r - create one variable out of three and merge over rows -
so, i've been looking solution days. still not succeeding. maybe can me.
# minimal example <- rep("a", 9) b <- rep("b", 4) schoolid <- c(a, b) dc <- c("a", "b", "c", "", "", "", "", "", "", "a", "b", "i", "j") mc <- c( "", "", "", "a", "b", "c", "", "", "", "a", "b", "i", "j") ec <- c( "", "", "", "", "", "", "a", "b", "c", "a", "b", "i", "j") dpoints <- c(20, 15, 17, "", "", "", "", "", "", 14, 13, 13, 12) mpoints <- c( "", "", "", 18, 12, 20, "", "", "", 15, 11, 14, 9) epoints <- c( "", "", "", "", "", "", 13, 14, 15, 16, 21, 17, 7) data <- data.frame(schoolid, dc, mc, ec, dpoints, mpoints, epoints)
this dataset have, with:
# dc ... pupilsid in deutsch # mc ... pupilsid in math # ec ... pupilsid in english # dpoints, mpoints, epoints, achieved points in tests # 4 pupils in school did participate in tests, # information spread on 3 rows per pupil # in school b allright: note, 4 pupils in school b have same codes others in school view(data) ###############################################################################################
so @ first i'd have 1 variable "code" pupilsid combines information of
## 3 columns dc, mc , ec # in following way: x <- rep(c("a", "b", "c"), 3) data1 <- data data1$code <- c(x, "a", "b", "i", "j") view(data1)
secondly upper part of dataframe i'd merge? rows this
# result desired dataset schoolid1 <- c("a", "a", "a", "b", "b", "b", "b") code <- c("a", "b", "c", "a", "b", "i", "j") dpoints1 <- c(20, 15, 17, 14, 13, 13, 12) mpoints1 <- c(18, 12, 20, 15, 11, 14, 9) epoints1 <- c(13, 14, 15, 16, 21, 17, 7) result <- data.frame(schoolid1, code, dpoints1, mpoints1, epoints1) view(result) ############################################################################################
so 1.) tried following (doesn't work though)
# counting variable i.th row in data <- 1 (i in 1:13){ if (data[i, "dc"]==data[i, "mc"]==data[i, "ec"]){ data$code <- data[i, "dc"] } else if (!is.na(data[i, "dc"]) & is.na(data[i, "mc"]) & is.na(data[i, "ec"])){ data$code <- data[i, "dc"] } else if (is.na(data[i, "dc"]) & !is.na(data[i, "mc"]) & is.na(data[i, "ec"])){ data$code <- data[i, "mc"] } else if (is.na(data[i, "dc"]) & is.na(data[i, "mc"]) & !is.na(data[i, "ec"])){ malsehen$code <- data[i, "ec"] } <- i+1 }
the second problem, don't know
here's solution using data.table
library(data.table) #1.9.5+ ints<-paste0(c("d","m","e"),"points") setdt(data)[,(ints):=lapply(.sd,function(x)as.integer(levels(x))[x]),.sdcols=ints] # problem 1 data[,code:=levels(dc)[pmax(as.integer(dc),as.integer(mc),as.integer(ec))]] # problem 2 data[,(ints):=lapply(.sd,function(x)max(x,na.rm=t)), by=.(schoolid,code),.sdcols=ints] # remove excess information data<-unique(setkey(data,schoolid,code))[,(chars):=null] > data schoolid dpoints mpoints epoints code 1: 20 18 13 2: 15 12 14 b 3: 17 20 15 c 4: b 14 15 16 5: b 13 11 21 b 6: b 13 14 17 7: b 12 9 7 j
note should check identical(levels(data$dc),levels(data$ec),levels(data$mc))
, definition of code
relies on.
Comments
Post a Comment