### ### Relabelling observations ### obs <- c("f7", "f8", "m1", "m2", "m3", "f3", "m4", "f1", "m7", "m7", "f4", "m5", "f5", "m6", "f6", "m8", "m9", "f9", "m10", "f10", "f2") sortObs <- function(x) { gender <- substr(x, 1, 1) gender <- factor(gender, levels = c("m", "f"), labels = c("male", "female")) integ <- as.integer(substr(x, 2, 2)) data.frame(integer = integ, gender = gender) } sortObs(obs) ### ### Ragged longitudinal data ### load("ragged.rda") ## (1) Find out how many people have 1,2,3 observations table(table(ragged\$id)) ## (2) Create a new variable [obs] that numbers the observations for each ## person as 1st, 2nd, 3rd, ... idx <- sort(unique(ragged\$id)) ragged\$obs <- NULL for (i in idx) { ragged\$obs[ragged\$id == i] <- order(ragged\$visittime[ragged\$id == i]) } ## (3) Lagging a var ## x = var name as a character string to lag ## ragged = ragged data frame ## id = character string identifying subjects ## time = character string identifying the time id var ## output is a data frame, with the lagged var named as lag.x makeLag <- function(x, ragged, time, id) { x1 <- ragged[[x]] id <- ragged[[id]] time <- ragged[[time]] lag.var <- NULL for (i in unique(id)) { idx <- id == i tmp <- ragged[idx, x][time[idx]] if (sum(idx) > 1) lag.var[idx] <- c(NA, tmp[1:(length(tmp) - 1)]) else lag.var[idx] <- NA } ragged[[paste("lag", x, sep = ".")]] <- lag.var ragged } lag.data <- makeLag("chol", ragged, id = "id", time = "obs") ## (4) Checking that a var is constant ## x = var name as a character string to check for constancy ## ragged = ragged data frame ## id = character string identifying subjects ## output is the group number of those that are not constant, ## returns NULL if all groups are constant checkConst <- function(x, ragged, id) { tab <- table(ragged[[x]], ragged[[id]]) g1 <- apply(tab, 2, '>', 0) g1 <- apply(g1, 2, sum) out <- which(g1 > 1) if (length(out) > 1) { return(names(out)) } else { return(NULL) } } checkConst("sex", ragged, id = "id") ## (5) Before and after ## helper function fun <- function(x, target.time, time) { if (target.time == 1) return( c(NA, x[2]) ) if (target.time == max(time)) { return( c(x[length(x) - 1], NA) ) } else { return(c(x[target.time - 1], x[target.time + 1])) } } ## x = character string with variable name ## target.time = integer for before / after ## time = character string, specifying name of time index / subject ## id = character string, specifying subject id ## ragged = ragged data frame ## output is a matrix with row names = subject ids, columns = {before, after} ## NAs returned as appropriate before.after <- function(x, target.time, time, id, ragged) { gid <- ragged[[id]] gidx <- sort(unique(gid)) out <- matrix(NA, nrow = length(gidx), ncol = 2, dimnames = list(gidx, c("before", "after"))) for (i in gidx) { out[i,] <- fun(ragged[[x]][gid == i], target.time = target.time, time = ragged[[time]]) } out } before.after("chol", target.time = 2, time = "obs", id = "id", ragged) ### ### Folding functions ### ## (a) reduce() x <- 1:10 reduce <- function(x, operator) { x1 <- x[1] for (i in 2:length(x)) x1 <- do.call(operator, list(x1, x[i])) x1 } reduce(x, "+") ## (b) accumulate() accumulate <- function(x, operator) { out <- NULL for (i in 2:length(x)) out[i] <- do.call(operator, list(x[i-1], x[i])) out[1] <- x[1] out } accumulate(x, "+")