Forum: help

2 nodes produced errors; first error: Invalid grouping factor specification, X1 [ Reply ]
By: Andreu Ferrero-Gregori on 2016-03-17 07:49

[forum:43081]

After trying to use this scrip, I always get same error about the random effect, no matter I do...

# Function for getting bootstrapped glmer predictions in parallel
glmmBoot <- function(dat, form, R, nc){
# dat = data for glmer (lme4) logistic regression
# form = formula of glmer equation for fitting
# R = total number of bootstrap draws - should be multiple of nc b/c divided among cores evenly
# nc = number of cores to use in parallel

library(parallel)
cl <- makeCluster(nc) # Request # cores
clusterExport(cl, c("dat", "form", "nc", "R"), envir = environment()) # Make these available to each core
clusterSetRNGStream(cl = cl, 1259)

out <- clusterEvalQ(cl, {
library(lme4)
library(boot) # used for the inv.logit function
b.star <- NULL
pres.star <- NULL
pred.star <- NULL
n <- length(dat$pres)
for(draw in 1:(R/nc)){ # within each core draw R/nc samples
df.star <- dat[sample(1:n, size=n, replace=T), ] # bootstrap data
mod <- glmer(form, family = binomial(link = "logit"), data = df.star, control = glmerControl(optimizer="bobyqa")) # regression on each bootstrapped data set
b.star <- rbind(b.star, coef(mod)) # combine coefficients from each draw (not currently used)
pres.star <- rbind(pres.star, df.star$pres) # combine "observed" presence-absences from each bootstrapped data iteration
pred.star <- rbind(pred.star, inv.logit(predict(mod, df.star, allow.new.levels = TRUE))) # # combine predicted presence probability from each draw
}

# make into lists formatted for the ROCR package
lab <- list(pres.star)
for(i in 1:dim(pres.star)[1]){
lab[[i]] <- as.integer(pres.star[i, ])
}

pred <- list(pred.star)
for(i in 1:dim(pred.star)[1]){
pred[[i]] <- as.numeric(pred.star[i,])
}

pred.all <- list(pred, lab)
names(pred.all) <- c("predictions", "observed")

return(pred.all)
}) # end cluster call
stopCluster(cl)

assign("out", out, envir = .GlobalEnv) # allow access outside function to help debugging

# combine lists from each core to format for ROCR
lab1 <- out[[1]]$observed
for(i in 2:length(out)){
foo <- out[[i]]$observed
lab1 <- c(lab1, foo)
}

pred1 <- out[[1]]$predictions
for(i in 2:length(out)){
foo <- out[[i]]$predictions
pred1 <- c(pred1, foo)
}

pred.mod <- list(pred1, lab1)
names(pred.mod) <- c("predictions", "observed")

return(pred.mod)
} # end function

# Run function for model glmm.M35
system.time(pred.M35 <- glmmBoot(dat=df.fit, form=formula(glmm.M35), R=9, nc=3)) # takes ~2 min with 9 draws on 3 cores

library(ROCR)
p35 <- prediction(pred.M35$predictions, pred.M35$observed)
perf35 <- performance(p35, "tpr", "fpr")

par(mfrow = c(1,1))
plot(perf35, lty=3, col="blue")
abline(0, 1)
plot(perf35, avg="vertical", lwd=2, col="black", spread.estimate="stderror", plotCI.lwd=2, add=TRUE)

# function to summarize AUC across all bootstrap draws
aucSummary <- function(pred.list, Mean=T, CI=c(0.025, 0.975), SE=F){
library(AUC)
auc.out <- NULL
for(i in 1:length(pred.list$predictions)) {
auc.out[i] <- auc(roc(as.numeric(pred.list$predictions[[i]]), as.factor(pred.list$observed[[i]])))
}
auc.CI <- quantile(auc.out, probs=CI)
auc.mean <- NA
auc.se <- NA
if(Mean == T){
auc.mean <- mean(auc.out)
}
if(SE == T) {
auc.se <- sd(auc.out)/sqrt(length(auc.out))
}
auc.summary <- data.frame(auc.mean, auc.CI[1], auc.CI[2], auc.se)
names(auc.summary) <- c('Mean', 'LCI', 'UCI', 'SE')
row.names(auc.summary) <- deparse(substitute(pred.list))
return(auc.summary)
} # end function

auc35 <- aucSummary(pred.M35, SE=T)

pred.fit <- inv.logit(predict(glmm.M35, df.fit, allow.new.levels = TRUE))
auc(roc(pred.fit, as.factor(df.fit$pres))) # 0.875 - why so diff than auc35??????????????????????????????

# alternative ways to get it - still different
auc(roc(as.numeric(pred.list$predictions[[1]]), as.factor(pred.list$observed[[1]])))
mean(unlist(performance(p35, "auc")@y.values))

(https://gist.github.com/djhocking/fee60fabe16e86e46bd7)