Forum: help
Monitor Forum | Start New Thread 2 nodes produced errors; first error: Invalid grouping factor specification, X1 [ Reply ] By: Andreu Ferrero-Gregori on 2016-03-17 07:49 | [forum:43081] |
After trying to use this scrip, I always get same error about the random effect, no matter I do... # Function for getting bootstrapped glmer predictions in parallel glmmBoot <- function(dat, form, R, nc){ # dat = data for glmer (lme4) logistic regression # form = formula of glmer equation for fitting # R = total number of bootstrap draws - should be multiple of nc b/c divided among cores evenly # nc = number of cores to use in parallel library(parallel) cl <- makeCluster(nc) # Request # cores clusterExport(cl, c("dat", "form", "nc", "R"), envir = environment()) # Make these available to each core clusterSetRNGStream(cl = cl, 1259) out <- clusterEvalQ(cl, { library(lme4) library(boot) # used for the inv.logit function b.star <- NULL pres.star <- NULL pred.star <- NULL n <- length(dat$pres) for(draw in 1:(R/nc)){ # within each core draw R/nc samples df.star <- dat[sample(1:n, size=n, replace=T), ] # bootstrap data mod <- glmer(form, family = binomial(link = "logit"), data = df.star, control = glmerControl(optimizer="bobyqa")) # regression on each bootstrapped data set b.star <- rbind(b.star, coef(mod)) # combine coefficients from each draw (not currently used) pres.star <- rbind(pres.star, df.star$pres) # combine "observed" presence-absences from each bootstrapped data iteration pred.star <- rbind(pred.star, inv.logit(predict(mod, df.star, allow.new.levels = TRUE))) # # combine predicted presence probability from each draw } # make into lists formatted for the ROCR package lab <- list(pres.star) for(i in 1:dim(pres.star)[1]){ lab[[i]] <- as.integer(pres.star[i, ]) } pred <- list(pred.star) for(i in 1:dim(pred.star)[1]){ pred[[i]] <- as.numeric(pred.star[i,]) } pred.all <- list(pred, lab) names(pred.all) <- c("predictions", "observed") return(pred.all) }) # end cluster call stopCluster(cl) assign("out", out, envir = .GlobalEnv) # allow access outside function to help debugging # combine lists from each core to format for ROCR lab1 <- out[[1]]$observed for(i in 2:length(out)){ foo <- out[[i]]$observed lab1 <- c(lab1, foo) } pred1 <- out[[1]]$predictions for(i in 2:length(out)){ foo <- out[[i]]$predictions pred1 <- c(pred1, foo) } pred.mod <- list(pred1, lab1) names(pred.mod) <- c("predictions", "observed") return(pred.mod) } # end function # Run function for model glmm.M35 system.time(pred.M35 <- glmmBoot(dat=df.fit, form=formula(glmm.M35), R=9, nc=3)) # takes ~2 min with 9 draws on 3 cores library(ROCR) p35 <- prediction(pred.M35$predictions, pred.M35$observed) perf35 <- performance(p35, "tpr", "fpr") par(mfrow = c(1,1)) plot(perf35, lty=3, col="blue") abline(0, 1) plot(perf35, avg="vertical", lwd=2, col="black", spread.estimate="stderror", plotCI.lwd=2, add=TRUE) # function to summarize AUC across all bootstrap draws aucSummary <- function(pred.list, Mean=T, CI=c(0.025, 0.975), SE=F){ library(AUC) auc.out <- NULL for(i in 1:length(pred.list$predictions)) { auc.out[i] <- auc(roc(as.numeric(pred.list$predictions[[i]]), as.factor(pred.list$observed[[i]]))) } auc.CI <- quantile(auc.out, probs=CI) auc.mean <- NA auc.se <- NA if(Mean == T){ auc.mean <- mean(auc.out) } if(SE == T) { auc.se <- sd(auc.out)/sqrt(length(auc.out)) } auc.summary <- data.frame(auc.mean, auc.CI[1], auc.CI[2], auc.se) names(auc.summary) <- c('Mean', 'LCI', 'UCI', 'SE') row.names(auc.summary) <- deparse(substitute(pred.list)) return(auc.summary) } # end function auc35 <- aucSummary(pred.M35, SE=T) pred.fit <- inv.logit(predict(glmm.M35, df.fit, allow.new.levels = TRUE)) auc(roc(pred.fit, as.factor(df.fit$pres))) # 0.875 - why so diff than auc35?????????????????????????????? # alternative ways to get it - still different auc(roc(as.numeric(pred.list$predictions[[1]]), as.factor(pred.list$observed[[1]]))) mean(unlist(performance(p35, "auc")@y.values)) (https://gist.github.com/djhocking/fee60fabe16e86e46bd7) |