########################################################## ## DRC 450K Methylation Arrays: Processing with Meffil #################### ## Install and launch ## packages #################### install.packages("BiocManager") library(BiocManager) BiocManager::install("bumphunter") BiocManager::install("IlluminaHumanMethylation450kmanifest") install.packages("devtools") library(devtools) install_github("perishky/meffil") library(meffil) detectCores() options(mc.cores=4) #################### ## Create and Edit ## Samplesheet #################### path_to_idat_files<-"" samplesheet <- meffil.create.samplesheet(path_to_idat_files) ######################################### ## Change "Sample_Name" to the DRC ID ######################################### linker = read.csv("ChipLayoutSheet.csv") SNid = paste0(linker[,13], "_", linker[,14]) o = match(samplesheet$Sample_Name, SNid) newID = linker[o, "Sample.ID"] ## Change ID in samplesheet to the DRC ID. samplesheet[,1] = newID ################ ## Quality Control ################ ## WHOLE BLOOD QC - Cell ref system.time( qc_objects_wholeblood <- meffil.qc(samplesheet, cell.type.reference="blood gse35069 complete", verbose=TRUE) ) #run QC, takes about 4 minutes for example dataset with 72 iDATS save(qc_objects_wholeblood,file="qc_objects_wholeblood.Robj") #### ## CORD BLOOD QC - Cell ref system.time( qc_objects_cord <- meffil.qc(samplesheet, cell.type.reference="andrews and bakulski cord blood", verbose=TRUE) ) #run QC, takes about 3.8 minutes for example dataset with 72 iDATS save(qc_objects_cord,file="qc_objects_cordblood.Robj") #### qc_parameters <- meffil.qc.parameters(#set QC parameters beadnum.samples.threshold = 0.1, detectionp.samples.threshold = 0.1, detectionp.cpgs.threshold = 0.1, beadnum.cpgs.threshold = 0.1, sex.outlier.sd = 5 ) qc_summary_wholeblood <- meffil.qc.summary(#summarise QC qc_objects_wholeblood, parameters = qc_parameters ) save(qc_summary_wholeblood, file="qc_summary_wholeblood.Robj") ######## ## Generate a QC report ######## meffil.qc.report(qc_summary_wholeblood, output.file="qc-report_wholeblood.html") outlier <- qc_summary_wholeblood$bad.samples table(outlier$issue) index <- outlier$issue %in% c("Control probe (dye.bias)", "Methylated vs Unmethylated", "X-Y ratio outlier", "Low bead numbers", "Detection p-value", "Control probe (bisulfite1)", "Control probe (bisulfite2)") outlier <- outlier[index,] if(nrow(outlier) > 0){ ### QC qc_objects_wholeblood <- meffil.remove.samples(qc_objects_wholeblood, outlier$sample.name) # length(qc_objects_wholeblood) save(qc_objects_wholeblood,file="qc.objects_wholeblood.clean.Robj") qc_summary <- meffil.qc.summary(qc_objects_wholeblood,parameters=qc_parameters) save(qc_summary, file="qcsummary.clean.Robj") meffil.qc.report(qc_summary, output.file="qc-report.clean.html") } ###################### ## Normalisation ###################### setwd("C:/Users/ccluk/Dropbox/My Word Docs/UF Anthropology Research/Congo Project/Hughs EWAS/Clukay EWAS/EWAS Analysis") #rm(list=ls()) #to clear your workspace options(mc.cores=4) #### LOAD DATA load("qc_objects_wholeblood.Robj"); length(qc_objects_wholeblood) # 72 load("qc_summary_wholeblood.Robj") y <- meffil.plot.pc.fit(qc_objects_wholeblood) ggsave(y$plot,filename="C:/Users/ccluk/Dropbox/Docs/UF_Anthro_Research/Congo Project/Hughs EWAS/Clukay EWAS/EWAS Analysis/pc_fit.pdf",height=6,width=6) ## Take a look at pdf and decide how many pcs to set for the normalization pc <- 6 ## execute the normalization, including the slide as a random effect. norm_objects <- meffil.normalize.quantiles(qc_objects_wholeblood,random.effects="Slide", number.pcs=pc) save(norm_objects ,file=paste("norm_obj_pc",pc,".Robj",sep="")) ############################# ## Generate Beta Values ############################# ## cpgs to remove length(qc_summary_wholeblood$bad.cpgs$name) # 1262 ## norm_beta <- meffil.normalize.samples(norm_objects, cpglist.remove=qc_summary_wholeblood$bad.cpgs$name) #Filter for only non-biased CpG sites based on Zhou et al. 2016 filter<-read.csv(file= "450ksitesbasedonAFRpop.csv") row.names(filter)<-filter$probeID order<-row.names(norm_beta) orderedfilter<-filter[order,] norm_beta<-norm_beta[orderedfilter$MASK_general_AFR == FALSE,] #save save(norm_beta,file=paste("norm_beta_pc",pc,".Robj",sep="")) ############################ ##Generate Normalization Report ############################ for (i in 1:length(norm_objects)){ norm_objects[[i]]$samplesheet$Slide<-as.factor(norm_objects[[i]]$samplesheet$Slide) norm_objects[[i]]$samplesheet$sentrix_row<-as.factor(norm_objects[[i]]$samplesheet$sentrix_row) norm_objects[[i]]$samplesheet$sentrix_col<-as.factor(norm_objects[[i]]$samplesheet$sentrix_col) norm_objects[[i]]$samplesheet$Sex<-as.factor(norm_objects[[i]]$samplesheet$Sex) } ## batch_var<-c("Slide", "sentrix_row","sentrix_col","disease_status") norm_parameters <- meffil.normalization.parameters( norm_objects, variables=batch_var, control.pcs=1:10, batch.pcs=1:10, batch.threshold=0.01 ) pcs <- meffil.methylation.pcs(norm_beta,probe.range=20000) save(pcs,file="pcs_norm_beta.Robj") norm_summary <- meffil.normalization.summary(norm_objects, pcs=pcs,parameters=norm_parameters) meffil.normalization.report(norm_summary, output.file="normalization-report.html")