#acl All:read DanieleMerico:write,delete,revert = How to generate gene-set collections from Bioconductor = {{{ # UPDATE LIBRARIES source("http://bioconductor.org/biocLite.R") biocLite ("GO.db") biocLite ("KEGG.db") biocLite ("org.Hs.eg.db") biocLite ("PFAM.db") # LOAD LIBRARIES library ("GO.db") library ("KEGG.db") library ("PFAM.db") library ("org.Hs.eg.db") # PACKAGE GO INTO LIST # (ID: GO:xxxxx) GO_Hs_eg.ls <- as.list (org.Hs.egGO2ALLEGS) GO_Hs_ID2name.chv <- unlist (lapply (as.list (GOTERM), Term)) # PACKAGE KEGG INTO LIST # (ID: KEGG_hsa:xxxxx) # Import KEGG_Hs_eg.ls <- as.list (org.Hs.egPATH2EG) KEGG_Hs_ID2name.chv <- unlist (as.list (KEGGPATHID2NAME)) # Add Prefix to IDs and Names ztemp_names.chv <- paste ( "KEGG:", KEGG_Hs_ID2name.chv[names (KEGG_Hs_eg.ls)], sep = "" ) ztemp_IDs.chv <- paste ( "KEGG:hsa", names (KEGG_Hs_ID2name.chv[names (KEGG_Hs_eg.ls)]), sep = "" ) KEGG_Hs_ID2name.chv <- ztemp_names.chv names (KEGG_Hs_ID2name.chv) <- ztemp_IDs.chv names (KEGG_Hs_eg.ls) <- paste ( "KEGG:hsa", names (KEGG_Hs_eg.ls), sep = "" ) # PACKAGE PFAM INTO LIST # (ID: KEGG_hsa:xxxxx) # Import PFAM_eg2pfam.ls <- as.list (org.Hs.egPFAM) PFAM2names.chv <- unlist (as.list (PFAMDE)) # Change Format # from EG->PFAM_ID to PFAM_ID->EG PFAM_eg2pfam.df <- stack (PFAM_eg2pfam.ls) colnames (PFAM_eg2pfam.df) <- c ("PFAM_ID", "EG") PFAM_eg.ls <- unstack (PFAM_eg2pfam.df, form = EG ~ PFAM_ID) # Warning: some PFAM entries don't have name # some problem with the R package (?) # UNITE GS_eg.ls <- c (GO_Hs_eg.ls, KEGG_Hs_eg.ls, PFAM_eg.ls) GS_ID2name.chv <- c (GO_Hs_ID2name.chv, KEGG_Hs_ID2name.chv, PFAM2names.chv) # PACKAGE INTO GMT f.collapse <- function (input.genes) {paste (input.genes, collapse = "\t")} genes.chv <- unlist (lapply (GS_eg.ls, f.collapse)) output.chv <- paste (names (GS_eg.ls), GS_ID2name.chv[names (GS_eg.ls)], genes.chv, sep = "\t") cat (output.chv, sep = "\n", file = "GO_KEGG_PFAM_Hs_eg.GMT") }}}