#acl All:read DanieleMerico:write,delete,revert = CNV Enrichment Test Code = == Reference == '''Functional impact of global rare copy number variation in autism spectrum disorders.'''<
>Pinto D, Pagnamenta AT, Klei L, Anney R, '''Merico D''', Regan R, Conroy J, Magalhaes TR, Correia C, Abrahams BS, Almeida J, Bacchelli E, '''Bader GD''', Bailey AJ, Baird G, Battaglia A, Berney T, Bolshakova N, Bölte S, Bolton PF, Bourgeron T, Brennan S, Brian J, Bryson SE, Carson AR, Casallo G, Casey J, Chung BH, Cochrane L, Corsello C, Crawford EL, Crossett A, Cytrynbaum C, Dawson G, de Jonge M, Delorme R, Drmic I, Duketis E, Duque F, Estes A, Farrar P, Fernandez BA, Folstein SE, Fombonne E, Freitag CM, Gilbert J, Gillberg C, Glessner JT, Goldberg J, Green A, Green J, Guter SJ, Hakonarson H, Heron EA, Hill M, Holt R, Howe JL, Hughes G, Hus V, Igliozzi R, Kim C, Klauck SM, Kolevzon A, Korvatska O, Kustanovich V, Lajonchere CM, Lamb JA, Laskawiec M, Leboyer M, Le Couteur A, Leventhal BL, Lionel AC, Liu XQ, Lord C, Lotspeich L, Lund SC, Maestrini E, Mahoney W, Mantoulan C, Marshall CR, McConachie H, McDougle CJ, McGrath J, McMahon WM, Merikangas A, Migita O, Minshew NJ, Mirza GK, Munson J, Nelson SF, Noakes C, Noor A, Nygren G, Oliveira G, Papanikolaou K, Parr JR, Parrini B, Paton T, Pickles A, Pilorge M, Piven J, Ponting CP, Posey DJ, Poustka A, Poustka F, Prasad A, Ragoussis J, Renshaw K, Rickaby J, Roberts W, Roeder K, Roge B, Rutter ML, Bierut LJ, Rice JP, Salt J, Sansom K, Sato D, Segurado R, Sequeira AF, Senman L, Shah N, Sheffield VC, Soorya L, Sousa I, Stein O, Sykes N, Stoppioni V, Strawbridge C, Tancredi R, Tansey K, Thiruvahindrapduram B, Thompson AP, Thomson S, Tryfon A, Tsiantis J, Van Engeland H, Vincent JB, Volkmar F, Wallace S, Wang K, Wang Z, Wassink TH, Webber C, Weksberg R, Wing K, Wittemeyer K, Wood S, Wu J, Yaspan BL, Zurawiecki D, Zwaigenbaum L, Buxbaum JD, Cantor RM, Cook EH, Coon H, Cuccaro ML, Devlin B, Ennis S, Gallagher L, Geschwind DH, Gill M, Haines JL, Hallmayer J, Miller J, Monaco AP, Nurnberger Jr JI, Paterson AD, Pericak-Vance MA, Schellenberg GD, Szatmari P, Vicente AM, Vieland VJ, Wijsman EM, Scherer SW, Sutcliffe JS, Betancur C<
>[[http://www.nature.com/nature/journal/vaop/ncurrent/abs/nature09146.html|Nature. 2010 Jun 9 (Epub ahead of print)]] == Code == The script below calls the functions in the zipped file at the bottom. * Script {{{ #!rscript numbers=off # CNV GENE-SET ENRICHMENT [LITE] # INPUT DATA # CNV data # 'CNV.df' is a data.frame with the following columns: # $Class (values: "case", "control") # $SampleID [i.e. patient identifier] # $Chr [i.e. CNV genomic coordinate: chromosome] # $Coord_i [i.e. CNV genomic coordinate: begin position] # $Coord_f [i.e. CNV genomic coordinate: end position] # $Length [i.e. CNV genomic coordinate: length] # $Type (values: '0' = DEL, '1' = DEL, '3' = DUP) # $Genes_eg (use ';' to separate multiple values) [i.e. genes overlapped by the CNV, Entrez-gene identifier] # $Gene_count [i.e. number of genes in the previous field] # 'Sample2Class.df' can be extracted from the previous: # it is a data.frame with the following columns: # $SampleID # $Class # Gene sets # 'GS2Genes.ls' is a list, with # - gene-set IDs as names # - genes (entrez-gene IDs) as slot content # 'GS2Name.chv' is a character vector, where # - names are the gene-set IDs # - values are the gene-set descriptions # These can be generated by parsing the GMT-formatted gene-sets at this page: # http://baderlab.org/GeneSetDB_02 # LOAD FUNCTIONS # - Load all functions function.names <- c ( "Filter_CNV_01.R", "CNV_Key_01.R", "Sample2genes_from_CNV_01.R", "Sample2GS_from_Sample2genes_01.R", "FisherTestGS_01.R", "Add_GSname_01.R", "AddGSsize_toEnrdf_01.R", "GeneCounts_from_CNV_01.R", "AddSuppGenes_toEnrdf_01.R" ) for (f.name in function.names) {source (f.name)} # EXECUTE MAIN # 1) Set parameters # To keep only CNV-DEL, of any gene length CNV_Limits.nv <- c (+Inf, 0) names (CNV_Limits.nv) <- c ("DEL", "DUP") # To keep both CNV-DEL and CNV-DUP, of any gene length CNV_Limits.nv <- c (+Inf, +Inf) names (CNV_Limits.nv) <- c ("DEL", "DUP") # Add any Entrez-gene you want to remove from the analysis # (use only for hypothesized false positives, e.g. instable regions) Blacklist.eg <- "" # Number of randomizations for FDR computation Iter.n <- 5 # 2) Filter CNV_f.df <- f.Filter_CNV2gene ( CNV.df = CNV.df, CNV_limits.nv = CNV_Limits.nv, blacklist.eg = Blacklist.eg ) # 3) Generate 'sample 2 gene-sets' table CNV_f.df <- f.MakeCNV_key (CNV_f.df) Samples2Genes.ls <- f.Comp_Sample2genes (CNV.df = CNV_f.df) Sample2GS.tab <- f.Comp_Sample2GS ( GS2genes.ls = GS2Genes.ls, Sample2genes.ls = Samples2Genes.ls, Sample2class.df = Sample2Class.df ) # 4) Run Test Enr_1.df <- f.FisherTestGS_FDR_Wrap ( Sample2GS.tab = Sample2GS.tab, Sample2class.df = Sample2Class.df, iter.n = Iter.n ) # 5) Add gene-set attributes Enr_2.df <- f.AddGeneSize ( GS2genes.ls = GS2Genes.ls, Enr.df = Enr_1.df ) Enr_2.df <- f.Add_GSname ( GS2name.chv = GS2Name.chv, Enr.df = Enr_2.df ) # 6) Add support genes # - defined as the genes with more mutations in cases than ctrls CNV_Gene.tab <- f.CompGeneCounts (CNV.df = CNV_f.df) Enr_3.df <- f.AddSupportGenes ( GS_enr.df = Enr_2.df, Genes2class.tab = CNV_Gene.tab, GS2genes.ls = GS2Genes.ls ) Enr_final.df <- Enr_3.df # EXECUTE ANCILLARY # 1) Compute Gene Tables # - these have stats by-gene source ("GeneTable_01.R") GeneTable.ls <- f.MakeGeneTable ( Enr.df = Enr_final.df, eg2sy.chv = Ann_eg2sy.chv, GeneCounts.tab = CNV_Gene.tab, GS2genes.ls = GS2Genes.ls, CNV.df = CNV_f.df ) }}} * [[attachment:Pinto2010_Code.zip|Functions]]