Attachment 'Fisher_Enrichment_13.R'
Download   1 f.Enr_Fisher <- function (
   2             GS.ls, 
   3             GS_names.chv,
   4             exp.genes, 
   5             uni.genes)
   6 
   7 {
   8 
   9 # INPUT REQUIREMENTS
  10 # - no redundancy in gene-sets
  11 
  12 # 1) PRE-PROCESSING
  13 
  14 # Reduce everything to the universe
  15 
  16 f.int_uni <- function (input.genes)
  17     {return (intersect (input.genes, uni.genes))}
  18 
  19 GS.ls <- lapply (GS.ls, f.int_uni)
  20 
  21 exp.genes <- intersect (exp.genes, uni.genes)
  22 
  23 # Defining enr.df
  24 
  25 row.n <- length (GS.ls)
  26 
  27 # columns of < enr.df > are: 
  28 # - GO ID, 
  29 # - term name, 
  30 # - number of GO term genes in the universe ("SplG"), 
  31 # - number of GO term genes in the sample ("UniG"), 
  32 # - uncorrected p-value
  33 # - ratio = (sample.GOterm.length / sample.tot.length) / (uni.GOterm.length / uni.tot.length)
  34 
  35 enr.df <- data.frame (
  36                     GS_ID     = character (row.n),
  37                     GS_name   = character (row.n),
  38                     GS_size   = integer (row.n),
  39                     enr_n     = integer (row.n),
  40                     pvalue    = numeric (row.n),
  41                     ratio     = numeric (row.n),
  42                     stringsAsFactors = F)
  43 
  44 
  45 # 2) COMPUTING ENRICHMENT
  46 
  47 # computed outside cycle
  48 
  49 enr.df$GS_ID   <- names (GS.ls)
  50 enr.df$GS_name <- GS_names.chv[enr.df$GS_ID]
  51 enr.df$GS_size <- unlist (lapply (GS.ls, length))
  52 
  53 # global variables of the functionalized cycle
  54 
  55 exp.n <- length (exp.genes)
  56 uni.n <- length (uni.genes)
  57 
  58 output.nv <- numeric (3)
  59 
  60 # functionalized cycle
  61     
  62 f.Enr_Fisher_Unit <- function (GS.genes)
  63     {
  64     enr.n    <- length (intersect (exp.genes, GS.genes))
  65     GS.n     <- length (GS.genes)
  66 
  67     table.mx <- matrix (
  68                     ncol = 2, 
  69                     nrow = 2, 
  70                     data = c (
  71                         enr.n, 
  72                         GS.n  - enr.n, 
  73                         exp.n - enr.n, 
  74                         uni.n - (GS.n + exp.n - enr.n)
  75                         ), 
  76                     byrow = T
  77                     )
  78 
  79     # number of enriched genes
  80     output.nv[1] <- enr.n
  81     # p-value
  82     output.nv[2] <- fisher.test (table.mx, alternative = "greater")$p.value
  83     # ratio
  84     output.nv[3] <- (enr.n / exp.n) / (GS.n / uni.n)
  85     
  86     names (output.nv) <- c ("enr_n", "pvalue", "ratio")
  87                         
  88     return (output.nv)
  89     }
  90 
  91 enr.ls <- lapply (GS.ls, f.Enr_Fisher_Unit)
  92 
  93 enr.df[, c ("enr_n", "pvalue", "ratio")] <- as.data.frame (t ( as.matrix (as.data.frame (enr.ls))))
  94 
  95 # sorting by increasing p-value
  96 
  97 enr.df <- enr.df[order (enr.df$pvalue, decreasing = F), ]
  98 
  99 return (enr.df)
 100 
 101 }                                                                                                      
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.
