library (org.Hs.eg.db)
Ann_eg2sy.chv <- unlist (as.list (org.Hs.egSYMBOL))
Ann_sy2eg.chv <- names (Ann_eg2sy.chv)
names (Ann_sy2eg.chv) <- Ann_eg2sy.chv
Ann_eg2name.chv <- unlist (as.list (org.Hs.egGENENAME))
setwd ("C:/Users/Daniele/Documents/DM New/Università/Classes_Taught/OBI Pathway Analysis/L2_Assignment/Data_Core")
expr.df <- read.table (
file = "MCF7_ExprMx_v2.txt",
sep = "\t",
header = T,
quote = "",
stringsAsFactors = F
)
f.t_test <- function (input.nv)
{
t.htest <- t.test (x = input.nv[1: 3], y = input.nv[4: 6], alternative = "two.sided")
logpv.n <- - log (t.htest$p.value) * sign (t.htest$statistic)
return (logpv.n)
}
diff_expr.mx <- matrix (ncol = 2, nrow = nrow (expr.df))
diff_expr.mx[, 1] <- apply (expr.df[, grep (colnames (expr.df), pattern = "_12h_")], 1, f.t_test)
diff_expr.mx[, 2] <- apply (expr.df[, grep (colnames (expr.df), pattern = "_24h_")], 1, f.t_test)
colnames (diff_expr.mx) <- c ("diff_12h", "diff_24h")
rownames (diff_expr.mx) <- expr.df$NAME
diff_expr_1.df <- data.frame (
GeneID = rownames (diff_expr.mx),
GeneSy = Ann_eg2sy.chv [as.character (expr.df$NAME)],
GeneName = Ann_eg2name.chv[as.character (expr.df$NAME)],
stringsAsFactors = F
)
diff_expr_2.df <- as.data.frame (diff_expr.mx)
diff_expr.df <- cbind (diff_expr_1.df, diff_expr_2.df)
int.df <- read.table (
file = "homo-sapiens.sif",
sep = "\t",
header = F,
quote = "",
stringsAsFactors = F
)
colnames (int.df) <- c ("Source", "IntType", "Target")
int_ppi.df <- int.df[int.df$IntType == "INTERACTS_WITH", ]
nrow (int.df)
nrow (int_ppi.df)
int_ppi_eg.df <- int_ppi.df
int_ppi_eg.df$Source <- Ann_sy2eg.chv[int_ppi.df$Source]
int_ppi_eg.df$Target <- Ann_sy2eg.chv[int_ppi.df$Target]
sel.ix <- which ((! is.na (int_ppi_eg.df$Source)) & (! is.na (int_ppi_eg.df$Target)))
int_ppi_eg.df <- int_ppi_eg.df[sel.ix, ]
nrow (int_ppi_eg.df)
113203 - 109081
f.orderPairs <- function (input.chv)
{
output.chv <- input.chv
sorted.chv <- sort (output.chv[c (1, 3)])
output.chv[1] <- sorted.chv[1]
output.chv[3] <- sorted.chv[2]
return (output.chv)
}
int_ppi_eg_unq.df <- unique (
as.data.frame (
t (
apply (int_ppi_eg.df, 1, f.orderPairs)
),
stringsAsFactors = F
)
)
nrow (int_ppi_eg_unq.df)
GO.ls <- as.list (org.Hs.egGO2ALLEGS)
RepF.genes <- unique (unlist (GO.ls[["GO:0005657"]]))
RepF.ix <- which ((int_ppi_eg_unq.df$Source %in% RepF.genes) & (int_ppi_eg_unq.df$Target %in% RepF.genes))
int_ppi_eg_RepF.df <- int_ppi_eg_unq.df[RepF.ix, ]
length (RepF.genes)
length (unique (c (int_ppi_eg_RepF.df$Source, int_ppi_eg_RepF.df$Target)))
RepDNA.genes <- unique (unlist (GO.ls[["GO:0006260"]]))
RepDNA.ix <- which ((int_ppi_eg_unq.df$Source %in% RepDNA.genes) & (int_ppi_eg_unq.df$Target %in% RepDNA.genes))
int_ppi_eg_RepDNA.df <- int_ppi_eg_unq.df[RepDNA.ix, ]
length (RepDNA.genes)
length (unique (c (int_ppi_eg_RepDNA.df$Source, int_ppi_eg_RepDNA.df$Target)))
Prdeg.genes <- unique (unlist (GO.ls[["GO:0031145"]]))
Prdeg.ix <- which ((int_ppi_eg_unq.df$Source %in% Prdeg.genes) & (int_ppi_eg_unq.df$Target %in% Prdeg.genes))
int_ppi_eg_Prdeg.df <- int_ppi_eg_unq.df[Prdeg.ix, ]
length (Prdeg.genes)
length (unique (c (int_ppi_eg_Prdeg.df$Source, int_ppi_eg_Prdeg.df$Target)))
setwd ("C:/Users/Daniele/Documents/DM New/Università/Classes_Taught/OBI Pathway Analysis/L2_Assignment/Data_Prep")
write.table (
diff_expr.df,
sep = "\t",
quote = F,
col.names = T,
row.names = F,
file = "ES_MCF7_Diff.txt"
)
write.table (
int_ppi_eg_RepF.df,
sep = "\t",
quote = F,
col.names = T,
row.names = F,
file = "ppiNetw_RepF_eg.txt"
)
write.table (
int_ppi_eg_RepDNA.df,
sep = "\t",
quote = F,
col.names = T,
row.names = F,
file = "ppiNetw_RepDNA_eg.txt"
)
write.table (
int_ppi_eg_Prdeg.df,
sep = "\t",
quote = F,
col.names = T,
row.names = F,
file = "ppiNetw_Prdeg_eg.txt"
)