# MEMORANDUM:
# 1) CHANGED SAMPLE COLUMN
#    SPLITTING INTO CASES AND CONTROLS
# 2) ADD CNV TOTAL LENGTH (PER SAMPLE)
#    AND OTHER CNV STATS (MEDIAN LENGTH, NUMBER)

f.AddSampleStats <- function (GS_enr.df, Sample2class.df, Sample2stats.df)

# Input description:
# - slot name: numeric index (ex gene-set in enrichment table)
# - slot content: sample ID

	{

	Sample2stats.df <- Sample2stats.df[match (Sample2class.df$SampleID, Sample2stats.df$SampleID), ]

	sample_case.ls <- strsplit (GS_enr.df$SamplesCase, split = ";")
	sample_ctrl.ls <- strsplit (GS_enr.df$SamplesCtrl, split = ";")

	Tot_hi.n <- sum (Sample2stats.df$IQ %in% c ("high"))
	Tot_md.n <- sum (Sample2stats.df$IQ %in% c ("moderate"))
	Tot_lo.n <- sum (Sample2stats.df$IQ %in% c ("verylow", "low"))

	Tot_male_case.n <- sum (Sample2stats.df$Gender == "male"   & Sample2class.df$Class == "case")
	Tot_feml_case.n <- sum (Sample2stats.df$Gender == "female" & Sample2class.df$Class == "case")
	Tot_male_ctrl.n <- sum (Sample2stats.df$Gender == "male"   & Sample2class.df$Class == "control")
	Tot_feml_ctrl.n <- sum (Sample2stats.df$Gender == "female" & Sample2class.df$Class == "control")
			
	f.SampleStats_Collapse_Unit <- function (sample.chv)
		{return (paste (sample.chv, collapse = ";"))}

	f.X2_Unit <- function (counts.nv)
		{
		table.mx <- 
			matrix (
				data  = c (counts.nv, c (Tot_lo.n, Tot_md.n, Tot_hi.n) - counts.nv), 
				ncol  = 3, 
				nrow  = 2, 
				byrow = T)
		test.result <- chisq.test (x = table.mx)
		return (test.result$p.value)
		}

	f.Fisher_Unit <- function (counts.nv)
	# first count: low, second count: high
		{
		table.mx <- 
			matrix (
				data  = c (counts.nv, c (Tot_x.n, Tot_y.n) - counts.nv), 
				ncol  = 2, 
				nrow  = 2, 
				byrow = T)
		pvalue.nv <- numeric (2)
		pvalue.nv[1] <- fisher.test (table.mx, alternative = "greater")$p.value
		pvalue.nv[2] <- fisher.test (table.mx, alternative = "less")$p.value
		return (pvalue.nv)
		}
			
	f.SampleStats_IQ_Unit <- function (sample.chv)
		{
		iq.chv <- Sample2stats.df$IQ[match (sample.chv, Sample2stats.df$SampleID)]
		count.nv <- numeric (7)
		count.nv[1] <- sum (iq.chv %in% c ("verylow", "low"))
		count.nv[2] <- sum (iq.chv %in% c ("moderate"))
		count.nv[3] <- sum (iq.chv %in% c ("high"))
		count.nv[4] <- sum (iq.chv %in% c ("none", "no_record"))
		count.nv[5: 6] <- f.Fisher_Unit (count.nv[c (1, 3)])
		count.nv[7] <- f.X2_Unit (count.nv[1: 3])
		return (count.nv)
		}

	f.SampleStats_Gender_Unit <- function (sample.chv)
		{
		gender.chv <- Sample2stats.df$Gender[match (sample.chv, Sample2stats.df$SampleID)]
		count.nv <- numeric (4)
		count.nv[1] <- sum (gender.chv == "male")
		count.nv[2] <- sum (gender.chv == "female")
		count.nv[3: 4] <- f.Fisher_Unit (count.nv[1: 2])
		return (count.nv)
		}

	cat ("\n\t-- computing IQ stats")

	# Define background numbers (global variables)
	Tot_x.n <- Tot_lo.n
	Tot_y.n <- Tot_hi.n		
	IQ.df <- as.data.frame (t (as.data.frame (lapply (sample_case.ls, f.SampleStats_IQ_Unit))))
	colnames (IQ.df) <- c ("Low", "Mid", "High", "N.A.", "Lo_Fsh_pv", "Hi_Fsh_pv", "IQ_X2_pv")
	# NaNs are generated when all counts are 0
	IQ.df$IQ_X2_pv[ is.na (IQ.df$IQ_X2_pv) ] <- 1
	IQ.df$Lo_Fsh_pv[is.na (IQ.df$IQ_Fsh_pv)] <- 1
	IQ.df$Hi_Fsh_pv[is.na (IQ.df$IQ_Fsh_pv)] <- 1

	cat ("\n\t-- computing gender stats (case)")
	
	# Define background numbers (global variables)
	Tot_x.n <- Tot_male_case.n
	Tot_y.n <- Tot_feml_case.n
	Gender_cs.df <- as.data.frame (t (as.data.frame (lapply (sample_case.ls, f.SampleStats_Gender_Unit))))
	colnames (Gender_cs.df) <- c ("Male_cs", "Female_cs", "Ma_cs_Fsh_pv", "Fe_cs_Fsh_pv") 

	cat ("\n\t-- computing gender stats (control)\n")
	
	Tot_x.n <- Tot_male_ctrl.n
	Tot_y.n <- Tot_feml_ctrl.n
	Gender_ct.df <- as.data.frame (t (as.data.frame (lapply (sample_ctrl.ls, f.SampleStats_Gender_Unit))))
	colnames (Gender_ct.df) <- c ("Male_ct", "Female_ct", "Ma_ct_Fsh_pv", "Fe_ct_Fsh_pv")
			
	# output.df <- cbind (GS_enr.df, IQ.df, Gender_cs.df, Gender_ct.df)
	output.df <- cbind (IQ.df, Gender_cs.df, Gender_ct.df)
	rownames (output.df) <- GS_enr.df$GS_ID
	
	return (output.df)
	}