package org.baderlab.pdzsvm.data.utils;

import org.baderlab.pdzsvm.utils.PDZSVMUtils;

import java.util.List;
import java.util.Collection;
import java.util.ArrayList;

import org.baderlab.pdzsvm.utils.Constants;
import org.baderlab.pdzsvm.data.manager.SequencePoolManager;
import org.baderlab.pdzsvm.data.manager.DataFileManager;
import org.baderlab.brain.ProteinProfile;
import org.biojava.bio.seq.Sequence;
import org.baderlab.pdzsvm.encoding.Chen16FeatureEncoding;
import weka.core.Utils;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */
public class GenomicPhageData {
    private List genomicSeqList;
    private boolean writeProjectFiles = false;

    public GenomicPhageData()
    {
        summarizeData();
    }


    public void summarizeData()
    {
        String fileName= DataFileManager.DATA_ROOT_DIR+"/Data/Human/PDZ/ProjectFiles/projectFileHuman-orig.txt";

        String codonBiasFile = DataFileManager.NNK_CODON_BIAS_FILENAME;
        List posTestProfileList = PDZSVMUtils.readProteinProfileList(fileName,codonBiasFile,5, true);

        List profileInfoList = new ArrayList();

        int numFromC = 4;
        initGenomicList(numFromC, Constants.HUMAN);

        Chen16FeatureEncoding enc = new Chen16FeatureEncoding();
        List gappedProfileList = new ArrayList();
        int MIN_NUM=10;
        for (int i=0;i < posTestProfileList.size();i++)
        {
            int numGenomic5 = 0;
            int numNonGenomic5 = 0;
            int numGenomic = 0;
            int numNonGenomic = 0;
            int numGenomic4 = 0;
            int numNonGenomic4 = 0;
            ProteinProfile profile = (ProteinProfile)posTestProfileList.get(i);
            String domainSeq = profile.getDomainSequence();
            String domainSeq10 = enc.getFeatures(domainSeq, Constants.HUMAN);

            Collection seqs = profile.getSequenceMap();
            System.out.println(profile.getName());
            List seqsList = new ArrayList(seqs);

            List uniqueGenSeq5List = new ArrayList();
            List uniqueNonGenSeq5List = new ArrayList();
            List uniqueGenSeq4List = new ArrayList();
            List uniqueNonGenSeq4List = new ArrayList();

            ProfileInfo profileInfo = new ProfileInfo();
            int gapIx = domainSeq10.indexOf("-");
            if (gapIx >=0)
                profileInfo.gapped = true;
            for (int j = 0;j < seqsList.size();j++)
            {
                Sequence seq = (Sequence)seqsList.get(j);
                String origPeptide = seq.seqString();
                String peptide = origPeptide.substring(origPeptide.length() -numFromC, origPeptide.length());
                profileInfo.name = profile.getName();
                if (genomicSeqList.contains(peptide))
                {
                    // If the peptide has not been seen then add it

                    System.out.print("\tG " + origPeptide);

                    if (!uniqueGenSeq5List.contains(origPeptide))
                    {
                        numGenomic5 = numGenomic5 +1;
                        uniqueGenSeq5List.add(origPeptide);
                    }
                    if (!uniqueGenSeq4List.contains(peptide))
                    {
                        numGenomic4 = numGenomic4 +1;
                        uniqueGenSeq4List.add(peptide);

                    }
                    numGenomic = numGenomic +1;
                    System.out.print("\t" + numGenomic4 + "\t" + numGenomic5);

                }
                else
                {
                    System.out.print("\tx " + origPeptide);
                    if (!uniqueNonGenSeq5List.contains(origPeptide))
                    {
                        //System.out.println("ng5 doesn't contain");
                        numNonGenomic5 = numNonGenomic5 +1;
                        uniqueNonGenSeq5List.add(origPeptide);

                    }

                    if (!uniqueNonGenSeq4List.contains(peptide))
                    {
                        //System.out.println("ng4 doesn't contain");
                        numNonGenomic4 = numNonGenomic4 +1;
                        uniqueNonGenSeq4List.add(peptide);


                    }
                    System.out.print("\t" + numNonGenomic4 + "\t" + numNonGenomic5);
                    numNonGenomic = numNonGenomic +1;
                }

                System.out.println();

            }     // end seq list iterations
            profileInfo.numGenomic5 = numGenomic5;
            profileInfo.numNonGenomic5 = numNonGenomic5;
            profileInfo.numGenomic4 = numGenomic4;
            profileInfo.numNonGenomic4 = numNonGenomic4;

            profileInfo.numGenomic = numGenomic;
            profileInfo.numNonGenomic = numNonGenomic;
            profileInfo.numTotal = seqsList.size();

            profileInfoList.add(profileInfo);

            System.out.println(profileInfo.toString());
        }

        List tooSmallProfileList = new ArrayList();
        List genomicProfileList = new ArrayList();
        List nonGenomicProfileList = new ArrayList();
        List dualProfileList = new ArrayList();
        List mixedProfileList = new ArrayList();

        for (int i=0; i < profileInfoList.size();i++)
        {
            ProfileInfo profileInfo = (ProfileInfo)profileInfoList.get(i);

            if (profileInfo.gapped)
            {
                gappedProfileList.add(profileInfo);
                profileInfo.type = "GAPPED";

            }
            else if (profileInfo.numTotal < MIN_NUM)
            {
                tooSmallProfileList.add(profileInfo);
                profileInfo.type = "LESS THAN 10";

            }
            else
            {
                if (profileInfo.numGenomic4 >=MIN_NUM)
                {

                    if (profileInfo.numNonGenomic4 >=MIN_NUM)
                    {
                        dualProfileList.add(profileInfo);
                        profileInfo.type = "DUAL";


                    }
                    else
                    {

                        genomicProfileList.add(profileInfo);
                        profileInfo.type = Constants.GENOMIC;
                    }
                }
                else
                {
                    if (profileInfo.numNonGenomic4 >= MIN_NUM)
                    {
                        nonGenomicProfileList.add(profileInfo);
                        profileInfo.type = Constants.NON_GENOMIC;

                    }
                    else
                    {
                        if (profileInfo.numTotal >= MIN_NUM)
                        {
                            mixedProfileList.add(profileInfo);
                            profileInfo.type = "NON SPECIFIC";

                        }
                    }
                }
            }
        }
        String header = Utils.padLeft("",10) +"\t#Gen4\t#Gen5\t#Non Gen.4\t#Non Gen.5\tTotal";

        System.out.println("LESS THAN 10 ("+tooSmallProfileList.size()+")" );
        System.out.println(header);
        print(tooSmallProfileList);
        System.out.println("GAPPED ("+gappedProfileList.size()+")" );
        System.out.println(header);

        print(gappedProfileList);
        System.out.println("GENOMIC ("+genomicProfileList.size()+")" );
        System.out.println(header);

        print(genomicProfileList);
        System.out.println("NON GENOMIC ("+nonGenomicProfileList.size()+")" );
        System.out.println(header);

        print(nonGenomicProfileList);
        System.out.println("DUAL ("+dualProfileList.size()+")" );
        System.out.println(header);

        print(dualProfileList);
        System.out.println("NON SPECIFIC ("+mixedProfileList.size()+")" );
        System.out.println(header);

        print(mixedProfileList);

        int total =   tooSmallProfileList.size() + gappedProfileList.size() + genomicProfileList.size() + nonGenomicProfileList.size() + dualProfileList.size() + mixedProfileList.size();
        System.out.println("Total number of profiles: " + total);

        if (writeProjectFiles)
        {
            System.out.println("GENOMIC PROJECT FILE...");

            System.out.println("#ProjectFile");
            printProjectFile(genomicProfileList,  Constants.GENOMIC);
            printProjectFile(dualProfileList, Constants.GENOMIC);
            printProjectFile(mixedProfileList, "MIXED");

            System.out.println();
            System.out.println("NON GENOMIC PROJECT FILE...");
            System.out.println("#ProjectFile");
            printProjectFile(nonGenomicProfileList,Constants.NON_GENOMIC);
            printProjectFile(dualProfileList,Constants.NON_GENOMIC);
            printProjectFile(mixedProfileList,"MIXED");

            System.out.println();
            System.out.println("DUAL PROJECT FILE...");
            System.out.println("#ProjectFile");
            printProjectFile(dualProfileList,Constants.BOTH);
            printProjectFile(mixedProfileList,Constants.BOTH);
        }
    }

    private void printProjectFile(List profileInfoList, String type)
    {
        String parentdir = DataFileManager.DATA_ROOT_DIR+"/Data/Human/PDZ/PeptideFiles/data/PDZ/Human";
        String gendir = "/Genomic";
        String nongendir = "/NonGenomic";
        String dir = "/SidhuPhage";
        String gensuffix = "-gen.pep.pos.txt";
        String nonsuffix = "-nongen.pep.pos.txt";
        String suffix = ".pep.txt";

        for (int i=0; i< profileInfoList.size();i++)
        {
            ProfileInfo info = (ProfileInfo)profileInfoList.get(i);
            if (info.type.equals("DUAL"))
            {
                if (type.equals(Constants.GENOMIC))
                {
                    dir = gendir;
                    suffix = gensuffix;
                }
                else if (type.equals(Constants.NON_GENOMIC))
                {
                    dir = nongendir;
                    suffix = nonsuffix;
                }
            }
            System.out.println(parentdir + dir  + "/" + info.name + suffix);


        }
    }
    private void print(List profileInfoList)
    {
        for (int i=0; i < profileInfoList.size();i++)
        {
            ProfileInfo profileInfo = (ProfileInfo)profileInfoList.get(i);
            System.out.println(profileInfo.toString());
        }
        System.out.println();
    }
    private class ProfileInfo
    {
        public String name = "";
        public int numGenomic5 = 0;
        public int numNonGenomic5 = 0;
        public int numGenomic4 = 0;
        public int numNonGenomic4 = 0;
        public int numGenomic = 0;
        public int numNonGenomic = 0;
        public int numTotal = 0;
        public boolean gapped = false;
        public String type = "";

        public String toString()
        {
            //String out = Utils.padLeft(name,10) + "\t" + Utils.padLeft(type,13) + "\t" + numGenomic + "\t" + numNonGenomic + "\t" + numGenomic4 +"\t" + numNonGenomic4 + "\t" + numGenomic5 +"\t" + numNonGenomic5 + "\t" + numTotal;
            String out = Utils.padLeft(name,10) +"\t" + Utils.padRight(Integer.toString(numGenomic4), 10) + "\t" +
                    Utils.padRight(Integer.toString(numGenomic5), 10) + "\t" +
                    Utils.padRight(Integer.toString(numNonGenomic4), 10) + "\t"+
                    Utils.padRight(Integer.toString(numNonGenomic5), 10) + "\t"+ 
                    numTotal;

            return out;
        }


    }


    public static void main(String[] args)
    {
        GenomicPhageData g  = new GenomicPhageData();
    }
    private void initGenomicList(int numFromC, String organism)
    {
        genomicSeqList = new ArrayList();
        SequencePoolManager gm = new SequencePoolManager(organism);
        List genomicList = gm.getSequencePool();

        for (int i=0; i < genomicList.size();i++)
        {
            String seq = (String) genomicList.get(i);
            seq = seq.substring(seq.length()-numFromC, seq.length());
            genomicSeqList.add(seq);
        }
    }
}
