package org.baderlab.pdzsvm.data;

import org.baderlab.pdzsvm.data.manager.ArtificialNegativesDataManager;
import org.baderlab.brain.ProteinProfile;
import java.util.*;
import org.baderlab.pdzsvm.utils.Constants;
import org.baderlab.pdzsvm.predictor.pwm.PWM;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Loads the data
 */
public class DataLoader {
    private List posTrainProfileList = null;
    private List negTrainProfileList= null;
    private List posTestProfileList= null;
    private List negTestProfileList= null;
    private DataRepository dr = DataRepository.getInstance();
    private ArtificialNegativesDataManager am;

    
    public List getPosTrainProfileList()
    {
        return posTrainProfileList;
    }
    public List getNegTrainProfileList()
    {
        return negTrainProfileList;
    }
    public List getPosTestProfileList()
    {
        return posTestProfileList;
    }
    public List getNegTestProfileList()
    {
        return negTestProfileList;
    }
    /*public List getPredictorProfileList()
    {
        return predictorProfileList;
    }
    */
    public int getNumPosTrainInteractions()
    {
        int num= 0;
        if (posTrainProfileList == null)
            return num;
        for (int i=0;i < posTrainProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)posTrainProfileList.get(i);
            num = num + profile.getNumSequences();
        }
        return num;
    }
    public int getNumPosTestInteractions()
    {
        int num= 0;
        if (posTestProfileList == null)
            return num;
        for (int i=0;i < posTestProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)posTestProfileList.get(i);
            num = num + profile.getNumSequences();
        }
        return num;
    }

    public int getNumNegTestInteractions()
    {
        int num= 0;
        if (negTestProfileList == null)
            return num;
        for (int i=0;i < negTestProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)negTestProfileList.get(i);
            num = num + profile.getNumSequences();
        }
        return num;
    }
    public int getNumNegTrainInteractions()
    {
        int num= 0;
        if (negTrainProfileList == null)
            return num;
        for (int i=0;i < negTrainProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)negTrainProfileList.get(i);
            num = num + profile.getNumSequences();
        }
        return num;
    }
    public DataLoader()
    {
        am = new ArtificialNegativesDataManager();
    }
    public static void main(String[] args)
    {
        DataLoader dl = new DataLoader();
        dl.loadHumanTrain(Constants.PWM,Constants.GENOMIC,Constants.NUM_RED_PEPTIDES);
        dl.loadMouseChenTrain();

        //dl.loadFlyTest();
        dl.loadWormTest(Constants.PROTEIN_MICROARRAY);

        //dl.loadMouseTest("ORPHAN");
        List posTestProfileList = dl.getPosTestProfileList();
        if (posTestProfileList == null) posTestProfileList = new ArrayList();

        List negTestProfileList = dl.getNegTestProfileList();
        if (negTestProfileList == null) negTestProfileList = new ArrayList();

        List posTrainProfileList = dl.getPosTrainProfileList();
        if (posTrainProfileList == null) posTrainProfileList = new ArrayList();
        List negTrainProfileList = dl.getNegTrainProfileList();
        if (negTrainProfileList == null) negTrainProfileList = new ArrayList();

        Data data = new Data();
        data.addRawData(posTrainProfileList,Constants.CLASS_YES);
        data.addRawData(negTrainProfileList,Constants.CLASS_NO);
        HashMap peptideMap = data.getPeptideNumToRawMap();
        data.printSummary();


        Data testdata = new Data();
        testdata.addRawData(posTestProfileList,Constants.CLASS_YES);
        testdata.addRawData(negTestProfileList,Constants.CLASS_NO);
        HashMap testpeptideMap = testdata.getPeptideNumToRawMap();

        //data.print();
        System.out.println("TRAIN DOMAINS");
        System.out.println("Num Pos Domains: " + posTrainProfileList.size());
        System.out.println("Num Neg Domains: " + negTrainProfileList.size());
        System.out.println("TRAIN PEPTIDES");

        System.out.println("Num total: " + peptideMap.size());

        System.out.println("TRAIN INTERACTIONS");
        System.out.println("Num Pos Interactions:" + dl.getNumPosTrainInteractions());
        System.out.println("Num Neg Interactions:" + dl.getNumNegTrainInteractions());
        System.out.println("TEST DOMAINS");
        System.out.println("Num Pos Domains: " + posTestProfileList.size());
        System.out.println("Num Neg Domains: " + negTestProfileList.size());
        System.out.println("TEST PEPTIDES");

        System.out.println("Num total: " + testpeptideMap.size());
        System.out.println("TEST INTERACTIONS");
        System.out.println("Num Pos Interactions:" + dl.getNumPosTestInteractions());
        System.out.println("Num Neg Interactions:" + dl.getNumNegTestInteractions());
        //System.out.println("Num Pos Domains: " + posProfileList.size());
        //System.out.println("Num Neg Domains: " + posProfileList.size());


    }


    public void clearTrain()
    {
        posTrainProfileList = new ArrayList();
        negTrainProfileList = new ArrayList();
    }
    public void clearTest()
    {
        posTestProfileList = new ArrayList();
        negTestProfileList = new ArrayList();

    }
    public void clearAll()
    {
        clearTrain();
        clearTest();
    }

    public void loadMouseChenTrain()
    {
        System.out.println("\n\tLoading mouse minus 3 (CHEN) protein microarray training data...");
        if (posTrainProfileList == null ||
                posTrainProfileList.isEmpty())
            posTrainProfileList = new ArrayList();

        posTrainProfileList.addAll(dr.mousePosPMChenList);

        if (negTrainProfileList == null ||
                negTrainProfileList.isEmpty())
            negTrainProfileList = new ArrayList();
        negTrainProfileList.addAll(dr.mouseNegPMChenList);

    }

    public void loadMouseTrain()
    {
        System.out.println("\n\tLoading mouse (STIFFLER) protein microarray training data...");
        if (posTrainProfileList == null ||
                posTrainProfileList.isEmpty())
            posTrainProfileList = new ArrayList();

        posTrainProfileList.addAll(dr.mousePosPMStifflerList);

        if (negTrainProfileList == null ||
                negTrainProfileList.isEmpty())
            negTrainProfileList = new ArrayList();
        negTrainProfileList.addAll(dr.mouseNegPMStifflerList);

    }

    public void loadSidhuHumanTrain()
    {
        if (posTrainProfileList == null ||
                posTrainProfileList.isEmpty())
            posTrainProfileList = new ArrayList();

        posTrainProfileList =  dr.humanPosSidhuPDList;

        if (negTrainProfileList == null ||
                negTrainProfileList.isEmpty())
            negTrainProfileList = new ArrayList();
    }

    public void loadSidhuHumanTrain(String artNegMethod, int numRedPeptides)
    {
        System.out.println("\tLoading SIDHU Human Training data...");
        System.out.println("\tArt neg method: " + artNegMethod + ", " + numRedPeptides );

        List humanPosList;

        if (posTrainProfileList == null ||
                posTrainProfileList.isEmpty())
            posTrainProfileList = new ArrayList();

        humanPosList = dr.humanPosSidhuPDList;

        List humanPosRemoveSmallList = removeSmallProfiles(humanPosList, Constants.MIN_NUM_PEPTIDES);

        posTrainProfileList.addAll(humanPosRemoveSmallList);


        if (negTrainProfileList == null ||
                negTrainProfileList.isEmpty())
            negTrainProfileList = new ArrayList();

        if (!artNegMethod.equals(Constants.NONE))
        {
            List humanNegTrainProfileList = new ArrayList();

            if (artNegMethod.equals(Constants.PWM))
            {
                humanNegTrainProfileList = am.getPWMNegatives(humanPosRemoveSmallList, numRedPeptides);
            }
            else if (artNegMethod.equals(Constants.RANDOM))
            {
                humanNegTrainProfileList = am.getRandomNegatives(humanPosRemoveSmallList, numRedPeptides);
            }
            else if (artNegMethod.equals(Constants.SHUFFLED))
            {
                humanNegTrainProfileList = am.getShuffledNegatives(humanPosRemoveSmallList, numRedPeptides);

            }
            else if (artNegMethod.equals(Constants.RANDOM_SEL))
            {
                humanNegTrainProfileList = am.getRandomSelectionNegatives(humanPosRemoveSmallList, numRedPeptides);
            }
            else
            {
                System.out.println("\tNot adding any negatives...");

                // no neg
            }

            negTrainProfileList.addAll(humanNegTrainProfileList);
        }
    }
    public void loadHumanTrain(String artNegMethod)
    {
        loadHumanTrain(artNegMethod, "", Constants.NUM_RED_PEPTIDES);
    }

    public void loadHumanTrain(String artNegMethod, String type)
    {
        loadHumanTrain(artNegMethod, type, Constants.NUM_RED_PEPTIDES);
    }

    public void loadHumanTrain(String artNegMethod, String type, int numRedPeptides)
    {
        System.out.println("\tLoading SIDHU Human "+type +" Training data...");
        System.out.println("\tArt neg method: " + artNegMethod + ", " + numRedPeptides );
        List humanPosList = new ArrayList();

        if (posTrainProfileList == null ||
                posTrainProfileList.isEmpty())
            posTrainProfileList = new ArrayList();

        // if no type we will assume the optimal one which is GENOMIC
        if (type.equals(""))
        {
            humanPosList = dr.humanPosPDGenomicList;
        }
        else if (type.equals(Constants.GENOMIC))
        {
            humanPosList = dr.humanPosPDGenomicList;

        }
        else if (type.equals(Constants.NON_GENOMIC))
            humanPosList = dr.humanPosPDNonGenomicList;

        List humanPosRemoveSmallList = removeSmallProfiles(humanPosList, Constants.MIN_NUM_PEPTIDES);

        posTrainProfileList.addAll(humanPosRemoveSmallList);


        if (negTrainProfileList == null ||
                negTrainProfileList.isEmpty())
            negTrainProfileList = new ArrayList();

        if (!artNegMethod.equals(Constants.NONE))
        {
            List humanNegTrainProfileList = new ArrayList();

            if (artNegMethod.equals(Constants.PWM))
            {
                humanNegTrainProfileList = am.getPWMNegatives(humanPosRemoveSmallList, numRedPeptides);
            }
            else if (artNegMethod.equals(Constants.RANDOM))
            {
                humanNegTrainProfileList = am.getRandomNegatives(humanPosRemoveSmallList, numRedPeptides);
            }
            else if (artNegMethod.equals(Constants.SHUFFLED))
            {
                humanNegTrainProfileList = am.getShuffledNegatives(humanPosRemoveSmallList, numRedPeptides);

            }
            else if (artNegMethod.equals(Constants.RANDOM_SEL))
            {
                humanNegTrainProfileList = am.getRandomSelectionNegatives(humanPosRemoveSmallList, numRedPeptides);
            }
            else
            {
                System.out.println("\tNot adding any negatives...");

                // no neg
            }

            negTrainProfileList.addAll(humanNegTrainProfileList);
        }
    }
    public List filterSmallProfiles(List allProfileList)
    {
        System.out.println("\tFiltering out small profiles...");
        List filteredProfileList = new ArrayList();
        for (int i=0; i < allProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)allProfileList.get(i);
            if (profile.getNumSequences() >= Constants.MIN_NUM_PEPTIDES)
                filteredProfileList.add(profile);
        }
        System.out.println("\tNum profiles kept: " + filteredProfileList.size());

        return filteredProfileList;
    }

    public void loadHumanTest(String type)
    {

        if (type.equals(Constants.PHAGE_DISPLAY))

        {
            posTestProfileList = dr.humanPosPDTestList;

            negTestProfileList = am.getMixedUpNegatives(posTestProfileList);

        }

    }
    public static List removeSmallProfiles(List profileList, int size)
    {
        List newProfileList = new ArrayList();
        for (int i =0; i < profileList.size();i++)
        {
            ProteinProfile profile= (ProteinProfile)profileList.get(i);
            if (profile.getNumSequences() >= size)// || profile.getName().equals("CASK-1"))
            {
                newProfileList.add(profile);
            }
            else
            {
                System.out.println("\tRemoving profile: " + profile.getName() + "\t" +  profile.getNumSequences());
            }

        }
        return newProfileList;
    }
    public void loadMouseTest(String type)
    {

        if (type.equals(Constants.MOUSE_ORPHAN))
        {
            posTestProfileList = dr.mousePosPMOrphanList;
            negTestProfileList = dr.mouseNegPMOrphanList;
        }
        else
        {
            System.out.println("\tDid not load mouse test.  No data for " + type + "...");
        }
    }
    public void loadWormTest(String type)
    {
        if (type.equals(Constants.PHAGE_DISPLAY))

        {
            posTestProfileList = dr.wormPosPDTestList;

            negTestProfileList = am.getMixedUpNegatives(posTestProfileList);

        }
        else
        {
            posTestProfileList = dr.wormPosPMList;
            negTestProfileList = dr.wormNegPMList;
        }

    }
    public void loadFlyTest()
    {
        posTestProfileList = dr.flyPosPMList;
        negTestProfileList = dr.flyNegPMList;

    }

}
