package org.baderlab.pdzsvm.analysis;

import java.util.*;
import java.io.*;

import org.baderlab.pdzsvm.utils.Constants;
import org.baderlab.pdzsvm.utils.PDZSVMUtils;
import org.baderlab.pdzsvm.data.DataLoader;
import org.baderlab.pdzsvm.data.utils.IRefIndexInteractome;
import org.baderlab.pdzsvm.data.manager.DataFileManager;
import org.baderlab.brain.ProteinProfile;
import org.baderlab.brain.ProteinProfileDistance;
import org.biojava.bio.seq.db.HashSequenceDB;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.ProteinTools;
import libsvm.svm_parameter;
import org.baderlab.pdzsvm.predictor.svm.ContactMapSVMPredictor;
import org.baderlab.pdzsvm.predictor.additive.AdditivePredictor;
import org.baderlab.pdzsvm.predictor.pwm.PWMPredictor;
import org.baderlab.pdzsvm.predictor.pwm.PWMOptPredictor;
import org.baderlab.pdzsvm.predictor.mdsm.MDSMPredictor;
import org.baderlab.pdzsvm.predictor.nn.NN;
import org.baderlab.pdzsvm.predictor.BRAIN.BrainPredictor;
import org.baderlab.pdzsvm.evaluation.Prediction;
import weka.core.Utils;
import com.lowagie.text.Document;
import com.lowagie.text.Phrase;
import com.lowagie.text.pdf.PdfWriter;
import com.lowagie.text.pdf.PdfPTable;
import com.lowagie.text.pdf.PdfPCell;


/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Program to scan human, worm or fly proteomes for potential binders of
 * PDZ domains. This code generates output for Fig. 5.
 */
public class ProteomeScan
{
    private HashSequenceDB genomeSeqDB;
    private List genomeSeqList;
    private int numPos = Constants.NUM_RES_POS;

    private HashMap PDZBaseToSeqListMap;

    private HashMap genomeSeqToGeneNameMap ;
    private HashMap genomeSeqToTranscriptNameMap ;

    private ProteinProfile svmNNProfile;
    private ProteinProfile svmMouseNNProfile;

    private ProteinProfile addNNProfile;
    private ProteinProfile pwmOptNNProfile;
    private ProteinProfile pwmTop10NNProfile;
    private ProteinProfile mdsmNNProfile;
    private ProteinProfile brainNNProfile;
    private ProteinProfile pwmSidhuProfile;

    private String organism = "";
    private String parentDir = "/ProteomeScan";
    private String testType ="";
    private String outDir = "";
    private ProteinProfile posValidProfile = null;
    private ProteinProfile negValidProfile = null;

    private HashMap posValidDataMap;
    private HashMap negValidDataMap;
    private List validationInfoList;

    private List svmAllTrainProfileList;
    private List svmMouseAllTrainProfileList;
    private List addAllTrainProfileList;
    private List mdsmAllTrainProfileList;
    private List pwmOptAllTrainProfileList;
    private List pwmTopAllTrainProfileList;
    private List brainAllTrainProfileList;
    private List pwmSidhuAllTrainProfileList;

    private ContactMapSVMPredictor svm = null;
    private ContactMapSVMPredictor svmMouse = null;

    private AdditivePredictor additive = null;
    private MDSMPredictor mdsm;
    private PWMOptPredictor pwmOpt = null;
    private PWMPredictor pwmTop = null;
    private PWMPredictor pwmSidhu = null;

    private Bingo bingoAlgorithm;

    boolean validate = true;
    boolean bingo = false;

    private IRefIndexInteractome iRefWeb;

    public ProteomeScan()
    {
    }
    private void populateTable(PdfPTable table,String statsFilename)
    {
        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(statsFilename)));
            String line = "";
            int i = 0;
            while((line=br.readLine())!=null)
            {
                String[] splitLine = line.split("\t");
                String domainName = splitLine[1];
                if (domainName.equals("PICK-1") || domainName.equals("NAB1-1") ) continue;
                String nnSimString = splitLine[2];
                Double nnSim = Double.parseDouble(nnSimString);
                String numPredString = splitLine[4];
                String numTP = splitLine[6];
                String numFP = splitLine[7];

                String numPosValid = splitLine[8];
                String numNegValid = splitLine[9];

                String numTPHPRD = splitLine[10];
                String numHPRD = splitLine[11];

                PdfPCell cell = new PdfPCell(new Phrase(Integer.toString(i+1)));
                table.addCell(cell);
                i = i+1;

                cell = new PdfPCell(new Phrase(domainName));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(Utils.doubleToString(nnSim,2)));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numPredString));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numTP));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numPosValid));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numFP));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numNegValid));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numTPHPRD));
                table.addCell(cell);

                cell = new PdfPCell(new Phrase(numHPRD));
                table.addCell(cell);

            }

            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }

    private void addHeading(PdfPTable table)
    {
        PdfPCell cell = new PdfPCell(new Phrase(" "));
        cell.setColspan(4);
        cell.setBorder(PdfPCell.NO_BORDER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("PDZBase"));
        cell.setColspan(4); cell.setHorizontalAlignment(PdfPCell.ALIGN_CENTER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("HPRD"));
        cell.setColspan(2);cell.setHorizontalAlignment(PdfPCell.ALIGN_CENTER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase(" "));
        cell.setColspan(4);
        cell.setBorder(PdfPCell.NO_BORDER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("Positive"));
        cell.setColspan(2);cell.setHorizontalAlignment(PdfPCell.ALIGN_CENTER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("Negative"));
        cell.setColspan(2);  cell.setHorizontalAlignment(PdfPCell.ALIGN_CENTER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("Positive"));
        cell.setColspan(2);  cell.setHorizontalAlignment(PdfPCell.ALIGN_CENTER);
        table.addCell(cell);

        cell = new PdfPCell(new Phrase(" "));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("Domain"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("NN Sim"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#Pred."));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#TP"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#P"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#FP"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#N"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#TP"));
        table.addCell(cell);

        cell = new PdfPCell(new Phrase("#P"));
        table.addCell(cell);
    }

    private void init()
    {
        svmNNProfile = null;
        svmMouseNNProfile= null;

        addNNProfile= null;
        pwmOptNNProfile= null;
        pwmTop10NNProfile= null;
        mdsmNNProfile= null;

        posValidProfile = null;
        negValidProfile = null;

        posValidDataMap= new HashMap();
        negValidDataMap= new HashMap();
        validationInfoList = new ArrayList();
    }
    public void loadPDZBaseInteractions()
    {
        System.out.println("\tLoading PDZBase interactions...");

        PDZBaseToSeqListMap = new HashMap();
        String pdzbaseFileName;
        if (organism.equals(Constants.MOUSE))
            pdzbaseFileName = DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/Mouse/PDZBase-Mouse-Alignment-Mar142010.txt";
        else if (organism.equals(Constants.HUMAN))
            pdzbaseFileName = DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/Human/PDZBase-Human-Alignment-Mar52010.txt";
        else if (organism.equals(Constants.WORM))
            pdzbaseFileName = DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/Worm/PDZBase-Worm-Alignment-Mar192010.txt";
        else
            pdzbaseFileName = DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/Fly/PDZBase-Fly-Alignment-Mar192010.txt";

        try
        {
            String line = "";
            String domainKey = "";

            BufferedReader br = new BufferedReader(new FileReader(new File(pdzbaseFileName)));
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    String[] splitName = line.split("&");
                    String domainName = splitName[0];
                    String domainNum = splitName[1];
                    domainKey = domainName.substring(1,domainName.length()) + "&" + domainNum;
                }
                else
                {
                    String[] interaction = line.split("/");
                    String peptideSeq = interaction[1];
                    peptideSeq = peptideSeq.substring(peptideSeq.length()-5, peptideSeq.length());

                    List seqList = (List)PDZBaseToSeqListMap.get(domainKey);
                    if (seqList ==null)
                        seqList = new ArrayList();
                    seqList.add(peptideSeq);
                    PDZBaseToSeqListMap.put(domainKey, seqList);
                    domainKey = "";
                }
            }
            br.close();

        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }

    }
    private class ScanResults
    {
        public String method;
        public int numTP;
        public int numFP;
        public int numPosPred;
        public int numNegPred;

        public double domainSim;
        public String domainName;
        public double predProfileSim;
        public int numPosValid;
        public int numNegValid;
        public int numPosPredInIRefWeb;
        public int numIRefWeb;
        public ScanResults(String predictorName, String domainName)
        {
            method = predictorName;
            numFP =  0;
            numTP =  0;

            numPosPred = 0;
            numNegPred = 0;

            this.domainName = domainName;
            domainSim = 0;
            predProfileSim = 0;
            numPosValid = 0;
            numNegValid = 0;
            numPosPredInIRefWeb = 0;
            numIRefWeb = 0;
        }


        public String toString()
        {
            String validString = method +"\t" + domainName+ "\t" + domainSim+"\t" +predProfileSim + "\t" +  numPosPred +  "\t" + numNegPred + "\t" + numTP + "\t" + numFP + "\t"+ numPosValid + "\t" + numNegValid + "\t" + numPosPredInIRefWeb +"\t" + numIRefWeb;// +  "\t" + numInOther + "\t" + numPosOther;
            return validString;
        }

    }
    public String getOrganism()
    {
        return organism;
    }

    private ProteinProfile makeValidProfile(HashMap validDataMap, ScanInfo scanInfo)
    {
        HashSequenceDB seqDB = new HashSequenceDB();

        Set key = validDataMap.keySet();
        List keyList = new ArrayList(key);
        List nonRedSeqList = new ArrayList();
        int peptideNum = 0;
        for (int i=0; i < keyList.size();i++)
        {
            String expMethod = (String)keyList.get(i);
            List seqList = (List)validDataMap.get(expMethod);

            for (int j=0; j< seqList.size();j++)
            {
                String seqString = (String)seqList.get(j);
                if (!nonRedSeqList.contains(seqString))
                {
                    try
                    {
                        Sequence seq = ProteinTools.createProteinSequence(seqString,"Peptide" + peptideNum);
                        seqDB.addSequence(seq);
                        peptideNum = peptideNum+1;
                        nonRedSeqList.add(seqString);
                    }
                    catch(Exception e)
                    {
                        System.out.println("Exception" +e);

                    }
                }
            }
        }
        ProteinProfile validProfile = null;
        if (seqDB.ids().size()>0)
        {
            String organismLong = PDZSVMUtils.organismShortToLongForm(scanInfo.organism);

            validProfile = PDZSVMUtils.makeProfile(scanInfo.domainName, scanInfo.domainNum, scanInfo.domainSeq, organismLong, seqDB);
            //PDZSVMUtils.saveAsLogo(validProfile,validProfile.getName() + "-0Valid",outDir,ClassIndex.YES);
            //PDZSVMUtils.saveToProfile(validProfile,outDir + "/" +validProfile.getName()+".VALIDATION.pep.txt");

        }

        return validProfile;
    }
    public void run(List scanInfoList, boolean validate, boolean bingo)
    {
        this.bingo = bingo;
        this.validate = validate;
        
        HashMap resultOutMap = new HashMap();
        System.out.println("\tScanning proteome for " + scanInfoList.size() + " domains...");
        for (int i=0; i < scanInfoList.size();i++)
        {
            ScanInfo scanInfo = (ScanInfo)scanInfoList.get(i);
            HashMap scanResultMap = scan(scanInfo);
            if (validate)
            {
                Set keys = scanResultMap.keySet();
                List keyList = new ArrayList(keys);
                for (int j = 0; j < keyList.size();j++)
                {
                    String predictorName = (String) keyList.get(j);
                    ScanResults scanResults = (ScanResults)scanResultMap.get(predictorName);
                    String results = scanResults.toString();
                    String resultOutString = (String)resultOutMap.get(predictorName);
                    if (resultOutString== null)
                        resultOutString = "";
                    resultOutString = resultOutString + results +"\n";
                    resultOutMap.put(predictorName, resultOutString);
                }
            }

        }
        if (validate)
        {
            String organism = getOrganism();
            Set keys = resultOutMap.keySet();
            List keyList = new ArrayList(keys);
            for (int i=0; i < keyList.size();i++)
            {
                String predictorName = (String)keyList.get(i);
                String resultOutString = (String) resultOutMap.get(predictorName);
                try
                {
                    String dir = "";
                    if (organism.equals(Constants.HUMAN))
                        dir = "/Human";
                    else if (organism.equals(Constants.FLY))
                        dir = "/Fly";
                    else if (organism.equals(Constants.WORM))
                        dir = "/Worm";
                    else if (organism.equals(Constants.MOUSE))
                        dir = "/MouseOrphan";
                    String fileName = DataFileManager.OUTPUT_ROOT_DIR + parentDir + dir + "/" + predictorName + "-" +testType+"-stats.txt";
                    System.out.println("\tWriting: ");
                    System.out.println(resultOutString);
                    System.out.println("\tto file: " + fileName);
                    // print everything out
                    BufferedWriter bw = new BufferedWriter(new FileWriter(new File(fileName)));
                    bw.write(resultOutString);
                    bw.close();

                }
                catch(Exception e)
                {
                    System.out.println("Exception: " + e);
                }
            }
        }
        System.out.println("\tDone.");
    }
    public HashMap scan(ScanInfo scanInfo)
    {
        String domainName = scanInfo.domainName;
        String organismLong = PDZSVMUtils.organismShortToLongForm(scanInfo.organism);

        System.out.println();
        System.out.println("\t========================================");
        System.out.println("\tScanning for: "+ domainName);

        init();
        System.out.println("\tOrganism: " + scanInfo.organism);
        if (this.organism.equals("") || !this.organism.equals(scanInfo.organism))
        {
            this.organism = scanInfo.organism;

            initGenome();
            if (validate)
            {
                loadPDZBaseInteractions();
                iRefWeb = new IRefIndexInteractome(organism);
            }
        }

        if (organism.equals(Constants.HUMAN))
            outDir = DataFileManager.OUTPUT_ROOT_DIR + parentDir + "/Human";
        else if (organism.equals(Constants.FLY))
            outDir = DataFileManager.OUTPUT_ROOT_DIR  + parentDir + "/Fly";
        else if (organism.equals(Constants.WORM))
            outDir = DataFileManager.OUTPUT_ROOT_DIR  + parentDir +  "/Worm";
        else if (organism.equals(Constants.MOUSE))
            outDir = DataFileManager.OUTPUT_ROOT_DIR  + parentDir +  "/MouseOrphan";
        outDir= outDir + "/PeptideFiles";
        System.out.println("\toutdir: " + outDir  );

        if (validate)
        {
            validationInfoList = scanInfo.validInfoList;

            for (int i=0;i < validationInfoList.size();i++)
            {
                ProfileInfo validInfo = (ProfileInfo)validationInfoList.get(i);
                if (validInfo.classIx.equals(Constants.CLASS_YES))
                    addValidationData(domainName, posValidDataMap, validInfo);
                else
                    addValidationData(domainName, negValidDataMap, validInfo);

            }

            if (!posValidDataMap.isEmpty())
                posValidProfile = makeValidProfile(posValidDataMap, scanInfo);
            else
                System.out.println("\tNo positive validation data...");
            if (!negValidDataMap.isEmpty())
                negValidProfile = makeValidProfile(negValidDataMap, scanInfo);
            else
                System.out.println("\tNo negative validation data...");
        }


        ProteinProfile testProfile= PDZSVMUtils.makeProfile(scanInfo.domainName, scanInfo.domainNum, scanInfo.domainSeq, organismLong, genomeSeqDB);

        List posTestProfileList = new ArrayList();
        posTestProfileList.add(testProfile);

        List svmPredictions = runSVM(posTestProfileList);
        List addPredictions = runAdditive(posTestProfileList);
        List mdsmPredictions = runMDSM(posTestProfileList);
        List pwmTop10Predictions = runPWMTopPercent(posTestProfileList);

        List svmMousePredictions = runSVMMouse(posTestProfileList);
        //List pwmOptPredictions = runPWMOpt(posTestProfileList);
        //List brainPredictions = runBrainPredictor(posTestProfileList);
        //List pwmSidhuPredictions = runPWMSidhu(posTestProfileList);
        HashMap resultMap = new HashMap();

        if (validate)
        {
            System.out.println("\t==== VALIDATING SVM PREDICTIONS ====");
            double svmSim = NN.getIdentity(testProfile, svmNNProfile);
            System.out.println("\tSVM: " + testProfile.getName() + "\t" + svmNNProfile.getName()+"\t" + svmSim);
            ScanResults svmResults = validatePredictions("SVM",svmPredictions, testProfile, svmSim, true, true);

            System.out.println("\n\t==== VALIDATING ADDITIVE PREDICTIONS ====");
            double addSim = NN.getIdentity(testProfile, addNNProfile);
            System.out.println("\tADD: " + testProfile.getName() + "\t" + addNNProfile.getName()+"\t" + addSim);
            ScanResults addResults = validatePredictions("Additive",addPredictions, testProfile, addSim, false, true);

            System.out.println("\n\t==== VALIDATING MDSM PREDICTIONS ====");
            double mdsmSim = NN.getIdentity(testProfile, mdsmNNProfile);
            System.out.println("\tMDSM: " + testProfile.getName() + "\t" +mdsmNNProfile.getName()+"\t"+ mdsmSim);
            ScanResults mdsmResults = validatePredictions("MDSM",mdsmPredictions, testProfile, mdsmSim, false, true);

            System.out.println("\n\t==== VALIDATING PWM TOP % PREDICTIONS ====");
            double pwmTop10Sim = NN.getIdentity(testProfile, pwmTop10NNProfile);
            System.out.println("\tPWM TOP 1: " + testProfile.getName() + "\t" + pwmTop10NNProfile.getName() + "\t"+ pwmTop10Sim);
            ScanResults pwmTop10Results = validatePredictions("PWMTop1",pwmTop10Predictions, testProfile, pwmTop10Sim,false, true);

            System.out.println("\t==== VERIFYING SVM MOUSE PREDICTIONS ====");
            double svmMouseSim = NN.getIdentity(testProfile, svmMouseNNProfile);
            System.out.println("\tSVM Mouse: " + testProfile.getName() + "\t" + svmMouseNNProfile.getName()+"\t" + svmMouseSim);
            ScanResults svmMouseResults = validatePredictions("SVM Mouse",svmMousePredictions, testProfile, svmMouseSim, false, true);

            /*System.out.println("\n\t==== VERIFYING PWM OPT PREDICTIONS ====");
            double pwmOptSim = NN.getIdentity(testProfile, pwmOptNNProfile);
            System.out.println("\tPWM OPT: " + testProfile.getName() + "\t" + pwmOptNNProfile.getName() + "\t"+pwmOptSim);
            ScanResults pwmOptResults =validatePredictions("PWMOpt",pwmOptPredictions, testProfile,  pwmOptSim, false, true);
            */
            /*System.out.println("\n\t==== VERIFYING BRAIN PREDICTIONS ====");
            double brainSim = NN.getIdentity(testProfile, brainNNProfile);
            System.out.println("\tBRAIN: " + testProfile.getName() + "\t" + brainNNProfile.getName() + "\t"+ brainSim);
            ScanResults brainResults = validatePredictions("BRAIN",brainPredictions, testProfile, brainSim,false, true);

            System.out.println("\n\t==== VERIFYING PWM SIDHU Top % PREDICTIONS ====");
            double pwmSidhuSim = NN.getIdentity(testProfile, pwmSidhuProfile);
            System.out.println("\tPWM SIDHU: " + testProfile.getName() + "\t" + pwmSidhuProfile.getName() + "\t"+ pwmSidhuSim);
            ScanResults pwmSidhuResults = validatePredictions("PWM SIDHU",pwmSidhuPredictions, testProfile, pwmSidhuSim,false, true);
            */
            resultMap.put(svmResults.method, svmResults);
            resultMap.put(addResults.method, addResults);
            resultMap.put(mdsmResults.method, mdsmResults);
            resultMap.put(pwmTop10Results.method, pwmTop10Results);
            resultMap.put(svmMouseResults.method, svmMouseResults);

            //resultMap.put(pwmOptResults.method, pwmOptResults);

            //resultMap.put(brainResults.method, brainResults);
            //resultMap.put(pwmSidhuResults.method, pwmSidhuResults);
        }
        else
        {
            System.out.println("\t==== PRINTING SVM PREDICTIONS ====");
            double svmSim = NN.getIdentity(testProfile, svmNNProfile);
            System.out.println("\tSVM: " + testProfile.getName() + "\t" + svmNNProfile.getName()+"\t" + svmSim);
            printPredictions("SVM",svmPredictions, testProfile, svmSim, true, true);

            System.out.println("\n\t==== PRINTING ADDITIVE PREDICTIONS ====");
            double addSim = NN.getIdentity(testProfile, addNNProfile);
            System.out.println("\tADD: " + testProfile.getName() + "\t" + addNNProfile.getName()+"\t" + addSim);
            printPredictions("Additive",addPredictions, testProfile, addSim, false, true);

            System.out.println("\n\t==== PRINTING MDSM PREDICTIONS ====");
            double mdsmSim = NN.getIdentity(testProfile, mdsmNNProfile);
            System.out.println("\tMDSM: " + testProfile.getName() + "\t" +mdsmNNProfile.getName()+"\t"+ mdsmSim);
            printPredictions("MDSM",mdsmPredictions, testProfile, mdsmSim, false, true);

            System.out.println("\n\t==== PRINTING PWM TOP % PREDICTIONS ====");
            double pwmTop10Sim = NN.getIdentity(testProfile, pwmTop10NNProfile);
            System.out.println("\tPWM TOP 10: " + testProfile.getName() + "\t" + pwmTop10NNProfile.getName() + "\t"+ pwmTop10Sim);
            printPredictions("PWMTop10",pwmTop10Predictions, testProfile, pwmTop10Sim,false, true);
        }

        return resultMap;
    }
    private class ProfileInfo
    {
        String organism;
        String expMethod;
        String localDomainName;
        String classIx;
    }

    private class ScanInfo
    {
        public String organism;
        public int domainNum;
        public String domainSeq;
        public String domainName;
        public List validInfoList = new ArrayList();

    }
    private List readScanInfo(String scanFile)
    {
        int slashIndex = scanFile.lastIndexOf("/");
        int dotIndex = scanFile.lastIndexOf(".");
        testType = scanFile.substring(slashIndex+1, dotIndex);

        List scanInfoList= new ArrayList();
        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(scanFile)));
            String line = "";

            ScanInfo scanInfo = null;
            List validInfoList= null;

            int token = 0;
            int prevToken = 0;
            while((line=br.readLine())!=null)
            {
                if (line.startsWith("Valid"))
                {
                    ProfileInfo validInfo = new ProfileInfo();
                    String[] splitLine = line.split("\\s+");
                    validInfo.organism = splitLine[1];
                    validInfo.expMethod = splitLine[2];
                    validInfo.localDomainName = splitLine[3];
                    validInfo.classIx = splitLine[4];
                    validInfoList.add(validInfo);
                    prevToken = token;
                    token = 2;

                }
                else if (line.startsWith("#"))
                {
                    prevToken = token;
                    token = 0;
                    // Comment
                    if (prevToken == 2 || prevToken == 1)// || prevToken == 3)
                    {
                        scanInfo.validInfoList = validInfoList;
                        scanInfoList.add(scanInfo);
                    }
                }
                else
                {
                    prevToken = token;
                    token = 1;
                    scanInfo = new ScanInfo();
                    String[] splitLine = line.split("\\s+");
                    scanInfo.organism = splitLine[0];
                    scanInfo.domainName = splitLine[1];
                    scanInfo.domainNum = Integer.parseInt(splitLine[2]);
                    scanInfo.domainSeq = splitLine[3];
                    validInfoList = new ArrayList();

                }
            }
            scanInfo.validInfoList = validInfoList;

            scanInfoList.add(scanInfo);
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
        return scanInfoList;
    }

    private String getProfileName( ProfileInfo profileInfo)
    {
        String classIx = profileInfo.classIx;
        String expMethod = profileInfo.expMethod;
        String domainName = profileInfo.localDomainName;
        String organism = profileInfo.organism;
        String dir ="";
        String suffix = "";
        if (classIx.equals(Constants.CLASS_YES))
            suffix = ".pep.pos.txt";
        else suffix = ".pep.neg.txt";
        if (organism.equals(Constants.HUMAN))
        {
            if (expMethod.equals(Constants.PHAGE_DISPLAY))
            {

                dir = DataFileManager.DATA_ROOT_DIR +"/Data/Human/PDZ/PeptideFiles/data/PDZ/Human/SidhuPhage/";
                suffix = ".pep.txt";

            }

        }
        else if (organism.equals(Constants.WORM))
        {
            if (expMethod.equals(Constants.PHAGE_DISPLAY))
            {
                dir = DataFileManager.DATA_ROOT_DIR +"/Data/Worm/PDZ/PeptideFiles/data/PDZ/Worm/SidhuPhage/";

            }
            else
            {
                if (classIx.equals(Constants.CLASS_YES))
                    dir = DataFileManager.DATA_ROOT_DIR +"/Data/Worm/PDZ/PeptideFiles/data/PDZ/Worm/Chen/Pos/";
                else
                    dir = DataFileManager.DATA_ROOT_DIR +"/Data/Worm/PDZ/PeptideFiles/data/PDZ/Worm/Chen/Neg/";
            }

        }
        else if (organism.equals(Constants.FLY))
        {
            if (expMethod.equals(Constants.PROTEIN_MICROARRAY))
            {
                if (classIx.equals(Constants.CLASS_YES))
                    dir = DataFileManager.DATA_ROOT_DIR +"/Data/Fly/PDZ/PeptideFiles/data/PDZ/Chen/Pos/";
                else
                    dir = DataFileManager.DATA_ROOT_DIR +"/Data/Fly/PDZ/PeptideFiles/data/PDZ/Chen/Neg/";
            }
        }

        else
        {
            if (expMethod.equals(Constants.PROTEIN_MICROARRAY))
            {
                if (domainName.equals("DELPHILIN_1") ||
                        domainName.equals("GOPC1_1") ||
                        domainName.equals("MAGI-2_3") ||
                        domainName.equals("MAGI-2_4") ||
                        domainName.equals("MUPP-1_2") ||
                        domainName.equals("NHERF1-2") ||
                        domainName.equals("PDZK-7_1") ||
                        domainName.equals("PDZK1_4") ||
                        domainName.equals("SCRB-1_4"))

                {
                    if (classIx.equals(Constants.CLASS_YES))
                    {
                        dir= DataFileManager.DATA_ROOT_DIR +"/Data/Mouse/PDZ/PeptideFiles/data/PDZ/Orphan/Pos/";

                    }
                    else
                    {
                        dir= DataFileManager.DATA_ROOT_DIR +"/Data/Mouse/PDZ/PeptideFiles/data/PDZ/Orphan/Neg/";

                    }
                }
                else
                {
                    if (classIx.equals(Constants.CLASS_YES))
                        dir= DataFileManager.DATA_ROOT_DIR +"/Data/Mouse/PDZ/PeptideFiles/data/PDZ/Pos/";
                    else
                        dir= DataFileManager.DATA_ROOT_DIR +"/Data/Mouse/PDZ/PeptideFiles/data/PDZ/Neg/";
                }
            }
        }

        String file = "";
        if (!dir.equals(""))
        {
            file = dir + domainName + suffix;
        }
        return file;
    }


    private void addValidationData(String domainName, HashMap validMap, ProfileInfo validInfo)
    {
        String codonBiasFile = DataFileManager.NNK_CODON_BIAS_FILENAME;
        System.out.println("\tAdding validation data for: " +domainName);
        String expMethod =validInfo.expMethod;
        List validSeqList = new ArrayList();

        if (expMethod.equals(Constants.PDZBASE))
        {
            List seqList = (List)PDZBaseToSeqListMap.get(validInfo.localDomainName);
            // remove redundant seq
            for (int ii=0; ii < seqList.size();ii++)
            {
                String seqString = (String)seqList.get(ii);
                if (!validSeqList.contains(seqString))
                    validSeqList.add(seqString);
            }
        }
        else
        {
            String profileFileName = getProfileName(validInfo);
            if (!profileFileName.equals(""))
            {

                List posProfileList = PDZSVMUtils.readProteinProfileList(profileFileName,codonBiasFile,numPos, true);
                ProteinProfile thePosProfile = (ProteinProfile)posProfileList.get(0);

                Collection seqCollection = thePosProfile.getSequenceMap();
                List seqList = new ArrayList(seqCollection);

                for (int ii=0; ii < seqList.size();ii++)
                {
                    Sequence seq = (Sequence)seqList.get(ii);
                    String seqString = seq.seqString();
                    if (genomeSeqList.contains(seqString) && !validSeqList.contains(seqString))
                        validSeqList.add(seqString);
                }
            }

        }
        if (validSeqList.size()>0)
        {
            validMap.put(expMethod, validSeqList);

        }
        System.out.println("\tAdded " + validSeqList.size() + " " +expMethod+" interactions for " + domainName);

        System.out.println();
    }

    private List runMDSM(List posTestProfileList)
    {
        if (mdsm == null)
        {
            DataLoader dl = new DataLoader();
            dl.loadMouseTrain();

            List posTrainProfileList = dl.getPosTrainProfileList();
            List negTrainProfileList = dl.getNegTrainProfileList();
            mdsmAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,negTrainProfileList);

            mdsm= new MDSMPredictor(posTrainProfileList, negTrainProfileList);
            mdsm.train();
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);
        String organismLong = testprofile.getOrganism();
        String organism = PDZSVMUtils.organismLongToShortForm(organismLong);
        mdsmNNProfile = NN.getNNBindingSiteSeqProfile(testprofile.getDomainSequence(), organism, mdsmAllTrainProfileList);

        List mdsmPrediction = mdsm.predict(posTestProfileList, new ArrayList());

        return mdsmPrediction;
    }

    private List runPWMOpt(List posTestProfileList)
    {
        if (pwmOpt == null)
        {
            DataLoader dl = new DataLoader();
            dl.loadMouseChenTrain();

            List posTrainProfileList_all = dl.getPosTrainProfileList();
            List negTrainProfileList_all = dl.getNegTrainProfileList();

            List[] balancedLists = PDZSVMUtils.balanceLists(posTrainProfileList_all,negTrainProfileList_all);
            List posTrainProfileList_bal = balancedLists[0];
            List negTrainProfileList_bal = balancedLists[1];

            pwmOptAllTrainProfileList =  PDZSVMUtils.allProfileList(posTrainProfileList_bal,negTrainProfileList_bal);
            pwmOpt = new PWMOptPredictor(posTrainProfileList_bal, negTrainProfileList_bal);
            pwmOpt.train();
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);
        String organismLong = testprofile.getOrganism();
        String organism = PDZSVMUtils.organismLongToShortForm(organismLong);
        pwmOptNNProfile = NN.getNNBindingSiteSeqProfile(testprofile.getDomainSequence(), organism, pwmOptAllTrainProfileList);

        List pwmPredictions = pwmOpt.predict(posTestProfileList,new ArrayList());

        return pwmPredictions;
    }
    private List runAdditive(List posTestProfileList)
    {
        if (additive == null)
        {
            DataLoader dl = new DataLoader();
            dl.loadMouseChenTrain();

            List posTrainProfileList = dl.getPosTrainProfileList();
            List negTrainProfileList = dl.getNegTrainProfileList();

            addAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,negTrainProfileList);

            additive = new AdditivePredictor(posTrainProfileList, negTrainProfileList);
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);
        String organismLong = testprofile.getOrganism();
        String organism = PDZSVMUtils.organismLongToShortForm(organismLong);
        addNNProfile = NN.getNNBindingSiteSeqProfile(testprofile.getDomainSequence(),organism, addAllTrainProfileList);

        List addPredictions = additive.predict(posTestProfileList, new ArrayList());

        return addPredictions;
    }

    private List runPWMSidhu(List posTestProfileList)
    {
        if (pwmSidhu==null)
        {
            DataLoader dl = new DataLoader();
            dl.loadSidhuHumanTrain();
            List posTrainProfileList = dl.getPosTrainProfileList();
            pwmSidhuAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,new ArrayList());

            pwmSidhu = new PWMPredictor(posTrainProfileList);
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);
        String profileOrganismLong = testprofile.getOrganism();
        String profileOrganism = PDZSVMUtils.organismLongToShortForm(profileOrganismLong);
        pwmSidhuProfile = NN.getNNBindingSiteSeqProfile(testprofile.getDomainSequence(), profileOrganism, pwmSidhuAllTrainProfileList);

        List pwmPredictions = pwmSidhu.predict(posTestProfileList,new ArrayList());

        return pwmPredictions;

    }
    private List runPWMTopPercent(List posTestProfileList)
    {
        if (pwmTop==null)
        {
            DataLoader dl = new DataLoader();
            dl.loadMouseChenTrain();
            dl.loadHumanTrain(Constants.NONE); // Just load positive profiles don't predict negatives

            List posTrainProfileList = dl.getPosTrainProfileList();
            pwmTopAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,new ArrayList());

            pwmTop = new PWMPredictor(posTrainProfileList);
            pwmTop.setPercent(1);
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);
        String profileOrganismLong = testprofile.getOrganism();
        String profileOrganism = PDZSVMUtils.organismLongToShortForm(profileOrganismLong);
        pwmTop10NNProfile = NN.getNNBindingSiteSeqProfile(testprofile.getDomainSequence(), profileOrganism, pwmTopAllTrainProfileList);

        List pwmPredictions = pwmTop.predict(posTestProfileList,new ArrayList());

        return pwmPredictions;
    }

    private List runSVMMouse(List posTestProfileList)
    {
        if (svmMouse == null)
        {
            DataLoader dl = new DataLoader();
            dl.loadMouseChenTrain();

            svm_parameter svmParams = new svm_parameter();
            svmParams.setDefaults();
            svmParams.C = Math.exp(2);
            svmParams.gamma = Math.exp(-Math.log(2)-4);
            svmParams.data_encoding = svm_parameter.CONTACTMAP2020;

            List posTrainProfileList = dl.getPosTrainProfileList();
            List negTrainProfileList = dl.getNegTrainProfileList();

            svmMouseAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,negTrainProfileList);

            System.out.println("\tTraining data type: Mouse Only");
            svmMouse = new ContactMapSVMPredictor(posTrainProfileList, negTrainProfileList, svmParams);
            svmMouse.train();
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);

        String organismLong = testprofile.getOrganism();
        String organism = PDZSVMUtils.organismLongToShortForm(organismLong);
        String domainSeq = testprofile.getDomainSequence();
        svmMouseNNProfile = NN.getNNBindingSiteSeqProfile(domainSeq, organism, svmMouseAllTrainProfileList);
        List predictions = svmMouse.predict(posTestProfileList, new ArrayList());
        return predictions;
    }
    private List runBrainPredictor(List posTestProfileList)
    {
        String fastaFileName = "";
        if (organism.equals(Constants.WORM))
            fastaFileName= DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Caenorhabditis_elegans.WS200.56.pep.all.fa";
        else if (organism.equals(Constants.HUMAN))
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Homo_sapiens.GRCh37.56.pep.all.fa";
        else if (organism.equals(Constants.MOUSE))
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Mus_musculus.NCBIM37.56.pep.all.fa";
        else
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Drosophila_melanogaster.BDGP5.13.56.pep.all.fa";
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);

        DataLoader dl = new DataLoader();
        dl.loadSidhuHumanTrain();
        List posTrainProfileList = dl.getPosTrainProfileList();
        List negTrainProfileList = dl.getNegTrainProfileList();
        HashMap profileHashMap = PDZSVMUtils.profileListToHashMap(posTrainProfileList);
        ProteinProfile profile = (ProteinProfile)profileHashMap.get(testprofile.getName());

        List profileList = new ArrayList();
        profileList.add(profile);
        brainAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,negTrainProfileList);

        String organismLong = testprofile.getOrganism();
        String organism = PDZSVMUtils.organismLongToShortForm(organismLong);
        String domainSeq = testprofile.getDomainSequence();
        brainNNProfile = NN.getNNBindingSiteSeqProfile(domainSeq, organism, brainAllTrainProfileList);

        BrainPredictor bp = new BrainPredictor(fastaFileName);
        List predictionList = bp.scan(profileList);
        return predictionList;
    }
    private List runSVM(List posTestProfileList)
    {

        if (svm == null)
        {
            DataLoader dl = new DataLoader();
            dl.loadMouseChenTrain();
            dl.loadHumanTrain(Constants.PWM);

            double C = 2.0;
            double g = 4.0;
            svm_parameter svmParams = new svm_parameter();
            svmParams.setDefaults();
            svmParams.C = Math.exp(C);
            svmParams.gamma = Math.exp(-Math.log(2)-g);
            svmParams.data_encoding = svm_parameter.CONTACTMAP2020;
            System.out.println("\tSVM, [g,C] = ["+g+","+C+"])");

            List posTrainProfileList = dl.getPosTrainProfileList();
            List negTrainProfileList = dl.getNegTrainProfileList();

            svmAllTrainProfileList = PDZSVMUtils.allProfileList(posTrainProfileList,negTrainProfileList);

            //System.out.println("\t" + testprofile.getName() +"\t" + domainSeq);

            svm = new ContactMapSVMPredictor(posTrainProfileList, negTrainProfileList, svmParams);
            svm.train();
        }
        ProteinProfile testprofile = (ProteinProfile)posTestProfileList.get(0);

        String organismLong = testprofile.getOrganism();
        String organism = PDZSVMUtils.organismLongToShortForm(organismLong);
        String domainSeq = testprofile.getDomainSequence();
        svmNNProfile = NN.getNNBindingSiteSeqProfile(domainSeq, organism, svmAllTrainProfileList);
        List predictions = svm.predict(posTestProfileList, new ArrayList());
        return predictions;
    }


    public void initGenome()
    {

        String fastaFileName;
        if (organism.equals(Constants.WORM))
            fastaFileName= DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Caenorhabditis_elegans.WS200.56.pep.all.fa";
        else if (organism.equals(Constants.HUMAN))
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Homo_sapiens.GRCh37.56.pep.all.fa";
        else if (organism.equals(Constants.MOUSE))
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Mus_musculus.NCBIM37.56.pep.all.fa";
        else
            fastaFileName =DataFileManager.DATA_ROOT_DIR+"/Data/Ensembl/Drosophila_melanogaster.BDGP5.13.56.pep.all.fa";

        try
        {
            genomeSeqToGeneNameMap=  PDZSVMUtils.genomeSeqToGeneNameMap( fastaFileName, 5);
            genomeSeqToTranscriptNameMap=  PDZSVMUtils.genomeSeqToTranscriptNameMap( fastaFileName, 5);

        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }

        Collection keys = genomeSeqToGeneNameMap.keySet();
        genomeSeqList = new ArrayList(keys);

        genomeSeqDB = new HashSequenceDB();
        int numAdded = 0;
        for (int i=0; i < genomeSeqList.size();i++)
        {

            try
            {
                String seq = (String) genomeSeqList.get(i);
                seq = seq.substring(seq.length()-numPos, seq.length());
                Sequence prot = ProteinTools.createProteinSequence(seq, "Peptide" + i);
                genomeSeqDB.addSequence(prot);
                numAdded = numAdded+1;

            }
            catch(Exception e)
            {
                System.out.println("Exception: " + e);
            } //
        }
        System.out.println("\tGenomic seq size: " + numAdded);

        if (bingo)
        {
            bingoAlgorithm = new Bingo(organism,DataFileManager.OUTPUT_ROOT_DIR+"/Bingo/");
            System.out.println("\tBingo initialized...") ;
        }

    }
    private List getSeqList(ProteinProfile profile)
    {
        if (profile == null)
            return new ArrayList();
        Collection seq = profile.getSequenceMap();
        List seqList = new ArrayList(seq);
        List seqStringList = new ArrayList();
        for (int i=0; i < seqList.size();i++)
        {
            Sequence seqs = (Sequence) seqList.get(i);
            String seqString = seqs.seqString();
            seqStringList.add(seqString);
        }
        return seqStringList;
    }


    private List sortPredictions(List predictions)
    {
        System.out.println("\tSorting predictions...");
        List sortedPredictions = new ArrayList();
        double[] decValues = new double[predictions.size()];

        for (int i=0; i < decValues.length;i++)
        {
            Prediction pred = (Prediction)predictions.get(i);
            double decValue = pred.getDecValue();
            decValues[i] =decValue;

        }
        int[] lowToHighIx = Utils.sort(decValues);
        int[] highToLoIx = new int[lowToHighIx.length];
        int end = lowToHighIx.length-1;

        for (int ii=0; ii<lowToHighIx.length;ii++)
        {
            int ixi = lowToHighIx[end-ii];
            highToLoIx[ii] = ixi;
        }

        for (int i = 0 ; i < highToLoIx.length;i++)
        {
            Prediction pred = (Prediction)predictions.get(highToLoIx[i]);
            sortedPredictions.add(pred);
        }
        System.out.println("\tFinished sorting predictions...");

        return sortedPredictions;

    }

    private List lookup(HashMap validMap,  String binder)
    {
        List expMethodList = new ArrayList();
        Set keys = validMap.keySet();
        List keysList = new ArrayList(keys);
        String expMethods = "";
        for (int i=0 ; i < keysList.size();i++)
        {
            String key = (String)keysList.get(i);
            List validSeqList = (List)validMap.get(key);
            if (validSeqList.contains(binder))
            {
                if (expMethods.equals(""))
                    expMethods = key;
                else
                    expMethods = expMethods + "," + key;
                expMethodList.add(key);
            }
        }
        return expMethodList;
    }

    private void printPredictions(String predictorName, List predictions, ProteinProfile testProfile, double sim, boolean print, boolean makeLogo)
    {
        List sortedPredictions = sortPredictions(predictions);
        List geneNamesList = new ArrayList();
        List transcriptNamesList = new ArrayList();

        String domainName = testProfile.getName();
        HashSequenceDB seqDB= new HashSequenceDB();
        int numPosPredictions = 0;
        int numNegPredictions = 0;

        StringBuffer predictionOutput = new StringBuffer();
        for (int i =0; i < sortedPredictions.size();i++)
        {
            Prediction pred = (Prediction)sortedPredictions.get(i);
            String binder = pred.peptideSeq;

            double decValue = pred.getDecValue();
            String decValueString = Utils.doubleToString(decValue, 3);
            String transcriptNames = "";
            String geneNames = "";

            if (pred.getPrediction() ==1.0)
            {

                List geneNameList = (List)genomeSeqToGeneNameMap.get(binder);
                List transcriptNameList = (List)genomeSeqToTranscriptNameMap.get(binder);

                for (int  j = 0; j  < transcriptNameList.size();j++)
                {

                    String transcriptName = (String)transcriptNameList.get(j);

                    if (organism.equals(Constants.HUMAN))
                    {
                        transcriptName = "Ensembl:" + transcriptName;
                    }
                    else if (organism.equals(Constants.FLY))
                    {
                        transcriptName = "FLYBASE:" + transcriptName;

                    }
                    transcriptNames = transcriptNames + transcriptName+" ";

                    if (!transcriptNamesList.contains(transcriptName))
                        transcriptNamesList.add(transcriptName);

                }

                // Do the same for genes
                for (int  j = 0; j  < geneNameList.size();j++)
                {

                    String geneName = (String)geneNameList.get(j);

                    if (organism.equals(Constants.HUMAN))
                    {
                        geneName = "Ensembl:" + geneName;
                    }
                    else if (organism.equals(Constants.FLY))
                    {
                        geneName = "FLYBASE:" + geneName;

                    }
                    geneNames = geneNames + geneName+" ";

                    if (!geneNamesList.contains(geneName))
                        geneNamesList.add(geneName);
                }
                try
                {
                    Sequence seq = ProteinTools.createProteinSequence(binder,domainName.toUpperCase() + numPosPredictions);
                    seqDB.addSequence(seq);

                }
                catch(Exception e)
                {
                    System.out.println("Exception: " + e);
                }

                numPosPredictions = numPosPredictions +1;
                if (print)
                {
                    System.out.println(binder + "\t" + decValueString+ "\t" + "-" + "\t" + transcriptNames);
                    predictionOutput.append(binder + "\t" + decValueString+ "\t" + "-" + "\t" + transcriptNames +"\n");
                }
            }
            else
            {
                // Don't print negatives out
                numNegPredictions = numNegPredictions +1;
            }

        }

        System.out.println();
        System.out.println("\t=== SUMMARY === ");

        if (numPosPredictions == 0)
        {
            System.out.println("\tNo positive predictions made...");

        }
        System.out.println("\tNumber of transcripts: " + transcriptNamesList.size());
        System.out.println("\tNumber of genes: " + geneNamesList.size());

        if (print)
        {
            for (int ii=0; ii < geneNamesList.size();ii++)
            {
                System.out.print(geneNamesList.get(ii) + " ");

            }

            try
            {
                System.out.println("\tWriting predictions to file...");
                BufferedWriter bw = new BufferedWriter(new FileWriter( new File(outDir + "/"+ testProfile.getName()+"."+predictorName+".predictions.txt")));
                bw.write(predictionOutput.toString());
                bw.close();
            }
            catch(Exception e)
            {
                System.out.println("Exception: " + e);
            }

            System.out.println();
        }
        ProteinProfile predProfile = PDZSVMUtils.makeProfile(testProfile, seqDB, "",false, false);

        if (predictorName.equals("SVM") && makeLogo && predProfile!=null)
        {
            PDZSVMUtils.saveToProfile(predProfile,outDir + "/" + predProfile.getName()+"."+predictorName+".pep.txt");
        }

        System.out.println("\t # Pos Pred: " + numPosPredictions);
        System.out.println("\t # Neg Pred: " + numNegPredictions);


    }
    private ScanResults validatePredictions(String predictorName, List predictions, ProteinProfile testProfile, double sim, boolean print, boolean makeLogo)
    {

        // order predictions by decision values
        List sortedPredictions = sortPredictions(predictions);
        int numPosPredictions = 0;
        int numNegPredictions = 0;

        HashSequenceDB seqDB= new HashSequenceDB();
        List geneNamesList = new ArrayList();
        List transcriptNamesList = new ArrayList();

        List posValidSeqList = getSeqList(posValidProfile);
        String domainName = testProfile.getName();
        List negValidSeqList = getSeqList(negValidProfile);
        int numFP = 0;
        int numTP = 0;
        int numPosPredInIRefWeb = 0;

        StringBuffer predictionOutput = new StringBuffer();
        for (int i =0; i < sortedPredictions.size();i++)
        {
            Prediction pred = (Prediction)sortedPredictions.get(i);
            String binder = pred.peptideSeq;

            double decValue = pred.getDecValue();
            String decValueString = Utils.doubleToString(decValue, 3);
            String transcriptNames = "";
            int slashIx = domainName.lastIndexOf("-");
            String proteinName = domainName;
            if (slashIx > -1)
                proteinName = domainName.substring(0,slashIx);

            if (pred.getPrediction() ==1.0)
            {

                List geneNameList = (List)genomeSeqToGeneNameMap.get(binder);
                List transcriptNameList = (List)genomeSeqToTranscriptNameMap.get(binder);
                for (int  j = 0; j  < transcriptNameList.size();j++)
                {

                    String transcriptName = (String)transcriptNameList.get(j);

                    if (organism.equals(Constants.HUMAN))
                    {
                        transcriptName = "Ensembl:" + transcriptName;
                    }
                    else if (organism.equals(Constants.FLY))
                    {
                        transcriptName = "FLYBASE:" + transcriptName;
                    }

                    transcriptNames = transcriptNames + transcriptName+" ";
                    if (!transcriptNamesList.contains(transcriptName))
                        transcriptNamesList.add(transcriptName);

                }
                for (int  j = 0; j  < geneNameList.size();j++)
                {

                    String geneName = (String)geneNameList.get(j);

                    if (organism.equals(Constants.HUMAN))
                    {
                        geneName = "Ensembl:" + geneName;
                    }
                    else if (organism.equals(Constants.FLY))
                    {
                        geneName = "FLYBASE:" + geneName;
                    }
                    if (!geneNamesList.contains(geneName))
                        geneNamesList.add(geneName);
                }
                try
                {
                    Sequence seq = ProteinTools.createProteinSequence(binder,domainName.toUpperCase() + numPosPredictions);
                    seqDB.addSequence(seq);

                }
                catch(Exception e)
                {
                    System.out.println("Exception: " + e);
                }


                List posExpMethodsList = lookup( posValidDataMap,  binder) ;
                List negExpMethodList = lookup( negValidDataMap,  binder) ;

                String iRefWebString = "";
                String ix = "";

                boolean found = false;
                for (int ii =0; ii < transcriptNameList.size();ii++)
                {
                    String ensemblTRSIdB =(String)transcriptNameList.get(ii);

                    if (iRefWeb.lookupEnsemblId(proteinName, ensemblTRSIdB))
                    {
                        if (!found)
                        {

                            numPosPredInIRefWeb = numPosPredInIRefWeb+1;
                            found = true;
                        }
                        if (ix.equals(""))
                            ix = new Integer(ii+1).toString();
                        else
                            ix = ix +"," +(ii+1);

                        iRefWebString = "IR("+ix+")";

                    }

                }
                // is this peptide in the valid positives?
                if (posValidSeqList.contains(binder) || !iRefWebString.equals(""))
                {
                    if (print)
                    {
                        if (!iRefWebString.equals(""))
                        {
                            posExpMethodsList.add(iRefWebString);
                        }
                        String posExpMethodString ="";
                        for (int ii =0; ii < posExpMethodsList.size();ii++)
                        {
                            if (ii==0)
                                posExpMethodString = (String)posExpMethodsList.get(ii);
                            else
                                posExpMethodString = posExpMethodString+"," + posExpMethodsList.get(ii);

                        }
                        System.out.println("*\t"+ binder + "\t" + decValueString+ "\t" + posExpMethodString + "\t" + transcriptNames);
                        predictionOutput.append("*\t"+ binder + "\t" + decValueString+ "\t" + posExpMethodString + "\t" + transcriptNames+"\n");
                    }
                    if (posValidSeqList.contains(binder))
                        numTP = numTP+1;

                }

                // is this peptide in the negatives?
                else if (negValidSeqList.contains(binder) || !iRefWebString.equals(""))
                {
                    if (print)
                    {
                        if (!iRefWebString.equals(""))
                        {
                            negExpMethodList.add(iRefWebString);
                        }
                        String negExpMethodString ="";
                        for (int ii =0; ii < negExpMethodList.size();ii++)
                        {
                            if (ii==0)
                                negExpMethodString = (String)negExpMethodList.get(ii);
                            else
                                negExpMethodString = negExpMethodString+"," + negExpMethodList.get(ii);

                        }
                        System.out.println("X\t"+ binder + "\t" + decValueString+ "\t" + negExpMethodString + "\t" + transcriptNames);
                        predictionOutput.append("X\t"+ binder + "\t" + decValueString+ "\t" + negExpMethodString + "\t" + transcriptNames+"\n");

                    }
                    if (negValidSeqList.contains(binder))
                        numFP = numFP+1;

                }
                else
                {
                    if (print)
                    {
                        System.out.println(" \t"+ binder + "\t" + decValueString+ "\t" + "-" + "\t" + transcriptNames);
                        predictionOutput.append(" \t"+ binder + "\t" + decValueString+ "\t" + "-" + "\t" + transcriptNames +"\n");
                    }
                }

                numPosPredictions = numPosPredictions +1;

            }
            else
            {
                numNegPredictions = numNegPredictions +1;
            }

        }


        System.out.println();
        System.out.println("\t=== SUMMARY === ");

        if (numPosPredictions == 0)
        {
            System.out.println("\tNo positive predictions made...");
            ScanResults results = new ScanResults(predictorName, testProfile.getName());

            results.numPosValid = posValidSeqList.size();
            results.numNegValid = negValidSeqList.size();

            results.numPosPredInIRefWeb = numPosPredInIRefWeb;
            String proteinName = domainName;
            int slashIx = domainName.lastIndexOf("-");
            if (slashIx > -1)
                proteinName = domainName.substring(0,slashIx);
            results.numIRefWeb = iRefWeb.numiRefWebInteractions(proteinName);
            return results;
        }
        System.out.println("\tNumber of transcripts: " + transcriptNamesList.size());
        System.out.println("\tNumber of genes: " + geneNamesList.size());

        if (print)
        {
            System.out.println("\tGene names:");
            String geneIds = "";
            for (int ii=0; ii < geneNamesList.size();ii++)
            {
                String geneId = (String)geneNamesList.get(ii);
                System.out.print(geneId + " ");
                if (ii==0)
                    geneIds = geneId;
                else
                    geneIds = geneIds + " " + geneId;

            }
            if (bingo)
            {
                System.out.println("\tPerforming Bingo analysis...");
                bingoAlgorithm.analyze(geneIds,domainName);
                System.out.println("\tDone Bingo analysis...");
            }
            System.out.println();
            try
            {
                System.out.println("\tWriting predictions to file...");
                BufferedWriter bw = new BufferedWriter(new FileWriter( new File(outDir + "/"+ testProfile.getName()+"."+predictorName+".predictions.txt")));
                bw.write(predictionOutput.toString());
                bw.close();
            }
            catch(Exception e)
            {
                System.out.println("Exception: " + e);
            }

        }
        ProteinProfile predProfile = PDZSVMUtils.makeProfile(testProfile, seqDB, "",false, false);

        if (predictorName.equals("SVM") && makeLogo )
        {

            PDZSVMUtils.saveToProfile(predProfile,outDir + "/"+ predProfile.getName()+"."+predictorName+".pep.txt");
        }
        double profileSim = 0;

        if (posValidProfile!=null)
            profileSim =  1.0 - ProteinProfileDistance.calculateDistributionDistance(predProfile, posValidProfile);

        System.out.println("\t # Pos Pred: " + numPosPredictions);
        //System.out.println("\t # Neg Pred: " + numNegPredictions);

        System.out.println("\t # True Pos: " + numTP + " out of " + posValidSeqList.size());
        System.out.println("\t# False Pos: " + numFP + " out of " + negValidSeqList.size());

        ScanResults results = new ScanResults(predictorName, testProfile.getName());
        results.numFP = numFP;
        results.numTP = numTP;
        results.numPosPred = numPosPredictions;
        results.numNegPred = numNegPredictions;

        results.domainSim = sim;
        results.predProfileSim = profileSim;
        results.numPosValid = posValidSeqList.size();
        results.numNegValid = negValidSeqList.size();

        results.numPosPredInIRefWeb = numPosPredInIRefWeb;
        String proteinName = domainName;
        int slashIx = domainName.lastIndexOf("-");
        if (slashIx > -1)
            proteinName = domainName.substring(0,slashIx);
        results.numIRefWeb = iRefWeb.numiRefWebInteractions(proteinName);
        return results;
    }

    public static void main(String[] args)
    {

        String scanFileName = args[0];
        String validateString = args[1];
        String bingoString = args[2];
        boolean validate = true;
        if (validateString.equals("N"))
        {
            validate = false;
        }
        boolean bingo = true;
        if (bingoString.equals("N"))
        {
            bingo = false;
        }

        ProteomeScan g = new ProteomeScan();
        System.out.println("\tScan info file: " + scanFileName + ", validate? " + validate+ ", bingo? " + bingo);
        List scanInfoList = g.readScanInfo(scanFileName);
        g.run(scanInfoList, validate, bingo);
        //g.initGenome();
        //g.makeSVMResultTables();
        //System.out.println("Fly");
        //g.analyzeGenome(Constants.FLY);
        //System.out.println("Worm");
        //g.analyzeGenome(Constants.WORM);
        //System.out.println("Human");
        //g.analyzeGenome(Constants.HUMAN);

    }
}