package org.baderlab.pdzsvm.utils;

import org.baderlab.brain.*;
import org.baderlab.pdzsvm.data.Data;
import org.baderlab.pdzsvm.data.Datum;
import org.baderlab.pdzsvm.encoding.Features;
import org.baderlab.pdzsvm.encoding.Chen16FeatureEncoding;
import org.baderlab.pdzsvm.evaluation.Prediction;
import org.biojava.bio.symbol.Symbol;
import org.biojava.bio.symbol.AtomicSymbol;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.db.HashSequenceDB;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.BioException;
import org.biojava.bio.Annotation;

import javax.imageio.ImageIO;
import java.util.*;
import java.util.List;
import java.io.*;
import java.awt.*;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Utility class
 */
public class PDZSVMUtils
{

    public static List allProfileList(List posProfileList, List negProfileList)
    {
        List allTrainProfileList = new ArrayList();
        HashMap posTestHashMap = PDZSVMUtils.profileListToHashMap(posProfileList);
        for (int i=0;i < posProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)posProfileList.get(i);
            allTrainProfileList.add(profile);
        }
        for (int i=0;i < negProfileList.size();i++)
        {
            ProteinProfile negProfile = (ProteinProfile)negProfileList.get(i);
            ProteinProfile posProfile = (ProteinProfile)posTestHashMap.get(negProfile.getName());
            if (posProfile == null)
                allTrainProfileList.add(negProfile);
        }
        return allTrainProfileList;
    }

    public static List[] balanceLists(List posProfileList, List negProfileList)
    {
        List balancedPosProfileList = new ArrayList();
        List balancedNegProfileList = new ArrayList();
        List[] balancedProfileList = new List[2];
        for (int i=0; i < posProfileList.size();i++)
        {
            ProteinProfile posProfile = (ProteinProfile)posProfileList.get(i);
            boolean found = false;
            ProteinProfile matchingNegProfile = null;
            for (int j=0;j < negProfileList.size();j++)
            {
                ProteinProfile negProfile = (ProteinProfile)negProfileList.get(j);
                String negName = negProfile.getName();
                String posName = posProfile.getName();
                String posOrganism = posProfile.getOrganism();
                String negOrganism = negProfile.getOrganism();
                String posMethod = posProfile.getExperimentalMethod();
                String negMethod = negProfile.getExperimentalMethod();
                if (negName.equals(posName) &&
                        posOrganism.equals(negOrganism) &&
                        posMethod.equals(negMethod))
                {
                    found = true;
                    matchingNegProfile = negProfile;
                    break;
                }


            }
            if (found)
            {
                balancedPosProfileList.add(posProfile);
                balancedNegProfileList.add(matchingNegProfile);
            }

        }
        balancedProfileList[0] = balancedPosProfileList;
        balancedProfileList[1] = balancedNegProfileList;
        return balancedProfileList;
    }

    public static StringBuffer toRString(List foldPredictionList)
    {
        StringBuffer actual = new StringBuffer("actual = vector(mode=\"list\"," + foldPredictionList.size() + ")\n");
        StringBuffer dec = new StringBuffer("dec = vector(mode=\"list\"," + foldPredictionList.size() + ")\n");

        for (int i=0; i < foldPredictionList.size();i++)
        {
            List predictionList = (List)foldPredictionList.get(i);
            Prediction pred= (Prediction)predictionList.get(0);

            actual.append( "actual[["+(i+1)+"]]=c("+pred.getActual());
            dec.append("dec[["+(i+1)+"]]=c("+pred.getDecValue());
            for (int j = 1; j < predictionList.size();j++)
            {
                pred= (Prediction)predictionList.get(j);

                actual.append("," + pred.getActual());
                dec.append("," + pred.getDecValue());
            }
            actual.append(")\n");
            dec.append(")\n");

        }

        //System.out.println(actual);
        //System.out.println(dec);
        actual.append("\n");
        return actual.append(dec);
    }

    static public double identity(String y1, String y2)
    {
        double tot = y1.length();
        double numSame = 0;
        for (int i = 0; i < y1.length();i++)
        {
            String chary1 = y1.substring(i,i+1);
            String chary2 = y2.substring(i,i+1);
            if (chary1.equals("X"))
                chary1 = chary2;
            if (chary2.equals("X"))
                chary2 = chary1;
            if (chary1.equals(chary2))
            {
                numSame = numSame +1;
            }

        }
        return numSame/tot;
    }
    public static HashMap makeNNBindingSiteSimMap(List profileList, List refProfileList)
    {
        HashMap nnSimMap= new HashMap();
        Chen16FeatureEncoding enc = new Chen16FeatureEncoding();

        for (int i=0;i < profileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)profileList.get(i);
            String domainSeq1  = profile.getDomainSequence();
            String organism1 = organismLongToShortForm(profile.getOrganism());

            String domain16Seq1 = enc.getFeatures(domainSeq1,organism1);
            double maxSim = Double.MIN_VALUE;
            String nnProfileName = "";
            for (int j=0;j < refProfileList.size();j++)
            {
                ProteinProfile refProfile = (ProteinProfile)refProfileList.get(j);
                String domainSeq2 = refProfile.getDomainSequence();
                String organism2 = organismLongToShortForm(refProfile.getOrganism());

                String domain16Seq2 = enc.getFeatures(domainSeq2,organism2);

                double sim = identity(domain16Seq1, domain16Seq2);
                if (sim >= maxSim)
                {
                    maxSim = sim;
                    nnProfileName = refProfile.getName();
                }
            }
            List nnInfo = new ArrayList();
            nnInfo.add(nnProfileName);
            nnInfo.add(maxSim);
            nnSimMap.put(profile.getName(), nnInfo);

        }
        return nnSimMap;
    }
    public static List removeSmallProfiles(List profileList, int size)
    {
        List newProfileList = new ArrayList();
        for (int i =0; i < profileList.size();i++)
        {
            ProteinProfile profile= (ProteinProfile)profileList.get(i);
            if (profile.getNumSequences() > size)
            {
                newProfileList.add(profile);
            }
        }
        return newProfileList;
    }
    public static double zeroToOne(double negOneToPosOne)
    {
        if (negOneToPosOne == -1)
            return 0;
        else if (negOneToPosOne == 1)
            return 1;
        else
            return -1;
    }

    public static  HashMap profileListToHashMap(List profileList)
    {
        HashMap profileMap = new HashMap();
        for (int i = 0; i < profileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)profileList.get(i);
            String name = profile.getName();
            profileMap.put(name, profile);

        }
        return profileMap;
    }

    public static void printMatrix(double[][] input, String filename)
    {
        try
        {
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
            System.out.println();
            for (int i= 0; i < input.length;i++)
            {
                for (int j = 0; j < input[0].length;j++ )
                {
                    if (j==0)
                    {
                        //System.out.print(input[i][j]);
                        bw.write(Double.toString(input[i][j]));
                    }
                    else
                    {
                        //System.out.print("\t" + input[i][j]);
                        bw.write("\t" +  input[i][j]);
                    }
                }
                bw.write("\n");
            }
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception"+ e);

        }
    }

    public static void printString(String input, String filename)
    {
        try
        {
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
            bw.write(input);
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception"+ e);

        }
    }

    public static void saveToProfile(ProteinProfile profile, String fileName)
    {
        System.out.println("\tWriting to " + fileName);
        String taxaID = "";
        if (profile.getOrganism().equals(Constants.WORM_STRING))
            taxaID = "6239";
        else if (profile.getOrganism().equals(Constants.MOUSE_STRING))
            taxaID = "10090";
        else if (profile.getOrganism().equals(Constants.HUMAN_STRING))
            taxaID = "9606";
        else if (profile.getOrganism().equals(Constants.FLY_STRING))
            taxaID = "7227";
        else
            taxaID = "UNK";
        String domainName = profile.getName();
        int index = domainName.lastIndexOf("-");
        if (index != -1)
            domainName = domainName.substring(0,index);
        String output = "Gene Name\t" + domainName + "\n";
        output = output + "Accession\tRefseq:NP_004877\n";
        output = output + "Organism\t"+ profile.getOrganism() +"\n";
        output = output + "NCBITaxonomyID\t"+taxaID+"\n";
        output = output + "Domain Number\t"+profile.getDomainNumber() +"\n";
        output = output + "Domain Type\tPDZ\n";
        output = output + "Interpro ID\tIPR001478\n";
        output = output + "Technique\t" + profile.getExperimentalMethod() +"\n";
        //output = output + "Specificity Class\t" + profile.getSpecificityClass() +"\n";
        output = output + "Domain sequence\t" + profile.getDomainSequence() +"\n";
        output = output + "PeptideName\tPeptide\tCloneFrequency\n";

        Collection seqCollection = profile.getSequenceMap();
        Iterator it = seqCollection.iterator();
        int ii = 1;
        int numSeq = seqCollection.size();
        while(it.hasNext())
        {
            Sequence seq = (Sequence)it.next();
            String seqString = seq.seqString();
            if (ii == numSeq)
                output = output + ii + "\t"+ seqString + "\t1";
            else
                output = output + ii + "\t"+ seqString + "\t1\n";
            ii = ii+1;
        }

        try
        {
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(fileName)));
            bw.write(output);
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception:" + e);
        }
    }

    public static ProteinProfile makeProfile(ProteinProfile trainProfile, HashSequenceDB seqDB)
    {
        return makeProfile(trainProfile, seqDB, null, false, false);
    }

    public static ProteinProfile makeProfile(ProteinProfile trainProfile, HashSequenceDB seqDB, String saveDir, boolean saveProfile, boolean saveLogo)
    {
        if (!seqDB.sequenceIterator().hasNext())
            return null;
        String folderName = organismLongToShortForm(trainProfile.getOrganism());

        ProteinProfile artNegProfile  =null;
        try
        {
            artNegProfile = new ProteinProfile(seqDB.sequenceIterator(),0.01,trainProfile.getName());
            artNegProfile.setOrganism(trainProfile.getOrganism());
            artNegProfile.setDomainSequence(trainProfile.getDomainSequence());
            artNegProfile.setDomainNumber(trainProfile.getDomainNumber());
            artNegProfile.setExperimentalMethod(trainProfile.getExperimentalMethod());
            if (saveLogo)
            {
                saveAsLogo(artNegProfile, saveDir,"");
               
            }
            if (saveProfile)
            {
                String profileFileName = saveDir + "Profiles/"+ folderName + "/" + trainProfile.getName() + ".pep.txt";
                saveToProfile( artNegProfile,  profileFileName );
            }
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);

        }

        return artNegProfile;
    }
    public static void saveAsLogo(ProteinProfile profile,  String saveDir, String theClass)

    {
        saveAsLogo( profile, null,  saveDir,  theClass);
    }
    public static void saveAsLogo(ProteinProfile profile, String logoName, String saveDir, String theClass)
    {
        try
        {
            String posneg = "-pos";
            if (theClass.equals(Constants.CLASS_NO))
            {
                posneg ="-neg";
            }
            if (logoName == null)
                logoName = profile.getName();
            String logoFileName = saveDir + logoName +posneg+".png";
            System.out.println("\tSaved logo to: " + logoFileName);
            ProteinSequenceLogo logo = new ProteinSequenceLogo( profile, 180);
            BufferedOutputStream bo = new BufferedOutputStream( new FileOutputStream(new File(logoFileName)));
            ImageIO.write(logo.drawSequenceLogo(), "png", new File(logoFileName));
            bo.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }

    public static ProteinProfile makeProfile(String domainName, int domainNum, String domainSequence, String organism, HashSequenceDB seqDB)
    {
        return makeProfile(domainName, domainNum, domainSequence, organism, seqDB, null, false,false);
    }

    public static ProteinProfile makeProfile(String domainName, int domainNum, String domainSequence, String organism, HashSequenceDB seqDB, String saveDir, boolean saveProfile, boolean saveLogo)
    {
        if (!seqDB.sequenceIterator().hasNext())
            return null;
        String folderName = organismLongToShortForm(organism);

        ProteinProfile artNegProfile  =null;
        try
        {
            artNegProfile = new ProteinProfile(seqDB.sequenceIterator(),0.01,domainName);
            artNegProfile.setOrganism(organism);
            artNegProfile.setDomainSequence(domainSequence);
            artNegProfile.setDomainNumber(domainNum);
            artNegProfile.setExperimentalMethod("PDZBase");
            artNegProfile.setDomainName(domainName);
            if (saveLogo)
            {

                saveAsLogo(artNegProfile, saveDir,"");
            }
            if (saveProfile)
            {
                String profileFileName = saveDir + "Profiles/"+ folderName + "/" +domainName + ".pep.txt";
                System.out.println("Saving to " +profileFileName);

                saveToProfile( artNegProfile,  profileFileName);
            }
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);

        }

        return artNegProfile;
    }


    public static List readProteinProfileList(String projectFileName, String codonFileName)
    {
        return readProteinProfileList(projectFileName, codonFileName, 5);
    }


    public static List readProteinProfileList(String projectFileName, String codonFileName, int length)
    {
        System.out.println("\n\tLoading data for: " + projectFileName);

        List profileList = readProteinProfileList(projectFileName, codonFileName, length, true);

        return profileList;
    }

    public static List readProteinProfileList(String projectFileName, String codonFileName, int length, boolean uniquePeptides)
    {
        System.out.println("\n\tLoading data for: " + projectFileName);
        File projectFile = new File(projectFileName);
        File codonFile = new File(codonFileName);
        List profileList = PeptideToProfileReader.readPeptidesAsProfiles(projectFile,length, ProteinTerminus.C,0.01,codonFile,uniquePeptides,true);

        return profileList;
    }


    public static String methodToDisplayForm(String expMethod, String dbMethod, String artMethod)
    {
        String method = "";
        if (!dbMethod.equals(""))
        {
            method = dbMethod;
            if (!artMethod.equals(""))
            {
                method = method + "-"+ artMethod;
            }
        }
        else
        {
            method = expMethod;
            if (!artMethod.equals(""))
            {
                method = method + "-"+ artMethod;
            }
        }
        return method;
    }
    public static String organismLongToShortForm(String longForm)
    {
        String shortForm = "";
        if (longForm.equals(Constants.HUMAN_STRING))
        {
            shortForm = Constants.HUMAN;
        }
        else if (longForm.equals(Constants.MOUSE_STRING))
        {
            shortForm = Constants.MOUSE;
        }
        else if (longForm.equals(Constants.WORM_STRING))
        {
            shortForm = Constants.WORM;
        }
        else if (longForm.equals(Constants.FLY_STRING))
        {
            shortForm = Constants.FLY;
        }
        else if (longForm.equals(Constants.HUMAN_MUTANT_STRING))
        {
            shortForm = Constants.HUMAN_MUTANT;
        }

        return shortForm;
    }

    public static String organismShortToLongForm(String shortForm)
    {
        String longForm = "";
        if (shortForm.equals(Constants.HUMAN))
        {
            longForm = Constants.HUMAN_STRING;
        }
        else if (shortForm.equals(Constants.MOUSE))
        {
            longForm = Constants.MOUSE_STRING;
        }
        else if (shortForm.equals(Constants.WORM))
        {
            longForm = Constants.WORM_STRING;
        }
        else if (shortForm.equals(Constants.FLY))
        {
            longForm = Constants.FLY_STRING;
        }
        else if(shortForm.equals(Constants.HUMAN_MUTANT))
        {
            longForm = Constants.HUMAN_MUTANT_STRING;
        }

        return longForm;
    }

    public static String methodShortToLongForm(String shortForm)
    {
        String longForm = "";
        if (shortForm.equals(Constants.PROTEIN_MICROARRAY))
        {
            longForm = Constants.PROTEIN_MICROARRAY_STRING;
        }
        else if (shortForm.equals(Constants.PDZBASE))
        {
            longForm = Constants.PDZBASE_STRING;
        }
        else if (shortForm.equals(Constants.PHAGE_DISPLAY))
        {
            longForm = Constants.PHAGE_DISPLAY_HIGH_STRING;
        }
        else if (shortForm.equals(Constants.ENSEMBL))
        {
            longForm = Constants.ENSEMBL_STRING;
        }
        return longForm;
    }

    public static String methodLongToShortForm(String longForm)
    {
        String shortForm = null;
        if (longForm.equals(Constants.PROTEIN_MICROARRAY_STRING))
        {
            shortForm = Constants.PROTEIN_MICROARRAY;
        }
        else if (longForm.equals(Constants.PDZBASE_STRING))
        {
            shortForm = Constants.PDZBASE;
        }
        else if (longForm.startsWith(Constants.PHAGE_DISPLAY_HIGH_STRING) ||
                longForm.startsWith(Constants.PHAGE_DISPLAY_LOW_STRING))
        {
            shortForm = Constants.PHAGE_DISPLAY;
        }
        else if (longForm.equals(Constants.ENSEMBL_STRING))
        {
            shortForm = Constants.ENSEMBL;
        }
        return shortForm;
    }

    public static List sortMap(HashMap theMap)
    {
        List theSortedList = new ArrayList(theMap.size());
        for (int i =0; i < theMap.size();i++)
        {
            theSortedList.add("");
        }
        // print domain Map out
        //Put keys and values in to an arraylist using entryset
        ArrayList myArrayList=new ArrayList(theMap.entrySet());

        //Sort the values based on values first and then keys.
        Collections.sort(myArrayList, new MyComparator());

        //Show sorted results
        Iterator itr=myArrayList.iterator();
        Features key= null;
        int value=0;
        int cnt=0;
        while(itr.hasNext())
        {
            cnt++;
            Map.Entry e=(Map.Entry)itr.next();

            key = (Features)e.getKey();
            value = ((Integer)e.getValue()).intValue();
            System.out.println(value+ "\t" +key);
            theSortedList.set(value,key);

        }
        return theSortedList;
    }

    static class MyComparator implements Comparator
    {
        public int compare(Object obj1, Object obj2)
        {
            int result=0;
            Map.Entry e1 = (Map.Entry)obj1;
            Map.Entry e2 = (Map.Entry)obj2;//Sort based on values.

            Integer value1 = (Integer)e1.getValue();
            Integer value2 = (Integer)e2.getValue();

            if(value1.compareTo(value2)==0)
            {
                String word1=((Features)e1.getKey()).toString();
                String word2=((Features)e2.getKey()).toString();

                //Sort String in an alphabetical order
                result=word1.compareToIgnoreCase(word2);

            }
            else
            {
                //Sort values in a descending order
                result=value1.compareTo( value2 );
            }

            return result;
        }

    }


   public static ArrayList get20aaAlphabet() {
        ArrayList alphabet = new ArrayList(20);
        HashMap alphabetMap = ProteinSequenceUtil.get20aaAlphabet();
        Collection symbols = alphabetMap.values();
        for (Iterator iterator = symbols.iterator(); iterator.hasNext();) {
            Symbol symbol = (Symbol) iterator.next();
            if (!symbol.getName().equals("SEC") && ! symbol.getName().equals("PYL"))
                alphabet.add(symbol);
        }
        return alphabet;
    }

    public static AtomicSymbol get20aaSymbol(String residueChar)
    {
        //System.out.println("Getting symbol for: " + residueChar);
        if (residueChar.equalsIgnoreCase("X"))
        {
            // randomly pick a residue from the alphabet
            String alphabetString = "ARNDCQEGHILKMFPSTWYV";
            Random rand = new Random( 19580427 );
            int random = rand.nextInt(20);
            residueChar = String.valueOf(alphabetString.charAt(random));

        }
        AtomicSymbol s = null;
        if (residueChar.equalsIgnoreCase("A"))
        {
            s = ProteinTools.a();
        }
        else if (residueChar.equalsIgnoreCase("R"))
        {
            s = ProteinTools.r();
        }
        else if (residueChar.equalsIgnoreCase("N"))
        {
            s = ProteinTools.n();
        }
        else if (residueChar.equalsIgnoreCase("D"))
        {
            s = ProteinTools.d();
        }
        else if (residueChar.equalsIgnoreCase("C"))
        {
            s = ProteinTools.c();
        }
        else if (residueChar.equalsIgnoreCase("Q"))
        {
            s = ProteinTools.q();
        }
        else if (residueChar.equalsIgnoreCase("E"))
        {
            s = ProteinTools.e();
        }
        else if (residueChar.equalsIgnoreCase("G"))
        {
            s = ProteinTools.r();
        }
        else if (residueChar.equalsIgnoreCase("H"))
        {
            s = ProteinTools.h();
        }
        else if (residueChar.equalsIgnoreCase("I"))
        {
            s = ProteinTools.i();
        }
        else if (residueChar.equalsIgnoreCase("L"))
        {
            s = ProteinTools.l();
        }
        else if (residueChar.equalsIgnoreCase("K"))
        {
            s = ProteinTools.k();
        }
        else if (residueChar.equalsIgnoreCase("M"))
        {
            s = ProteinTools.m();
        }
        else if (residueChar.equalsIgnoreCase("F"))
        {
            s = ProteinTools.f();
        }
        else if (residueChar.equalsIgnoreCase("P"))
        {
            s = ProteinTools.p();
        }
        else if (residueChar.equalsIgnoreCase("S"))
        {
            s = ProteinTools.s();
        }
        else if (residueChar.equalsIgnoreCase("T"))
        {
            s = ProteinTools.t();
        }
        else if (residueChar.equalsIgnoreCase("W"))
        {
            s = ProteinTools.w();
        }
        else if (residueChar.equalsIgnoreCase("Y"))
        {
            s = ProteinTools.y();
        }
        else if (residueChar.equalsIgnoreCase("V"))
        {
            s = ProteinTools.v();
        }
        else if (residueChar.equalsIgnoreCase("X"))
        {

        }
        return s;


    }

    public static HashMap readAlignment(String fastaFileName) throws IOException, BioException {
        File multipleSequenceAlignmentFile = new File(fastaFileName);
        HashMap sequenceNameToSequence = new HashMap();
        //read protein domain multiple sequence alignment
        BufferedReader brMSA = new BufferedReader(new FileReader(multipleSequenceAlignmentFile));
        SequenceIterator sequenceAlignment = (SequenceIterator) SeqIOTools.fileToBiojava("fasta", "PROTEIN", brMSA);
        //determine the width of the MSA

        while (sequenceAlignment.hasNext())
        {
            Sequence seq = sequenceAlignment.nextSequence();
            //System.out.println("length: " + seq.length());

            //System.out.println("seq name: " + seq.getName());
            sequenceNameToSequence.put(seq.getName().toUpperCase(), seq);
        }
        brMSA.close();
        return sequenceNameToSequence;
    }


    public static HashSequenceDB readAlignmentToDB(String fastaFileName, int length) throws IOException, BioException {
        File multipleSequenceAlignmentFile = new File(fastaFileName);
        HashSequenceDB sequenceNameToSequence = new HashSequenceDB();
        //read protein domain multiple sequence alignment
        BufferedReader brMSA = new BufferedReader(new FileReader(multipleSequenceAlignmentFile));
        SequenceIterator sequenceAlignment = (SequenceIterator) SeqIOTools.fileToBiojava("fasta", "PROTEIN", brMSA);
        //determine the width of the MSA
        HashMap seqHash = new HashMap();
        while (sequenceAlignment.hasNext())
        {
            Sequence seq = sequenceAlignment.nextSequence();
            String seqString =seq.seqString();
            //System.out.println("length: " + seq.length());

            //System.out.println("seq name: " + seq.getName());
            if (seqString.length() < length)
                continue;
            if (length != -1)
                seqString = seqString.substring(seq.length()-length, seq.length());

            Sequence seq2 = ProteinTools.createProteinSequence(seqString,seq.getName().toUpperCase());
            String hashSeq = (String)seqHash.get(seqString);
            if (hashSeq == null)
            {
                try
                {
                    sequenceNameToSequence.addSequence(seq2.getName().toUpperCase(), seq2);
                    seqHash.put(seqString,seqString);
                    //System.out.println(seqString);
                }
                catch(Exception e)
                {

                }
            }
        }
        //System.out.println("Num genomic sequences read:" + seqHash.size());
        brMSA.close();
        return sequenceNameToSequence;
    }

    public static HashMap genomeSeqToTranscriptNameMap(String fastaFileName, int length) throws IOException, BioException {
        File multipleSequenceAlignmentFile = new File(fastaFileName);
        HashMap seqToNameMap = new HashMap();
        //read protein domain multiple sequence alignment
        BufferedReader brMSA = new BufferedReader(new FileReader(multipleSequenceAlignmentFile));
        SequenceIterator sequenceAlignment = (SequenceIterator) SeqIOTools.fileToBiojava("fasta", "PROTEIN", brMSA);
        //determine the width of the MSA
        List geneIdList = new ArrayList();
        while (sequenceAlignment.hasNext())
        {
            Sequence seq = sequenceAlignment.nextSequence();
            Annotation ann = seq.getAnnotation();
            String key = "description";
            String desc = (String)ann.getProperty(key);
            String[] splitdesc = desc.split("\\s");
            String geneId ="";
            for (int i=0; i< splitdesc.length;i++)
            {
                if (splitdesc[i].startsWith("transcript"))
                {
                    String[] splitgene = splitdesc[i].split(":");
                    geneId = splitgene[1];
                    break;
                }
            }

            String seqString =seq.seqString();
            //System.out.println("length: " + seq.length());
            //System.out.println("gene id:" + geneId);
            //System.out.println("seq name: " + seq.getName());
            if (seqString.length() < length)
                continue;
            if (length != -1)
                seqString = seqString.substring(seq.length()-length, seq.length());
            List nameList = (List)seqToNameMap.get(seqString);
            if (nameList == null)
                nameList = new ArrayList();
            nameList.add(geneId);
            seqToNameMap.put(seqString, nameList);
        }
        //System.out.println("Num genomic sequences read:" + seqHash.size());
        brMSA.close();
        return seqToNameMap;
    }
    public static HashMap genomeSeqToGeneNameMap(String fastaFileName, int length) throws IOException, BioException {
            File multipleSequenceAlignmentFile = new File(fastaFileName);
            HashMap seqToNameMap = new HashMap();
            //read protein domain multiple sequence alignment
            BufferedReader brMSA = new BufferedReader(new FileReader(multipleSequenceAlignmentFile));
            SequenceIterator sequenceAlignment = (SequenceIterator) SeqIOTools.fileToBiojava("fasta", "PROTEIN", brMSA);
            //determine the width of the MSA
            List geneIdList = new ArrayList();
            while (sequenceAlignment.hasNext())
            {
                Sequence seq = sequenceAlignment.nextSequence();
                Annotation ann = seq.getAnnotation();
                String key = "description";
                String desc = (String)ann.getProperty(key);
                String[] splitdesc = desc.split("\\s");
                String geneId ="";
                for (int i=0; i< splitdesc.length;i++)
                {
                    if (splitdesc[i].startsWith("gene"))
                    {
                        String[] splitgene = splitdesc[i].split(":");
                        geneId = splitgene[1];
                        break;
                    }
                }

                String seqString =seq.seqString();
                //System.out.println("length: " + seq.length());
                //System.out.println("gene id:" + geneId);
                //System.out.println("seq name: " + seq.getName());
                if (seqString.length() < length)
                    continue;
                if (length != -1)
                    seqString = seqString.substring(seq.length()-length, seq.length());
                List nameList = (List)seqToNameMap.get(seqString);
                if (nameList == null)
                    nameList = new ArrayList();
                nameList.add(geneId);
                seqToNameMap.put(seqString, nameList);
            }
            //System.out.println("Num genomic sequences read:" + seqHash.size());
            brMSA.close();
            return seqToNameMap;
        }

    public static HashMap readEnsemblIdToSeqMap(String fastaFileName, int length) throws IOException, BioException {
        File multipleSequenceAlignmentFile = new File(fastaFileName);
        HashMap idToSeqMap = new HashMap();
        //read protein domain multiple sequence alignment
        BufferedReader brMSA = new BufferedReader(new FileReader(multipleSequenceAlignmentFile));
        SequenceIterator sequenceAlignment = (SequenceIterator) SeqIOTools.fileToBiojava("fasta", "PROTEIN", brMSA);
        //determine the width of the MSA
        HashMap seqHash = new HashMap();
        while (sequenceAlignment.hasNext())
        {
            Sequence seq = sequenceAlignment.nextSequence();
            Annotation ann = seq.getAnnotation();
            String key = "description";
            String desc = (String)ann.getProperty(key);
            String[] splitdesc = desc.split("\\s");
            String geneId ="";
            for (int i=0; i< splitdesc.length;i++)
            {
                if (splitdesc[i].startsWith("gene"))
                {
                    String[] splitgene = splitdesc[i].split(":");
                    geneId = splitgene[1];
                    break;
                }
            }

            String seqString =seq.seqString();
            //System.out.println("length: " + seq.length());
            //System.out.println("gene id:" + geneId);
            //System.out.println("seq name: " + seq.getName());
            if (seqString.length() < length)
                continue;
            if (length != -1)
                seqString = seqString.substring(seq.length()-length, seq.length());
            List seqList = (List)idToSeqMap.get(geneId);
            if (seqList == null)
                seqList = new ArrayList();
            seqList.add(seqString);
            idToSeqMap.put(geneId, seqList);
        }
        //System.out.println("Num genomic sequences read:" + seqHash.size());
        brMSA.close();
        return idToSeqMap;
    }
    public static List readAlignment(String fastaFileName, int length) throws IOException, BioException {
        File multipleSequenceAlignmentFile = new File(fastaFileName);
        List sequenceList = new ArrayList();
        //HashSequenceDB sequenceNameToSequence = new HashSequenceDB();
        //read protein domain multiple sequence alignment
        BufferedReader brMSA = new BufferedReader(new FileReader(multipleSequenceAlignmentFile));
        SequenceIterator sequenceAlignment = (SequenceIterator) SeqIOTools.fileToBiojava("fasta", "PROTEIN", brMSA);
        //determine the width of the MSA

        while (sequenceAlignment.hasNext())
        {
            Sequence seq = sequenceAlignment.nextSequence();
            String seqString =seq.seqString();
            if (seqString.length() != length)
                continue;
            //System.out.println("length: " + seq.length());

            //System.out.println("seq name: " + seq.getName());
            if (length != -1)
                seqString = seqString.substring(seq.length()-length-1, seq.length()-1);

            if ((seqString.indexOf('U') > -1) || (seqString.indexOf('Y') > -1))
                continue;

            Sequence seq2 = ProteinTools.createProteinSequence(seqString,seq.getName().toUpperCase());
            sequenceList.add(seq2);
            //sequenceNameToSequence.addSequence(seq2.getName().toUpperCase(), seq2);
            //sequenceNameToSequence.put(seq.getName().toUpperCase(), seq);
        }
        brMSA.close();
        //return sequenceNameToSequence;
        return sequenceList;
    }

    public static Color[] randomColors(int n)
    {

        Random rand = new Random();

        Color[] cols = new Color[n];
        for (int i = 0; i< n;i++)
        {
            int r = rand.nextInt(256);
            int g = rand.nextInt(256);
            int b  = rand.nextInt(256);
            cols[i] = new Color(r,g,b);
        }

        return cols;
    }


    public static void outputlibSVMFormat(String outputFileName, Data data)
    {
        List dataList = data.getDataList();
        try
        {
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outputFileName)));

            for (int i = 0;i < dataList.size();i++)
            {
                Datum dt = (Datum) dataList.get(i);

                // Look up the encoded pair
                Features encFeatures = data.getEncodedDataPair(dt);
                String featureString = dt.posneg + " ";
                //System.out.println(encFeatures);
                for (int j = 0; j < encFeatures.numFeatures();j++)
                {
                    double doubleFeature = encFeatures.getFeatureAsDouble(j);
                    featureString = featureString+ "  " + (j+1)+ ":" + doubleFeature;

                }
                //System.out.print(featureString);
                bw.write(featureString+ "\n");
            }
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }

}
