/*
 * Decompiled with CFR 0.152.
 */
package org.baderlab.brain;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Iterator;
import org.baderlab.brain.ProteinTerminus;
import org.biojava.bio.BioException;
import org.biojava.bio.dist.Distribution;
import org.biojava.bio.dist.DistributionFactory;
import org.biojava.bio.dist.SimpleDistributionTrainerContext;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.AtomicSymbol;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Symbol;

public class ProteinDatabaseDistribution {
    SequenceIterator database = null;

    public ProteinDatabaseDistribution(String dbFileName, String dbFormat) throws FileNotFoundException, BioException {
        BufferedReader br = new BufferedReader(new FileReader(dbFileName));
        this.database = (SequenceIterator)SeqIOTools.fileToBiojava(dbFormat, "PROTEIN", br);
    }

    private double[][] countsTableToDistribution(long[][] countsTable, long totalPairs) {
        double[][] distribution = new double[countsTable.length][countsTable[0].length];
        int i = 0;
        while (i < countsTable.length) {
            int j = 0;
            while (j < countsTable[0].length) {
                distribution[i][j] = (double)countsTable[i][j] / (double)totalPairs;
                ++j;
            }
            ++i;
        }
        return distribution;
    }

    private Object[] calcOrder2PairCount(SequenceIterator searchDB, int length, ProteinTerminus terminus, int numGapsInPair) {
        String aaList = "ACDEFGHIKLMNPQRSTUVWY";
        long[][] countsTable = new long["ACDEFGHIKLMNPQRSTUVWY".length()]["ACDEFGHIKLMNPQRSTUVWY".length()];
        long totalPairs = 0L;
        Sequence sequenceFromDB = null;
        while (searchDB.hasNext()) {
            try {
                sequenceFromDB = searchDB.nextSequence();
            }
            catch (BioException e) {
                System.err.println("Can't read next sequence from database.");
            }
            Sequence sequenceToSearch = ProteinTerminus.getSequenceTerminus(sequenceFromDB, length, terminus);
            String sequenceToSearchString = sequenceToSearch.seqString();
            if (sequenceToSearchString.indexOf(88) >= 0) continue;
            int i = 0;
            while (i < length) {
                int j = i + 1;
                while (j < length) {
                    if (j - i - 1 == numGapsInPair) {
                        long[] lArray = countsTable["ACDEFGHIKLMNPQRSTUVWY".indexOf(sequenceToSearchString.charAt(i))];
                        int n = "ACDEFGHIKLMNPQRSTUVWY".indexOf(sequenceToSearchString.charAt(j));
                        lArray[n] = lArray[n] + 1L;
                        ++totalPairs;
                    }
                    ++j;
                }
                ++i;
            }
        }
        Object[] returnValue = new Object[]{countsTable, new Long(totalPairs), "ACDEFGHIKLMNPQRSTUVWY"};
        return returnValue;
    }

    public void printDistribution(double[][] distribution, String aaList) {
        int i = 0;
        while (i < aaList.length()) {
            if (aaList.charAt(i) != 'U') {
                System.out.print("\t" + aaList.charAt(i));
            }
            ++i;
        }
        System.out.print("\n");
        i = 0;
        while (i < distribution.length) {
            if (aaList.charAt(i) != 'U') {
                System.out.print(String.valueOf(aaList.charAt(i)) + "\t");
                int j = 0;
                while (j < distribution[i].length) {
                    if (aaList.charAt(j) != 'U') {
                        System.out.print(distribution[i][j]);
                        if (j < distribution.length - 1) {
                            System.out.print("\t");
                        }
                    }
                    ++j;
                }
                System.out.print("\n");
            }
            ++i;
        }
    }

    public void calcPairDistributionSearchDB(String fastaDatabaseFileName, int length, ProteinTerminus terminus) {
        SequenceIterator searchDB = null;
        BufferedReader br = null;
        long totalPairs = 0L;
        int i = 0;
        while (i <= length - 2) {
            try {
                br = new BufferedReader(new FileReader(fastaDatabaseFileName));
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            try {
                searchDB = (SequenceIterator)SeqIOTools.fileToBiojava("FASTA", "PROTEIN", br);
            }
            catch (BioException e) {
                e.printStackTrace();
            }
            Object[] returnValue = this.calcOrder2PairCount(searchDB, length, terminus, i);
            long[][] countsTable = (long[][])returnValue[0];
            totalPairs = (Long)returnValue[1];
            String aaList = (String)returnValue[2];
            double[][] distribution = this.countsTableToDistribution(countsTable, totalPairs);
            System.out.println("Gap:" + i);
            this.printDistribution(distribution, aaList);
            try {
                br.close();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
            ++i;
        }
    }

    public void calcAADistributionSearchDB(int length, ProteinTerminus terminus) {
        try {
            SimpleDistributionTrainerContext dtc = new SimpleDistributionTrainerContext();
            Distribution proteinDist = DistributionFactory.DEFAULT.createDistribution(ProteinTools.getAlphabet());
            dtc.registerDistribution(proteinDist);
            Sequence sequenceFromDB = null;
            while (this.database.hasNext()) {
                try {
                    sequenceFromDB = this.database.nextSequence();
                }
                catch (BioException e) {
                    System.err.println("Can't read next sequence from database.");
                }
                Sequence sequenceToSearch = ProteinTerminus.getSequenceTerminus(sequenceFromDB, length, terminus);
                int j = 1;
                while (j <= sequenceToSearch.length()) {
                    dtc.addCount(proteinDist, sequenceToSearch.symbolAt(j), 1.0);
                    ++j;
                }
            }
            dtc.train();
            SymbolTokenization st = null;
            try {
                st = ProteinTools.getAlphabet().getTokenization("token");
            }
            catch (BioException e) {
                System.err.println("Unable to get symboltokenization");
            }
            String token = null;
            Iterator iter = ((FiniteAlphabet)proteinDist.getAlphabet()).iterator();
            while (iter.hasNext()) {
                Symbol sym = (Symbol)iter.next();
                try {
                    token = st.tokenizeSymbol((AtomicSymbol)sym);
                }
                catch (IllegalSymbolException e) {
                    System.err.println("Unable to convert symbol to token.");
                }
                if (token.equalsIgnoreCase("U")) continue;
                System.out.println(String.valueOf(token) + "\t" + proteinDist.getWeight(sym));
            }
            System.out.println("\n");
        }
        catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}

