package org.baderlab.brain.correlationlearn;

import mt.MatrixEntry;

/**
 * Copyright (c) 2005 Memorial Sloan-Kettering Cancer Center
 *
 * Code written by: Gary Bader
 * Authors: Gary Bader, Chris Sander
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation; either version 2.1 of the License, or
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and
 * Memorial Sloan-Kettering Cancer Center
 * has no obligations to provide maintenance, support,
 * updates, enhancements or modifications.  In no event shall the
 * Memorial Sloan-Kettering Cancer Center
 * be liable to any party for direct, indirect, special,
 * incidental or consequential damages, including lost profits, arising
 * out of the use of this software and its documentation, even if
 * Memorial Sloan-Kettering Cancer Center
 * has been advised of the possibility of such damage.  See
 * the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 *
 * User: shirleyhui
 * Date: Jul 16, 2007
 * Time: 1:42:19 PM
 */

/**
 * A Utility class that performs operations on residue position pair features stored in a correlation matrix
 */
public class FeatureUtils {

    /**
     * Reference 20 letter amino acid alphabet
     */
    private static final String aaList = "ACDEFGHIKLMNPQRSTVWY";

    /**
     * Finds the max matrix index for a given sequence length and 'number of positions chosen' value
     *
     * @param numberOfPositions The number of positions in the sequence considered
     * @param totalPositions    The total number of positions in the sequence
     * @return The maximum matrix index
     */
    public static int getMaxIndex(int numberOfPositions, int totalPositions) {

        int maxIndex = FeatureUtils.getCombinations(totalPositions, numberOfPositions);
        maxIndex *= Math.pow(20, numberOfPositions);

        return maxIndex;
    }

    /*
     * Converts an index generated with getFeatureIndex back to a feature
     *
     * @param index          The index to convert
     * @param feature        The allocated feature array to fill
     * @param sequenceLength The total sequence length (maximum feature position+1)
     * @return the calculated feature
     */
    public static ResiduePositionPair[] indexToFeature(int index, ResiduePositionPair[] feature, int sequenceLength) {

        //dial up positions
        //determine partition number
        int partitionNumber = (int) Math.floor(index / Math.pow(aaList.length(), feature.length));
        int tempPartitionNumber = partitionNumber;
        for (int featurePosition = feature.length; featurePosition > 0; featurePosition--)
        {  //featurePosition starts at 1
            for (int i = (featurePosition - 1); i < sequenceLength; i++) {
                if (tempPartitionNumber >= getCombinations(i + 1, featurePosition)) {
                    continue;
                }
                feature[featurePosition - 1].position = i;
                tempPartitionNumber -= getCombinations(i, featurePosition);
                break;
            }
        }

        //dial up residues
        //partitions of length aaList.length()^feature.length
        int partitionOffset = partitionNumber * (int) Math.pow(aaList.length(), feature.length);
        int residueOffset = index - partitionOffset;
        for (int i = feature.length - 1; i >= 0; i--) {
            feature[i].residue = aaList.charAt(residueOffset / (int) Math.pow(aaList.length(), i));
            residueOffset %= (int) Math.pow(aaList.length(), i);
        }
        return feature;
    }

    /**
     * Checks if the residues stored in the feature are in the aaList - accounts for gap characters or bad letters
     *
     * @param feature The feature to validate
     * @return true if the feature is valid
     */
    public static boolean featureValid(ResiduePositionPair[] feature) {
        for (int i = 0; i < feature.length; i++) {
            ResiduePositionPair residuePositionPair = feature[i];
            if (aaList.indexOf(residuePositionPair.residue) < 0) {
                return false;
            }
        }
        return true;
    }

       /**
     * Finds the index in the matrix for this feature
     *
     * @param feature The feature to look up in the matrix. This can be one or more residue/position pairs
     * @return The matrix index for this feature
     */
    public static int getFeatureIndex(ResiduePositionPair[] feature) {

        //partitions of length aaList.length()^feature.length
        int partitionNumber = 0;
        int residueOffset = 0;

        //matrix is organized in (n choose k) partitions of (size of aa alphabet ^ number of positions)
        //where n = max number of positions and k is the number of positions in the feature
        //IMPORTANT: the partition formula assumes that pair(i).position < pair(i+1).position
        //note: partition formula is a perfect hashing function based on Pascal's triangle
        for (int i = 0; i < feature.length; i++) {
            //partition number - maxes out at the number of total positions choose feature.length
            partitionNumber += getCombinations(feature[i].position, i + 1);
        }
        //partitions of length aaList.length()^feature.length
        int partitionOffset = partitionNumber * (int) Math.pow(aaList.length(), feature.length);
        for (int i = 0; i < feature.length; i++) {
            residueOffset += aaList.indexOf(feature[i].residue) * Math.pow(aaList.length(), i);
        }
        int index = partitionOffset + residueOffset;
        return index;
    }

    /**
     * Helper method to allocate memory for a feature
     * @param numPositionsPerFeature Number of positions per feature
     * @return A set of residue position pairs
     */
    public static ResiduePositionPair[] allocateFeature(int numPositionsPerFeature) {
        ResiduePositionPair[] feature = new ResiduePositionPair[numPositionsPerFeature];
        for (int i = 0; i < feature.length; i++) {
            feature[i] = new ResiduePositionPair(-1, 'X');
        }
        return feature;
    }


    /**
     * Populates a feature with values
     *
     * @param feature        The feature to populate (previously allocated)
     * @param positionArray  The list of positions to use
     * @param sequenceString The protein sequence
     * @return The populated feature as an array of residue/position pairs
     */
    public static ResiduePositionPair[] createFeature(ResiduePositionPair[] feature, int[] positionArray, String sequenceString) {
        for (int i = 0; i < feature.length; i++) {
            ResiduePositionPair residuePositionPair = feature[i];
            residuePositionPair.position = positionArray[i];
            residuePositionPair.residue = sequenceString.charAt(positionArray[i]);
        }
        return feature;
    }

    /**
     * Converts a feature to a simple string representation
     * @param feature A set of residue position pairs
     * @return the string representation of the residue position pairs
     */
    public static String featureToString(ResiduePositionPair[] feature) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < feature.length; i++) {
            sb.append(feature[i].toString());
        }
        return sb.toString();
    }


    /**
     * Checks if a feature is part of a given sequence
     * @param sequenceFeature A set of residue position pairs describing the sequence features
     * @param seqString The sequence string
     * @return true if the feature is in the sequence
     */
    public static boolean isFeatureInSequence(ResiduePositionPair[] sequenceFeature, String seqString) {
        for (int i = 0; i < sequenceFeature.length; i++) {
            ResiduePositionPair residuePositionPair = sequenceFeature[i];
            if (seqString.charAt(residuePositionPair.position) != residuePositionPair.residue) {
                return false;
            }
        }
        return true;
    }

    /**
     * Returns n choose k
     * @param n The number of elements
     * @param k The number of combinations to choose
     * @return n choose k
     */
    public static int getCombinations(int n, int k) {
        if (k == 1) {
            return n; //trivial case
        }
        int k1 = k;
        int k2 = n - k;
        //computes (n! / k1! k2!) for combinations
        //assure k1 >= k2
        if (k1 < k2) {
            k1 = n - k;
            k2 = k;
        }
        int t = 0;
        if (k1 > n)
            return t;
        else {
            // accumulate the factors for k2 factorial (k2 is smallest)
            t = 1;
            while (k2 > 1)
                t *= k2--;

            // accumulate the factors from n downto k1 (k1 is largest)
            int t2 = 1;
            while (n > k1)
                t2 *= n--;

            t = t2 / t;
        }
        return t;
    }

    /**
     * Returns the mutual information score of the feature
     *
     * @param matrixEntry Contains the feature (encoded) and the feature frequency
     * @param domainFrequency Contains the frequency of the part of the feature in the domain alignment
     * @param peptideFrequency Contains the frequency of the part of the feature in the peptide alignment
     * @param multipleSequenceAlignmentLength The multiple sequence alignment length
     * @return The mutual information score
     */
    public static double scoreFeature(MatrixEntry matrixEntry, double domainFrequency, double peptideFrequency, double multipleSequenceAlignmentLength) {
        //currently the score is the conditional entropy
        double conditionalEntropy = (Math.log((matrixEntry.get() * multipleSequenceAlignmentLength) / (domainFrequency * peptideFrequency)) *
                (matrixEntry.get() / multipleSequenceAlignmentLength));
        return conditionalEntropy;
    }

    /**
     * Generates the next feature position
     *
     * @param positionArray  An already initialized array of positions, where positionArray.length is equal to the
     *                       number of positions in the feature
     * @param totalPositions The total number of positions possible (i.e. the length of the sequence)
     *                       Note: totalPositions must be larger than the length of the position array
     * @param initialize     True only for the first iteration and must be false afterwards
     * @return The positions for the next feature (an updated version of the input position array)
     */
    public static int[] generateFeaturePositions(int[] positionArray, int totalPositions, boolean initialize) {
        //this basically counts upwards in base totalPositions
        if (!initialize) {
            for (int i = positionArray.length - 1; i >= 0; i--) {
                if (positionArray[i] < (totalPositions - (positionArray.length - i))) {
                    //add one to the current position
                    positionArray[i] += 1;
                    //reset all positions to the right
                    for (int j = i + 1; j < positionArray.length; j++) {
                        positionArray[j] = positionArray[j - 1] + 1;
                    }
                    break;
                }
            }
        } else {
            //true for the first call to this method
            for (int i = 0; i < positionArray.length; i++) {
                positionArray[i] = i;
            }
        }
        return positionArray;
    }
}
