package org.baderlab.pdzsvm.encoding;

import java.util.List;
import java.util.ArrayList;


/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Atchley Factor feature encoding where a residue is represented as a set of
 * five real numbers encoding phyisochemical properties.  A sequence is a
 * concatenation of the set of number for each residue.
 * Atchley, W. et al. (2005) Solving the protein sequence metric problem,
 *   P Natl. Acad. Sci. USA, 102, 6395-6400.
 */
public class AtchleyFactorFeatureEncoding extends FeatureEncoding {

    private static int numFactors = 5;
    private static int numLetters = 20;

    private double [] factor1 = {-0.591,-1.343,1.05,1.357,-1.006,-0.384,0.336,-1.239,1.831,-1.019,-0.663,0.945,0.189,0.931,1.538,-0.228,-0.032,-1.337,-0.595,0.26};
    private double [] factor2 = {-1.302,0.465,0.302,-1.453,-0.59,1.652,-0.417,-0.547,-0.561,-0.987,-1.524,0.828,2.081,-0.179,-0.055,1.399,0.326,-0.279,0.009,0.83};
    private double [] factor3 = {-0.733,-0.862,-3.656,1.477,1.891,1.33,-1.673,2.131,0.533,-1.505,2.219,1.299,-1.628,-3.005,1.502,-4.76,2.213,-0.544,0.672,3.097};
    private double [] factor4 = {1.57,-1.02,-0.259,0.113,-0.397,1.045,-1.474,0.393,-0.277,1.266,-1.005,-0.169,0.421,-0.503,0.44,0.67,0.908,1.242,-2.128,-0.838};
    private double [] factor5 = {-0.146,-0.255,-3.242,-0.837,0.412,2.064,-0.078,0.816,1.648,-0.912,1.212,0.933,-1.392,-1.853,2.897,-2.647,1.313,-1.262,-0.184,1.512};

    private static double[] minMaxFactor1 ={-1.3430,1.8310};
    private static double[] minMaxFactor2 ={-1.5240,2.0810};
    private static  double[] minMaxFactor3 ={-4.7600,3.0970};
    private static  double[] minMaxFactor4 ={-2.1280,1.5700};
    private static  double[] minMaxFactor5 ={-3.2420,2.8970};

    private static final String alphabet = "ACDEFGHIKLMNPQRSTVWYX-";

    public List scale(double low, double high, List featureList)
    {
        List scaledFeatureList = new ArrayList();
        for (int i =0; i < featureList.size();i++)
        {
            Features features = (Features)featureList.get(i);
            double[] featureValues = features.getFeatureValuesAsDoubleArray();
            List scaledFeatureValues = new ArrayList();

            int factor = 0;
            for (int j= 0; j < featureValues.length;j++)
            {

                double featureValue = featureValues[j];
                double scaledFeature = scaleValue(low, high, factor, featureValue);
                scaledFeatureValues.add(scaledFeature);
                factor = factor+1;
                if (factor == 5)
                    factor =0;

            } // for
            Features scaledFeatures = new Features();
            scaledFeatures.addFeatureValues(scaledFeatureValues);
            scaledFeatureList.add(scaledFeatures);
        }  // for
        return scaledFeatureList;
    }
    private double scaleValue(double low, double high, int factor, double featureValue)
    {
        double scaledValue;
        double[] minMax = null;
        double min = -1;
        double max = -1;
        if (factor == 0)
        {
            minMax = minMaxFactor1;
        }
        else if (factor == 1)
        {
            minMax = minMaxFactor2;

        }
        else if (factor == 2)
        {
            minMax = minMaxFactor3;
        }
        else if (factor ==3 )
        {
            minMax = minMaxFactor4;
        }
        else if (factor == 4)
        {
            minMax = minMaxFactor5;
        }
        min = minMax[0];
        max = minMax[1];
        if(featureValue == min)
            scaledValue = low;
        else if(featureValue == max)
            scaledValue = high;
        else
            scaledValue = low + (high-low) * (featureValue-min)/(max-min);
        //System.out.println(factor + " ["+min+","+max+"]:" + featureValue + "="+ scaledValue + ", " +(high-low)+", " +  (featureValue-min) + ", " + (max-min));
        
        return scaledValue;

    }
    public AtchleyFactorFeatureEncoding()
    {
        setEncodingName("FACTOR");
    }

    public static int getNumFactors()
    {
        return numFactors;
    }

    public List encodeFeature(String residue)
    {
        int index = alphabet.indexOf(residue);
        List encodedFeatures = new ArrayList();
        try
        {
            //System.out.println("index: " + index);
            if (index > numLetters-1 || index == -1)
            {
                //System.out.println("non AA char so setting everything to be 0.0");
                encodedFeatures.add(0);
                encodedFeatures.add(0);
                encodedFeatures.add(0);
                encodedFeatures.add(0);
                encodedFeatures.add(0);
            }
            else
            {
                double low = 0;
                double high = 1.0;
                double scaledFactor1Value = scaleValue(low, high, 0, factor1[index]);
                double scaledFactor2Value = scaleValue(low, high, 1, factor2[index]);
                double scaledFactor3Value = scaleValue(low, high, 2, factor3[index]);
                double scaledFactor4Value = scaleValue(low, high, 3, factor4[index]);
                double scaledFactor5Value = scaleValue(low, high, 4, factor5[index]);

                boolean scale = true;
                if (scale)
                {
                    encodedFeatures.add(scaledFactor1Value);
                    encodedFeatures.add(scaledFactor2Value);
                    encodedFeatures.add(scaledFactor3Value);
                    encodedFeatures.add(scaledFactor4Value);
                    encodedFeatures.add(scaledFactor5Value);
                }
                else
                {
                    encodedFeatures.add(factor1[index]);
                    encodedFeatures.add(factor2[index]);
                    encodedFeatures.add(factor3[index]);
                    encodedFeatures.add(factor4[index]);
                    encodedFeatures.add(factor5[index]);
                }
            }
        } catch(Exception e)
        {
            System.out.println("Exception: " + e);
            System.out.println("index = "+ index + ", residue = " + residue);
            e.printStackTrace();
        }
        //System.out.println(residue + ":" + encodedFeatures.toString());
        return encodedFeatures;

    }

    public static void main(String[] args)
    {
        AtchleyFactorFeatureEncoding a = new AtchleyFactorFeatureEncoding();
        //List f = a.encodeFeature("AC");

        Features f = a.encodeFeatures("A,C");
        List fList = new ArrayList();
        fList.add(f);
        List scaledFeatures = a.scale(-1.0,1.0,fList);
        System.out.println(f.toString());

    }

}
