package org.baderlab.pdzsvmstruct.utils;

import org.baderlab.brain.ProteinProfile;
import org.baderlab.pdzsvmstruct.data.manager.DataFileManager;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.db.HashSequenceDB;

import java.util.*;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.File;

/**
 * Copyright (c) 2011 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVMStruct.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVMStruct.  If not, see <http://www.gnu.org/licenses/>.
 */

public class PDZBaseUtils {
    private HashMap geneNameToPDZBaseNameMap;
    private HashMap pdzBaseNameToGeneNameMap;
    private String organism;

    public PDZBaseUtils(String organism)
    {
        geneNameToPDZBaseNameMap = new HashMap();
        this.organism = organism;
        if (organism.equals(Constants.HUMAN))
        {
            geneNameToPDZBaseNameMap.put("MLLT4-1","AF6-1");
            geneNameToPDZBaseNameMap.put("CASK-1","CASK-1");
            geneNameToPDZBaseNameMap.put("USH1C-1","Harmonin-1");
            geneNameToPDZBaseNameMap.put("ERBB2IP-1","ERBIN-1");
            geneNameToPDZBaseNameMap.put("GRD2I-1","Delphilin-1");
            geneNameToPDZBaseNameMap.put("GIPC1-1","GIPC-1");
            geneNameToPDZBaseNameMap.put("GOPC-1","GOPC/PIST-1");
            geneNameToPDZBaseNameMap.put("PDZD3-3","IKEPP-3");
            geneNameToPDZBaseNameMap.put("MAGI2-2","MAGI2-2");
            geneNameToPDZBaseNameMap.put("MAGI2-6","MAGI2-6");
            geneNameToPDZBaseNameMap.put("MAGI3-2","MAGI3-2");
            geneNameToPDZBaseNameMap.put("MPP5-1","MP55-1");
            geneNameToPDZBaseNameMap.put("MPDZ-10","MUPP1-10");
            geneNameToPDZBaseNameMap.put("MPDZ-13","MUPP1-13");
            geneNameToPDZBaseNameMap.put("SLC9A3R1-1","NHERF1-1");
            geneNameToPDZBaseNameMap.put("SLC9A3R1-2","NHERF1-2");
            //geneNameToPDZBaseNameMap.put("SLC9A3R2-1","NHERF2-1");
            geneNameToPDZBaseNameMap.put("SLC9A3R2-2","NHERF2-2");
            geneNameToPDZBaseNameMap.put("ARHGEF11-1","PDZRhoGEF-1");
            geneNameToPDZBaseNameMap.put("PDZK1-1","PDZK1-1");            
            geneNameToPDZBaseNameMap.put("PICK1-1","PICK1-1");
            geneNameToPDZBaseNameMap.put("DLG2-1","PSD93-1");
            geneNameToPDZBaseNameMap.put("DLG2-2","PSD93-2");
            geneNameToPDZBaseNameMap.put("DLG4-1","PSD95-1");
            geneNameToPDZBaseNameMap.put("DLG4-2","PSD95-2");
            geneNameToPDZBaseNameMap.put("PTPN3-1","PTPH1-1");
            geneNameToPDZBaseNameMap.put("PTPN13-2","PTPL1-2");
            geneNameToPDZBaseNameMap.put("PTPN13-3","PTPL1-3");
            geneNameToPDZBaseNameMap.put("PTPN13-4","PTPL1-4");
            geneNameToPDZBaseNameMap.put("PTPN13-5","PTPL1-5");
            geneNameToPDZBaseNameMap.put("PTPN4-1","PTPaseMEG1-1");
            geneNameToPDZBaseNameMap.put("DLG3-1","SAP102-1");
            geneNameToPDZBaseNameMap.put("DLG3-2","SAP102-2");
            geneNameToPDZBaseNameMap.put("DLG1-1","SAP97-1");
            geneNameToPDZBaseNameMap.put("DLG1-2","SAP97-2");
            //geneNameToPDZBaseNameMap.put("TAX1BP3-1","TIP1-1");
            geneNameToPDZBaseNameMap.put("LIN7A-1","Veli1-1");
            geneNameToPDZBaseNameMap.put("LIN7B-1","Veli2-1");
            geneNameToPDZBaseNameMap.put("LIN7C-1","Veli3-1");
            geneNameToPDZBaseNameMap.put("TJP1-2","ZO1-2");
            geneNameToPDZBaseNameMap.put("SNTA1-1","alpha1syntrophin-1");
            geneNameToPDZBaseNameMap.put("SNTB2-1","beta2syntrophin-1");
            geneNameToPDZBaseNameMap.put("SNTG1-1","gamma1syntrophin-1");
            geneNameToPDZBaseNameMap.put("SNTG2-1","gamma2syntrophin-1");
            geneNameToPDZBaseNameMap.put("INADL-6","hDlt-6");
            geneNameToPDZBaseNameMap.put("INADL-8","hDlt-8");
            geneNameToPDZBaseNameMap.put("MPP1-1","p55-1");
        }
        else if (organism.equals(Constants.MOUSE))
        {
            geneNameToPDZBaseNameMap.put("DLG4-2","PSD95-2");
            geneNameToPDZBaseNameMap.put("PDZK1-1","PDZK1-1");
            geneNameToPDZBaseNameMap.put("PDZK1-3","PDZK1-3");
            geneNameToPDZBaseNameMap.put("MAGI-2-6","MAGI2-5");
            geneNameToPDZBaseNameMap.put("CIPP-10","Cipp-4");
            geneNameToPDZBaseNameMap.put("CIPP-8","Cipp-2");
            geneNameToPDZBaseNameMap.put("CIPP-9","Cipp-3");
            geneNameToPDZBaseNameMap.put("DVL1-1","DVL1-1");
            geneNameToPDZBaseNameMap.put("ERBIN-1","ERBIN-1");
            geneNameToPDZBaseNameMap.put("MAGI-1-6","MAGI1-5");
            geneNameToPDZBaseNameMap.put("LIN-7A-1","Veli1-1");
            geneNameToPDZBaseNameMap.put("LIN-7C-1","Veli3-1");
            geneNameToPDZBaseNameMap.put("ZO-1-1","ZO1-1");
            geneNameToPDZBaseNameMap.put("ZO-2-1","ZO2-1");
            geneNameToPDZBaseNameMap.put("ZO-3-1","ZO3-1");
            geneNameToPDZBaseNameMap.put("A1-SYNTROPHIN-1","alpha1syntrophin-1");
            geneNameToPDZBaseNameMap.put("NHERF-1-1","NHERF1-1");
            geneNameToPDZBaseNameMap.put("NHERF-2-1","NHERF2-1");
            geneNameToPDZBaseNameMap.put("MUPP1-10","MUPP1-10");
            geneNameToPDZBaseNameMap.put("SAP102-2","SAP102-2");
            geneNameToPDZBaseNameMap.put("SAP102-1","SAP102-1");            
            geneNameToPDZBaseNameMap.put("PDZ-RGS3-1","PDZRGS3-1");
            geneNameToPDZBaseNameMap.put("NNOS-1","nNOS-1");
            geneNameToPDZBaseNameMap.put("PDZK1-4","PDZK1-4");
            geneNameToPDZBaseNameMap.put("LNX1-2","lnx1-2");
            geneNameToPDZBaseNameMap.put("DLG4-1","PSD95-1");

            geneNameToPDZBaseNameMap.put("PDLIM5-1","Enigma-1");
            geneNameToPDZBaseNameMap.put("MAGI-1-2","MAGI1-1");
            geneNameToPDZBaseNameMap.put("RHPN1-1","Rhophilin1-1");
            geneNameToPDZBaseNameMap.put("GIPC1-1","GIPC-1");
            geneNameToPDZBaseNameMap.put("GRIP1-7","GRIP-7");
            geneNameToPDZBaseNameMap.put("MUPP1-9","MUPP1-9");
            geneNameToPDZBaseNameMap.put("IL16-1","NIL16-1");
            geneNameToPDZBaseNameMap.put("PICK1-1","PICK1-1");
            geneNameToPDZBaseNameMap.put("CYPHER-1","Cypher2s-1");
            geneNameToPDZBaseNameMap.put("LIN-7B-1","Veli2-1");
            geneNameToPDZBaseNameMap.put("CHAPSYN-110-2","PSD93-2");
            geneNameToPDZBaseNameMap.put("MUPP1-7","MUPP1-7");
            geneNameToPDZBaseNameMap.put("CYPHER-1","Cypher1c-1");
            geneNameToPDZBaseNameMap.put("CHAPSYN-110-1","PSD93-1");
            
            
        }
        pdzBaseNameToGeneNameMap = new HashMap();

        Set keys = geneNameToPDZBaseNameMap.keySet();
        List keyList = new ArrayList(keys);
        for (int i=0; i < keyList.size();i++)
        {
            String geneName = (String)keyList.get(i);
            String pdzbaseName = (String)geneNameToPDZBaseNameMap.get(geneName);
            pdzBaseNameToGeneNameMap.put(pdzbaseName, geneName);
        }
    }
    public List getDomainNames()
    {
        Set values = geneNameToPDZBaseNameMap.keySet();
        return new ArrayList(values);
    }
    public List getPDZBaseIntList(String domainName)
    {
        List cTermList = new ArrayList();

        try
        {
            String fileName = "";
            if (organism.equals(Constants.HUMAN))
                fileName = DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/Human/PDZBase-Human-Alignment-Mar52010.txt";
            else if (organism.equals(Constants.MOUSE))
                fileName = DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/Mouse/PDZBase-Mouse-Alignment-Mar142010.txt";

            BufferedReader br = new BufferedReader(new FileReader(new File(fileName)));
            String line = "";
            String pdzbaseName = (String)geneNameToPDZBaseNameMap.get(domainName);
            boolean addSeq = false;

            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    addSeq=false;
                    String[] splitLine = line.split("&");
                    String pdzbaseName1 = splitLine[0].substring(1,splitLine[0].length())+"-"+splitLine[1];
                    if (pdzbaseName1.equals(pdzbaseName))
                        addSeq = true;
                }
                else
                {
                    if (addSeq)
                    {
                        String cTerm = line.substring(line.length()-5, line.length());
                        if (!cTermList.contains(cTerm))
                            cTermList.add(cTerm);
                        //map.put(domainName,cTermList);
                    }
                }
            }
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
        return cTermList;
    }

    public static void main(String[] args)
    {
        PDZBaseUtils b = new PDZBaseUtils(Constants.HUMAN);
        //List intList = b.getPDZBaseIntList("PTPN13-2");
        //b.saveProfiles();
        //b.getAllProteinNames();
    }
    private class DomainInfo
    {
        String domainName;
        String domainNum;
        String domainSeq;
        public boolean equals(Object obj)
        {
            DomainInfo domainInfo = (DomainInfo)obj;
            if (this.domainName.equals(domainInfo.domainName) &&
                    this.domainNum.equals(domainInfo.domainNum) &&
                    this.domainSeq.equals(domainInfo.domainSeq))
                return true;
            else
                return false;
        }
    }
    public void saveProfiles()
    {
        try
        {
            String dir = "";
            if (organism.equals(Constants.HUMAN))
                dir = "Human";
            else if (organism.equals(Constants.MOUSE))
                dir= "Mouse";
            String fileName = DataFileManager.DATA_ROOT_DIR +"/Data/PDZBase/"+dir+"/PDZBase-Human-Alignment-Mar52010.txt";
            BufferedReader br = new BufferedReader(new FileReader(new File(fileName)));
            String line = "";

            HashMap nameToSeqMap = new HashMap();
            HashMap nameToInfoMap = new HashMap();
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    String[] splitLine = line.split("&");
                    String domainNum = splitLine[1];
                    String pdzbaseName = splitLine[0].substring(1,splitLine[0].length())+"-"+splitLine[1];
                    line = br.readLine();

                    String geneName = (String)pdzBaseNameToGeneNameMap.get(pdzbaseName);
                    splitLine = line.split("//");
                    String domainSeq = splitLine[0];
                    DomainInfo domainInfo = new DomainInfo();
                    domainInfo.domainName = geneName;
                    domainInfo.domainNum = domainNum;
                    domainInfo.domainSeq = domainSeq;
                    String cTerm = line.substring(line.length()-5, line.length());

                    List cTermList = (List)nameToSeqMap.get(geneName);
                    nameToInfoMap.put(geneName, domainInfo);
                    if (cTermList == null)
                        cTermList = new ArrayList();
                    cTermList.add(cTerm);
                    nameToSeqMap.put(geneName, cTermList);

                }

            }
            br.close();
            Set keys = nameToSeqMap.keySet();
            List keyList = new ArrayList(keys);
            for (int j = 0; j < keyList.size();j++)
            {
                String domainName = (String)keyList.get(j);
                if (domainName==null)
                    continue;
                List cTermList = (List) nameToSeqMap.get(domainName);
                DomainInfo domainInfo = (DomainInfo)nameToInfoMap.get(domainName);
                HashSequenceDB seqDB = new HashSequenceDB();

                for (int i =0; i < cTermList.size();i++)
                {
                    try
                    {
                        String seqString = (String)cTermList.get(i);
                        Sequence addSeq = ProteinTools.createProteinSequence(seqString,seqString);
                        seqDB.addSequence(addSeq);
                    }
                    catch(Exception e)
                    {
                        System.out.println("Exception: " + e);
                    }
                }
                ProteinProfile profile = PDZSVMUtils.makeProfile(domainInfo.domainName,
                        Integer.parseInt(domainInfo.domainNum),
                        domainInfo.domainSeq,
                        PDZSVMUtils.organismShortToLongForm(organism),
                        seqDB);
                PDZSVMUtils.saveAsLogo(profile, DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/"+dir+"/Logos/",Constants.CLASS_YES);
                PDZSVMUtils.saveToProfile(profile,DataFileManager.DATA_ROOT_DIR+"/Data/PDZBase/"+dir+"/PeptideFiles/"+domainName+".pep.txt");
            }
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }

}
