package org.genemania.dw.db;

import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.TreeMap;
import org.genemania.dw.entity.EnsemblGene;
import org.genemania.dw.entity.EntrezGene;
import org.genemania.dw.entity.ExtResource;
import org.genemania.dw.entity.ExtResourceGene;
import org.genemania.dw.entity.TAIRGene;
import org.genemania.dw.util.DWUtil;
import org.genemania.dw.util.DefParams;
import org.genemania.dw.util.GenUtil;

/**
 * The interface with the local mirror of the Entrez database. It 
 * encapsulates all the queries from that database. 
 * example format of a dbxref from the gene_info table:
 * ZFIN:ZDB-GENE-050731-2|Ensembl:ENSDARG00000008396
 * Two flavors for replaceGene (), to reduce re-querying if DB is cached.
 * There is currently no support for GIs at this level. 
 *
 * @author rashadbadrawi
 */

public class EntrezMirrorTables {

    //tables of interest
    private static final String GENE_INFO_TABLE = "Gene_Info";
    private static final String GENE2ACCN_TABLE = "Gene2Accession";
    private static final String GENE2REFSEQ_TABLE = "Gene2RefSeq";
    private static final String GENE_HISTORY_TABLE = "Gene_History";
    private static final String GENE2UNIPROT_TABLE = "Gene_Refseq_Uniprotkb_Collab";
    private static final String TAX_NAMES_TABLE = "TaxonomyNames";
    
    //generic columns
    private static final String TAX_ID_COL = "tax_id";    //shared
    private static final String GENE_ID_COL = "GeneID";   //shared
    //gene_info
    private static final String SYMBOL_COL = "Symbol";
    private static final String SYNS_COL = "Synonyms";
    private static final String XREF_COL = "dbXrefs";
    private static final String DESC_COL = "description";
    private static final String TYPE_COL = "type_of_gene";
    //gene2refseq & gene2accession
    private static final String RNA_ACCN_COL = "RNA_nucleotide_accession";
    private static final String PRO_ACCN_COL = "protein_accession";
    private static final String RNA_GI_ACCN_COL = "RNA_nucleotide_gi";
    private static final String PRO_GI_ACCN_COL = "protein_gi";
    private static final String GEN_GI_ACCN_COL = "genomic_nucleotide_gi";
    //gene2uniprot
    private static final String NCBI_ACCN_COL = "NCBI_protein_accession";
    private static final String UNIPROT_COL = "UniProtKB_protein_accession";
    //GeneHistory
    private static final String OLD_GENE_ID_COL = "Discontinued_GeneID";
    //private static final String OLD_GENE_NAME_COL = "Discontinued_Symbol";
    //Taxonomy Names table
    private static final String NAME_COL = "Name";
    private static final String NAME_CLASS_COL = "NameClass";
    //species species x-refs
    private static final String SPID_MGD = "MGI";

    //current schema name
    private static String DBName;
    private static PrintWriter log = GenUtil.getDefaultLog();

    static {
        DBName = DefParams.getDefaultProp(DefParams.ENTREZ_MIRROR);
    }
    
    private EntrezMirrorTables () {}            //no instances allowed
    
    public static TreeMap <String, ExtResource> loadAll
            (String speciesName) throws SQLException {
        
        System.out.println ("loading all Entrez genes...");
        TreeMap <String, ExtResource> entGeneMap = new TreeMap <String, ExtResource> ();
        String loadQuery = "select " + 
            GENE_ID_COL + ", " + SYMBOL_COL + ", " + SYNS_COL + ", " + 
            XREF_COL + ", " + DESC_COL  + ", " + TYPE_COL +
            " from " + DBName +  "." + GENE_INFO_TABLE +
            " where " + TAX_ID_COL + " = ?"; 
        log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement ps = null;
        ResultSet rs = null;
        try {
            con = DBUtil.getConnection ();
            ps = con.prepareStatement (loadQuery);
            int taxID = getTaxID (speciesName);
            ps.setInt(1, taxID);
            rs = ps.executeQuery();
            int cnt = 0;
            EntrezGene DBEntGene = null;
            while (rs.next ()) {
                DBEntGene = getResults (rs, DBEntGene, speciesName);
                loadRefSeq (taxID, DBEntGene);
                loadUniprot (DBEntGene);
                entGeneMap.put (DBEntGene.getID(), DBEntGene);
                cnt++;
            }
            System.out.println ("Loaded: " + cnt + " Entrez entries, for " +
                    "species: " + speciesName + " (" + taxID + ")");
        } catch (SQLException e) {
            throw e;
        } finally {
            if (ps != null) {
                ps.close ();
            }
            if (con != null && !con.isClosed ()) {
                con.close ();
            }
        }
        
        return entGeneMap;
    }
    
    public static ExtResource load (String speciesName, String geneID) 
                                  throws SQLException {
        
        //System.out.println ("loading Entrez gene...");
        String loadQuery = "select " + 
            GENE_ID_COL + ", " + SYMBOL_COL + ", " + SYNS_COL + ", " + 
            XREF_COL + ", " + DESC_COL  + ", " + TYPE_COL +
            " from " + DBName +  "." + GENE_INFO_TABLE +
            " where " + TAX_ID_COL + " = ? and " + GENE_ID_COL + " = ?"; 
        log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement loadSingleStmt = null;
        ResultSet rs = null;
        EntrezGene DBEntGene = null;
        try {
            con = DBUtil.getConnection ();
            loadSingleStmt = con.prepareStatement (loadQuery);
            int taxID = getTaxID (speciesName);
            loadSingleStmt.setInt(1, taxID);
            loadSingleStmt.setString (2, geneID);
            rs = loadSingleStmt.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                DBEntGene = getResults (rs, DBEntGene, speciesName);
                loadRefSeq (taxID, DBEntGene);
                loadUniprot (DBEntGene);
                cnt++;
            }
            if (cnt > 1) {
                System.err.println ("More than one match for Entrez gene ID: " +
                                    geneID + " " + cnt);
            }
        } catch (SQLException e) {
            if (con != null && !con.isClosed ()) {
                con.close ();
            }
            throw e;
        } finally {
            if (loadSingleStmt != null) {
                loadSingleStmt.close ();
            }
            /*if (con != null && !con.isClosed ()) {
                con.close ();
            }*/
        }
        
        return DBEntGene;
    }

    private static void loadRefSeq (int taxID, EntrezGene entGene) 
            throws SQLException {
        
        String loadQuery = "select " + 
            RNA_ACCN_COL + ", " + PRO_ACCN_COL + 
            " from " + DBName +  "." + GENE2REFSEQ_TABLE +
            " where " + TAX_ID_COL + " = ? and " + GENE_ID_COL + " = ?"; 
        //log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement ps = null;
        ResultSet rs = null;
        try {
            con = DBUtil.getConnection ();
            ps = con.prepareStatement (loadQuery);
            ps.setInt(1, taxID);
            ps.setString (2, entGene.getID());
            rs = ps.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                if (!GenUtil.HYPHEN.equals(rs.getString (RNA_ACCN_COL))) {
                    String [] tempArr = rs.getString (RNA_ACCN_COL).split ("\\" + GenUtil.DOT);
                    entGene.addRefSeqRNAID(tempArr [0]);    //ignore versioning
                }
                if (!GenUtil.HYPHEN.equals(rs.getString (PRO_ACCN_COL))) {
                    String [] tempArr = rs.getString (PRO_ACCN_COL).split ("\\" + GenUtil.DOT);
                    entGene.addRefSeqProID(tempArr [0]);    //ignore versioning
                }
                cnt++;
            }
            //System.out.println ("Loaded: " + cnt + " RefSeqs for " + entGene.getID());
        } catch (SQLException e) {
            throw e;
        } finally {
            if (ps != null) {
                ps.close ();
            }
        }
    }
   
    public static ArrayList <ExtResourceGene> replaceGene (ExtResourceGene oldEntGene) throws
                              SQLException {

        String loadQuery = "select " +
            GENE_ID_COL +
            " from " + DBName +  "." + GENE_HISTORY_TABLE +
            " where " + TAX_ID_COL + " = ? and " + OLD_GENE_ID_COL + " = ?";
        //log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement ps = null;
        ResultSet rs = null;
        ExtResourceGene newEntGene = null;
        ArrayList <ExtResourceGene> replacedList = new ArrayList <ExtResourceGene> ();
        try {
            con = DBUtil.getConnection ();
            ps = con.prepareStatement (loadQuery);
            ps.setInt(1, getTaxID(oldEntGene.getSpeciesName()));
            ps.setString (2, oldEntGene.getID());
            rs = ps.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                if (!GenUtil.HYPHEN.equals(rs.getString (GENE_ID_COL))) {
                    newEntGene = (ExtResourceGene)EntrezMirrorTables.load (oldEntGene.getSpeciesName(),
                                                  rs.getString (GENE_ID_COL));
                    if (newEntGene == null) {
                        System.out.println ("Replacing gene is deprecated: " +
                                            rs.getString (GENE_ID_COL));
                    } else {
                        replacedList.add (newEntGene);
                    }
                    cnt++;
                }
            }
            if (cnt > 1) {
                System.out.println ("More than one replacement for gene ID: " +
                                    oldEntGene.getID());
            }
        } catch (SQLException e) {
            throw e;
        } finally {
            if (ps != null) {
                ps.close ();
            }
        }

        return replacedList;
    }

    public static ArrayList <String> replaceGeneID (ExtResourceGene oldEntGene)
                            throws SQLException {

        String loadQuery = "select " +
            GENE_ID_COL +
            " from " + DBName +  "." + GENE_HISTORY_TABLE +
            " where " + TAX_ID_COL + " = ? and " + OLD_GENE_ID_COL + " = ?";
        //log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement ps = null;
        ResultSet rs = null;
        ArrayList <String> replacedList = new ArrayList <String> ();
        try {
            con = DBUtil.getConnection ();
            ps = con.prepareStatement (loadQuery);
            ps.setInt(1, getTaxID(oldEntGene.getSpeciesName()));
            ps.setString (2, oldEntGene.getID());
            rs = ps.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                if (!GenUtil.HYPHEN.equals(rs.getString (GENE_ID_COL))) {
                    replacedList.add (rs.getString (GENE_ID_COL));
                    cnt++;
                }
            }
            if (cnt > 1) {
                System.out.println ("More than one replacement for gene ID: " +
                                    oldEntGene.getID());
            }
        } catch (SQLException e) {
            throw e;
        } finally {
            if (ps != null) {
                ps.close ();
            }
        }

        return replacedList;
    }

    private static void loadUniprot (EntrezGene entGene) throws SQLException {
        
        ArrayList <String> tempList = entGene.getRefSeqProList();
        if (tempList.size () == 0) {
            return;
        }
        String loadQuery = "select " + UNIPROT_COL + 
            " from " + DBName +  "." + GENE2UNIPROT_TABLE +
            " where " + NCBI_ACCN_COL + " = ?"; 
        //log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement ps = null;
        ResultSet rs = null;
        int cnt = 0;
        try {
            con = DBUtil.getConnection ();
            ps = con.prepareStatement (loadQuery);
            for (int i = 0; i < tempList.size (); i++) {
                ps.setString (1, tempList.get (i));
                rs = ps.executeQuery();
                cnt = 0;
                while (rs.next ()) {
                    entGene.addUniprotID(rs.getString (UNIPROT_COL));
                    cnt++;
                }
            }
            //System.out.println ("Loaded: " + cnt + " Uniprots for " + 
            //        entGene.getID());
        } catch (SQLException e) {
            throw e;
        } finally {
            if (ps != null) {
                ps.close ();
            }
        }
    }
       
    private static EntrezGene getResults (ResultSet rs, EntrezGene entGene, 
            String speciesName) throws SQLException {
        
        entGene = new EntrezGene();
        entGene.setID(rs.getString(EntrezMirrorTables.GENE_ID_COL));
        entGene.setName(rs.getString(EntrezMirrorTables.SYMBOL_COL));
        entGene.setType(rs.getString (EntrezMirrorTables.TYPE_COL));
        if (!GenUtil.HYPHEN.equals (rs.getString(EntrezMirrorTables.DESC_COL))) {
            entGene.setDefinition(rs.getString(EntrezMirrorTables.DESC_COL));     
        } 
        entGene.setSpeciesName(speciesName);
        //get all syns
        String tempStr = rs.getString (EntrezMirrorTables.SYNS_COL);
        String [] tempArr = tempStr.split ("\\" + GenUtil.PIPE);
        for (int i = 0; i < tempArr.length; i++) {
            if (!tempArr [i].equals (GenUtil.HYPHEN)) {
                entGene.addSyn(tempArr [i]);
            }
        }
        //get all xrefs
        tempStr = rs.getString(EntrezMirrorTables.XREF_COL);
        tempArr = tempStr.split ("\\" + GenUtil.PIPE);
        for (int i = 0; i < tempArr.length; i++) {
            if (tempArr [i].equals (GenUtil.HYPHEN)) {
                continue;
            }
            if (tempArr [i].startsWith(ExtResourceGene.RES_ENSEMBL)) {
                String [] tempArr2 = tempArr [i].split (GenUtil.COLON);
                EnsemblGene ensGene = new EnsemblGene ();
                ensGene.setID(tempArr2 [1]);
                ensGene.setSpeciesName(speciesName);
                entGene.addXRef(ExtResourceGene.RES_ENSEMBL, ensGene);
            } else if (tempArr [i].startsWith(ExtResourceGene.RES_TAIR)) {
                String [] tempArr2 = tempArr [i].split (GenUtil.COLON);
                TAIRGene tGene = new TAIRGene ();
                tGene.setID(tempArr2 [1]);
                entGene.addXRef(ExtResourceGene.RES_TAIR, tGene);
            } else if (tempArr [i].startsWith(EntrezMirrorTables.SPID_MGD)) {
                //String [] tempArr2 = tempArr [i].split (GenUtil.COLON);
                //entGene.addMGDID (tempArr2 [1]);
                entGene.addMGDID (tempArr [i]);
            }
        }
        
        return entGene;
    }
    
    private static int getTaxID (String speciesName) {
        
        int taxID = -1;
        
        if (DWUtil.SP_CRESS.equals (speciesName)) {
            taxID = DWUtil.TAXID_AT;
        } else if (DWUtil.SP_WORM.equals (speciesName)) {
            taxID = DWUtil.TAXID_CE;
        } else if (DWUtil.SP_MOUSE.equals (speciesName)) {
            taxID = DWUtil.TAXID_MM;
        } else if (DWUtil.SP_RAT.equals (speciesName)) {
            taxID = DWUtil.TAXID_RN;
        } else if (DWUtil.SP_YEAST.equals (speciesName)) {
            taxID = DWUtil.TAXID_SC;
        } else if (DWUtil.SP_HUMAN.equals (speciesName)) {
            taxID = DWUtil.TAXID_HS;
        } else if (DWUtil.SP_FLY.equals (speciesName)) {
            taxID = DWUtil.TAXID_DM;
        } else if (DWUtil.SP_ECOLI.equals (speciesName)) {
            taxID = DWUtil.TAXID_EC;
        }
        
        return taxID;
    }

    public static ExtResource loadByGI (String speciesName, String GI)
                                                         throws SQLException {

        //System.out.println ("loading Entrez gene by GI...");
        String loadQuery = "select " +
            TAX_ID_COL + ", " + GENE_ID_COL  + ", " + RNA_ACCN_COL + ", " +
            PRO_ACCN_COL +
            " from " + DBName +  "." + GENE2REFSEQ_TABLE +
            " where ";
        if (speciesName != null) {
            loadQuery += TAX_ID_COL + " = ? and ";
        }
        loadQuery += "(" + RNA_GI_ACCN_COL + " = ? or " +
                           PRO_GI_ACCN_COL + " = ? or " +
                           GEN_GI_ACCN_COL + " = ?)";
        log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement loadStmt = null;
        ResultSet rs = null;
        EntrezGene DBEntGene = null;
        try {
            con = DBUtil.getConnection ();
            loadStmt = con.prepareStatement (loadQuery);
            int index = 1;
            if (speciesName != null) {
                int taxID = getTaxID(speciesName);
                loadStmt.setInt(index++, taxID);
            }
            loadStmt.setString (index++, GI);
            loadStmt.setString (index++, GI);
            loadStmt.setString (index++, GI);
            rs = loadStmt.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                DBEntGene = new EntrezGene();
                if (speciesName != null) {              //taxID not used.
                    DBEntGene.setSpeciesName(speciesName);
                }
                DBEntGene.setID(rs.getString (GENE_ID_COL));
                if (!GenUtil.HYPHEN.equals(rs.getString (RNA_ACCN_COL))) {
                    String [] tempArr = rs.getString (RNA_ACCN_COL).split ("\\" + GenUtil.DOT);
                    DBEntGene.addRefSeqRNAID(tempArr [0]);    //ignore versioning
                }
                if (!GenUtil.HYPHEN.equals(rs.getString (PRO_ACCN_COL))) {
                    String [] tempArr = rs.getString (PRO_ACCN_COL).split ("\\" + GenUtil.DOT);
                    DBEntGene.addRefSeqRNAID(tempArr [0]);    //ignore versioning
                }
                loadUniprot (DBEntGene);
                cnt++;
            }
            if (cnt > 1) {
                System.err.println ("More than one match for Entrez gene ID: " +
                                    GI + " " + cnt);
            }
        } catch (SQLException e) {
            if (con != null && !con.isClosed ()) {
                con.close ();
            }
            throw e;
        } finally {
            if (loadStmt != null) {
                loadStmt.close ();
            }
            /*if (con != null && !con.isClosed ()) {
                con.close ();
            }*/
        }

        return DBEntGene;
    }

    //not currently used
    public static ExtResource loadByAccn (String speciesName, String accnType,
                                         String accnID) throws SQLException {

        //System.out.println ("loading Entrez gene by refSeq..." + accnType);
        String loadQuery = "select " +
            TAX_ID_COL + ", " + GENE_ID_COL  + ", " + RNA_ACCN_COL + ", " +
            PRO_ACCN_COL +
            " from " + DBName +  "." + GENE2REFSEQ_TABLE + //GENE2ACCN_TABLE +
            " where ";
        if (speciesName != null) {
            loadQuery += TAX_ID_COL + " = ? and ";
        }
        boolean wildFlag = false;
        if (ExtResource.LIST_REFSEQ_RNA.equals (accnType)) {
            if (!accnID.contains(GenUtil.DOT)) {
                loadQuery += RNA_ACCN_COL + " like (?) or ";
                wildFlag = true;
            } 
            loadQuery += RNA_ACCN_COL + " = ?";
        } else if (ExtResource.LIST_REFSEQ_PRO.equals (accnType)) {
            if (!accnID.contains(GenUtil.DOT)) {
                loadQuery += PRO_ACCN_COL + " like (?) or ";
                wildFlag = true;
            }
            loadQuery += PRO_ACCN_COL + " = ?";
        }
        log.println (DBUtil.LOAD + ": " + loadQuery);

        Connection con = null;
        PreparedStatement loadStmt = null;
        ResultSet rs = null;
        EntrezGene DBEntGene = null;
        try {
            con = DBUtil.getConnection ();
            loadStmt = con.prepareStatement (loadQuery);
            int index = 1;
            if (speciesName != null) {
                int taxID = getTaxID(speciesName);
                loadStmt.setInt(index++, taxID);
            }
            if (wildFlag) {
                loadStmt.setString (index++, accnID + ".%");
            } 
            loadStmt.setString (index++, accnID);
            rs = loadStmt.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                DBEntGene = new EntrezGene();
                if (speciesName != null) {              //taxID not used.
                    DBEntGene.setSpeciesName(speciesName);
                }
                DBEntGene.setID(rs.getString (GENE_ID_COL));
                if (!GenUtil.HYPHEN.equals(rs.getString (RNA_ACCN_COL))) {
                    String [] tempArr = rs.getString (RNA_ACCN_COL).split ("\\" + GenUtil.DOT);
                    DBEntGene.addRefSeqRNAID(tempArr [0]);    //ignore versioning
                }
                if (!GenUtil.HYPHEN.equals(rs.getString (PRO_ACCN_COL))) {
                    String [] tempArr = rs.getString (PRO_ACCN_COL).split ("\\" + GenUtil.DOT);
                    DBEntGene.addRefSeqRNAID(tempArr [0]);    //ignore versioning
                }
                loadUniprot (DBEntGene);
                cnt++;
            }
            if (cnt > 1) {
                System.err.println ("More than one match for Entrez gene ID: " +
                                    accnID + " " + cnt);
            }
        } catch (SQLException e) {
            if (con != null && !con.isClosed ()) {
                con.close ();
            }
            throw e;
        } finally {
            if (loadStmt != null) {
                loadStmt.close ();
            }
            /*if (con != null && !con.isClosed ()) {
                con.close ();
            }*/
        }

        return DBEntGene;
    }

    public static ArrayList <String> getSpeciesCommonName (int taxID) throws SQLException {

        return getSpeciesName (DWUtil.SP_NAME_COMMON, taxID);
    }

    public static ArrayList <String> getSpeciesScientificName (int taxID) throws SQLException {

        return getSpeciesName (DWUtil.SP_NAME_SCIENTIFIC, taxID);
    }

    private static ArrayList <String> getSpeciesName (String speciesNameClass, 
                   int taxID) throws SQLException {

        if (taxID < 1) {
            throw new IllegalArgumentException ("Invalid species ID: " + taxID);
        }
        String speciesNameQuery = "select * " +
            " from " + DBName +  "." + TAX_NAMES_TABLE +
            " where " + TAX_ID_COL + " = ? and " + NAME_CLASS_COL + " = ? ";
        //log.println (DBUtil.LOAD + ": " + speciesNameQuery);

        Connection con = null;
        PreparedStatement stmt = null;
        ResultSet rs = null;
        ArrayList <String> speciesNameList = new ArrayList <String> ();
        try {
            con = DBUtil.getConnection ();
            stmt = con.prepareStatement (speciesNameQuery);
            stmt.setInt(1, taxID);
            stmt.setString (2, speciesNameClass);
            rs = stmt.executeQuery();
            int cnt = 0;
            while (rs.next ()) {
                speciesNameList.add(rs.getString (NAME_COL));
                cnt++;
            }
            if (cnt > 1 && speciesNameClass.equals (DWUtil.SP_NAME_SCIENTIFIC)) {
                System.err.println ("Error: Duplicate scientific names for species" +
                                    taxID);
            }
        } catch (SQLException e) {
            if (con != null && !con.isClosed ()) {
                con.close ();
            }
            throw e;
        } finally {
            if (stmt != null) {
                stmt.close ();
            }
            /*if (con != null && !con.isClosed ()) {
                con.close ();
            }*/
        }

        return speciesNameList;
    }
}
