/*
 * Decompiled with CFR 0.152.
 */
package jannovar.io;

import jannovar.common.Constants;
import jannovar.exception.KGParseException;
import jannovar.io.TranscriptDataParser;
import jannovar.reference.TranscriptModel;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;

public class UCSCKGParser
extends TranscriptDataParser
implements Constants {
    public static final int NFIELDS = 12;

    public UCSCKGParser(String path) {
        super(path);
    }

    private String addPrefixAndGzipSuffix(String path, String base) {
        return String.format("%s%s.gz", path, base);
    }

    private boolean parseGzipUCSCFiles() {
        String knownGene = this.addPrefixAndGzipSuffix(this.directory_path, "knownGene.txt");
        String knownGeneMrna = this.addPrefixAndGzipSuffix(this.directory_path, "knownGeneMrna.txt");
        String kgXref = this.addPrefixAndGzipSuffix(this.directory_path, "kgXref.txt");
        String known2locus = this.addPrefixAndGzipSuffix(this.directory_path, "knownToLocusLink.txt");
        File f = new File(knownGene);
        if (!f.exists()) {
            System.err.println(String.format("Error: Could not find \"%s\"", f.getName()));
            return false;
        }
        f = new File(knownGeneMrna);
        if (!f.exists()) {
            System.err.println("Error: Could not find knownGeneMrna.txt.gz");
            return false;
        }
        f = new File(kgXref);
        if (!f.exists()) {
            System.err.println("Error: Could not find knownGeneMrnakgXref.txt.gz");
            return false;
        }
        f = new File(known2locus);
        if (!f.exists()) {
            System.err.println("Error: Could not find known2locus.txt.gz");
            return false;
        }
        try {
            this.parseKnownGeneFile(knownGene, true);
            this.parseKnownGeneMrna(knownGeneMrna, true);
            this.parseKnownGeneXref(kgXref, true);
            this.parseKnown2Locus(known2locus, true);
        }
        catch (KGParseException e) {
            System.err.println("[Jannovar] Error parsing UCSC Transcript Definition Files: " + e.toString());
            System.exit(1);
        }
        return true;
    }

    public void parseUCSCFiles() {
        boolean success = this.parseGzipUCSCFiles();
        if (success) {
            return;
        }
        String knownGene = String.format("%s%s", this.directory_path, "knownGene.txt");
        String knownGeneMrna = String.format("%s%s", this.directory_path, "knownGeneMrna.txt");
        String kgXref = String.format("%s%s", this.directory_path, "kgXref.txt");
        String known2locus = String.format("%s%s", this.directory_path, "knownToLocusLink.txt");
        try {
            this.parseKnownGeneFile(knownGene, false);
            this.parseKnownGeneMrna(knownGeneMrna, false);
            this.parseKnownGeneXref(kgXref, false);
            this.parseKnown2Locus(known2locus, false);
        }
        catch (KGParseException kge) {
            System.out.println("UCSCKGParser.java: Error with file input");
            System.out.println(kge.toString());
            System.exit(1);
        }
    }

    public TranscriptModel parseTranscriptModelFromLine(String line) throws KGParseException {
        int i;
        int exonCount;
        int cdsEnd;
        int cdsStart;
        int txEnd;
        int txStart;
        int chromosome;
        TranscriptModel model = TranscriptModel.createTranscriptModel();
        String[] A = line.split("\t");
        if (A.length != 12) {
            String error = String.format("Malformed line in UCSC knownGene.txt file:\n%s\nExpected %d fields but there were %d", line, 12, A.length);
            throw new KGParseException(error);
        }
        model.setAccessionNumber(A[0]);
        try {
            if (A[1].equals("chrX")) {
                chromosome = 23;
            } else if (A[1].equals("chrY")) {
                chromosome = 24;
            } else if (A[1].equals("chrM")) {
                chromosome = 25;
            } else {
                String tmp = A[1].substring(3);
                chromosome = Byte.parseByte(tmp);
            }
        }
        catch (NumberFormatException e) {
            throw new KGParseException("Could not parse chromosome field: " + A[1]);
        }
        model.setChromosome((byte)chromosome);
        char strand = A[2].charAt(0);
        if (strand != '+' && strand != '-') {
            throw new KGParseException("Malformed strand: " + A[2]);
        }
        model.setStrand(strand);
        try {
            txStart = Integer.parseInt(A[3]) + 1;
        }
        catch (NumberFormatException e) {
            throw new KGParseException("Could not parse txStart:" + A[3]);
        }
        model.setTranscriptionStart(txStart);
        try {
            txEnd = Integer.parseInt(A[4]);
        }
        catch (NumberFormatException e) {
            throw new KGParseException("Could not parse txEnd:" + A[4]);
        }
        model.setTranscriptionEnd(txEnd);
        try {
            cdsStart = Integer.parseInt(A[5]) + 1;
        }
        catch (NumberFormatException e) {
            throw new KGParseException("Could not parse cdsStart:" + A[5]);
        }
        model.setCdsStart(cdsStart);
        try {
            cdsEnd = Integer.parseInt(A[6]);
        }
        catch (NumberFormatException e) {
            throw new KGParseException("Could not parse cdsEnd:" + A[6]);
        }
        model.setCdsEnd(cdsEnd);
        try {
            exonCount = Short.parseShort(A[7]);
        }
        catch (NumberFormatException e) {
            throw new KGParseException("Could not parse exonCount:" + A[7]);
        }
        model.setExonCount((short)exonCount);
        int[] exonStarts = new int[exonCount];
        int[] exonEnds = new int[exonCount];
        String starts = A[8];
        String ends = A[9];
        String[] B = starts.split(",");
        if (B.length != exonCount) {
            String error = String.format("[UCSCKGParser] Malformed exonStarts list: found %d but I expected %d exons", B.length, (short)exonCount);
            error = String.format("%s. This should never happen, the knownGene.txt file may be corrupted", error);
            throw new KGParseException(error);
        }
        for (i = 0; i < exonCount; ++i) {
            try {
                exonStarts[i] = Integer.parseInt(B[i]) + 1;
                continue;
            }
            catch (NumberFormatException e) {
                String error = String.format("[UCSCKGParser] Malformed exon start at position %d of line %s", i, starts);
                error = String.format("%s. This should never happen, the knownGene.txt file may be corrupted", error);
                throw new KGParseException(error);
            }
        }
        B = ends.split(",");
        for (i = 0; i < exonCount; ++i) {
            try {
                exonEnds[i] = Integer.parseInt(B[i]);
                continue;
            }
            catch (NumberFormatException e) {
                String error = String.format("[UCSCKGParser] Malformed exon end at position %d of line %s", i, ends);
                error = String.format("%s. This should never happen, the knownGene.txt file may be corrupted", error);
                throw new KGParseException(error);
            }
        }
        model.setExonStartsAndEnds(exonStarts, exonEnds);
        model.initialize();
        return model;
    }

    public void parseKnownGeneFile(String kgpath, boolean isGzip) throws KGParseException {
        try {
            String line;
            BufferedReader br = this.getBufferedReaderFromFilePath(kgpath, isGzip);
            while ((line = br.readLine()) != null) {
                try {
                    TranscriptModel kg = this.parseTranscriptModelFromLine(line);
                    String id = kg.getAccessionNumber();
                    this.knownGeneMap.put(id, kg);
                }
                catch (KGParseException e) {}
            }
        }
        catch (FileNotFoundException fnfe) {
            String s = String.format("[Jannovar/USCSKGParser] Could not find KnownGene.txt file: %s\n%s", kgpath, fnfe.toString());
            throw new KGParseException(s);
        }
        catch (IOException e) {
            String s = String.format("[Jannovar/USCSKGParser] Exception while parsing UCSC KnownGene file at \"%s\"\n%s", kgpath, e.toString());
            throw new KGParseException(s);
        }
    }

    private void parseKnown2Locus(String locuspath, boolean isGzip) throws KGParseException {
        try {
            String line;
            BufferedReader br = this.getBufferedReaderFromFilePath(locuspath, isGzip);
            int foundID = 0;
            int notFoundID = 0;
            while ((line = br.readLine()) != null) {
                String[] A = line.split("\t");
                if (A.length != 2) {
                    System.err.println("[ERROR] Bad format for UCSC KnownToLocusLink.txt file:\n" + line);
                    System.err.println("[ERROR] Got " + A.length + " fields instead of the expected 2");
                    System.err.println("[ERROR] Fix problem in UCSC file before continuing");
                    System.exit(1);
                }
                String id = A[0];
                Integer geneID = Integer.parseInt(A[1]);
                TranscriptModel kg = (TranscriptModel)this.knownGeneMap.get(id);
                if (kg == null) {
                    ++notFoundID;
                    continue;
                }
                ++foundID;
                kg.setGeneID(geneID);
            }
            br.close();
            String msg = String.format("[INFO] knownToLocusLink contained ids for %d knownGenes (no ids available for %d)", foundID, notFoundID);
            System.out.println(msg);
        }
        catch (FileNotFoundException fnfe) {
            String s = String.format("Exception while parsing UCSC  knownToLocusLink file at \"%s\"\n%s", locuspath, fnfe.toString());
            throw new KGParseException(s);
        }
        catch (IOException e) {
            String s = String.format("Exception while parsing UCSC KnownToLocusfile at \"%s\"\n%s", locuspath, e.toString());
            throw new KGParseException(s);
        }
    }

    private void parseKnownGeneMrna(String mrnapath, boolean isGzip) throws KGParseException {
        try {
            String line;
            BufferedReader br = this.getBufferedReaderFromFilePath(mrnapath, isGzip);
            int kgWithNoSequence = 0;
            int foundSequence = 0;
            while ((line = br.readLine()) != null) {
                String[] A = line.split("\t");
                if (A.length != 2) {
                    System.err.println("[ERROR] Bad format for UCSC KnownGeneMrna.txt file:\n" + line);
                    System.err.println("[ERROR] Got " + A.length + " fields instead of the expected 2");
                    System.err.println("[ERROR] Fix problem in UCSC file before continueing");
                    System.exit(1);
                }
                String id = A[0];
                String seq = A[1].toUpperCase();
                TranscriptModel kg = (TranscriptModel)this.knownGeneMap.get(id);
                if (kg == null) {
                    ++kgWithNoSequence;
                    continue;
                }
                ++foundSequence;
                kg.setSequence(seq);
            }
            br.close();
            System.out.println(String.format("[INFO] Found %d transcript models from UCSC KnownGenes resource, %d of which had sequences", foundSequence, foundSequence - kgWithNoSequence));
        }
        catch (FileNotFoundException fnfe) {
            String s = String.format("Could not find file: %s\n%s", mrnapath, fnfe.toString());
            throw new KGParseException(s);
        }
        catch (IOException ioe) {
            String s = String.format("Exception while parsing UCSC KnownGene FASTA file at \"%s\"\n%s", mrnapath, ioe.toString());
            throw new KGParseException(s);
        }
    }

    private void parseKnownGeneXref(String xrefpath, boolean isGzip) throws KGParseException {
        try {
            String line;
            BufferedReader br = this.getBufferedReaderFromFilePath(xrefpath, isGzip);
            while ((line = br.readLine()) != null) {
                if (line.startsWith("#")) continue;
                String[] A = line.split("\t");
                if (A.length < 8) {
                    String err = String.format("Error, malformed ucsc xref line: %s\nExpected 8 fields but got %d", line, A.length);
                    throw new KGParseException(err);
                }
                String id = A[0];
                String geneSymbol = A[4];
                TranscriptModel kg = (TranscriptModel)this.knownGeneMap.get(id);
                if (kg == null) continue;
                kg.setGeneSymbol(geneSymbol);
            }
            br.close();
        }
        catch (FileNotFoundException fnfe) {
            String err = String.format("Could not find file: %s\n%s", xrefpath, fnfe.toString());
            throw new KGParseException(err);
        }
        catch (IOException e) {
            String err = String.format("Exception while parsing UCSC KnownGene xref file at \"%s\"\n%s", xrefpath, e.toString());
            throw new KGParseException(err);
        }
    }
}

