/*
 * Decompiled with CFR 0.152.
 */
package org.ut.biolab.medsavant.server.vcf;

import com.google.code.externalsorting.ExternalSort;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.samtools.util.BlockCompressedInputStream;
import org.apache.commons.lang.NumberUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ut.biolab.medsavant.server.MedSavantServerEngine;
import org.ut.biolab.medsavant.server.serverapi.LogManager;
import org.ut.biolab.medsavant.server.vcf.VCFHeader;
import org.ut.biolab.medsavant.shared.format.BasicVariantColumns;
import org.ut.biolab.medsavant.shared.model.MedSavantServerJobProgress;
import org.ut.biolab.medsavant.shared.model.SessionExpiredException;
import org.ut.biolab.medsavant.shared.serverapi.LogManagerAdapter;
import org.ut.biolab.medsavant.shared.util.IOUtils;
import org.ut.biolab.medsavant.shared.util.MiscUtils;
import org.ut.biolab.medsavant.shared.vcf.VariantRecord;

public class VCFParser {
    private static final Log LOG = LogFactory.getLog(VCFParser.class);
    private static final String HEADER_CHARS = "#";
    private static final String COMMENT_CHARS = "##";
    private static final Pattern VCF_FORMAT_REGEX = Pattern.compile("^##fileformat=VCFv([\\d+.]+)");
    private static final int VCF_START_INDEX = 1;
    private static final int VCF_ID_INDEX = 2;
    private static final int VCF_REF_INDEX = 3;
    private static final int VCF_ALT_INDEX = 4;
    private static final int VCF_QUALITY_INDEX = 5;
    private static final int VCF_FILTER_INDEX = 6;
    private static final int VCF_INFO_INDEX = 7;
    private static final int VCF_FORMAT_INDEX = 8;
    private static final int VCF_SAMPLE_START_INDEX = 9;
    private static final Pattern VCF_BADREF_REGEX = Pattern.compile("[^ACGTNacgtn]");
    private static final Pattern VCF_SNP_REGEX = Pattern.compile("^[ACGTNacgtn]");
    private static final Pattern VCF_BADALT_REGEX = Pattern.compile("[^ACGTNacgtn:\\d\\[\\]]");
    private static final Pattern VCF_ALT_OLD_1000G_REGEX = Pattern.compile("^<.+>$");
    private static final int LINES_PER_PROGRESSREPORT = 50000;
    private static final Pattern VCF_GT_REGEX = Pattern.compile("([\\d.])(([/|])([\\d.]))*");
    private int lineNumber = 0;
    private int numInvalidRef = 0;
    private int numInvalidAlt = 0;
    private int numSnp = 0;
    private int numTi = 0;
    private int numTv = 0;
    private int numIndels = 0;
    private int numSnp1 = 0;
    private int numTi1 = 0;
    private int numTv1 = 0;
    private int numIndels1 = 0;
    private int numInvalidGT = 0;
    private int numHom = 0;
    private int numHet = 0;
    private int numVariants = 0;
    private String sessID;
    private File vcfFile;
    private MedSavantServerJobProgress jobProgress;
    private static final int EXTERNALSORT_MAX_TMPFILES = 1024;
    private static final Charset EXTERNALSORT_CHARSET = Charset.defaultCharset();
    private static final int TDF_INDEX_OF_CHROM = 4;
    private static final int TDF_INDEX_OF_STARTPOS = 5;
    private static final long MAX_WARNINGS = 1000L;
    private long warningsEmitted = 0L;
    private static final boolean TOOLONG_REFS_GIVE_WARNING = true;
    private static final boolean UNRECOGNIZED_REFS_GIVE_WARNING = true;
    private static final boolean TOOLONG_ALTS_GIVE_WARNING = true;
    private static final boolean UNRECOGNIZED_ALTS_GIVE_WARNING = true;

    public VCFParser(String sessID, File vcfFile, MedSavantServerJobProgress jobProgress) {
        this.sessID = sessID;
        this.vcfFile = vcfFile;
        this.jobProgress = jobProgress;
    }

    public int getNumInvalidRef() {
        return this.numInvalidRef;
    }

    public int getNumInvalidAlt() {
        return this.numInvalidAlt;
    }

    public int getNumSnp() {
        return this.numSnp;
    }

    public int getNumTi() {
        return this.numTi;
    }

    public int getNumTv() {
        return this.numTv;
    }

    public int getNumIndels() {
        return this.numIndels;
    }

    public int getNumSnp1() {
        return this.numSnp1;
    }

    public int getNumTi1() {
        return this.numTi1;
    }

    public int getNumTv1() {
        return this.numTv1;
    }

    public int getNumIndels1() {
        return this.numIndels1;
    }

    public int getNumInvalidGT() {
        return this.numInvalidGT;
    }

    public int getNumHom() {
        return this.numHom;
    }

    public int getNumHet() {
        return this.numHet;
    }

    public int parseVariantsFromReader(BufferedReader r, File outfile, int updateId, int fileId) throws IOException {
        return this.parseVariantsFromReader(r, outfile, updateId, fileId, false);
    }

    public int parseVariantsFromReader(BufferedReader r, File outfile, int updateId, int fileId, boolean includeHomoRef) throws IOException {
        VCFHeader header = null;
        int numRecords = 0;
        String outOfOrderFilename = outfile.getAbsolutePath() + "_ooo";
        BufferedWriter outOfOrderHandle = new BufferedWriter(new FileWriter(outOfOrderFilename, true));
        int variantId = 0;
        int numLinesWritten = 0;
        while (true) {
            String[] nextLine;
            String nextLineString;
            if ((nextLineString = r.readLine()) == null) break;
            ++this.lineNumber;
            if ((nextLineString = nextLineString.trim()).length() == 0) continue;
            String s = "Processed " + numRecords + " lines (" + numLinesWritten + " variants) so far...";
            this.jobProgress.setMessage(this.vcfFile.getName() + " - " + s);
            if (numRecords % 100000 == 0 && numRecords != 0) {
                LOG.info((Object)s);
            }
            if ((nextLine = nextLineString.split("\t"))[0].startsWith(COMMENT_CHARS)) {
                String ver;
                Matcher vcf_format_matcher = VCF_FORMAT_REGEX.matcher(nextLineString);
                if (!vcf_format_matcher.find() || !((double)Float.parseFloat(ver = vcf_format_matcher.group(1)) < 4.0)) continue;
                throw new IllegalArgumentException("VCF version (" + ver + ") is older than version 4");
            }
            if (nextLine[0].startsWith(HEADER_CHARS)) {
                header = this.parseHeader(nextLine);
                continue;
            }
            if (header == null) {
                throw new IOException("Cannot parse headless VCF file");
            }
            List<VariantRecord> records = null;
            try {
                records = this.parseRecord(nextLine, header);
                if (records == null) {
                    continue;
                }
            }
            catch (Exception ex) {
                LOG.error((Object)("Erroneous line: " + nextLineString));
                throw new IOException(ex);
            }
            for (VariantRecord v : records) {
                if (!includeHomoRef && v.getZygosity() == VariantRecord.Zygosity.HomoRef) continue;
                outOfOrderHandle.write(v.toTabString(updateId, fileId, variantId));
                outOfOrderHandle.write("\r\n");
                ++numLinesWritten;
                ++variantId;
            }
            ++numRecords;
        }
        LOG.info((Object)("Reader returned null after " + numLinesWritten + " lines."));
        outOfOrderHandle.close();
        this.jobProgress.setMessage("Sorting variants...");
        LOG.info((Object)"sorting out of order handle");
        VCFParser.sortTDF(outOfOrderFilename, outfile);
        return numLinesWritten;
    }

    static void sortTDF(String unsortedTDF, File sortedTDF) throws IOException {
        boolean eliminateDuplicateRows = false;
        boolean numHeaderLinesToExcludeFromSort = false;
        boolean useGzipForTmpFiles = false;
        Comparator<String> comparator = new Comparator<String>(){

            @Override
            public int compare(String o1, String o2) {
                int c2;
                String[] tokens1 = o1.split("\t");
                String[] tokens2 = o2.split("\t");
                String chr1 = tokens1[4].toLowerCase();
                String chr2 = tokens2[4].toLowerCase();
                chr1 = chr1.substring(1, chr1.length() - 1);
                chr2 = chr2.substring(1, chr2.length() - 1);
                if (chr1.startsWith("chr")) {
                    chr1 = chr1.substring(3);
                }
                if (chr2.startsWith("chr")) {
                    chr2 = chr2.substring(3);
                }
                if (chr1.equals(chr2)) {
                    long pos2;
                    long pos1 = Long.parseLong(tokens1[5].substring(1, tokens1[5].length() - 1));
                    if (pos1 < (pos2 = Long.parseLong(tokens2[5].substring(1, tokens2[5].length() - 1)))) {
                        return -1;
                    }
                    if (pos1 > pos2) {
                        return 1;
                    }
                    return 0;
                }
                int c1 = NumberUtils.isDigits((String)chr1) ? Integer.parseInt(chr1) : (int)chr1.charAt(0);
                int n = c2 = NumberUtils.isDigits((String)chr2) ? Integer.parseInt(chr2) : (int)chr2.charAt(0);
                return c1 < c2 ? -1 : (c1 > c2 ? 1 : 0);
            }
        };
        File uf = new File(unsortedTDF);
        BufferedReader fbr = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(uf), EXTERNALSORT_CHARSET));
        long maxMem = (long)(0.3 * (double)Runtime.getRuntime().maxMemory() / (double)MedSavantServerEngine.getMaxThreads());
        long availMem = ExternalSort.estimateAvailableMemory();
        if (0.5 * (double)availMem < (double)maxMem) {
            maxMem = 0L * availMem;
            LOG.info((Object)("WARNING: Memory is low for sorting, sorting with reduced memory of " + (maxMem >> 20) + " M"));
        } else {
            LOG.info((Object)("Sorting using " + (maxMem >> 20) + "M of memory"));
        }
        List batch = ExternalSort.sortInBatch((BufferedReader)fbr, (long)uf.length(), (Comparator)comparator, (int)1024, (long)maxMem, (Charset)EXTERNALSORT_CHARSET, (File)new File(sortedTDF.getParent()), (boolean)false, (int)0, (boolean)false);
        String finalOutputFileName = sortedTDF.getCanonicalPath();
        File outputFile = new File(finalOutputFileName + "_MERGED");
        ExternalSort.mergeSortedFiles((List)batch, (File)outputFile, (Comparator)comparator, (Charset)EXTERNALSORT_CHARSET, (boolean)false, (boolean)false, (boolean)false);
        if (!IOUtils.moveFile(outputFile, sortedTDF)) {
            throw new IOException("Can't rename merged file " + outputFile.getCanonicalPath() + " to " + sortedTDF.getCanonicalPath());
        }
        LOG.info((Object)("Outputted sorted TDF file to " + sortedTDF));
    }

    static BufferedReader openFile(File vcf) throws FileNotFoundException, IOException {
        if (vcf.getAbsolutePath().endsWith(".gz")) {
            return new BufferedReader(new InputStreamReader((InputStream)new BlockCompressedInputStream(vcf)));
        }
        return new BufferedReader(new FileReader(vcf));
    }

    private VCFHeader parseHeader(String[] headerLine) {
        VCFHeader result = new VCFHeader();
        if (headerLine.length > VCFHeader.getNumMandatoryFields()) {
            for (int i = VCFHeader.getNumMandatoryFields() + 1; i < headerLine.length; ++i) {
                if (headerLine[i] == null || headerLine[i].length() == 0) continue;
                result.addGenotypeLabel(headerLine[i]);
            }
        }
        return result;
    }

    private void messageToUser(LogManagerAdapter.LogType logtype, String msg) {
        try {
            LogManager.getInstance().addServerLog(this.sessID, logtype, msg);
            LOG.info((Object)("sessId=" + this.sessID + " " + msg));
        }
        catch (RemoteException re) {
            LOG.error((Object)re);
            LOG.error((Object)("WARNING: Couldn't log warning due to RemoteException.  Warning: " + msg));
        }
        catch (SessionExpiredException see) {
            LOG.error((Object)see);
            LOG.error((Object)("WARNING: Couldn't log warning due to SessionExpiredException.  Warning: " + msg));
        }
    }

    private void vcf_warning(String msg) {
        if (this.warningsEmitted < 1000L) {
            String warning = this.vcfFile.getName() + ": WARNING (line " + this.lineNumber + "): " + msg;
            this.messageToUser(LogManagerAdapter.LogType.WARNING, warning);
        } else if (this.warningsEmitted == 1000L) {
            String warning = this.vcfFile.getName() + ": Further warnings have been truncated.";
            this.messageToUser(LogManagerAdapter.LogType.WARNING, warning);
        }
        ++this.warningsEmitted;
    }

    private List<VariantRecord> parseRecord(String[] line, VCFHeader h) {
        List<String> ids;
        int numMandatoryFields = VCFHeader.getNumMandatoryFields();
        if (line[7].startsWith("\"") && line[7].endsWith("\"")) {
            line[7] = line[7].substring(1, line[7].length() - 1);
        }
        ArrayList<String> infos = new ArrayList<String>();
        for (int i = numMandatoryFields; i < line.length; ++i) {
            infos.add(line[i]);
        }
        if (infos.isEmpty()) {
            infos.add(".");
            ids = new ArrayList<String>();
            ids.add(".");
        } else {
            ids = h.getGenotypeLabels();
        }
        ArrayList<VariantRecord> records = new ArrayList<VariantRecord>();
        int triedIndex = 0;
        try {
            String ref = line[3].toUpperCase();
            String altStr = line[4].toUpperCase();
            long start = 0L;
            try {
                start = Long.parseLong(line[1]);
            }
            catch (NumberFormatException nex) {
                this.vcf_warning("Invalid (non-numeric) start position detected in VCF4 file: " + line[1]);
                return null;
            }
            if (altStr.equals(".")) {
                return null;
            }
            boolean badRef = false;
            if (ref.length() > BasicVariantColumns.REF.getColumnLength()) {
                this.vcf_warning("Detected reference allele with too many characters (maximum is " + BasicVariantColumns.REF.getColumnLength() + ", " + ref.length() + " detected).  Setting ref=0");
                badRef = true;
                ref = "0";
            } else if (VCF_BADREF_REGEX.matcher(ref).find()) {
                badRef = true;
                this.vcf_warning("Unrecognized reference allele found in VCF4 file (ACGT expected, found " + ref + ") Storing anyway.");
            }
            String[] allAlt = altStr.split(",");
            int altNumber = 0;
            for (String alt : allAlt) {
                long newEnd;
                String newAlt;
                String newRef;
                ++altNumber;
                if (badRef) {
                    ++this.numInvalidRef;
                }
                boolean complex = alt.contains("[") || alt.contains("]");
                boolean snp = false;
                VariantRecord.VariantType variantType = VariantRecord.VariantType.Unknown;
                long newStart = start;
                if (complex) {
                    newRef = ref;
                    newAlt = alt;
                    newEnd = newStart;
                    if (newRef.length() == 1) {
                        variantType = VariantRecord.VariantType.Complex;
                    } else {
                        this.vcf_warning("Unrecognized complex rearrangement detected (ref length expected to be 1, found ref=" + newRef + ".  Storing anyway.");
                    }
                } else {
                    String prefix = StringUtils.getCommonPrefix((String[])new String[]{ref, alt});
                    newRef = ref.substring(prefix.length());
                    newAlt = alt.substring(prefix.length());
                    newEnd = newStart += (long)prefix.length();
                    if (newAlt.length() > BasicVariantColumns.ALT.getColumnLength()) {
                        this.vcf_warning("Skipping alternate allele with too many characters (maximum is " + BasicVariantColumns.ALT.getColumnLength() + ", " + newAlt.length() + " detected).  MedSavant does not yet support sequences of this size.  Storing first 10 characters");
                        newAlt = newAlt.substring(0, Math.min(10, BasicVariantColumns.ALT.getColumnLength()));
                        ++this.numInvalidAlt;
                    }
                    if (newRef.length() == newAlt.length() && VCF_SNP_REGEX.matcher(newRef).matches() && VCF_SNP_REGEX.matcher(newAlt).matches()) {
                        snp = true;
                        ++this.numSnp;
                        if (newRef.equals("A") && newAlt.equals("G") || newRef.equals("G") && newAlt.equals("A") || newRef.equals("C") && newAlt.equals("T") || newRef.equals("T") && newAlt.equals("C")) {
                            ++this.numTi;
                        } else {
                            ++this.numTv;
                        }
                    } else {
                        ++this.numIndels;
                    }
                    if (newAlt.equals("<DEL>")) {
                        newEnd = newStart + (long)newRef.length() - 1L;
                        newAlt = "-";
                        variantType = VariantRecord.VariantType.Deletion;
                    } else if (newAlt.equals(".")) {
                        newAlt = newRef;
                        newEnd = newStart + (long)newRef.length() - 1L;
                        variantType = VariantRecord.VariantType.HomoRef;
                    } else if (snp) {
                        newEnd = newStart + (long)newRef.length() - 1L;
                        variantType = VariantRecord.VariantType.SNP;
                    } else if (!badRef && newRef.length() >= newAlt.length()) {
                        String head = newRef.substring(0, newAlt.length());
                        if (head.equals(newAlt)) {
                            newEnd = newStart + (long)newRef.length() - 1L;
                            newStart += (long)head.length();
                            newRef = newRef.substring(newAlt.length());
                            newAlt = "-";
                            variantType = VariantRecord.VariantType.Deletion;
                        } else {
                            newEnd = newStart + (long)newRef.length() - 1L;
                            variantType = VariantRecord.VariantType.InDel;
                        }
                    } else if (VCF_BADALT_REGEX.matcher(newAlt).find()) {
                        this.vcf_warning("Unrecognized ALT allele detected (ACGTN]:[ expected, found " + alt + ").  Storing anyway.");
                        ++this.numInvalidAlt;
                    } else if (!badRef) {
                        if (newRef.length() == 0) {
                            newRef = "-";
                            --newStart;
                            --newEnd;
                            variantType = VariantRecord.VariantType.Insertion;
                        } else {
                            newEnd = newStart + (long)newRef.length() - 1L;
                            variantType = VariantRecord.VariantType.InDel;
                        }
                    }
                }
                VariantRecord variantRecordTemplate = new VariantRecord(line, newStart, newEnd, newRef, newAlt, altNumber, variantType);
                int indexGT = VCFParser.getIndexGT(line);
                for (int i = 0; i < ids.size(); ++i) {
                    VariantRecord sampleVariantRecord = new VariantRecord(variantRecordTemplate);
                    if (indexGT >= 0) {
                        String chunk = line[numMandatoryFields + i + 1];
                        String gt = chunk.split(":")[indexGT];
                        Matcher gtMatcher = VCF_GT_REGEX.matcher(gt);
                        if (!gtMatcher.find() || indexGT < 0) {
                            this.vcf_warning("SKIPPED VARIANT. Invalid GT field (" + gt.substring(0, Math.min(10, gt.length())) + (gt.length() < 10 ? "" : "...") + ") found in VCF file. cannot determine genotype.");
                            ++this.numInvalidGT;
                            continue;
                        }
                        sampleVariantRecord.setGenotype(gt);
                        try {
                            sampleVariantRecord.setZygosity(VCFParser.calculateZygosity(sampleVariantRecord));
                        }
                        catch (IllegalArgumentException iex) {
                            this.vcf_warning("SKIPPED VARIANT. " + iex.getMessage());
                            continue;
                        }
                        if (sampleVariantRecord.getZygosity() == VariantRecord.Zygosity.Hetero) {
                            ++this.numHet;
                        } else if (sampleVariantRecord.getZygosity() == VariantRecord.Zygosity.HomoAlt) {
                            ++this.numHom;
                        }
                    }
                    if (snp) {
                        ++this.numSnp1;
                        if (ref.equals("A") && alt.equals("G") || ref.equals("G") && alt.equals("A") || ref.equals("C") && alt.equals("T") || ref.equals("T") && alt.equals("C")) {
                            ++this.numTi1;
                        } else {
                            ++this.numTv1;
                        }
                    } else {
                        ++this.numIndels1;
                    }
                    triedIndex = 0;
                    String id = ids.get(i);
                    sampleVariantRecord.setDnaID(id);
                    try {
                        String format = line[VCFHeader.getNumMandatoryFields()].trim();
                        String sampleInfo = line[numMandatoryFields + i + 1];
                        sampleInfo = sampleInfo.replace(";", ",");
                        sampleVariantRecord.setSampleInformation(format, sampleInfo);
                    }
                    catch (Exception e) {
                        // empty catch block
                    }
                    records.add(sampleVariantRecord);
                }
            }
        }
        catch (IllegalArgumentException ex) {
            this.vcf_warning(ex.getMessage());
            return null;
        }
        catch (Exception ex) {
            String lStr = "";
            for (int i = 0; i < line.length; ++i) {
                lStr = lStr + line[i] + "\t";
            }
            LOG.info((Object)("Tried index " + triedIndex + " of line with " + line.length + " entries"));
            String badString = lStr.length() > 300 ? lStr.substring(0, 299) + "..." : lStr;
            LOG.error((Object)("Error parsing line " + badString + ": " + ex.getClass() + " " + MiscUtils.getMessage(ex)));
            ex.printStackTrace();
        }
        if (this.lineNumber % 50000 == 0) {
            this.messageToUser(LogManagerAdapter.LogType.INFO, this.vcfFile.getName() + ": Loaded " + this.lineNumber + " variants...");
        }
        return records;
    }

    private static int getIndexGT(String[] line) {
        if (line.length >= VCFHeader.getNumMandatoryFields() + 1) {
            String[] list = line[VCFHeader.getNumMandatoryFields()].trim().split(":");
            for (int i = 0; i < list.length; ++i) {
                if (!list[i].equals("GT")) continue;
                return i;
            }
        }
        return -1;
    }

    private static VariantRecord.Zygosity calculateZygosity(VariantRecord vr) throws IllegalArgumentException {
        boolean homoRef = vr.getRef().equals(vr.getAlt());
        String gt = vr.getGenotype();
        String[] split = gt.split("/|\\\\|\\|");
        if (split.length < 2 || split[0] == null || split[1] == null || split[0].length() == 0 || split[1].length() == 0) {
            throw new IllegalArgumentException("Invalid genotype field: " + gt);
        }
        try {
            if (split[0].equals(".") || split[1].equals(".")) {
                if (homoRef) {
                    return VariantRecord.Zygosity.HomoRef;
                }
                return VariantRecord.Zygosity.Missing;
            }
            int a = Integer.parseInt(split[0]);
            int b = Integer.parseInt(split[1]);
            if (a == 0 && b == 0) {
                return VariantRecord.Zygosity.HomoRef;
            }
            if (!homoRef) {
                if (a == b) {
                    return VariantRecord.Zygosity.HomoAlt;
                }
                if (a == 0 || b == 0) {
                    return VariantRecord.Zygosity.Hetero;
                }
                return VariantRecord.Zygosity.HeteroTriallelic;
            }
            throw new IllegalArgumentException("Ref and Alt field are equal or Alt=., indicicating HomoRef variant, but genotype (" + gt + ") is invalid or indicates differently.");
        }
        catch (NumberFormatException e) {
            throw new IllegalArgumentException("Invalid Genotype " + gt);
        }
    }
}

