diff --git a/snpmatch/__init__.py b/snpmatch/__init__.py
index 6260270..ac19083 100644
--- a/snpmatch/__init__.py
+++ b/snpmatch/__init__.py
@@ -128,7 +128,7 @@ def snpmatch_parser(args):
     if os.path.isfile(args['inFile'] + ".snpmatch.npz"):
       os.remove(args['inFile'] + ".snpmatch.npz")
   from snpmatch.core import parsers
-  parsers.parseInput(inFile = args['inFile'], logDebug =  args['logDebug'], outFile = args['outFile'])
+  parsers.potatoParser(inFile = args['inFile'], logDebug =  args['logDebug'], outFile = args['outFile'])
 
 def genotype_cross(args):
   #checkARGs(args)
diff --git a/snpmatch/core/csmatch.py b/snpmatch/core/csmatch.py
index 6779a5b..ef90750 100644
--- a/snpmatch/core/csmatch.py
+++ b/snpmatch/core/csmatch.py
@@ -5,10 +5,9 @@
 import numpy.ma
 import scipy.stats as st
 from pygwas.core import genotype
-import pandas
+import pandas as pd
 import logging
 import os
-import StringIO
 import snpmatch
 import parsers
 import json
@@ -18,225 +17,242 @@
 
 # Arabidopsis chromosome length
 chrlen = np.array((30427671, 19698289, 23459830, 18585056, 26975502, 154478, 154478))
+tair_chrs = ['1', '2', '3', '4', '5']
 
-def getBins(g, binLen):
-  binLen = int(binLen)
-  ChrIndex = np.zeros(0,dtype="int8")
-  LenBins = np.zeros(0, dtype="int16")
-  for i in range(len(g.chrs)):
-    start = g.chr_regions[i][0]
-    end = g.chr_regions[i][1]
-    chr_pos = g.positions[start:end]
-    for j in range(0, chrlen[i], binLen):
-      bin_pos = len(np.where((chr_pos >= j) & (chr_pos < j + binLen))[0])
-      LenBins = np.append(LenBins, bin_pos)
-      ChrIndex = np.append(ChrIndex, g.chrs[i])
-  return(ChrIndex, LenBins)
+def get_bins_echr(real_chrlen, chr_pos, binLen, rel_ix):
+    ind = 0
+    for t in range(1, real_chrlen, binLen):
+        result = []
+        bin_bed = [int(t), int(t) + binLen - 1]
+        for epos in chr_pos[ind:]:
+            if epos >= bin_bed[0]:
+                if epos <= bin_bed[1]:
+                    result.append(ind + rel_ix)
+                elif epos > bin_bed[1] :
+                    yield((bin_bed, result))
+                    break
+                ind = ind + 1
 
-def getBinsSNPs(g_chrs, g_snppos, binLen):
+def get_bins_genome(g, binLen):
     binLen = int(binLen)
-    ChrIndex = np.zeros(0,dtype="int8")
-    LenBins = np.zeros(0, dtype="int16")
-    chrs = np.unique(g_chrs)
-    for i in range(len(chrs)):
-        chr_pos = g_snppos[np.where(g_chrs == chrs[i])[0]]
-        for j in range(0, chrlen[i], binLen):
-            bin_pos = len(np.where((chr_pos >= j) & (chr_pos < j + binLen))[0])
-            LenBins = np.append(LenBins, bin_pos)
-            ChrIndex = np.append(ChrIndex, chrs[i])
-    return(ChrIndex, LenBins)
+    g_chrs = np.char.replace(np.core.defchararray.lower(np.array(g.chrs[0:len(tair_chrs)], dtype="string")), "chr", "")
+    if len(g_chrs) > 7:
+        snpmatch.die("Please change the genome sizes in csmatch module")
+    if not np.array_equal(g_chrs, np.array(tair_chrs)):
+        snpmatch.die("Please change the genome sizes in csmatch module")
+    for chr_ix in range(len(g_chrs)):
+        start = g.chr_regions[chr_ix][0]
+        end = g.chr_regions[chr_ix][1]
+        chr_pos = g.positions[start:end]
+        echr_bins = get_bins_echr(chrlen[chr_ix], chr_pos, binLen, start)
+        for e_bin in echr_bins:
+            yield((chr_ix, e_bin[0], e_bin[1]))
 
-def writeBinData(out_file, i, GenotypeData, ScoreList, NumInfoSites):
-  num_lines = len(GenotypeData.accessions)
-  (likeliScore, likeliHoodRatio) = snpmatch.calculate_likelihoods(ScoreList, NumInfoSites)
-  if len(likeliScore) > 0:
-    NumAmb = np.where(likeliHoodRatio < snpmatch.lr_thres)[0]
-    if len(NumAmb) >= 1 and len(NumAmb) < num_lines:
-      try:
-        nextLikeli = np.nanmin(likeliHoodRatio[np.where(likeliHoodRatio > snpmatch.lr_thres)[0]])
-      except:
-        nextLikeli = 1
-      for k in NumAmb:
-        score = float(ScoreList[k])/NumInfoSites[k]
-        out_file.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (GenotypeData.accessions[k], int(ScoreList[k]), NumInfoSites[k], score, likeliScore[k], nextLikeli, len(NumAmb), i+1))
+def get_bins_arrays(g_chrs, g_snppos, binLen):
+    g_chrs = np.char.replace(np.core.defchararray.lower(np.array(g_chrs, dtype="string")), "chr", "")
+    g_chrs_uq = np.unique(g_chrs)
+    g_chrs_uq = np.char.replace(np.core.defchararray.lower(np.array(g_chrs_uq[0:len(tair_chrs)], dtype="string")), "chr", "")
+    if len(g_chrs_uq) > 7:
+        snpmatch.die("Please change the genome sizes in csmatch module")
+    if not np.array_equal(g_chrs_uq, np.array(tair_chrs)):
+        snpmatch.die("Please change the genome sizes in csmatch module")
+    for chr_ix in range(len(g_chrs_uq)):
+        chr_pos_ix = np.where(g_chrs == g_chrs_uq[chr_ix])[0]
+        echr_bins = get_bins_echr(chrlen[chr_ix], g_snppos[chr_pos_ix], binLen, chr_pos_ix[0])
+        for e_bin in echr_bins:
+            yield((chr_ix, e_bin[0], e_bin[1]))
 
-def crossWindower(binLen, snpCHR, snpPOS, snpWEI, DPmean, GenotypeData, outFile):
-  NumSNPs = len(snpCHR)
-  num_lines = len(GenotypeData.accessions)
-  NumMatSNPs = 0
-  chunk_size = 1000
-  TotScoreList = np.zeros(num_lines, dtype="uint32")
-  TotNumInfoSites = np.zeros(num_lines, dtype="uint32")
-  (ChrBins, PosBins) = getBins(GenotypeData, binLen)
-  out_file = open(outFile, 'w')
-  for i in range(len(PosBins)):
-    start = np.sum(PosBins[0:i])
-    end = start + PosBins[i]
-    pos = GenotypeData.positions[start:end]
-    perchrTarPos = np.where(snpCHR == ChrBins[i])[0]
-    perchrtarSNPpos = snpPOS[perchrTarPos]
-    matchedAccInd = np.where(np.in1d(pos, perchrtarSNPpos))[0] + start
-    matchedTarInd = np.where(np.in1d(perchrtarSNPpos, pos))[0]
-    matchedTarWei = snpWEI[perchrTarPos[matchedTarInd],]
-    TarGTs0 = np.zeros(len(matchedTarInd), dtype="int8")
-    TarGTs1 = np.ones(len(matchedTarInd), dtype="int8") + 1
-    TarGTs2 = np.ones(len(matchedTarInd), dtype="int8")
-    NumMatSNPs = NumMatSNPs + len(matchedAccInd)
-    ScoreList = np.zeros(num_lines, dtype="uint32")
-    NumInfoSites = np.zeros(num_lines, dtype="uint32")
-    for j in range(0, len(matchedAccInd), chunk_size):
-      t1001SNPs = GenotypeData.snps[matchedAccInd[j:j+chunk_size],:]
-      samSNPs0 = np.reshape(np.repeat(TarGTs0[j:j+chunk_size], num_lines), (len(TarGTs0[j:j+chunk_size]),num_lines))
-      samSNPs1 = np.reshape(np.repeat(TarGTs1[j:j+chunk_size], num_lines), (len(TarGTs1[j:j+chunk_size]),num_lines))
-      samSNPs2 = np.reshape(np.repeat(TarGTs2[j:j+chunk_size], num_lines), (len(TarGTs2[j:j+chunk_size]),num_lines))
-      tempScore0 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs0, dtype=int).T, matchedTarWei[j:j+chunk_size,0]).T, axis=0)
-      tempScore1 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs1, dtype=int).T, matchedTarWei[j:j+chunk_size,1]).T, axis=0)
-      tempScore2 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs2, dtype=int).T, matchedTarWei[j:j+chunk_size,2]).T, axis=0)
-      ScoreList = ScoreList + tempScore0 + tempScore1 + tempScore2
-      if(len(TarGTs0[j:j+chunk_size]) >= 1):
-        NumInfoSites = NumInfoSites + len(TarGTs0[j:j+chunk_size]) - np.sum(numpy.ma.masked_less(t1001SNPs, 0).mask.astype(int), axis = 0)
-    TotScoreList = TotScoreList + ScoreList
-    TotNumInfoSites = TotNumInfoSites + NumInfoSites
-    writeBinData(out_file, i, GenotypeData, ScoreList, NumInfoSites)
-    if i % 50 == 0:
-      log.info("Done analysing %s positions", NumMatSNPs)
-  out_file.close()
-  return (TotScoreList, TotNumInfoSites, NumMatSNPs)
+
+def writeBinData(out_file, e_g, GenotypeData, ScoreList, NumInfoSites):
+    num_lines = len(GenotypeData.accessions)
+    (likeliScore, likeliHoodRatio) = snpmatch.GenotyperOutput.calculate_likelihoods(ScoreList, NumInfoSites)
+    bin_bed = str(GenotypeData.chrs[e_g[0]]) + ',' + str(e_g[1][0]) + ',' + str(e_g[1][1])
+    if len(likeliScore) > 0:
+        NumAmb = np.where(likeliHoodRatio < snpmatch.lr_thres)[0]
+        if len(NumAmb) >= 1 and len(NumAmb) < num_lines:
+            try:
+                nextLikeli = np.nanmin(likeliHoodRatio[np.where(likeliHoodRatio > snpmatch.lr_thres)[0]])
+            except:
+                nextLikeli = 1
+            for k in NumAmb:
+                score = float(ScoreList[k])/NumInfoSites[k]
+                out_file.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (GenotypeData.accessions[k], int(ScoreList[k]), NumInfoSites[k], score, likeliScore[k], nextLikeli, len(NumAmb), bin_bed))
+
+def crossWindower(inputs, GenotypeData, binLen, outFile):
+    inputs.filter_chr_names(GenotypeData)
+    num_lines = len(GenotypeData.accessions)
+    NumMatSNPs = 0
+    chunk_size = 1000
+    TotScoreList = np.zeros(num_lines, dtype="uint32")
+    TotNumInfoSites = np.zeros(num_lines, dtype="uint32")
+    iter_bins_genome = get_bins_genome(GenotypeData, binLen)
+    iter_bins_snps = get_bins_arrays(inputs.chrs, inputs.pos, binLen)
+    out_file = open(outFile, 'w')
+    bin_inds = 0
+    winds_chrs = np.zeros(0, dtype = GenotypeData.chrs.dtype)
+    for e_g, e_s in zip(iter_bins_genome, iter_bins_snps):
+        g_bin_pos = GenotypeData.positions[e_g[2]]
+        perchrtarSNPpos = inputs.pos[e_s[2]]
+        matchedAccInd = np.array(e_g[2], dtype=int)[np.where(np.in1d(g_bin_pos, perchrtarSNPpos))[0]]
+        matchedTarInd = np.array(e_s[2], dtype=int)[np.where(np.in1d(perchrtarSNPpos, g_bin_pos))[0]]
+        matchedTarWei = inputs.wei[matchedTarInd,]
+        TarGTs0 = np.zeros(len(matchedTarInd), dtype="int8")
+        TarGTs1 = np.ones(len(matchedTarInd), dtype="int8") + 1
+        TarGTs2 = np.ones(len(matchedTarInd), dtype="int8")
+        NumMatSNPs = NumMatSNPs + len(matchedAccInd)
+        ScoreList = np.zeros(num_lines, dtype="uint32")
+        NumInfoSites = np.zeros(num_lines, dtype="uint32")
+        for j in range(0, len(matchedAccInd), chunk_size):
+            t1001SNPs = GenotypeData.snps[matchedAccInd[j:j+chunk_size],:]
+            samSNPs0 = np.reshape(np.repeat(TarGTs0[j:j+chunk_size], num_lines), (len(TarGTs0[j:j+chunk_size]),num_lines))
+            samSNPs1 = np.reshape(np.repeat(TarGTs1[j:j+chunk_size], num_lines), (len(TarGTs1[j:j+chunk_size]),num_lines))
+            samSNPs2 = np.reshape(np.repeat(TarGTs2[j:j+chunk_size], num_lines), (len(TarGTs2[j:j+chunk_size]),num_lines))
+            tempScore0 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs0, dtype=int).T, matchedTarWei[j:j+chunk_size,0]).T, axis=0)
+            tempScore1 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs1, dtype=int).T, matchedTarWei[j:j+chunk_size,1]).T, axis=0)
+            tempScore2 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs2, dtype=int).T, matchedTarWei[j:j+chunk_size,2]).T, axis=0)
+            ScoreList = ScoreList + tempScore0 + tempScore1 + tempScore2
+            if(len(TarGTs0[j:j+chunk_size]) >= 1):
+                NumInfoSites = NumInfoSites + len(TarGTs0[j:j+chunk_size]) - np.sum(numpy.ma.masked_less(t1001SNPs, 0).mask.astype(int), axis = 0)
+        TotScoreList = TotScoreList + ScoreList
+        TotNumInfoSites = TotNumInfoSites + NumInfoSites
+        writeBinData(out_file, e_g, GenotypeData, ScoreList, NumInfoSites)
+        bin_inds += 1
+        winds_chrs = np.append( winds_chrs, inputs.chr_list[e_g[0]] )
+        if bin_inds % 50 == 0:
+            log.info("Done analysing %s positions", NumMatSNPs)
+    out_file.close()
+    overlap = float(NumMatSNPs)/len(inputs.filter_inds_ix)
+    result = snpmatch.GenotyperOutput(GenotypeData.accessions, TotScoreList, TotNumInfoSites, overlap, NumMatSNPs, inputs.dp)
+    result.winds_chrs = winds_chrs
+    return(result)
 
 def getHomoWindows(likeLiwind):
-  snp_thres_wind = np.nanmean(likeLiwind[2]) - np.std(likeLiwind[2])
-  x_info = np.unique(likeLiwind[7])
-  homo_wind = np.zeros(0, dtype = "int")
-  for i in x_info:
-    eWinds = likeLiwind.iloc[np.where(likeLiwind[7] == i)[0]]
-    if np.nanmean(eWinds[3]) > snpmatch.prob_thres and np.nanmean(eWinds[2]) > snp_thres_wind:
-      homo_wind = np.append(homo_wind, i)
-  return homo_wind
+    snp_thres_wind = np.nanmean(likeLiwind[2]) - np.std(likeLiwind[2])
+    x_info = np.unique(likeLiwind[7])
+    homo_wind = np.zeros(0, dtype = "int")
+    for i in x_info:
+        eWinds = likeLiwind.iloc[np.where(likeLiwind[7] == i)[0]]
+        if np.nanmean(eWinds[3]) > snpmatch.prob_thres and np.nanmean(eWinds[2]) > snp_thres_wind:
+            homo_wind = np.append(homo_wind, i)
+    return(homo_wind)
 
-def crossInterpreter(GenotypeData, binLen, outID):
-  ## ScoreFile should be one from crossF1genotyper
-  ## Output file is from the crossIdentifier
-  cs_thres = 0.9
-  outFile = outID + '.windowscore.txt'
-  scoreFile = outID + '.scores.txt'
-  log.info("running cross interpreter!")
-  num_lines = len(GenotypeData.accessions)
-  likeLiwind = pandas.read_table(outFile, header=None)
-  ScoreAcc = pandas.read_table(scoreFile, header=None)
-  topHitsDict = json.load(open(scoreFile + ".matches.json", 'r'))
-  if topHitsDict['interpretation']['case'] == 3:
-    homo_wind = getHomoWindows(likeLiwind)
-    homo_acc = np.unique(likeLiwind[0][np.where(np.in1d(likeLiwind[7], homo_wind))[0]],return_counts=True)
-    matches_dict = [(homo_acc[0][i].astype("string"), homo_acc[1][i]) for i in np.argsort(-homo_acc[1])]
-    topHitsDict['matches'] = matches_dict
-    f1matches = ScoreAcc.iloc[~np.in1d(ScoreAcc[0], GenotypeData.accessions)].reset_index()
-    topMatch = np.argsort(f1matches[5])[0]  ## top F1 match sorted based on likelihood
-    if f1matches[3][topMatch] > cs_thres:
-      mother = f1matches[0][topMatch].split("x")[0]
-      father = f1matches[0][topMatch].split("x")[1]
-      topHitsDict['interpretation']['text'] = "Sample may be a F1! or a contamination!"
-      topHitsDict['interpretation']['case'] = 5
-      topHitsDict['parents'] = {'mother': [mother,1], 'father': [father,1]}
-      topHitsDict['genotype_windows'] = {'chr_bins': None, 'coordinates': {'x': None, 'y': None}}
-    else:
-      (ChrBins, PosBins) = getBins(GenotypeData, binLen)
-      ## Get exactly the homozygous windows with one count
-      clean = np.unique(likeLiwind[0][np.where(likeLiwind[6] == 1)[0]], return_counts = True)
-      if len(clean[0]) > 0:  ## Check if there are atlease one homozygous window
-        parents = clean[0][np.argsort(-clean[1])[0:2]].astype("string")
-        parents_counts = clean[1][np.argsort(-clean[1])[0:2]].astype("int")
-        xdict = np.array(np.unique(likeLiwind[7]), dtype="int")
-        ydict = np.repeat("NA", len(xdict)).astype("a25")
-        if len(parents) == 1:
-          topHitsDict['interpretation']['text'] = "Sample may be a F2! but only one parent found!"
-          topHitsDict['interpretation']['case'] = 6
-          topHitsDict['parents'] = {'mother': [parents[0], parents_counts[0]], 'father': ["NA", "NA"]}
-          par1_ind = likeLiwind[7][np.where((likeLiwind[0].astype("string") == parents[0]) & np.in1d(likeLiwind[7], homo_wind))[0]]
-          ydict[np.where(np.in1d(xdict,par1_ind))[0]] = parents[0]
+def crossInterpreter(snpmatch_result, GenotypeData, binLen, outID):
+    ## ScoreFile should be one from crossF1genotyper
+    ## Output file is from the crossIdentifier
+    cs_thres = 0.9
+    outFile = outID + '.windowscore.txt'
+    scoreFile = outID + '.scores.txt'
+    log.info("running cross interpreter!")
+    likeLiwind = pandas.read_table(outFile, header=None)
+    ScoreAcc = pandas.read_table(scoreFile, header=None)
+    topHitsDict = json.load(open(scoreFile + ".matches.json", 'r'))
+    if topHitsDict['interpretation']['case'] == 3:
+        homo_wind = getHomoWindows(likeLiwind)
+        homo_acc = np.unique(likeLiwind[0][np.where(np.in1d(likeLiwind[7], homo_wind))[0]],return_counts=True)
+        matches_dict = [(homo_acc[0][i].astype("string"), homo_acc[1][i]) for i in np.argsort(-homo_acc[1])]
+        topHitsDict['matches'] = matches_dict
+        f1matches = ScoreAcc.iloc[~np.in1d(ScoreAcc[0], GenotypeData.accessions)].reset_index()
+        topMatch = np.argsort(f1matches[5])[0]  ## top F1 match sorted based on likelihood
+        if f1matches[3][topMatch] > cs_thres:
+            mother = f1matches[0][topMatch].split("x")[0]
+            father = f1matches[0][topMatch].split("x")[1]
+            topHitsDict['interpretation']['text'] = "Sample may be a F1! or a contamination!"
+            topHitsDict['interpretation']['case'] = 5
+            topHitsDict['parents'] = {'mother': [mother,1], 'father': [father,1]}
+            topHitsDict['genotype_windows'] = {'chr_bins': None, 'coordinates': {'x': None, 'y': None}}
         else:
-          topHitsDict['interpretation']['text'] = "Sample may be a F2!"
-          topHitsDict['interpretation']['case'] = 6
-          topHitsDict['parents'] = {'mother': [parents[0], parents_counts[0]], 'father': [parents[1], parents_counts[1]]}
-          NumChrs = np.unique(ChrBins, return_counts=True)
-          chr_bins = dict(('Chr%s' % NumChrs[0][i], NumChrs[1][i]) for i in range(len(NumChrs[0])))
-          par1_ind = np.array(likeLiwind[7][np.where((likeLiwind[0].astype("string") == parents[0]) & np.in1d(likeLiwind[7], homo_wind))[0]])
-          par2_ind = np.array(likeLiwind[7][np.where((likeLiwind[0].astype("string") == parents[1]) & np.in1d(likeLiwind[7], homo_wind))[0]])
-          ydict[np.where(np.in1d(xdict,par1_ind))[0]] = parents[0]
-          ydict[np.where(np.in1d(xdict,par2_ind))[0]] = parents[1]
-        xdict = xdict.tolist()
-        ydict = ydict.tolist()
-        topHitsDict['genotype_windows'] = {'chr_bins': chr_bins, 'coordinates': {'x': xdict, 'y': ydict}}
-      else:   ## No homozygous window found!
-        topHitsDict['interpretation']['case'] = 7
-        topHitsDict['interpretation']['text'] = "Sample may just be contamination!"
-        topHitsDict['genotype_windows'] = {'chr_bins': None, 'coordinates': {'x': None, 'y': None}}
-        topHitsDict['parents'] = {'mother': [None,0], 'father': [None,1]}
-    with open(outID + ".matches.json", "w") as out_stats:
-      out_stats.write(json.dumps(topHitsDict))
+            ## Get exactly the homozygous windows with one count
+            clean = np.unique(likeLiwind[0][np.where(likeLiwind[6] == 1)[0]], return_counts = True)
+            if len(clean[0]) > 0:  ## Check if there are atlease one homozygous window
+                parents = clean[0][np.argsort(-clean[1])[0:2]].astype("string")
+                parents_counts = clean[1][np.argsort(-clean[1])[0:2]].astype("int")
+                xdict = np.array(np.unique(likeLiwind[7]), dtype="int")
+                ydict = np.repeat("NA", len(xdict)).astype("a25")
+                if len(parents) == 1:
+                    topHitsDict['interpretation']['text'] = "Sample may be a F2! but only one parent found!"
+                    topHitsDict['interpretation']['case'] = 6
+                    topHitsDict['parents'] = {'mother': [parents[0], parents_counts[0]], 'father': ["NA", "NA"]}
+                    par1_ind = likeLiwind[7][np.where((likeLiwind[0].astype("string") == parents[0]) & np.in1d(likeLiwind[7], homo_wind))[0]]
+                    ydict[np.where(np.in1d(xdict,par1_ind))[0]] = parents[0]
+                else:
+                    topHitsDict['interpretation']['text'] = "Sample may be a F2!"
+                    topHitsDict['interpretation']['case'] = 6
+                    topHitsDict['parents'] = {'mother': [parents[0], parents_counts[0]], 'father': [parents[1], parents_counts[1]]}
+                    NumChrs = np.unique(snpmatch_result.winds_chrs, return_counts=True)
+                    chr_bins = dict(( NumChrs[0][i], NumChrs[1][i]) for i in range(len(NumChrs[0])))
+                    par1_ind = np.array(likeLiwind[7][np.where((likeLiwind[0].astype("string") == parents[0]) & np.in1d(likeLiwind[7], homo_wind))[0]])
+                    par2_ind = np.array(likeLiwind[7][np.where((likeLiwind[0].astype("string") == parents[1]) & np.in1d(likeLiwind[7], homo_wind))[0]])
+                    ydict[np.where(np.in1d(xdict,par1_ind))[0]] = parents[0]
+                    ydict[np.where(np.in1d(xdict,par2_ind))[0]] = parents[1]
+                xdict = xdict.tolist()
+                ydict = ydict.tolist()
+                topHitsDict['genotype_windows'] = {'chr_bins': chr_bins, 'coordinates': {'x': xdict, 'y': ydict}}
+            else:   ## No homozygous window found!
+                topHitsDict['interpretation']['case'] = 7
+                topHitsDict['interpretation']['text'] = "Sample may just be contamination!"
+                topHitsDict['genotype_windows'] = {'chr_bins': None, 'coordinates': {'x': None, 'y': None}}
+                topHitsDict['parents'] = {'mother': [None,0], 'father': [None,1]}
+        with open(outID + ".matches.json", "w") as out_stats:
+            out_stats.write(json.dumps(topHitsDict))
 
-def crossIdentifier(binLen, snpCHR, snpPOS, snpWEI, DPmean, GenotypeData, GenotypeData_acc, outID):
-  ## Get tophit accessions
-  # sorting based on the final scores
-  if not outID:
-      outID = "cross.identifier"
-  outFile = outID + '.windowscore.txt'
-  scoreFile = outID + '.scores.txt'
-  NumSNPs = len(snpCHR)
-  num_lines = len(GenotypeData.accessions)
-  (ScoreList, NumInfoSites, NumMatSNPs) = crossWindower(binLen, snpCHR, snpPOS, snpWEI, DPmean, GenotypeData, outFile)
-  probScore = [float(ScoreList[i])/NumInfoSites[i] for i in range(num_lines)]
-  probScore = np.array(probScore, dtype = float)
-  snpmatch.print_topHits(scoreFile + ".matches.json", GenotypeData.accessions, ScoreList, NumInfoSites, float(NumMatSNPs)/NumSNPs, NumMatSNPs)
-  log.info("simulating F1s for top 10 accessions")
-  Accessions = np.copy(GenotypeData.accessions)
-  TopHitAccs = np.argsort(-probScore)[0:10]
-  for (i, j) in itertools.combinations(TopHitAccs, 2):
-    p1 = GenotypeData_acc.snps[:,i]
-    p2 = GenotypeData_acc.snps[:,j]
-    score = 0
-    numinfo = 0
-    NumMatSNPs = 0
-    for ind,echr in enumerate(parsers.parseChrName(GenotypeData.chrs)[0]):
-      perchrTarPos = np.where(snpCHR == echr)[0]
-      perchrtarSNPpos = snpPOS[perchrTarPos]
-      start = GenotypeData.chr_regions[ind][0]
-      end = GenotypeData.chr_regions[ind][1]
-      chrpositions = GenotypeData.positions[start:end]
-      matchedAccInd = np.where(np.in1d(chrpositions, perchrtarSNPpos))[0] + start
-      matchedTarInd = np.where(np.in1d(perchrtarSNPpos, chrpositions))[0]
-      NumMatSNPs = NumMatSNPs + len(matchedTarInd)
-      gtp1 = p1[matchedAccInd]
-      gtp2 = p2[matchedAccInd]
-      matchedTarWEI = snpWEI[perchrTarPos[matchedTarInd],]
-      homalt = np.where((gtp1 == 1) & (gtp2 == 1))[0]
-      homref = np.where((gtp1 == 0) & (gtp2 == 0))[0]
-      het = np.where((gtp1 != -1) & (gtp2 != -1) & (gtp1 != gtp2))[0]
-      score = score + np.sum(matchedTarWEI[homalt, 2]) + np.sum(matchedTarWEI[homref, 0]) + np.sum(matchedTarWEI[het, 1])
-      numinfo = numinfo + len(homalt) + len(homref) + len(het)
-    ScoreList = np.append(ScoreList, score)
-    NumInfoSites = np.append(NumInfoSites, numinfo)
-    acc = GenotypeData.accessions[i] + "x" + GenotypeData.accessions[j]
-    Accessions = np.append(Accessions, acc)
-  log.info("writing output!")
-  snpmatch.print_out_table(scoreFile, Accessions, ScoreList, NumInfoSites, NumMatSNPs, DPmean)
-  crossInterpreter(GenotypeData, binLen, outID)
+def crossIdentifier(inputs, GenotypeData, GenotypeData_acc, binLen, outID):
+    ## Get tophit accessions
+    # sorting based on the final scores
+    inputs.filter_chr_names(GenotypeData)
+    if not outID:
+        outID = "cross.identifier"
+    outFile = outID + '.windowscore.txt'
+    scoreFile = outID + '.scores.txt'
+    snpmatch_result = crossWindower(inputs, GenotypeData, binLen, outFile)
+    snpmatch_result.print_json_output( scoreFile + ".matches.json" )
+    log.info("simulating F1s for top 10 accessions")
+    TopHitAccs = np.argsort(-snpmatch_result.probabilies)[0:10]
+    for (i, j) in itertools.combinations(TopHitAccs, 2):
+        p1 = GenotypeData_acc.snps[:,i]
+        p2 = GenotypeData_acc.snps[:,j]
+        score = 0
+        numinfo = 0
+        NumMatSNPs = 0
+        for ind,echr in enumerate(inputs.chr_list):
+            perchrTarPos = np.where(inputs.chrs_nochr == echr)[0]
+            perchrtarSNPpos = inputs.pos[perchrTarPos]
+            start = GenotypeData.chr_regions[ind][0]
+            end = GenotypeData.chr_regions[ind][1]
+            chrpositions = GenotypeData.positions[start:end]
+            matchedAccInd = np.where(np.in1d(chrpositions, perchrtarSNPpos))[0] + start
+            matchedTarInd = np.where(np.in1d(perchrtarSNPpos, chrpositions))[0]
+            NumMatSNPs = NumMatSNPs + len(matchedTarInd)
+            gtp1 = p1[matchedAccInd]
+            gtp2 = p2[matchedAccInd]
+            matchedTarWEI = inputs.wei[perchrTarPos[matchedTarInd],]
+            homalt = np.where((gtp1 == 1) & (gtp2 == 1))[0]
+            homref = np.where((gtp1 == 0) & (gtp2 == 0))[0]
+            het = np.where((gtp1 != -1) & (gtp2 != -1) & (gtp1 != gtp2))[0]
+            score = score + np.sum(matchedTarWEI[homalt, 2]) + np.sum(matchedTarWEI[homref, 0]) + np.sum(matchedTarWEI[het, 1])
+            numinfo = numinfo + len(homalt) + len(homref) + len(het)
+        snpmatch_result.scores = np.append(snpmatch_result.scores, score)
+        snpmatch_result.ninfo = np.append(snpmatch_result.ninfo, numinfo)
+        snpmatch_result.accs = np.append( snpmatch_result.accs, GenotypeData.accessions[i] + "x" + GenotypeData.accessions[j] )
+    log.info("writing output!")
+    snpmatch_result.print_out_table( scoreFile )
+    crossInterpreter(snpmatch_result, GenotypeData, binLen, outID)
 
 def potatoCrossIdentifier(args):
-  (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = parsers.parseInput(inFile = args['inFile'], logDebug = args['logDebug'])
-  log.info("loading genotype files!")
-  GenotypeData = genotype.load_hdf5_genotype_data(args['hdf5File'])
-  GenotypeData_acc = genotype.load_hdf5_genotype_data(args['hdf5accFile'])
-  log.info("done!")
-  log.info("running cross identifier!")
-  crossIdentifier(args['binLen'],snpCHR, snpPOS, snpWEI, DPmean, GenotypeData, GenotypeData_acc, args['outFile'])
-  log.info("finished!")
+    inputs = parsers.ParseInputs(inFile = args['inFile'], logDebug = args['logDebug'])
+    log.info("loading genotype files!")
+    GenotypeData = genotype.load_hdf5_genotype_data(args['hdf5File'])
+    GenotypeData_acc = genotype.load_hdf5_genotype_data(args['hdf5accFile'])
+    log.info("done!")
+    log.info("running cross identifier!")
+    crossIdentifier(inputs, GenotypeData, GenotypeData_acc, args['binLen'], args['outFile'])
+    log.info("finished!")
 
 def getWindowGenotype(matchedP1, totalMarkers, lr_thres = 2.706):
     ### Choose lr_thres as 2.706 which is at 0.1 alpha level with 1 degree of freedom
     pval = 'NA'
     geno = 'NA'
     if totalMarkers > 0:
-        likelihood = snpmatch.calculate_likelihoods([matchedP1, totalMarkers - matchedP1], [totalMarkers, totalMarkers])
+        likelihood = snpmatch.GenotyperOutput.calculate_likelihoods([matchedP1, totalMarkers - matchedP1], [totalMarkers, totalMarkers])
         if np.max(likelihood[1]) > lr_thres:
             if likelihood[1][0] == 1:
                 geno = 0
@@ -247,40 +263,39 @@ def getWindowGenotype(matchedP1, totalMarkers, lr_thres = 2.706):
         else:
             geno = 0.5
             pval = np.max(likelihood[1])
-    return (geno, pval)
+    return(geno, pval)
 
 def crossGenotypeWindows(commonSNPsCHR, commonSNPsPOS, snpsP1, snpsP2, inFile, binLen, outFile, logDebug = True):
     ## inFile are the SNPs of the sample
-    (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = parsers.parseInput(inFile = inFile, logDebug = logDebug)
+    inputs = parsers.ParseInputs(inFile = inFile, logDebug = logDebug)
     # identifying the segregating SNPs between the accessions
     # only selecting 0 or 1
     segSNPsind = np.where((snpsP1 != snpsP2) & (snpsP1 >= 0) & (snpsP2 >= 0) & (snpsP1 < 2) & (snpsP2 < 2))[0]
     log.info("number of segregating snps between parents: %s", len(segSNPsind))
-    (ChrBins, PosBins) = getBinsSNPs(commonSNPsCHR, commonSNPsPOS, binLen)
-    log.info("number of bins: %s", len(ChrBins))
+    iter_bins_genome = get_bins_arrays(commonSNPsCHR, commonSNPsPOS, binLen)
+    iter_bins_snps = get_bins_arrays(inputs.chrs, inputs.pos, binLen)
+    bin_inds = 0
     outfile = open(outFile, 'w')
-    for i in range(len(PosBins)):
-      start = np.sum(PosBins[0:i])
-      end = start + PosBins[i]
-      # first snp positions which are segregating and are in this window
-      reqPOSind = segSNPsind[np.where((segSNPsind < end) & (segSNPsind >= start))[0]]
-      reqPOS = commonSNPsPOS[reqPOSind]
-      perchrTarPosind = np.where(snpCHR == ChrBins[i])[0]
-      perchrTarPos = snpPOS[perchrTarPosind]
-      matchedAccInd = reqPOSind[np.where(np.in1d(reqPOS, perchrTarPos))[0]]
-      matchedTarInd = perchrTarPosind[np.where(np.in1d(perchrTarPos, reqPOS))[0]]
-      matchedTarGTs = snpGT[matchedTarInd]
-      try:
-        TarGTBinary = parsers.parseGT(matchedTarGTs)
-        TarGTBinary[np.where(TarGTBinary == 2)[0]] = 4
-        genP1 = np.subtract(TarGTBinary, snpsP1[matchedAccInd])
-        genP1no = len(np.where(genP1 == 0)[0])
-        (geno, pval) = getWindowGenotype(genP1no, len(genP1))
-        outfile.write("%s\t%s\t%s\t%s\t%s\n" % (i+1, genP1no, len(genP1), geno, pval))
-      except:
-        outfile.write("%s\tNA\tNA\tNA\tNA\n" % (i+1))
-      if i % 40 == 0:
-        log.info("progress: %s windows", i+10)
+    for e_b in iter_bins_snps:
+        # first snp positions which are segregating and are in this window
+        reqPOSind = segSNPsind[ np.where( np.in1d(segSNPsind, e_b[2]) )[0] ]
+        reqPOS = commonSNPsPOS[reqPOSind]
+        perchrTarPos = inputs.pos[e_s[2]]
+        matchedAccInd = reqPOSind[ np.where( np.in1d(reqPOS, perchrTarPos) )[0] ]
+        matchedTarInd = np.array(e_s[2], dtype=int)[ np.where( np.in1d(perchrTarPos, reqPOS) )[0] ]
+        matchedTarGTs = inputs.gt[matchedTarInd]
+        try:
+            TarGTBinary = parsers.parseGT(matchedTarGTs)
+            TarGTBinary[np.where(TarGTBinary == 2)[0]] = 4
+            genP1 = np.subtract(TarGTBinary, snpsP1[matchedAccInd])
+            genP1no = len(np.where(genP1 == 0)[0])
+            (geno, pval) = getWindowGenotype(genP1no, len(genP1))
+            outfile.write("%s\t%s\t%s\t%s\t%s\n" % (bin_inds+1, genP1no, len(genP1), geno, pval))
+        except:
+            outfile.write("%s\tNA\tNA\tNA\tNA\n" % (bin_inds+1))
+        bin_inds += 1
+        if bin_inds % 40 == 0:
+            log.info("progress: %s windows", bin_inds)
     log.info("done!")
     outfile.close()
 
@@ -298,25 +313,25 @@ def crossGenotyper(args):
         log.info("input files: %s and %s" % (args['parents'], args['father']))
         if not os.path.isfile(args['parents']) and os.path.isfile(args['father']):
             die("either of the input files do not exists, please provide VCF/BED file for parent genotype information")
-        (p1snpCHR, p1snpPOS, p1snpGT, p1snpWEI, p1DPmean) = parsers.parseInput(inFile = args['parents'], logDebug = args['logDebug'])
-        (p2snpCHR, p2snpPOS, p2snpGT, p2snpWEI, p2DPmean) = parsers.parseInput(inFile = args['father'], logDebug = args['logDebug'])
-        commonCHRs_ids = np.union1d(p1snpCHR, p2snpCHR)
+        p1_snps = parsers.ParseInputs(inFile = args['parents'], logDebug = args['logDebug'])
+        p2_snps = parsers.ParseInputs(inFile = args['father'], logDebug = args['logDebug'])
+        commonCHRs_ids = np.union1d(p1_snps.chrs, p2_snps.chrs)
         commonSNPsCHR = np.zeros(0, dtype=commonCHRs_ids.dtype)
         commonSNPsPOS = np.zeros(0, dtype=int)
         snpsP1 = np.zeros(0, dtype='int8')
         snpsP2 = np.zeros(0, dtype='int8')
         for i in commonCHRs_ids:
-            perchrP1inds = np.where(p1snpCHR == i)[0]
-            perchrP2inds = np.where(p2snpCHR == i)[0]
-            perchrPositions = np.union1d(p1snpPOS[perchrP1inds], p2snpPOS[perchrP2inds])
+            perchrP1inds = np.where(p1_snps.chrs == i)[0]
+            perchrP2inds = np.where(p2_snps.chrs == i)[0]
+            perchrPositions = np.union1d(p1_snps.pos[perchrP1inds], p2_snps.pos[perchrP2inds])
             commonSNPsCHR = np.append(commonSNPsCHR, np.repeat(i, len(perchrPositions)))
             commonSNPsPOS = np.append(commonSNPsPOS, perchrPositions)
             perchrsnpsP1 = np.repeat(-1, len(perchrPositions)).astype('int8')
             perchrsnpsP2 = np.repeat(-1, len(perchrPositions)).astype('int8')
-            perchrsnpsP1_inds = np.where(np.in1d(p1snpPOS[perchrP1inds], perchrPositions))[0]
-            perchrsnpsP2_inds = np.where(np.in1d(p2snpPOS[perchrP2inds], perchrPositions))[0]
-            snpsP1 = np.append(snpsP1, parsers.parseGT(p1snpGT[perchrsnpsP1_inds]))
-            snpsP2 = np.append(snpsP2, parsers.parseGT(p2snpGT[perchrsnpsP2_inds]))
+            perchrsnpsP1_inds = np.where(np.in1d(p1_snps.pos[perchrP1inds], perchrPositions))[0]
+            perchrsnpsP2_inds = np.where(np.in1d(p2_snps.pos[perchrP2inds], perchrPositions))[0]
+            snpsP1 = np.append(snpsP1, parsers.parseGT(p1_snps.gt[perchrsnpsP1_inds]))
+            snpsP2 = np.append(snpsP2, parsers.parseGT(p2_snps.gt[perchrsnpsP2_inds]))
         log.info("done!")
     else:
         parents = args['parents']
diff --git a/snpmatch/core/parsers.py b/snpmatch/core/parsers.py
index 14c4560..53516b6 100644
--- a/snpmatch/core/parsers.py
+++ b/snpmatch/core/parsers.py
@@ -31,98 +31,116 @@ def parseGT(snpGT):
     snpBinary[np.where(snpGT == nocall)[0]] = -1
     return snpBinary
 
-def parseChrName(targetCHR):
-    snpCHROM = np.char.replace(np.core.defchararray.lower(np.array(targetCHR, dtype="string")), "chr", "")
-    snpsREQ = np.where(np.char.isdigit(snpCHROM))[0]   ## Filtering positions from mitochondrial and chloroplast
-    snpCHR = snpCHROM[snpsREQ]
-    return (snpCHR, snpsREQ)
+class ParseInputs(object):
+    ## class object for parsing input files for SNPmatch
 
-def readBED(inFile, logDebug):
-    log.info("reading the position file")
-    targetSNPs = pd.read_table(inFile, header=None, usecols=[0,1,2])
-    (snpCHR, snpsREQ) = parseChrName(targetSNPs[0])
-    snpPOS = np.array(targetSNPs[1], dtype=int)[snpsREQ]
-    snpGT = np.array(targetSNPs[2])[snpsREQ]
-    snpBinary = parseGT(snpGT)
-    snpWEI = np.ones((len(snpCHR), 3))  ## for homo and het
-    snpWEI[np.where(snpBinary != 0),0] = 0
-    snpWEI[np.where(snpBinary != 1),2] = 0
-    snpWEI[np.where(snpBinary != 2),1] = 0
-    return (snpCHR, snpPOS, snpGT, snpWEI)
+    def __init__(self, inFile, logDebug, outFile = "parser" ):
+        if outFile == "parser" or not outFile:
+            outFile = inFile + ".snpmatch"
+        if os.path.isfile(inFile + ".snpmatch.npz"):
+            log.info("snpmatch parser dump found! loading %s", inFile + ".snpmatch.npz")
+            snps = np.load(inFile + ".snpmatch.npz")
+            self.load_snp_info(snps['chr'], snps['pos'], snps['gt'], snps['wei'], snps['dp'])
+        else:
+            _,inType = os.path.splitext(inFile)
+            if inType == '.npz':
+                log.info("loading snpmatch parser file! %s", inFile)
+                snps = np.load(inFile)
+                self.load_snp_info(snps['chr'], snps['pos'], snps['gt'], snps['wei'], snps['dp'])
+            else:
+                log.info('running snpmatch parser!')
+                if inType == '.vcf':
+                    (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = self.read_vcf(inFile, logDebug)
 
-def readVcf(inFile, logDebug):
-    log.info("reading the VCF file")
-    ## We read only one sample from the VCF file
-    if logDebug:
-        vcf = allel.read_vcf(inFile, samples = [0], fields = '*')
-    else:
-        import StringIO
-        import sys
-        sys.stderr = StringIO.StringIO()
-        vcf = allel.read_vcf(inFile, samples = [0], fields = '*')
-        #vcf = vcfnp.variants(inFile, cache=False).view(np.recarray)
-        #vcfD = vcfnp.calldata_2d(inFile, cache=False).view(np.recarray)
-        sys.stderr = sys.__stderr__
-    (snpCHR, snpsREQ) = parseChrName(vcf['variants/CHROM'])
-    try:
-        snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[snpsREQ, 0]
-    except AttributeError:
-        snpmatch.die("input VCF file doesnt have required GT field")
-    snpsREQ = snpsREQ[np.where(snpGT != './.')[0]]
-    snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[snpsREQ, 0]
-    if 'calldata/PL' in sorted(vcf.keys()):
-        snpWEI = np.copy(vcf['calldata/PL'][snpsREQ, 0]).astype('float')
-        snpWEI = snpWEI/(-10)
-        snpWEI = np.exp(snpWEI)
-    else:
+                elif inType == '.bed':
+                    (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = self.read_bed(inFile, logDebug)
+                else:
+                    snpmatch.die("input file type %s not supported" % inType)
+                self.load_snp_info(snpCHR, snpPOS, snpGT, snpWEI, DPmean)
+                self.save_snp_info(outFile)
+                self.case_interpret_inputs(outFile + ".stats.json")
+        log.info("done!")
+
+    def load_snp_info(self, snpCHR, snpPOS, snpGT, snpWEI, DPmean):
+        self.chrs = snpCHR
+        self.pos = snpPOS
+        self.gt = snpGT
+        self.wei = snpWEI
+        self.dp = DPmean
+
+    def save_snp_info(self, outFile):
+        log.info("creating snpmatch parser file: %s", outFile + '.npz')
+        np.savez(outFile, chr = self.chrs, pos = self.pos, gt = self.gt, wei = self.wei, dp = self.dp)
+
+    def case_interpret_inputs(self, outFile):
+        NumSNPs = len(self.chrs)
+        case = 0
+        note = "Sufficient number of SNPs"
+        if NumSNPs < snpmatch.snp_thres:
+            note = "Attention: low number of SNPs provided"
+            case = 1
+        snpst = np.unique(self.chrs, return_counts=True)
+        snpdict = dict(('%s' % snpst[0][i], snpst[1][i]) for i in range(len(snpst[0])))
+        statdict = {"interpretation": {"case": case, "text": note}, "snps": snpdict, "num_of_snps": NumSNPs, "depth": self.dp}
+        statdict['percent_heterozygosity'] = snpmatch.getHeterozygosity(self.gt)
+        with open(outFile , "w") as out_stats:
+            out_stats.write(json.dumps(statdict))
+
+    @staticmethod
+    def read_bed(inFile, logDebug):
+        log.info("reading the position file")
+        targetSNPs = pd.read_table(inFile, header=None, usecols=[0,1,2])
+        snpCHR = np.array(targetSNPs[0], dtype="string")
+        snpPOS = np.array(targetSNPs[1], dtype=int)
+        snpGT = np.array(targetSNPs[2])
         snpBinary = parseGT(snpGT)
-        snpWEI = np.ones((len(snpsREQ), 3))  ## for homo and het
+        snpWEI = np.ones((len(snpCHR), 3))  ## for homo and het
         snpWEI[np.where(snpBinary != 0),0] = 0
         snpWEI[np.where(snpBinary != 1),2] = 0
         snpWEI[np.where(snpBinary != 2),1] = 0
-    snpCHR = snpCHR[snpsREQ]
-    if 'calldata/DP' in sorted(vcf.keys()):
-        DPmean = np.mean(vcf['calldata/DP'][snpsREQ,0])
-    else:
-        DPmean = "NA"
-    snpPOS = np.array(vcf['variants/POS'][snpsREQ])
-    return (DPmean, snpCHR, snpPOS, snpGT, snpWEI)
+        return((snpCHR, snpPOS, snpGT, snpWEI, "NA"))
 
-def parseInput(inFile, logDebug, outFile = "parser"):
-    if outFile == "parser" or not outFile:
-        outFile = inFile + ".snpmatch"
-    if os.path.isfile(inFile + ".snpmatch.npz"):
-        log.info("snpmatch parser dump found! loading %s", inFile + ".snpmatch.npz")
-        snps = np.load(inFile + ".snpmatch.npz")
-        (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = (snps['chr'], snps['pos'], snps['gt'], snps['wei'], snps['dp'])
-    else:
-        _,inType = os.path.splitext(inFile)
-        if inType == '.npz':
-            log.info("loading snpmatch parser file! %s", inFile)
-            snps = np.load(inFile)
-            (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = (snps['chr'], snps['pos'], snps['gt'], snps['wei'], snps['dp'])
+    @staticmethod
+    def read_vcf(inFile, logDebug):
+        if logDebug:
+            vcf = allel.read_vcf(inFile, samples = [0], fields = '*')
         else:
-            log.info('running snpmatch parser!')
-            if inType == '.vcf':
-                (DPmean, snpCHR, snpPOS, snpGT, snpWEI) = readVcf(inFile, logDebug)
-            elif inType == '.bed':
-                (snpCHR, snpPOS, snpGT, snpWEI) = readBED(inFile, logDebug)
-                DPmean = "NA"
-            else:
-                snpmatch.die("input file type %s not supported" % inType)
-            log.info("creating snpmatch parser file: %s", outFile + '.npz')
-            np.savez(outFile, chr = snpCHR, pos = snpPOS, gt = snpGT, wei = snpWEI, dp = DPmean)
-            NumSNPs = len(snpCHR)
-            case = 0
-            note = "Sufficient number of SNPs"
-            if NumSNPs < snpmatch.snp_thres:
-                note = "Attention: low number of SNPs provided"
-                case = 1
-            snpst = np.unique(snpCHR, return_counts=True)
-            snpdict = dict(('Chr%s' % snpst[0][i], snpst[1][i]) for i in range(len(snpst[0])))
-            statdict = {"interpretation": {"case": case, "text": note}, "snps": snpdict, "num_of_snps": NumSNPs, "depth": DPmean}
-            statdict['percent_heterozygosity'] = snpmatch.getHeterozygosity(snpGT)
-            with open(outFile + ".stats.json", "w") as out_stats:
-                out_stats.write(json.dumps(statdict))
-    log.info("done!")
-    return (snpCHR, snpPOS, snpGT, snpWEI, DPmean)
+            import StringIO
+            import sys
+            sys.stderr = StringIO.StringIO()
+            vcf = allel.read_vcf(inFile, samples = [0], fields = '*')
+            sys.stderr = sys.__stderr__
+        try:
+            snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[:, 0]
+        except AttributeError:
+            snpmatch.die("input VCF file doesnt have required GT field")
+        snpsREQ = np.where((snpGT != './.') & (snpGT != '.|.'))[0]
+        snpGT = snpGT[snpsREQ]
+        if 'calldata/PL' in sorted(vcf.keys()):
+            snpWEI = np.copy(vcf['calldata/PL'][snpsREQ, 0]).astype('float')
+            snpWEI = snpWEI/(-10)
+            snpWEI = np.exp(snpWEI)
+        else:
+            snpBinary = parseGT(snpGT)
+            snpWEI = np.ones((len(snpsREQ), 3))  ## for homo and het
+            snpWEI[np.where(snpBinary != 0),0] = 0
+            snpWEI[np.where(snpBinary != 1),2] = 0
+            snpWEI[np.where(snpBinary != 2),1] = 0
+        snpCHR = np.array(vcf['variants/CHROM'][snpsREQ], dtype="string")
+        if 'calldata/DP' in sorted(vcf.keys()):
+            DPmean = np.mean(vcf['calldata/DP'][snpsREQ,0])
+        else:
+            DPmean = "NA"
+        snpPOS = np.array(vcf['variants/POS'][snpsREQ])
+        return((snpCHR, snpPOS, snpGT, snpWEI, DPmean))
+
+    def filter_chr_names(self, g):
+        ## provide genotypedata (pygwas genotype object)
+        self.chr_list = np.array(pd.Series(g.chrs).str.replace("chr", "", case=False), dtype="string")
+        self.chrs_nochr = np.array(pd.Series(self.chrs).str.replace("chr", "", case=False), dtype="string")
+        self.filter_inds_ix = np.where( np.in1d( self.chrs_nochr, self.chr_list ) )[0]
+
+
+def potatoParser(inFile, logDebug, outFile = "parser"):
+    inputs = ParseInputs(inFile, logDebug, outFile)
+    return(inputs.chrs, inputs.pos, inputs.gt, inputs.wei, inputs.dp)
diff --git a/snpmatch/core/snpmatch.py b/snpmatch/core/snpmatch.py
index 23f701d..beeff21 100644
--- a/snpmatch/core/snpmatch.py
+++ b/snpmatch/core/snpmatch.py
@@ -33,62 +33,79 @@ def likeliTest(n, y):
   else:
     return np.nan
 
-def calculate_likelihoods(ScoreList, NumInfoSites):
-  num_lines = len(ScoreList)
-  LikeLiHoods = [likeliTest(NumInfoSites[i], int(ScoreList[i])) for i in range(num_lines)]
-  LikeLiHoods = np.array(LikeLiHoods).astype("float")
-  TopHit = np.amin(LikeLiHoods)
-  LikeLiHoodRatios = [LikeLiHoods[i]/TopHit for i in range(num_lines)]
-  LikeLiHoodRatios = np.array(LikeLiHoodRatios).astype("float")
-  return (LikeLiHoods, LikeLiHoodRatios)
-
-def print_out_table(outFile, AccList, ScoreList, NumInfoSites, NumMatSNPs, DPmean):
-  (LikeLiHoods, LikeLiHoodRatios) = calculate_likelihoods(ScoreList, NumInfoSites)
-  if outFile:
-    out = open(outFile, 'w')
-    for i in range(len(AccList)):
-      score = float(ScoreList[i])/NumInfoSites[i]
-      out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (AccList[i], int(ScoreList[i]), NumInfoSites[i], score, LikeLiHoods[i], LikeLiHoodRatios[i], NumMatSNPs, DPmean))
-    out.close()
-  else:
-    for i in range(len(AccList)):
-      score = float(ScoreList[i])/NumInfoSites[i]
-      sys.stdout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (AccList[i], int(ScoreList[i]), NumInfoSites[i], score, LikeLiHoods[i], LikeLiHoodRatios[i], NumMatSNPs, DPmean))
-
-def CaseInterpreter(overlap, NumSNPs, topHits, probScore):
-  overlap_thres = 0.5
-  num_lines = len(probScore)
-  case = 10
-  if len(topHits) == 1:
-    case = 0
-    note = "Unique hit"
-  elif np.nanmean(probScore[topHits]) > prob_thres:
-    case = 2
-    note = "Ambiguous sample: Accessions in top hits can be really close"
-  elif overlap > overlap_thres:
-    case = 3
-    note = "Ambiguous sample: Sample might contain mixture of DNA or contamination"
-  elif overlap < overlap_thres:
-    case = 4
-    note = "Ambiguous sample: Overlap of SNPs is very low, sample may not be in database"
-  if case > 4:
-    case = 1
-    note = "Ambiguous sample"
-  return (case, note)
-
-def print_topHits(outFile, AccList, ScoreList, NumInfoSites, overlap, NumMatSNPs):
-    num_lines = len(ScoreList)
-    (LikeLiHoods, LikeLiHoodRatios) = calculate_likelihoods(ScoreList, NumInfoSites)
-    topHits = np.where(LikeLiHoodRatios < lr_thres)[0]
-    probScore = [float(ScoreList[i])/NumInfoSites[i] for i in range(num_lines)]
-    overlapScore = [float(NumInfoSites[i])/NumMatSNPs for i in range(num_lines)]
-    probScore = np.array(probScore, dtype = float)
-    sorted_order = topHits[np.argsort(-probScore[topHits])]
-    (case, note) = CaseInterpreter(overlap, NumMatSNPs, topHits, probScore)
-    matches_dict = [(str(AccList[i]), probScore[i], int(NumInfoSites[i]), int(overlapScore[i])) for i in sorted_order]
-    topHitsDict = {'overlap': [overlap, NumMatSNPs], 'matches': matches_dict, 'interpretation':{'case': case, 'text': note}}
-    with open(outFile, "w") as out_stats:
-        out_stats.write(json.dumps(topHitsDict))
+class GenotyperOutput(object):
+    ## class object for main SNPmatch output
+
+    def __init__(self, AccList, ScoreList, NumInfoSites, overlap, NumMatSNPs, DPmean ):
+        self.accs = np.array(AccList, dtype="string")
+        self.scores = np.array(ScoreList, dtype="int")
+        self.ninfo = np.array(NumInfoSites, dtype="int")
+        self.overlap = overlap
+        self.num_snps = NumMatSNPs
+        self.dp = DPmean
+
+    def get_probabilities(self):
+        probs = [float(self.scores[i])/self.ninfo[i] for i in range(len(self.accs))]
+        self.probabilies = np.array(probs, dtype="float")
+
+    @staticmethod
+    def calculate_likelihoods(scores, ninfo, amin = "calc"):
+        num_lines = len(scores)
+        LikeLiHoods = [likeliTest(ninfo[i], int(scores[i])) for i in range(num_lines)]
+        LikeLiHoods = np.array(LikeLiHoods, dtype = "float")
+        if amin == "calc":
+            TopHit = np.amin(LikeLiHoods)
+        else:
+            TopHit = float(amin)
+        LikeLiHoodRatios = [LikeLiHoods[i]/TopHit for i in range(num_lines)]
+        LikeLiHoodRatios = np.array(LikeLiHoodRatios, dtype="float")
+        return((LikeLiHoods, LikeLiHoodRatios))
+
+    def get_likelihoods(self, amin = "calc"):
+        (self.likelis, self.lrts) = self.calculate_likelihoods(self.scores, self.ninfo, amin)
+
+    def print_out_table(self, outFile):
+        self.get_likelihoods()
+        self.get_probabilities()
+        if outFile:
+            out = open(outFile, 'w')
+            for i in range(len(self.accs)):
+                out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.accs[i], self.scores[i], self.ninfo[i], self.probabilies[i], self.likelis[i], self.lrts[i], self.num_snps, self.dp))
+            out.close()
+        else:
+            for i in range(len(self.accs)):
+                sys.stdout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.accs[i], self.scores[i], self.ninfo[i], self.probabilies[i], self.likelis[i], self.lrts[i], self.num_snps, self.dp))
+
+    def print_json_output(self, outFile):
+        self.get_likelihoods()
+        self.get_probabilities()
+        topHits = np.where(self.lrts < lr_thres)[0]
+        overlapScore = [float(self.ninfo[i])/self.num_snps for i in range(len(self.accs))]
+        sorted_order = topHits[np.argsort(-self.probabilies[topHits])]
+        (case, note) = self.case_interpreter(topHits)
+        matches_dict = [(self.accs[i], self.probabilies[i], self.ninfo[i], overlapScore[i]) for i in sorted_order]
+        topHitsDict = {'overlap': [self.overlap, self.num_snps], 'matches': matches_dict, 'interpretation':{'case': case, 'text': note}}
+        with open(outFile, "w") as out_stats:
+            out_stats.write(json.dumps(topHitsDict))
+
+    def case_interpreter(self, topHits):
+        overlap_thres = 0.5
+        case = 1
+        note = "Ambiguous sample"
+        if len(topHits) == 1:
+            case = 0
+            note = "Unique hit"
+        elif np.nanmean(self.probabilies[topHits]) > prob_thres:
+            case = 2
+            note = "Ambiguous sample: Accessions in top hits can be really close"
+        elif self.overlap > overlap_thres:
+            case = 3
+            note = "Ambiguous sample: Sample might contain mixture of DNA or contamination"
+        elif self.overlap < overlap_thres:
+            case = 4
+            note = "Ambiguous sample: Overlap of SNPs is very low, sample may not be in database"
+        return(case, note)
+
 
 def getHeterozygosity(snpGT, outFile='default'):
     snpBinary = parsers.parseGT(snpGT)
@@ -101,79 +118,80 @@ def getHeterozygosity(snpGT, outFile='default'):
           out_stats.write(json.dumps(topHitsDict))
     return float(numHets)/len(snpGT)
 
-def genotyper(snpCHR, snpPOS, snpGT, snpWEI, DPmean, hdf5File, hdf5accFile, outFile):
-  NumSNPs = len(snpCHR)
-  log.info("loading database files")
-  GenotypeData = genotype.load_hdf5_genotype_data(hdf5File)
-  GenotypeData_acc = genotype.load_hdf5_genotype_data(hdf5accFile)
-  log.info("done!")
-  num_lines = len(GenotypeData.accessions)
-  ScoreList = np.zeros(num_lines, dtype="float")
-  NumInfoSites = np.zeros(len(GenotypeData.accessions), dtype="uint32")
-  NumMatSNPs = 0
-  overlappedInds = np.zeros(0, dtype=int)
-  chunk_size = 1000
-  for ind,echr in enumerate(parsers.parseChrName(GenotypeData.chrs)[0]):
-    perchrTarPos = np.where(snpCHR == echr)[0]
-    perchrtarSNPpos = snpPOS[perchrTarPos]
-    log.info("Analysing positions in chromosome %s", echr)
-    start = GenotypeData.chr_regions[ind][0]
-    end = GenotypeData.chr_regions[ind][1]
-    chrpositions = GenotypeData.positions[start:end]
-    matchedAccInd = np.where(np.in1d(chrpositions, perchrtarSNPpos))[0] + start
-    matchedTarInd = np.where(np.in1d(perchrtarSNPpos, chrpositions))[0]
-    matchedTarWei = snpWEI[perchrTarPos[matchedTarInd],]
-    TarGTs0 = np.zeros(len(matchedTarInd), dtype="int8")
-    TarGTs1 = np.ones(len(matchedTarInd), dtype="int8") + 1
-    TarGTs2 = np.ones(len(matchedTarInd), dtype="int8")
-    overlappedInds = np.append(overlappedInds, perchrTarPos[matchedTarInd])
-    NumMatSNPs = NumMatSNPs + len(matchedAccInd)
-    for j in range(0, len(matchedAccInd), chunk_size):
-      t1001SNPs = GenotypeData.snps[matchedAccInd[j:j+chunk_size],:]
-      samSNPs0 = np.reshape(np.repeat(TarGTs0[j:j+chunk_size], num_lines), (len(TarGTs0[j:j+chunk_size]),num_lines))
-      samSNPs1 = np.reshape(np.repeat(TarGTs1[j:j+chunk_size], num_lines), (len(TarGTs1[j:j+chunk_size]),num_lines))
-      samSNPs2 = np.reshape(np.repeat(TarGTs2[j:j+chunk_size], num_lines), (len(TarGTs2[j:j+chunk_size]),num_lines))
-      tempScore0 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs0, dtype=int).T, matchedTarWei[j:j+chunk_size,0]).T, axis=0)
-      tempScore1 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs1, dtype=int).T, matchedTarWei[j:j+chunk_size,1]).T, axis=0)
-      tempScore2 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs2, dtype=int).T, matchedTarWei[j:j+chunk_size,2]).T, axis=0)
-      ScoreList = ScoreList + tempScore0 + tempScore1 + tempScore2
-      if(len(TarGTs0[j:j+chunk_size]) >= 1):
-        NumInfoSites = NumInfoSites + len(TarGTs0[j:j+chunk_size]) - np.sum(numpy.ma.masked_less(t1001SNPs, 0).mask.astype(int), axis = 0) # Number of informative sites
-    log.info("Done analysing %s positions", NumMatSNPs)
-  log.info("writing score file!")
-  overlap = float(NumMatSNPs)/NumSNPs
-  print_out_table(outFile + '.scores.txt',GenotypeData.accessions, ScoreList, NumInfoSites, NumMatSNPs, DPmean)
-  if not outFile:
-    outFile = "genotyper"
-  print_topHits(outFile + ".matches.json", GenotypeData.accessions, ScoreList, NumInfoSites, overlap, NumMatSNPs)
-  getHeterozygosity(snpGT[overlappedInds], outFile + ".matches.json")
-  return (ScoreList, NumInfoSites)
+def genotyper(inputs, hdf5File, hdf5accFile, outFile):
+    log.info("loading database files")
+    GenotypeData = genotype.load_hdf5_genotype_data(hdf5File)
+    GenotypeData_acc = genotype.load_hdf5_genotype_data(hdf5accFile)
+    log.info("done!")
+    inputs.filter_chr_names(GenotypeData)
+    num_lines = len(GenotypeData.accessions)
+    ScoreList = np.zeros(num_lines, dtype="float")
+    NumInfoSites = np.zeros(len(GenotypeData.accessions), dtype="uint32")
+    NumMatSNPs = 0
+    overlappedInds = np.zeros(0, dtype=int)
+    chunk_size = 1000
+    for ind,echr in enumerate(inputs.chr_list):
+        perchrTarPos = np.where(inputs.chrs_nochr == echr)[0]
+        perchrtarSNPpos = inputs.pos[perchrTarPos]
+        log.info("Analysing positions in chromosome %s", echr)
+        start = GenotypeData.chr_regions[ind][0]
+        end = GenotypeData.chr_regions[ind][1]
+        chrpositions = GenotypeData.positions[start:end]
+        matchedAccInd = np.where(np.in1d(chrpositions, perchrtarSNPpos))[0] + start
+        matchedTarInd = np.where(np.in1d(perchrtarSNPpos, chrpositions))[0]
+        matchedTarWei = inputs.wei[perchrTarPos[matchedTarInd],]
+        TarGTs0 = np.zeros(len(matchedTarInd), dtype="int8")
+        TarGTs1 = np.ones(len(matchedTarInd), dtype="int8") + 1
+        TarGTs2 = np.ones(len(matchedTarInd), dtype="int8")
+        overlappedInds = np.append(overlappedInds, perchrTarPos[matchedTarInd])
+        NumMatSNPs = NumMatSNPs + len(matchedAccInd)
+        for j in range(0, len(matchedAccInd), chunk_size):
+            t1001SNPs = GenotypeData.snps[matchedAccInd[j:j+chunk_size],:]
+            samSNPs0 = np.reshape(np.repeat(TarGTs0[j:j+chunk_size], num_lines), (len(TarGTs0[j:j+chunk_size]),num_lines))
+            samSNPs1 = np.reshape(np.repeat(TarGTs1[j:j+chunk_size], num_lines), (len(TarGTs1[j:j+chunk_size]),num_lines))
+            samSNPs2 = np.reshape(np.repeat(TarGTs2[j:j+chunk_size], num_lines), (len(TarGTs2[j:j+chunk_size]),num_lines))
+            tempScore0 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs0, dtype=int).T, matchedTarWei[j:j+chunk_size,0]).T, axis=0)
+            tempScore1 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs1, dtype=int).T, matchedTarWei[j:j+chunk_size,1]).T, axis=0)
+            tempScore2 = np.sum(np.multiply(np.array(t1001SNPs == samSNPs2, dtype=int).T, matchedTarWei[j:j+chunk_size,2]).T, axis=0)
+            ScoreList = ScoreList + tempScore0 + tempScore1 + tempScore2
+            if(len(TarGTs0[j:j+chunk_size]) >= 1):
+                NumInfoSites = NumInfoSites + len(TarGTs0[j:j+chunk_size]) - np.sum(numpy.ma.masked_less(t1001SNPs, 0).mask.astype(int), axis = 0) # Number of informative sites
+        log.info("Done analysing %s positions", NumMatSNPs)
+    log.info("writing score file!")
+    overlap = float(NumMatSNPs)/len(inputs.filter_inds_ix)
+    result = GenotyperOutput(GenotypeData.accessions, ScoreList, NumInfoSites, overlap, NumMatSNPs, inputs.dp)
+    result.print_out_table( outFile + '.scores.txt' )
+    if not outFile:
+        outFile = "genotyper"
+    result.print_json_output( outFile + ".matches.json" )
+    getHeterozygosity(inputs.gt[overlappedInds], outFile + ".matches.json")
+    return(result)
 
 def potatoGenotyper(args):
-  (snpCHR, snpPOS, snpGT, snpWEI, DPmean) = parsers.parseInput(inFile = args['inFile'], logDebug = args['logDebug'])
-  log.info("running genotyper!")
-  (ScoreList, NumInfoSites) = genotyper(snpCHR, snpPOS, snpGT, snpWEI, DPmean, args['hdf5File'], args['hdf5accFile'], args['outFile'])
-  log.info("finished!")
+    inputs = parsers.ParseInputs(inFile = args['inFile'], logDebug = args['logDebug'])
+    log.info("running genotyper!")
+    result = genotyper(inputs, args['hdf5File'], args['hdf5accFile'], args['outFile'])
+    log.info("finished!")
 
 def pairwiseScore(inFile_1, inFile_2, logDebug, outFile):
-    (snpCHR1, snpPOS1, snpGT1, snpWEI1, DPmean1) = parsers.parseInput(inFile = inFile_1, logDebug = logDebug)
-    (snpCHR2, snpPOS2, snpGT2, snpWEI2, DPmean2) = parsers.parseInput(inFile = inFile_2, logDebug = logDebug)
+    inputs_1 = parsers.ParseInputs(inFile = inFile_1, logDebug = logDebug)
+    inputs_2 = parsers.ParseInputs(inFile = inFile_2, logDebug = logDebug)
     snpmatch_stats = {}
     unique_1, unique_2, common, scores = 0, 0, 0, 0
-    chrs = np.union1d(snpCHR1, snpCHR2)
-    for i in chrs:
-        perchrTarPosInd1 = np.where(snpCHR1 == i)[0]
-        perchrTarPosInd2 = np.where(snpCHR2 == i)[0]
+    common_chrs = np.intersect1d(np.unique(inputs_1.chrs), np.unique(inputs_2.chrs))
+    for i in common_chrs:
+        perchrTarPosInd1 = np.where(inputs_1.chrs == i)[0]
+        perchrTarPosInd2 = np.where(inputs_2.chrs == i)[0]
         log.info("Analysing chromosome %s positions", i)
-        perchrtarSNPpos1 = snpPOS1[perchrTarPosInd1]
-        perchrtarSNPpos2 = snpPOS2[perchrTarPosInd2]
+        perchrtarSNPpos1 = inputs_1.pos[perchrTarPosInd1]
+        perchrtarSNPpos2 = inputs_2.pos[perchrTarPosInd2]
         matchedAccInd1 = np.where(np.in1d(perchrtarSNPpos1, perchrtarSNPpos2))[0]
         matchedAccInd2 = np.where(np.in1d(perchrtarSNPpos2, perchrtarSNPpos1))[0]
         unique_1 = unique_1 + len(perchrTarPosInd1) - len(matchedAccInd1)
         unique_2 = unique_2 + len(perchrTarPosInd2) - len(matchedAccInd2)
         common = common + len(matchedAccInd1)
-        scores = scores + np.sum(np.array(snpGT1[matchedAccInd1] == snpGT2[matchedAccInd2], dtype = int))
-    snpmatch_stats['unique'] = {"%s" % os.path.basename(inFile_1): [float(unique_1)/len(snpCHR1), len(snpCHR1)], "%s" % os.path.basename(inFile_2): [float(unique_2)/len(snpCHR2), len(snpCHR2)]}
+        scores = scores + np.sum(np.array(inputs_1.gt[matchedAccInd1] == inputs_2.gt[matchedAccInd2], dtype = int))
+    snpmatch_stats['unique'] = {"%s" % os.path.basename(inFile_1): [float(unique_1)/len(inputs_1.chrs), len(inputs_1.chrs)], "%s" % os.path.basename(inFile_2): [float(unique_2)/len(inputs_2.chrs), len(inputs_2.chrs)]}
     snpmatch_stats['matches'] = [float(scores)/common, common]
     if not outFile:
         outFile = "genotyper"