diff --git a/.idea/BioPython-Convert.iml b/.idea/BioPython-Convert.iml
index 83413fb..ad8c5d0 100644
--- a/.idea/BioPython-Convert.iml
+++ b/.idea/BioPython-Convert.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 2c9b542..a9c5106 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/biopython_convert/JMESPathGen.py b/biopython_convert/JMESPathGen.py
index 6f05c4c..2e4284a 100644
--- a/biopython_convert/JMESPathGen.py
+++ b/biopython_convert/JMESPathGen.py
@@ -29,6 +29,12 @@
# and https://github.com/jmespath/jmespath.py/issues/159
+class Options(jmespath.Options):
+ def __init__(self, dict_cls=None, custom_functions=None, custom_slice_types=None):
+ super().__init__(dict_cls, custom_functions)
+ self.custom_slice_types = custom_slice_types
+
+
def compile(expression):
return Parser().parse(expression)
@@ -178,6 +184,8 @@ def visit_index(self, node, value, **kwargs):
return super().visit_index(node, value)
def visit_slice(self, node, value, **kwargs):
+ if self._options.custom_slice_types is not None and isinstance(value, self._options.custom_slice_types):
+ return value[slice(*node['children'])]
return itertools.islice(value, *node['children'])
def visit_multi_select_list(self, node, value, **kwargs):
diff --git a/biopython_convert/__init__.py b/biopython_convert/__init__.py
index eec2874..2a12886 100644
--- a/biopython_convert/__init__.py
+++ b/biopython_convert/__init__.py
@@ -27,6 +27,8 @@
stat_annotations = ['molecule_type', 'topology', 'data_file_division', 'date', 'accessions', 'sequence_version', 'gi',
'keywords', 'source', 'organism']
+JMESPathGenOptions = JMESPathGen.Options(custom_functions=JMESPathGen.ExtendedFunctions(), custom_slice_types=(SeqIO.SeqRecord,))
+
usage = """\
Use: biopython.convert [-s] [-v] [-i] [-q JMESPath] input_file input_type output_file output_type
\t-s Split records into seperate files
@@ -192,7 +194,7 @@ def gentype(x):
# Wrap input in JMESPath selector if provided
if jpath:
- input_records = JMESPathGen.search(jpath, gentype(input_records))
+ input_records = JMESPathGen.search(jpath, gentype(input_records), JMESPathGenOptions)
# Apply xform to both entire return value
input_records = xform(input_records)
diff --git a/test-data/outputs/jpath_slice b/test-data/outputs/jpath_slice
new file mode 100644
index 0000000..7de779a
--- /dev/null
+++ b/test-data/outputs/jpath_slice
@@ -0,0 +1,88 @@
+LOCUS NC_008563 2800 bp DNA UNK 01-JAN-1980
+DEFINITION Escherichia coli APEC O1, complete genome.
+ACCESSION NC_008563
+VERSION NC_008563.1
+KEYWORDS .
+SOURCE .
+ ORGANISM .
+ .
+FEATURES Location/Qualifiers
+ gene 117..2579
+ /locus_tag="APECO1_RS00010"
+ /old_locus_tag="APECO1_1976"
+ CDS 117..2579
+ /locus_tag="APECO1_RS00010"
+ /old_locus_tag="APECO1_1976"
+ /inference="COORDINATES: similar to AA
+ sequence:RefSeq:WP_005124053.1"
+ /note="Derived by automated computational analysis using
+ gene prediction method: Protein Homology."
+ /codon_start=1
+ /transl_table=11
+ /product="bifunctional aspartokinase I/homoserine
+ dehydrogenase I"
+ /protein_id="WP_001264707.1"
+ /translation="MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITN
+ HLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHV
+ LHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLES
+ TVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADC
+ CEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCL
+ IKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMS
+ RARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAII
+ SVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQM
+ LFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLN
+ LENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVT
+ PNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGI
+ LSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGREL
+ ELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDG
+ VCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLR
+ TLSWKLGV"
+ORIGIN
+ 1 accatcacca ttaccacagg taacggtgcg ggctgacgcg tacaggaaac acagaaaaaa
+ 61 gcccgcacct gacagtgcgg gctttttttt cgaccaaagg taacgaggta acaaccatgc
+ 121 gagtgttgaa gttcggcggt acatcagtgg caaatgcaga acgttttctg cgggttgccg
+ 181 atattctgga aagcaatgcc aggcaggggc aggtggcgac cgtcctctct gcccccgcca
+ 241 aaattaccaa ccatctggta gcgatgattg aaaaaaccat tagcggccaa gatgctttac
+ 301 ccaatatcag cgatgccgaa cgtatttttg ccgaacttct gacgggactc gccgccgccc
+ 361 agccgggatt tccgctggca caattgaaaa ctttcgtcga ccaggaattt gcccaaataa
+ 421 aacatgtcct gcatggcatt agtttgttgg ggcagtgccc ggatagcatc aacgctgcgc
+ 481 tgatttgccg tggcgagaaa atgtcgatcg ccattatggc cggcgtgtta gaagcgcgtg
+ 541 gtcacaacgt taccgttatc gatccggtcg aaaaactgct tgcagtgggg cattacctcg
+ 601 aatctaccgt tgatattgct gagtccaccc gccgtattgc ggcaagccgc attccggctg
+ 661 accacatggt gctgatggct ggtttcactg ccggtaatga aaaaggcgag ctggtggttc
+ 721 tgggacgcaa cggttccgac tactccgctg cggtgctggc ggcctgttta cgcgccgatt
+ 781 gttgcgagat ctggacggat gttgacggtg tttatacctg cgatccgcgt caggtgcccg
+ 841 atgcgaggtt gttgaagtcg atgtcctatc aggaagcgat ggagctttct tacttcggcg
+ 901 ctaaagttct tcacccccgc accatcaccc ccatcgccca gtttcagatc ccttgcctga
+ 961 ttaaaaatac cggaaatcct caagctccag gtacgctcat tggtgccagc cgtgatgaag
+ 1021 acgaattacc ggtcaagggc atttccaatc tgaataacat ggcaatgttc agcgtttccg
+ 1081 gcccggggat gaaagggatg gttggcatgg cggcgcgcgt ctttgcagcg atgtcacgcg
+ 1141 cccgtatttc cgtggtgctg attacgcaat catcttccga atacagtatc agtttctgcg
+ 1201 ttccgcaaag cgactgtgtg cgagctgaac gggcaatgca ggaagagttc tacctggaac
+ 1261 tgaaagaagg cttactggag ccgttggcgg tgacggaacg gctggccatt atctcggtgg
+ 1321 taggtgatgg tatgcgcacc ttacgtggga tctcggcgaa attctttgcc gcgctggccc
+ 1381 gcgccaatat caacattgtc gccattgctc agggatcttc tgaacgctca atctctgtcg
+ 1441 tggtcaataa cgatgatgcg accactggcg tgcgcgttac tcatcagatg ctgttcaata
+ 1501 ccgatcaggt tatcgaagtg tttgtgattg gcgtcggtgg cgttggcggt gcgctgctgg
+ 1561 agcaactgaa gcgtcagcaa agctggttga agaataaaca tatcgactta cgtgtctgcg
+ 1621 gtgttgctaa ctcgaaggca ctgctcacca atgtacatgg ccttaatctg gaaaactggc
+ 1681 aggaagaact ggcgcaagcc aaagagccgt ttaatctcgg gcgcttaatt cgcctcgtga
+ 1741 aagaatatca tctgctgaac ccggtcattg ttgactgtac ttccagccag gcagtggcgg
+ 1801 atcaatatgc cgacttcctg cgcgaaggtt tccacgttgt tacgccgaac aaaaaggcca
+ 1861 acacctcgtc gatggattac taccatcagt tgcgttatgc ggcggaaaaa tcgcggcgta
+ 1921 aattcctcta tgacaccaac gttggggctg gattaccggt tatcgagaac ctgcaaaatc
+ 1981 tgctcaatgc tggtgatgaa ttgatgaagt tctccggcat tctttcaggt tcgctttctt
+ 2041 atatcttcgg caagttagac gaaggcatga gtttctccga ggcgaccaca ctggcgcggg
+ 2101 aaatgggtta taccgaaccg gacccgcgag atgatctttc tggtatggat gtggcgcgta
+ 2161 agctattgat tctcgctcgt gaaacgggac gtgaactgga gctggcggat attgaaattg
+ 2221 aacctgtgct gcccgcagag tttaacgccg agggtgatgt cgccgctttt atggcgaatc
+ 2281 tgtcacagct cgacgatctc tttgccgcgc gtgtggcgaa ggcccgtgat gaaggaaaag
+ 2341 ttttgcgcta tgttggcaat attgatgaag atggcgtctg ccgcgtgaag attgccgaag
+ 2401 tggatggtaa tgatccgctg ttcaaagtga aaaatggcga aaacgccctg gccttctata
+ 2461 gccactatta tcagccgctg ccgttggtac tgcgcggata tggtgcgggc aatgacgtta
+ 2521 cagctgccgg tgtctttgct gatctgctac gtaccctctc atggaagtta ggagtctgac
+ 2581 atggttaaag tttatgcccc ggcttccagt gccaatatga gcgtcgggtt tgatgtgctc
+ 2641 ggggcggcgg tgacacctgt tgatggtgca ttgctcggag atgtagtcac ggttgaggcg
+ 2701 gcagagacat tcagtctcaa caacctcgga cgctttgccg ataagctgcc gtcagagcca
+ 2761 cgggaaaata tcgtttatca gtgctgggag cgtttttgcc
+//
diff --git a/tests/test_convert.py b/tests/test_convert.py
index 0b9b3cb..ee291d6 100644
--- a/tests/test_convert.py
+++ b/tests/test_convert.py
@@ -172,4 +172,12 @@ def test_creation2(self):
seq: extract(seq, @),
description: desc})
""")
- self.compare_files(Path.joinpath(self.output_path, 'ffn'), output_path)
\ No newline at end of file
+ self.compare_files(Path.joinpath(self.output_path, 'ffn'), output_path)
+
+ def test_jpath_slice(self):
+ """
+ Test slicing a SeqRecord
+ """
+ output_path = Path(self.workdir.name, 'jpath_slice')
+ convert(self.input_path, self.input_type, output_path, 'genbank', jpath='[[0][200:3000]]')
+ self.compare_files(Path.joinpath(self.output_path, 'jpath_slice'), output_path)
\ No newline at end of file