Skip to content

Commit

Permalink
Add support for API version 3
Browse files Browse the repository at this point in the history
  • Loading branch information
wragge committed Aug 26, 2023
1 parent 27f6c53 commit 386d8c0
Show file tree
Hide file tree
Showing 8 changed files with 1,146 additions and 160 deletions.
573 changes: 501 additions & 72 deletions 00_parser.ipynb

Large diffs are not rendered by default.

11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,26 @@

* Construct a search in the Trove 'Newspapers and Gazettes' category.
* Copy the search url.
* Feed the url to the `parse_query` function.
* Feed the url to the `parse_query` function

The second parameter to `parse_query` is the Trove API version number. The default is `2` for backwards compatibility.

```python
from trove_query_parser.parser import parse_query

parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')
parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)
```




{'q': 'wragge',
'zone': 'newspaper',
'l-artType': 'newspapers',
'l-state': ['Queensland'],
'l-category': ['Article'],
'l-illustrated': 'true',
'l-illtype': ['Cartoon']}
'l-illtype': ['Cartoon'],
'category': 'newspaper'}



Expand Down
12 changes: 7 additions & 5 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ <h2 id="How-to-use">How to use<a class="anchor-link" href="#How-to-use"> </a></h
<ul>
<li>Construct a search in the Trove 'Newspapers and Gazettes' category.</li>
<li>Copy the search url.</li>
<li>Feed the url to the <a href="/trove_query_parser/parser.html#parse_query"><code>parse_query</code></a> function.</li>
<li>Feed the url to the <a href="/trove_query_parser/parser.html#parse_query"><code>parse_query</code></a> function</li>
</ul>
<p>The second parameter to <a href="/trove_query_parser/parser.html#parse_query"><code>parse_query</code></a> is the Trove API version number. The default is <code>2</code> for backwards compatibility.</p>

</div>
</div>
Expand All @@ -62,7 +63,7 @@ <h2 id="How-to-use">How to use<a class="anchor-link" href="#How-to-use"> </a></h
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">trove_query_parser.parser</span> <span class="kn">import</span> <span class="n">parse_query</span>

<span class="n">parse_query</span><span class="p">(</span><span class="s1">&#39;https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&amp;l-artType=newspapers&amp;l-state=Queensland&amp;l-category=Article&amp;l-illustrationType=Cartoon&#39;</span><span class="p">)</span>
<span class="n">parse_query</span><span class="p">(</span><span class="s1">&#39;https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&amp;l-artType=newspapers&amp;l-state=Queensland&amp;l-category=Article&amp;l-illustrationType=Cartoon&#39;</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
</pre></div>

</div>
Expand All @@ -78,11 +79,12 @@ <h2 id="How-to-use">How to use<a class="anchor-link" href="#How-to-use"> </a></h

<div class="output_text output_subarea output_execute_result">
<pre>{&#39;q&#39;: &#39;wragge&#39;,
&#39;zone&#39;: &#39;newspaper&#39;,
&#39;l-artType&#39;: &#39;newspapers&#39;,
&#39;l-state&#39;: [&#39;Queensland&#39;],
&#39;l-category&#39;: [&#39;Article&#39;],
&#39;l-illustrated&#39;: &#39;true&#39;,
&#39;l-illtype&#39;: [&#39;Cartoon&#39;]}</pre>
&#39;l-illtype&#39;: [&#39;Cartoon&#39;],
&#39;category&#39;: &#39;newspaper&#39;}</pre>
</div>

</div>
Expand Down Expand Up @@ -116,5 +118,5 @@ <h2 id="Limitations">Limitations<a class="anchor-link" href="#Limitations"> </a>
</div>
</div>
</div>


674 changes: 608 additions & 66 deletions docs/parser.html

Large diffs are not rendered by default.

13 changes: 8 additions & 5 deletions index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
"source": [
"* Construct a search in the Trove 'Newspapers and Gazettes' category.\n",
"* Copy the search url.\n",
"* Feed the url to the `parse_query` function."
"* Feed the url to the `parse_query` function\n",
"\n",
"The second parameter to `parse_query` is the Trove API version number. The default is `2` for backwards compatibility."
]
},
{
Expand All @@ -43,11 +45,12 @@
"data": {
"text/plain": [
"{'q': 'wragge',\n",
" 'zone': 'newspaper',\n",
" 'l-artType': 'newspapers',\n",
" 'l-state': ['Queensland'],\n",
" 'l-category': ['Article'],\n",
" 'l-illustrated': 'true',\n",
" 'l-illtype': ['Cartoon']}"
" 'l-illtype': ['Cartoon'],\n",
" 'category': 'newspaper'}"
]
},
"execution_count": null,
Expand All @@ -58,7 +61,7 @@
"source": [
"from trove_query_parser.parser import parse_query\n",
"\n",
"parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon')"
"parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)"
]
},
{
Expand Down Expand Up @@ -89,7 +92,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ author = Tim Sherratt
author_email = [email protected]
copyright = Tim Sherratt
branch = master
version = 0.1.1
version = 0.2.0
min_python = 3.6
audience = Developers
language = English
Expand Down
2 changes: 1 addition & 1 deletion trove_query_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.1"
__version__ = "0.2.0"
19 changes: 13 additions & 6 deletions trove_query_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

# Cell

import os
from urllib.parse import urlparse, parse_qsl, parse_qs
import requests
import arrow
Expand All @@ -21,12 +22,13 @@ def format_date(date, start=False):
date = '{}Z'.format(date_obj.format('YYYY-MM-DDT00:00:00'))
return date

def parse_query(query):
def parse_query(query, api_version=2):
'''
Converts the parameters of a search using the Trove web interface into a form the API will understand.
Parameters:
* `query` – the url of a search in the Trove newspapers & gazettes category
* `api_version` – Trove API version (default is 2)
Returns:
* a dict containing the parameters (multiple values will be in a list)
Expand Down Expand Up @@ -85,10 +87,13 @@ def parse_query(query):
elif key == 'keyword.any':
keywords.append('({})'.format(' OR '.join(value.split())))
elif key in ['l-ArtType', 'l-advArtType', 'l-artType']:
if value == 'newspapers':
new_params['zone'] = 'newspaper'
elif value == 'gazette':
new_params['zone'] = 'gazette'
if api_version == 2:
if value == 'newspapers':
new_params['zone'] = 'newspaper'
elif value == 'gazette':
new_params['zone'] = 'gazette'
elif api_version == 3:
new_params['l-artType'] = value
if keywords:
if 'q' in new_params:
new_params['q'] += ' AND {}'.format(' AND '.join(keywords))
Expand All @@ -106,7 +111,9 @@ def parse_query(query):
new_params['q'] = date_query
if 'q' not in new_params:
new_params['q'] = ' '
if 'zone' not in new_params:
if api_version == 2 and 'zone' not in new_params:
new_params['zone'] = 'newspaper,gazette'
if api_version == 3 and 'category' not in new_params:
new_params['category'] = 'newspaper'
# return '{}?{}'.format('https://api.trove.nla.gov.au/v2/result', urlencode(new_params, doseq=True))
return new_params

0 comments on commit 386d8c0

Please sign in to comment.