Skip to content

Commit

Permalink
Fix issue #5: Add --timeout option to change or turn off httpx reques…
Browse files Browse the repository at this point in the history
…t timeouts.
  • Loading branch information
yaph committed Jul 27, 2023
1 parent 93eee71 commit 3705e79
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ test-cli:
rm -rf tests/data/
wikidata_dl/cli.py --cache-dir tests/data --format json tests/queries/continents-on-earth.sparql
wikidata_dl/cli.py --cache-dir tests/data --items tests/queries/continents-on-earth.sparql
wikidata_dl/cli.py --cache-dir tests/data --timeout 0 tests/queries/nevada-events.sparql


# Call example: make release version=2022.11.04
Expand Down
21 changes: 21 additions & 0 deletions tests/queries/nevada-events.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#title: Recent events
SELECT ?event ?eventLabel ?date ?location
WITH {
SELECT DISTINCT ?event ?date ?location
WHERE {
# find events
wd:Q1227 wdt:P625 ?nevada.
?event wdt:P31/wdt:P279* wd:Q1190554.
# wdt:P17 wd:Q30;
# with a point in time or start date
OPTIONAL { ?event wdt:P585 ?date. }
OPTIONAL { ?event wdt:P580 ?date. }
?event wdt:P625 ?location.
FILTER(geof:distance(?location, ?nevada) < 400).
}
LIMIT 1000
} AS %i
WHERE {
INCLUDE %i
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
8 changes: 7 additions & 1 deletion wikidata_dl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ def main():
help='Get item results in this language. Enter a language code used by Wikimedia.')
parser.add_argument('--sleep', '-s', type=int, default=1,
help='Sleep time between file downloads in seconds.')
parser.add_argument('--timeout', type=float, default=5.0,
help='Set the timeout for fetching content (in seconds). Use 0 for no timeout.')

argv = parser.parse_args()

argv.cache_dir.mkdir(exist_ok=True, parents=True)

# Timeout must be a float larger than 0. Users can pass 0 as a CLI option to turn off the timeout.
timeout = argv.timeout if argv.timeout > 0 else None

# Get and save result.
result = wikidata.get(argv.query_file.read_text(), argv.format)
result = wikidata.get(argv.query_file.read_text(), argv.format, timeout)
file = argv.cache_dir.joinpath(f'{argv.query_file.stem}.{argv.format}')
file.write_text(result)
print(f'Saved query result in {file}\n')
Expand Down
17 changes: 11 additions & 6 deletions wikidata_dl/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def download(wikibase_id: str, root: Path, lifetime: int, language: str) -> str:
return f'Saved item data in {file}'


def get(query: str, format: str) -> str:
def get(query: str, format: str, timeout: float) -> str:
"""
Return a set of Wikibase IDs for given query from Wikidata.
Expand All @@ -89,12 +89,17 @@ def get(query: str, format: str) -> str:

params = {'query': query}
headers = {'Accept': formats[format]}
resp = httpx.get(api_endpoint, params=params, headers=headers)

if resp.is_success:
return resp.text

raise Exception('Data could not be fetched.')
try:
resp = httpx.get(api_endpoint, params=params, headers=headers, timeout=timeout)
except httpx.ReadTimeout:
print('Timeout error: Use the --timeout option to increase the timeout or set it to 0 to turn timeouts off.')
else:
if resp.is_success:
return resp.text

print('Data could not be fetched.')
quit()


def is_current(mtime: float, data: dict) -> bool:
Expand Down

0 comments on commit 3705e79

Please sign in to comment.