Skip to content

Commit

Permalink
chore: Add ruff lint configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Sep 15, 2024
1 parent 606cbe5 commit a9c3e12
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 30 deletions.
24 changes: 10 additions & 14 deletions ocdsindex/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def main():
@click.argument("base-url")
def sphinx(directory, base_url):
"""
Crawls the DIRECTORY of the Sphinx build of the OCDS documentation, generates documents to index, assigns documents
unique URLs from the BASE_URL, and prints the base URL, timestamp, and documents as JSON.
Crawl the DIRECTORY of the Sphinx build of the OCDS documentation, generate documents to index, assign documents
unique URLs from the BASE_URL, and print the base URL, timestamp, and documents as JSON.
"""
documents = Crawler(directory, base_url, extract_sphinx, allow=allow_sphinx).get_documents_by_language()
json.dump({"base_url": base_url, "created_at": int(time.time()), "documents": documents}, sys.stdout)
Expand All @@ -44,8 +44,8 @@ def sphinx(directory, base_url):
@click.argument("file", type=click.File())
def extension_explorer(file):
"""
Crawls the Extension Explorer's `extensions.json` file, generates documents to index, assigns documents unique
URLs, and prints the base URL, timestamp, and documents as JSON.
Crawl the Extension Explorer's `extensions.json` file, generate documents to index, assign documents unique
URLs, and print the base URL, timestamp, and documents as JSON.
"""
"https://extensions.open-contracting.org"

Expand All @@ -55,15 +55,15 @@ def extension_explorer(file):
@click.argument("file", type=click.File())
def index(file, host):
"""
Adds documents to Elasticsearch indices.
Add documents to Elasticsearch indices.
Reads a JSON file in which the "base_url" key is the remote URL at which the documents will be accessible, and the
Read a JSON file in which the "base_url" key is the remote URL at which the documents will be accessible, and the
"documents" key is an object in which the key is a language code and the value is the documents to index.
The `sphinx` and `extension-explorer` commands create such files.
Connects to Elasticsearch at HOST and, for each language, creates an `ocdsindex_XX` index, deletes existing
documents matching the base URL, and indexes the new documents in that language.
Connect to Elasticsearch at HOST and, for each language, create an `ocdsindex_XX` index, delete existing
documents matching the base URL, and index the new documents in that language.
"""
language_map = {
"en": "english",
Expand Down Expand Up @@ -117,9 +117,7 @@ def index(file, host):
@click.argument("source")
@click.argument("destination")
def copy(host, source, destination):
"""
Adds a document with a DESTINATION base URL for each document with a SOURCE base URL.
"""
"""Add a document with a DESTINATION base URL for each document with a SOURCE base URL."""
with connect(host) as es:
body = []

Expand All @@ -143,9 +141,7 @@ def copy(host, source, destination):
"--exclude-file", type=click.File(), help="exclude any document whose base URL is equal to a line in this file"
)
def expire(host, exclude_file):
"""
Deletes documents from Elasticsearch indices that were crawled more than 180 days ago.
"""
"""Delete documents from Elasticsearch indices that were crawled more than 180 days ago."""
threshold = int(time.time()) - 15552000 # 180 days

base_urls = [line.strip() for line in exclude_file] if exclude_file else []
Expand Down
8 changes: 3 additions & 5 deletions ocdsindex/allow.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
"""
``allow_`` methods that return whether to crawl a file.
"""
"""``allow_`` methods that return whether to crawl a file."""

import os


def allow_sphinx(root, file):
def allow_sphinx(root, _file):
"""
Allows all files, except the 404 page.
Allow all files, except the 404 page.
:param str root: a directory path
:param str file: a file basename
Expand Down
10 changes: 4 additions & 6 deletions ocdsindex/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
import lxml.html


def true(root, file):
def true(_root, _file):
return True


class Crawler:
"""
Crawls a directory for documents to index.
"""
"""Crawl a directory for documents to index."""

def __init__(self, directory, base_url, extract, *, allow=true):
"""
Expand All @@ -30,7 +28,7 @@ def __init__(self, directory, base_url, extract, *, allow=true):

def get_documents_by_language(self):
"""
Returns the documents to index for each language.
Return the documents to index for each language.
:returns: a dict in which the key is a language code and the value is the documents to index
:rtype: dict
Expand All @@ -51,7 +49,7 @@ def get_documents_by_language(self):

def get_documents_from_file(self, path):
"""
Parses the file's HTML contents, calculates its remote URL, and returns the documents to index from the file.
Parse the file's HTML contents, calculate its remote URL, and return the documents to index from the file.
:param str path: a file path
:returns: the documents to index
Expand Down
4 changes: 2 additions & 2 deletions ocdsindex/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class OCDSIndexError(Exception):
"""Base class for exceptions from within this package"""
"""Base class for exceptions from within this package."""


class MissingHeadingError(OCDSIndexError, IndexError):
"""Raised when a section is missing a heading"""
"""Raised when a section is missing a heading."""
6 changes: 4 additions & 2 deletions ocdsindex/extract.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""
``extract_`` methods that return the documents to index as a list of dicts. Each dict sets these keys:
``extract_`` methods that return the documents to index as a list of dicts.
Each dict sets these keys:
url
The remote URL of the document, which might include a fragment identifier
Expand Down Expand Up @@ -44,7 +46,7 @@ def _select_div_by_class(tree, class_name):

def extract_sphinx(url, tree):
"""
Extracts one document per section of the page.
Extract one document per section of the page.
:param str url: the file's remote URL
:param tree: the file's root HTML element
Expand Down
16 changes: 15 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,18 @@ line-length = 119
target-version = "py39"

[tool.ruff.lint]
select = ["E", "C4", "F", "I", "W"]
select = ["ALL"]
ignore = [
"ANN", "C901", "COM812", "D203", "D212", "D415", "EM", "PERF203", "PLR091", "Q000",
"PLR2004", "PLW2901", "D100", "D103", "D104", "D205",
"PTH",
]

[tool.ruff.lint.flake8-builtins]
builtins-ignorelist = ["copyright"]

[tool.ruff.lint.per-file-ignores]
"docs/conf.py" = ["D100", "INP001"]
"tests/*" = [
"ARG001", "D", "FBT003", "INP001", "PLR2004", "S", "TRY003",
]

0 comments on commit a9c3e12

Please sign in to comment.