Skip to content

Commit

Permalink
Merge branch 'scribe-org:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Jk40git authored Mar 20, 2024
2 parents 945929f + 039e073 commit fdf30bf
Show file tree
Hide file tree
Showing 33 changed files with 171 additions and 53 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/pr_ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: pr_ci
on:
push:
branches: [main]
pull_request:
branches:
- main
types: [opened, reopened, synchronize]

jobs:
format_check:
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.9"

runs-on: ${{ matrix.os }}

name: Run PR Check

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade uv
uv venv
uv pip install -r requirements.txt
- name: Activate virtualenv
run: |
. .venv/bin/activate
echo PATH=$PATH >> $GITHUB_ENV
- name: Run ruff format - Formatting check
run: ruff check .

- name: Run ruff - Linting and import sorting check
if: always()
run: ruff check .
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ Emojis for the following are chosen based on [gitmoji](https://gitmoji.dev/).

### ♻️ Code Refactoring

- Code formatting was shifted from [black](https://github.com/psf/black) to [Ruff](https://github.com/astral-sh/ruff).
- A Ruff based GitHub workflow was added to check the code formatting and lint the codebase on each pull request ([#109](https://github.com/scribe-org/Scribe-Data/issues/109)).
- The `_update_files` directory was renamed `update_files` as these files are used in non-internal manners now ([#57](https://github.com/scribe-org/Scribe-Data/issues/57)).
- A common function has been created to map Wikidata ids to noun genders ([#69](https://github.com/scribe-org/Scribe-Data/issues/69)).
- Code formatting was shifted from [black](https://github.com/psf/black) to [Ruff](https://github.com/astral-sh/ruff).

## Scribe-Data 3.2.2

Expand Down
Binary file added docs/source/_static/ScribeFavicon.ico
Binary file not shown.
3 changes: 3 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@
"display_version": True,
}

# Adding favicon to the docs.
html_favicon = "_static/ScribeFavicon.ico"

# Importing custom css for theme customization.
html_css_files = [
"custom.css",
Expand Down
5 changes: 2 additions & 3 deletions src/scribe_data/checkquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"""

import argparse
import contextlib
import math
import os
import subprocess
Expand Down Expand Up @@ -304,12 +305,10 @@ def check_positive_int(value: str, err_msg: str) -> int:
------
argparse.ArgumentTypeError
"""
try:
with contextlib.suppress(ValueError):
number = int(value)
if number >= 1:
return number
except ValueError:
pass

raise argparse.ArgumentTypeError(err_msg)

Expand Down
8 changes: 4 additions & 4 deletions src/scribe_data/extract_transform/extract_wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,9 @@ def parse_to_ndjson(
print(f"Making {output_dir} directory for the output")
os.makedirs(output_dir)

if multicore == True:
if multicore:
num_cores = os.cpu_count()
elif multicore == False:
elif not multicore:
num_cores = 1
elif isinstance(multicore, int):
num_cores = multicore
Expand Down Expand Up @@ -372,8 +372,8 @@ def read_and_combine_json(file_path):
data = []

with open(file_path, "r", encoding="utf-8") as f:
for l in f:
data.append(json.loads(l))
for line in f:
data.append(json.loads(line))

return data

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import translate_to_other_languages
from scribe_data.utils import translate_to_other_languages # noqa: E402

SRC_LANG = "English"
translate_script_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

"""
Translates the French words queried from Wikidata to all other Scribe languages.
Expand All @@ -15,7 +14,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import translate_to_other_languages
from scribe_data.utils import translate_to_other_languages # noqa: E402

SRC_LANG = "French"
translate_script_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -39,4 +38,4 @@
word_list=word_list,
translations=translations,
batch_size=100,
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Translates the Portuguese words queried from Wikidata to all other Scribe languages.
Example
-------
python3 src/scribe_data/extract_transform/languages/Portuguese/translations/translate_words.py
"""

import json
import os
import sys

PATH_TO_SCRIBE_ORG = os.path.dirname(sys.path[0]).split("Scribe-Data")[0]
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import translate_to_other_languages # noqa: E402

SRC_LANG = "Portuguese"
translate_script_dir = os.path.dirname(os.path.abspath(__file__))
words_to_translate_path = os.path.join(translate_script_dir, "words_to_translate.json")

with open(words_to_translate_path, "r", encoding="utf-8") as file:
json_data = json.load(file)

word_list = [item["word"] for item in json_data]

translations = {}
translated_words_path = os.path.join(
translate_script_dir, "../formatted_data/translated_words.json"
)
if os.path.exists(translated_words_path):
with open(translated_words_path, "r", encoding="utf-8") as file:
translations = json.load(file)

translate_to_other_languages(
source_language=SRC_LANG,
word_list=word_list,
translations=translations,
batch_size=100,
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import translate_to_other_languages
from scribe_data.utils import translate_to_other_languages # noqa: E402

SRC_LANG = "Russian"
translate_script_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data, map_genders
from scribe_data.utils import ( # noqa: E402
export_formatted_data,
load_queried_data,
map_genders,
)

file_path = sys.argv[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

from scribe_data.utils import export_formatted_data, load_queried_data
from scribe_data.utils import export_formatted_data, load_queried_data # noqa: E402

file_path = sys.argv[0]

Expand Down
Empty file.
Loading

0 comments on commit fdf30bf

Please sign in to comment.