diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index faeb770..8b31f5a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ permissions: packages: read jobs: - test-check: + common-test-check: runs-on: ubuntu-latest services: postgres: @@ -22,15 +22,56 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - name: Setup python 3.12 + - name: Setup python 3.10 uses: actions/setup-python@v4 with: - python-version: 3.12 + python-version: "3.10" cache: pip - cache-dependency-path: pyproject.toml + cache-dependency-path: common/pyproject.toml - name: dependency install - run: pip install -e ".[dev]" + run: pip install -e "./common[dev]" - name: copy env - run: cp .env.example .env + run: cp .env.example common/.env - name: test + working-directory: common + run: tox + + api-test-check: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: pip + cache-dependency-path: api/pyproject.toml + - name: dependency install + run: pip install -e "./api[dev]" + - name: copy env + run: cp .env.example api/.env + - name: test + working-directory: api + run: tox + + etl-test-check: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: pip + cache-dependency-path: etl/pyproject.toml + - name: dependency install + run: pip install -e "./etl[dev]" + - name: copy env + run: cp .env.example etl/.env + - name: test + working-directory: etl run: tox diff --git a/.vscode/settings.json.example b/.vscode/settings.json.example index b018960..0aaaa5b 100644 --- a/.vscode/settings.json.example +++ b/.vscode/settings.json.example @@ -14,7 +14,7 @@ "editor.formatOnSave": true, "[python]": { "editor.codeActionsOnSave": { - "source.organizeImports": true + "source.organizeImports": "explicit" }, "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true @@ -23,9 +23,21 @@ ".venv/lib/python3.10/site-packages" ], "python.defaultInterpreterPath": ".venv/bin/python", - "python.analysis.stubPath": "stubs", + "python.analysis.stubPath": "common/stubs", + "black-formatter.args": [ + "--config", + "common/pyproject.toml" + ], "flake8.path": [ ".venv/bin/pflake8" ], - "python.formatting.provider": "none", -} + "flake8.args": [ + "--config", + "common/pyproject.toml" + ], + "isort.args": [ + "--settings-path", + "common/pyproject.toml" + ], + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/birdxplorer/__init__.py b/api/birdxplorer_api/__init__.py similarity index 100% rename from birdxplorer/__init__.py rename to api/birdxplorer_api/__init__.py diff --git a/birdxplorer/app.py b/api/birdxplorer_api/app.py similarity index 91% rename from birdxplorer/app.py rename to api/birdxplorer_api/app.py index 8c93ba7..38a6e86 100644 --- a/birdxplorer/app.py +++ b/api/birdxplorer_api/app.py @@ -7,11 +7,12 @@ from pydantic.alias_generators import to_snake from starlette.types import ASGIApp, Receive, Scope, Send -from .logger import get_logger +from birdxplorer_common.logger import get_logger +from birdxplorer_common.settings import GlobalSettings +from birdxplorer_common.storage import gen_storage + from .routers.data import gen_router as gen_data_router from .routers.system import gen_router as gen_system_router -from .settings import GlobalSettings -from .storage import gen_storage class QueryStringFlatteningMiddleware: diff --git a/birdxplorer/main.py b/api/birdxplorer_api/main.py similarity index 71% rename from birdxplorer/main.py rename to api/birdxplorer_api/main.py index baa7c77..0cd201d 100644 --- a/birdxplorer/main.py +++ b/api/birdxplorer_api/main.py @@ -1,7 +1,8 @@ from fastapi import FastAPI +from birdxplorer_common.settings import GlobalSettings + from .app import gen_app -from .settings import GlobalSettings def main() -> FastAPI: diff --git a/birdxplorer/py.typed b/api/birdxplorer_api/py.typed similarity index 100% rename from birdxplorer/py.typed rename to api/birdxplorer_api/py.typed diff --git a/birdxplorer/routers/__init__.py b/api/birdxplorer_api/routers/__init__.py similarity index 100% rename from birdxplorer/routers/__init__.py rename to api/birdxplorer_api/routers/__init__.py diff --git a/birdxplorer/routers/data.py b/api/birdxplorer_api/routers/data.py similarity index 95% rename from birdxplorer/routers/data.py rename to api/birdxplorer_api/routers/data.py index df95afd..2398df8 100644 --- a/birdxplorer/routers/data.py +++ b/api/birdxplorer_api/routers/data.py @@ -4,7 +4,7 @@ from dateutil.parser import parse as dateutil_parse from fastapi import APIRouter, HTTPException, Query -from ..models import ( +from birdxplorer_common.models import ( BaseModel, LanguageIdentifier, Note, @@ -18,7 +18,7 @@ TwitterTimestamp, UserEnrollment, ) -from ..storage import Storage +from birdxplorer_common.storage import Storage class TopicListResponse(BaseModel): @@ -55,7 +55,9 @@ def gen_router(storage: Storage) -> APIRouter: router = APIRouter() @router.get("/user-enrollments/{participant_id}", response_model=UserEnrollment) - def get_user_enrollment_by_participant_id(participant_id: ParticipantId) -> UserEnrollment: + def get_user_enrollment_by_participant_id( + participant_id: ParticipantId, + ) -> UserEnrollment: res = storage.get_user_enrollment_by_participant_id(participant_id=participant_id) if res is None: raise ValueError(f"participant_id={participant_id} not found") diff --git a/birdxplorer/routers/system.py b/api/birdxplorer_api/routers/system.py similarity index 83% rename from birdxplorer/routers/system.py rename to api/birdxplorer_api/routers/system.py index 3108d4a..00d8394 100644 --- a/birdxplorer/routers/system.py +++ b/api/birdxplorer_api/routers/system.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from ..models import Message +from birdxplorer_common.models import Message def gen_router() -> APIRouter: diff --git a/api/pyproject.toml b/api/pyproject.toml new file mode 100644 index 0000000..7503043 --- /dev/null +++ b/api/pyproject.toml @@ -0,0 +1,114 @@ +[build-system] +build-backend = "flit_core.buildapi" +requires = ["flit_core >=3.8.0,<4"] + + +[project] +name = "birdxplorer_api" +description = "The Web API for BirdXplorer project." +authors = [ + {name = "osoken"}, +] +dynamic = [ + "version", +] +readme = "../README.md" +license = {file = "../LICENSE"} +requires-python = ">=3.10" + +classifiers = [ + "Development Status :: 3 - Alpha", + "Natural Language :: Japanese", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", +] + +dependencies = [ + "birdxplorer_common @ git+https://github.com/codeforjapan/BirdXplorer.git@feature/issue-53-divide-python-packages#subdirectory=common", + "fastapi", + "python-dateutil", + "pydantic", + "starlette", + "python-dotenv", +] + +[project.urls] +Source = "https://github.com/codeforjapan/BirdXplorer" + +[tool.setuptools] +packages=["birdxplorer"] + +[tool.setuptools.package-data] +birdxplorer = ["py.typed"] + +[project.optional-dependencies] +dev=[ + "black", + "flake8", + "pyproject-flake8", + "pytest", + "mypy", + "tox", + "isort", + "pytest-mock", + "pytest-cov", + "freezegun", + "types-python-dateutil", + "psycopg2-binary", + "factory_boy", + "uvicorn", + "polyfactory", + "httpx", +] +prod=[ +] + +[tool.pytest.ini_options] +addopts = ["-sv", "--doctest-modules", "--ignore-glob=birdxplorer_api/main.py", "--cov=birdxplorer_api", "--cov-report=xml", "--cov-report=term-missing"] +testpaths = ["tests", "birdxplorer_api"] +filterwarnings = [ + "error", + "ignore:The \\'app\\' shortcut is now deprecated. Use the explicit style \\'transport=WSGITransport\\(app=\\.\\.\\.\\)\\' instead\\.", +] + +[tool.black] +line-length = 120 +target-version = ['py310'] + +[tool.flake8] +max-line-length = 120 +extend-ignore = "E203,E701" + +[tool.mypy] +python_version = "3.10" +warn_return_any = true +warn_unused_configs = true +plugins = ["pydantic.mypy"] + +[tool.pydantic.mypy] +init_typed = true + +[tool.isort] +profile = "black" +known_first_party = "birdxplorer_api,birdxplorer_common,birdxplorer_etl" + +[tool.tox] +legacy_tox_ini = """ + [tox] + skipsdist = true + envlist = py310 + + [testenv] + setenv = + VIRTUALENV_PIP = 24.0 + deps = + -e .[dev] + commands = + black birdxplorer_api tests + isort birdxplorer_api tests + pytest + pflake8 birdxplorer_api/ tests/ + mypy birdxplorer_api --strict + mypy tests --strict +""" diff --git a/api/tests/conftest.py b/api/tests/conftest.py new file mode 100644 index 0000000..093d718 --- /dev/null +++ b/api/tests/conftest.py @@ -0,0 +1,348 @@ +import os +import random +from collections.abc import Generator +from typing import List, Type, Union +from unittest.mock import MagicMock, patch + +from dotenv import load_dotenv +from fastapi.testclient import TestClient +from polyfactory import Use +from polyfactory.factories.pydantic_factory import ModelFactory +from polyfactory.pytest_plugin import register_fixture +from pytest import fixture + +from birdxplorer_common.exceptions import UserEnrollmentNotFoundError +from birdxplorer_common.models import ( + LanguageIdentifier, + Note, + NoteId, + ParticipantId, + Post, + PostId, + Topic, + TopicId, + TweetId, + TwitterTimestamp, + UserEnrollment, + XUser, +) +from birdxplorer_common.settings import GlobalSettings, PostgresStorageSettings +from birdxplorer_common.storage import Storage + + +def gen_random_twitter_timestamp() -> int: + return random.randint(TwitterTimestamp.min_value(), TwitterTimestamp.max_value()) + + +@register_fixture(name="user_enrollment_factory") +class UserEnrollmentFactory(ModelFactory[UserEnrollment]): + __model__ = UserEnrollment + + participant_id = Use(lambda: "".join(random.choices("0123456789ABCDEF", k=64))) + timestamp_of_last_state_change = Use(gen_random_twitter_timestamp) + timestamp_of_last_earn_out = Use(gen_random_twitter_timestamp) + + +@register_fixture(name="note_factory") +class NoteFactory(ModelFactory[Note]): + __model__ = Note + + +@register_fixture(name="topic_factory") +class TopicFactory(ModelFactory[Topic]): + __model__ = Topic + + +@register_fixture(name="x_user_factory") +class XUserFactory(ModelFactory[XUser]): + __model__ = XUser + + +@register_fixture(name="post_factory") +class PostFactory(ModelFactory[Post]): + __model__ = Post + + +@fixture +def user_enrollment_samples( + user_enrollment_factory: UserEnrollmentFactory, +) -> Generator[List[UserEnrollment], None, None]: + yield [user_enrollment_factory.build() for _ in range(3)] + + +@fixture +def topic_samples(topic_factory: TopicFactory) -> Generator[List[Topic], None, None]: + topics = [ + topic_factory.build(topic_id=0, label={"en": "topic0", "ja": "トピック0"}, reference_count=3), + topic_factory.build(topic_id=1, label={"en": "topic1", "ja": "トピック1"}, reference_count=2), + topic_factory.build(topic_id=2, label={"en": "topic2", "ja": "トピック2"}, reference_count=1), + topic_factory.build(topic_id=3, label={"en": "topic3", "ja": "トピック3"}, reference_count=0), + ] + yield topics + + +@fixture +def note_samples(note_factory: NoteFactory, topic_samples: List[Topic]) -> Generator[List[Note], None, None]: + notes = [ + note_factory.build( + note_id="1234567890123456781", + post_id="2234567890123456781", + topics=[topic_samples[0]], + language="ja", + summary="要約文1", + created_at=1152921600000, + ), + note_factory.build( + note_id="1234567890123456782", + post_id="2234567890123456782", + topics=[], + language="en", + summary="summary2", + created_at=1152921601000, + ), + note_factory.build( + note_id="1234567890123456783", + post_id="2234567890123456783", + topics=[topic_samples[1]], + language="en", + summary="summary3", + created_at=1152921602000, + ), + note_factory.build( + note_id="1234567890123456784", + post_id="2234567890123456784", + topics=[topic_samples[0], topic_samples[1], topic_samples[2]], + language="en", + summary="summary4", + created_at=1152921603000, + ), + note_factory.build( + note_id="1234567890123456785", + post_id="2234567890123456785", + topics=[topic_samples[0]], + language="en", + summary="summary5", + created_at=1152921604000, + ), + ] + yield notes + + +@fixture +def x_user_samples(x_user_factory: XUserFactory) -> Generator[List[XUser], None, None]: + x_users = [ + x_user_factory.build( + user_id="1234567890123456781", + name="User1", + profile_image_url="https://pbs.twimg.com/profile_images/1468001914302390XXX/xxxxXXXX_normal.jpg", + followers_count=100, + following_count=200, + ), + x_user_factory.build( + user_id="1234567890123456782", + name="User2", + profile_image_url="https://pbs.twimg.com/profile_images/1468001914302390YYY/yyyyYYYY_normal.jpg", + followers_count=300, + following_count=400, + ), + x_user_factory.build( + user_id="1234567890123456783", + name="User3", + profile_image_url="https://pbs.twimg.com/profile_images/1468001914302390ZZZ/zzzzZZZZ_normal.jpg", + followers_count=300, + following_count=400, + ), + ] + yield x_users + + +@fixture +def post_samples(post_factory: PostFactory, x_user_samples: List[XUser]) -> Generator[List[Post], None, None]: + posts = [ + post_factory.build( + post_id="2234567890123456781", + x_user_id="1234567890123456781", + x_user=x_user_samples[0], + text="text11", + media_details=None, + created_at=1152921600000, + like_count=10, + repost_count=20, + impression_count=30, + ), + post_factory.build( + post_id="2234567890123456791", + x_user_id="1234567890123456781", + x_user=x_user_samples[0], + text="text12", + media_details=None, + created_at=1153921700000, + like_count=10, + repost_count=20, + impression_count=30, + ), + post_factory.build( + post_id="2234567890123456801", + x_user_id="1234567890123456782", + x_user=x_user_samples[1], + text="text21", + media_details=None, + created_at=1154921800000, + like_count=10, + repost_count=20, + impression_count=30, + ), + ] + yield posts + + +@fixture +def mock_storage( + user_enrollment_samples: List[UserEnrollment], + topic_samples: List[Topic], + post_samples: List[Post], + note_samples: List[Note], +) -> Generator[MagicMock, None, None]: + mock = MagicMock(spec=Storage) + + def _get_user_enrollment_by_participant_id( + participant_id: ParticipantId, + ) -> UserEnrollment: + x = {d.participant_id: d for d in user_enrollment_samples}.get(participant_id) + if x is None: + raise UserEnrollmentNotFoundError(participant_id=participant_id) + return x + + mock.get_user_enrollment_by_participant_id.side_effect = _get_user_enrollment_by_participant_id + + def _get_topics() -> Generator[Topic, None, None]: + yield from topic_samples + + def _get_notes( + note_ids: Union[List[NoteId], None] = None, + created_at_from: Union[None, TwitterTimestamp] = None, + created_at_to: Union[None, TwitterTimestamp] = None, + topic_ids: Union[List[TopicId], None] = None, + post_ids: Union[List[TweetId], None] = None, + language: Union[LanguageIdentifier, None] = None, + ) -> Generator[Note, None, None]: + for note in note_samples: + if note_ids is not None and note.note_id not in note_ids: + continue + if created_at_from is not None and note.created_at < created_at_from: + continue + if created_at_to is not None and note.created_at > created_at_to: + continue + if topic_ids is not None and not set(topic_ids).issubset({topic.topic_id for topic in note.topics}): + continue + if post_ids is not None and note.post_id not in post_ids: + continue + if language is not None and note.language != language: + continue + yield note + + mock.get_topics.side_effect = _get_topics + mock.get_notes.side_effect = _get_notes + + def _get_posts() -> Generator[Post, None, None]: + yield from post_samples + + mock.get_posts.side_effect = _get_posts + + def _get_posts_by_ids(post_ids: List[PostId]) -> Generator[Post, None, None]: + for i in post_ids: + for post in post_samples: + if post.post_id == i: + yield post + break + + mock.get_posts_by_ids.side_effect = _get_posts_by_ids + + def _get_posts_by_created_at_range(start: TwitterTimestamp, end: TwitterTimestamp) -> Generator[Post, None, None]: + for post in post_samples: + if start <= post.created_at < end: + yield post + + mock.get_posts_by_created_at_range.side_effect = _get_posts_by_created_at_range + + def _get_posts_by_created_at_start( + start: TwitterTimestamp, + ) -> Generator[Post, None, None]: + for post in post_samples: + if start <= post.created_at: + yield post + + mock.get_posts_by_created_at_start.side_effect = _get_posts_by_created_at_start + + def _get_posts_by_created_at_end( + end: TwitterTimestamp, + ) -> Generator[Post, None, None]: + for post in post_samples: + if post.created_at < end: + yield post + + mock.get_posts_by_created_at_end.side_effect = _get_posts_by_created_at_end + + yield mock + + +TEST_DATABASE_NAME = "bx_test" + + +@fixture +def load_dotenv_fixture() -> None: + load_dotenv() + + +@fixture +def postgres_storage_settings_factory( + load_dotenv_fixture: None, +) -> Type[ModelFactory[PostgresStorageSettings]]: + class PostgresStorageSettingsFactory(ModelFactory[PostgresStorageSettings]): + __model__ = PostgresStorageSettings + + host = "localhost" + username = "postgres" + port = 5432 + database = "postgres" + password = os.environ["BX_STORAGE_SETTINGS__PASSWORD"] + + return PostgresStorageSettingsFactory + + +@fixture +def global_settings_factory( + postgres_storage_settings_factory: Type[ModelFactory[PostgresStorageSettings]], +) -> Type[ModelFactory[GlobalSettings]]: + class GlobalSettingsFactory(ModelFactory[GlobalSettings]): + __model__ = GlobalSettings + + storage_settings = postgres_storage_settings_factory.build() + + return GlobalSettingsFactory + + +@fixture +def settings_for_test( + global_settings_factory: Type[ModelFactory[GlobalSettings]], + postgres_storage_settings_factory: Type[ModelFactory[PostgresStorageSettings]], +) -> Generator[GlobalSettings, None, None]: + yield global_settings_factory.build( + storage_settings=postgres_storage_settings_factory.build(database=TEST_DATABASE_NAME) + ) + + +@fixture +def client(settings_for_test: GlobalSettings, mock_storage: MagicMock) -> Generator[TestClient, None, None]: + from birdxplorer_api.app import gen_app + + with patch("birdxplorer_api.app.gen_storage", return_value=mock_storage): + app = gen_app(settings=settings_for_test) + yield TestClient(app) + + +@fixture +def default_settings( + global_settings_factory: Type[ModelFactory[GlobalSettings]], +) -> Generator[GlobalSettings, None, None]: + yield global_settings_factory.build() diff --git a/tests/routers/__init__.py b/api/tests/routers/__init__.py similarity index 100% rename from tests/routers/__init__.py rename to api/tests/routers/__init__.py diff --git a/tests/routers/test_data.py b/api/tests/routers/test_data.py similarity index 93% rename from tests/routers/test_data.py rename to api/tests/routers/test_data.py index 8f719f7..8bf271c 100644 --- a/tests/routers/test_data.py +++ b/api/tests/routers/test_data.py @@ -3,7 +3,7 @@ from fastapi.testclient import TestClient -from birdxplorer.models import Note, Post, Topic, UserEnrollment +from birdxplorer_common.models import Note, Post, Topic, UserEnrollment def test_user_enrollments_get(client: TestClient, user_enrollment_samples: List[UserEnrollment]) -> None: @@ -32,7 +32,10 @@ def test_posts_get_has_post_id_filter(client: TestClient, post_samples: List[Pos assert response.status_code == 200 res_json = response.json() assert res_json == { - "data": [json.loads(post_samples[0].model_dump_json()), json.loads(post_samples[2].model_dump_json())] + "data": [ + json.loads(post_samples[0].model_dump_json()), + json.loads(post_samples[2].model_dump_json()), + ] } @@ -95,7 +98,10 @@ def test_notes_get_has_note_id_filter(client: TestClient, note_samples: List[Not assert response.status_code == 200 res_json = response.json() assert res_json == { - "data": [json.loads(note_samples[0].model_dump_json()), json.loads(note_samples[2].model_dump_json())] + "data": [ + json.loads(note_samples[0].model_dump_json()), + json.loads(note_samples[2].model_dump_json()), + ] } diff --git a/tests/routers/test_system.py b/api/tests/routers/test_system.py similarity index 100% rename from tests/routers/test_system.py rename to api/tests/routers/test_system.py diff --git a/tests/test_app.py b/api/tests/test_app.py similarity index 59% rename from tests/test_app.py rename to api/tests/test_app.py index e87e423..c54e8c1 100644 --- a/tests/test_app.py +++ b/api/tests/test_app.py @@ -1,12 +1,12 @@ from pytest_mock import MockerFixture -from birdxplorer.app import gen_app -from birdxplorer.settings import GlobalSettings +from birdxplorer_api.app import gen_app +from birdxplorer_common.settings import GlobalSettings def test_gen_app(mocker: MockerFixture, default_settings: GlobalSettings) -> None: - FastAPI = mocker.patch("birdxplorer.app.FastAPI") - get_logger = mocker.patch("birdxplorer.app.get_logger") + FastAPI = mocker.patch("birdxplorer_api.app.FastAPI") + get_logger = mocker.patch("birdxplorer_api.app.get_logger") expected = FastAPI.return_value actual = gen_app(settings=default_settings) diff --git a/tests/test_main.py b/api/tests/test_main.py similarity index 63% rename from tests/test_main.py rename to api/tests/test_main.py index 87dc313..0443346 100644 --- a/tests/test_main.py +++ b/api/tests/test_main.py @@ -2,9 +2,9 @@ def test_main_returns_app(mocker: MockerFixture) -> None: - gen_app = mocker.patch("birdxplorer.main.gen_app") - GlobalSettings = mocker.patch("birdxplorer.main.GlobalSettings") - from birdxplorer.main import main + gen_app = mocker.patch("birdxplorer_api.main.gen_app") + GlobalSettings = mocker.patch("birdxplorer_api.main.GlobalSettings") + from birdxplorer_api.main import main expected = gen_app.return_value actual = main() diff --git a/api/tests/test_package.py b/api/tests/test_package.py new file mode 100644 index 0000000..43ef207 --- /dev/null +++ b/api/tests/test_package.py @@ -0,0 +1,9 @@ +import re + +import birdxplorer_api + + +def test_package_has_version() -> None: + assert hasattr(birdxplorer_api, "__version__") + assert isinstance(birdxplorer_api.__version__, str) + assert re.match(r"^\d+\.\d+\.\d+$", birdxplorer_api.__version__) diff --git a/common/birdxplorer_common/__init__.py b/common/birdxplorer_common/__init__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/common/birdxplorer_common/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/birdxplorer/exceptions.py b/common/birdxplorer_common/exceptions.py similarity index 100% rename from birdxplorer/exceptions.py rename to common/birdxplorer_common/exceptions.py diff --git a/birdxplorer/logger.py b/common/birdxplorer_common/logger.py similarity index 100% rename from birdxplorer/logger.py rename to common/birdxplorer_common/logger.py diff --git a/birdxplorer/models.py b/common/birdxplorer_common/models.py similarity index 100% rename from birdxplorer/models.py rename to common/birdxplorer_common/models.py diff --git a/common/birdxplorer_common/py.typed b/common/birdxplorer_common/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/birdxplorer/settings.py b/common/birdxplorer_common/settings.py similarity index 100% rename from birdxplorer/settings.py rename to common/birdxplorer_common/settings.py diff --git a/birdxplorer/storage.py b/common/birdxplorer_common/storage.py similarity index 100% rename from birdxplorer/storage.py rename to common/birdxplorer_common/storage.py diff --git a/pyproject.toml b/common/pyproject.toml similarity index 71% rename from pyproject.toml rename to common/pyproject.toml index abcf924..f29efbc 100644 --- a/pyproject.toml +++ b/common/pyproject.toml @@ -4,16 +4,16 @@ requires = ["flit_core >=3.8.0,<4"] [project] -name = "birdxplorer" -description = "birdxplorer is a tool to help you read and get insights from your documents." +name = "birdxplorer_common" +description = "Common library for BirdXplorer" authors = [ {name = "osoken"}, ] dynamic = [ "version", ] -readme = "README.md" -license = {file = "LICENSE"} +readme = "../README.md" +license = {file = "../LICENSE"} requires-python = ">=3.10" classifiers = [ @@ -29,16 +29,14 @@ dependencies = [ "python-dateutil", "sqlalchemy", "pydantic_settings", - "fastapi", "JSON-log-formatter", - "openai", ] [project.urls] Source = "https://github.com/codeforjapan/BirdXplorer" [tool.setuptools] -packages=["birdxplorer"] +packages=["birdxplorer_common"] [tool.setuptools.package-data] birdxplorer = ["py.typed"] @@ -58,9 +56,7 @@ dev=[ "types-python-dateutil", "psycopg2-binary", "factory_boy", - "uvicorn", "polyfactory", - "httpx", "types-psycopg2", ] prod=[ @@ -69,11 +65,10 @@ prod=[ [tool.pytest.ini_options] -addopts = ["-sv", "--doctest-modules", "--cov=birdxplorer", "--cov-report=xml", "--cov-report=term-missing"] -testpaths = ["tests", "birdxplorer"] +addopts = ["-sv", "--doctest-modules", "--cov=birdxplorer_common", "--cov-report=xml", "--cov-report=term-missing"] +testpaths = ["tests", "birdxplorer_common"] filterwarnings = [ "error", - "ignore:The \\'app\\' shortcut is now deprecated. Use the explicit style \\'transport=WSGITransport\\(app=\\.\\.\\.\\)\\' instead\\.", ] [tool.black] @@ -96,6 +91,7 @@ init_typed = true [tool.isort] profile = "black" +known_first_party = "birdxplorer_api,birdxplorer_common,birdxplorer_etl" [tool.tox] legacy_tox_ini = """ @@ -110,10 +106,10 @@ legacy_tox_ini = """ deps = -e .[dev] commands = - black birdxplorer tests - isort birdxplorer tests + black birdxplorer_common tests + isort birdxplorer_common tests pytest - pflake8 birdxplorer/ tests/ stubs/ - mypy birdxplorer --strict + pflake8 birdxplorer_common/ tests/ stubs/ + mypy birdxplorer_common --strict mypy tests --strict """ diff --git a/stubs/json_log_formatter.pyi b/common/stubs/json_log_formatter.pyi similarity index 100% rename from stubs/json_log_formatter.pyi rename to common/stubs/json_log_formatter.pyi diff --git a/tests/conftest.py b/common/tests/conftest.py similarity index 73% rename from tests/conftest.py rename to common/tests/conftest.py index 29f0f09..a2b1094 100644 --- a/tests/conftest.py +++ b/common/tests/conftest.py @@ -1,11 +1,9 @@ import os import random from collections.abc import Generator -from typing import List, Type, Union -from unittest.mock import MagicMock, patch +from typing import List, Type from dotenv import load_dotenv -from fastapi.testclient import TestClient from polyfactory import Use from polyfactory.factories.pydantic_factory import ModelFactory from polyfactory.pytest_plugin import register_fixture @@ -16,28 +14,20 @@ from sqlalchemy.orm import Session from sqlalchemy.sql import text -from birdxplorer.exceptions import UserEnrollmentNotFoundError -from birdxplorer.models import ( - LanguageIdentifier, +from birdxplorer_common.models import ( Note, - NoteId, - ParticipantId, Post, - PostId, Topic, - TopicId, - TweetId, TwitterTimestamp, UserEnrollment, XUser, ) -from birdxplorer.settings import GlobalSettings, PostgresStorageSettings -from birdxplorer.storage import ( +from birdxplorer_common.settings import GlobalSettings, PostgresStorageSettings +from birdxplorer_common.storage import ( Base, NoteRecord, NoteTopicAssociation, PostRecord, - Storage, TopicRecord, XUserRecord, ) @@ -116,99 +106,6 @@ def user_enrollment_samples( yield [user_enrollment_factory.build() for _ in range(3)] -@fixture -def mock_storage( - user_enrollment_samples: List[UserEnrollment], - topic_samples: List[Topic], - post_samples: List[Post], - note_samples: List[Note], -) -> Generator[MagicMock, None, None]: - mock = MagicMock(spec=Storage) - - def _get_user_enrollment_by_participant_id(participant_id: ParticipantId) -> UserEnrollment: - x = {d.participant_id: d for d in user_enrollment_samples}.get(participant_id) - if x is None: - raise UserEnrollmentNotFoundError(participant_id=participant_id) - return x - - mock.get_user_enrollment_by_participant_id.side_effect = _get_user_enrollment_by_participant_id - - def _get_topics() -> Generator[Topic, None, None]: - yield from topic_samples - - def _get_notes( - note_ids: Union[List[NoteId], None] = None, - created_at_from: Union[None, TwitterTimestamp] = None, - created_at_to: Union[None, TwitterTimestamp] = None, - topic_ids: Union[List[TopicId], None] = None, - post_ids: Union[List[TweetId], None] = None, - language: Union[LanguageIdentifier, None] = None, - ) -> Generator[Note, None, None]: - for note in note_samples: - if note_ids is not None and note.note_id not in note_ids: - continue - if created_at_from is not None and note.created_at < created_at_from: - continue - if created_at_to is not None and note.created_at > created_at_to: - continue - if topic_ids is not None and not set(topic_ids).issubset({topic.topic_id for topic in note.topics}): - continue - if post_ids is not None and note.post_id not in post_ids: - continue - if language is not None and note.language != language: - continue - yield note - - mock.get_topics.side_effect = _get_topics - mock.get_notes.side_effect = _get_notes - - def _get_posts() -> Generator[Post, None, None]: - yield from post_samples - - mock.get_posts.side_effect = _get_posts - - def _get_posts_by_ids(post_ids: List[PostId]) -> Generator[Post, None, None]: - for i in post_ids: - for post in post_samples: - if post.post_id == i: - yield post - break - - mock.get_posts_by_ids.side_effect = _get_posts_by_ids - - def _get_posts_by_created_at_range(start: TwitterTimestamp, end: TwitterTimestamp) -> Generator[Post, None, None]: - for post in post_samples: - if start <= post.created_at < end: - yield post - - mock.get_posts_by_created_at_range.side_effect = _get_posts_by_created_at_range - - def _get_posts_by_created_at_start(start: TwitterTimestamp) -> Generator[Post, None, None]: - for post in post_samples: - if start <= post.created_at: - yield post - - mock.get_posts_by_created_at_start.side_effect = _get_posts_by_created_at_start - - def _get_posts_by_created_at_end(end: TwitterTimestamp) -> Generator[Post, None, None]: - for post in post_samples: - if post.created_at < end: - yield post - - mock.get_posts_by_created_at_end.side_effect = _get_posts_by_created_at_end - - yield mock - - -@fixture -def client(settings_for_test: GlobalSettings, mock_storage: MagicMock) -> Generator[TestClient, None, None]: - from birdxplorer.app import gen_app - - with patch("birdxplorer.app.gen_storage", return_value=mock_storage): - app = gen_app(settings=settings_for_test) - yield TestClient(app) - - @fixture def topic_samples(topic_factory: TopicFactory) -> Generator[List[Topic], None, None]: topics = [ diff --git a/tests/test_data_model.py b/common/tests/test_data_model.py similarity index 96% rename from tests/test_data_model.py rename to common/tests/test_data_model.py index a57e765..d3f77f0 100644 --- a/tests/test_data_model.py +++ b/common/tests/test_data_model.py @@ -4,7 +4,7 @@ from abc import ABC, abstractmethod from collections.abc import Mapping -from birdxplorer.models import NoteData, UserEnrollment +from birdxplorer_common.models import NoteData, UserEnrollment class BaseDataModelTester(ABC): diff --git a/tests/test_logger.py b/common/tests/test_logger.py similarity index 71% rename from tests/test_logger.py rename to common/tests/test_logger.py index 126931c..6922e75 100644 --- a/tests/test_logger.py +++ b/common/tests/test_logger.py @@ -1,6 +1,6 @@ from pytest import LogCaptureFixture -from birdxplorer.logger import get_logger +from birdxplorer_common.logger import get_logger def test_logger_is_a_child_of_root_logger(caplog: LogCaptureFixture) -> None: @@ -8,5 +8,5 @@ def test_logger_is_a_child_of_root_logger(caplog: LogCaptureFixture) -> None: with caplog.at_level("INFO"): logger.info("test") assert len(caplog.records) == 1 - assert caplog.records[0].name == "birdxplorer.logger" + assert caplog.records[0].name == "birdxplorer_common.logger" assert caplog.records[0].message == "test" diff --git a/common/tests/test_settings.py b/common/tests/test_settings.py new file mode 100644 index 0000000..41424f4 --- /dev/null +++ b/common/tests/test_settings.py @@ -0,0 +1,25 @@ +import os + +from pytest_mock import MockerFixture + +from birdxplorer_common.settings import GlobalSettings + + +def test_settings_read_from_env_var(mocker: MockerFixture) -> None: + mocker.patch.dict( + os.environ, + {"BX_LOGGER_SETTINGS__LEVEL": "99", "BX_STORAGE_SETTINGS__PASSWORD": "s0m6S+ron9P@55w0rd"}, + clear=True, + ) + settings = GlobalSettings() + assert settings.logger_settings.level == 99 + + +def test_settings_default(mocker: MockerFixture) -> None: + mocker.patch.dict( + os.environ, + {"BX_STORAGE_SETTINGS__PASSWORD": "s0m6S+ron9P@55w0rd"}, + ) + + settings = GlobalSettings() + assert settings.logger_settings.level == 20 diff --git a/tests/test_storage.py b/common/tests/test_storage.py similarity index 95% rename from tests/test_storage.py rename to common/tests/test_storage.py index 5cbcf20..3b74975 100644 --- a/tests/test_storage.py +++ b/common/tests/test_storage.py @@ -2,7 +2,7 @@ from sqlalchemy.engine import Engine -from birdxplorer.models import ( +from birdxplorer_common.models import ( LanguageIdentifier, Note, NoteId, @@ -13,7 +13,7 @@ TweetId, TwitterTimestamp, ) -from birdxplorer.storage import NoteRecord, PostRecord, Storage, TopicRecord +from birdxplorer_common.storage import NoteRecord, PostRecord, Storage, TopicRecord def test_get_topic_list( @@ -181,7 +181,10 @@ def test_get_notes_by_topic_ids( storage = Storage(engine=engine_for_test) topics = note_samples[0].topics topic_ids: List[TopicId] = [TopicId.from_int(0)] - expected = sorted([note for note in note_samples if note.topics == topics], key=lambda note: note.note_id) + expected = sorted( + [note for note in note_samples if note.topics == topics], + key=lambda note: note.note_id, + ) actual = sorted(list(storage.get_notes(topic_ids=topic_ids)), key=lambda note: note.note_id) assert expected == actual @@ -204,7 +207,10 @@ def test_get_notes_by_post_ids( note_records_sample: List[NoteRecord], ) -> None: storage = Storage(engine=engine_for_test) - post_ids = [TweetId.from_str("2234567890123456781"), TweetId.from_str("2234567890123456782")] + post_ids = [ + TweetId.from_str("2234567890123456781"), + TweetId.from_str("2234567890123456782"), + ] expected = [note for note in note_samples if note.post_id in post_ids] actual = list(storage.get_notes(post_ids=post_ids)) assert expected == actual diff --git a/etl/birdxplorer_etl/__init__.py b/etl/birdxplorer_etl/__init__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/etl/birdxplorer_etl/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/etl/birdxplorer_etl/py.typed b/etl/birdxplorer_etl/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/etl/pyproject.toml b/etl/pyproject.toml new file mode 100644 index 0000000..602c45b --- /dev/null +++ b/etl/pyproject.toml @@ -0,0 +1,111 @@ +[build-system] +build-backend = "flit_core.buildapi" +requires = ["flit_core >=3.8.0,<4"] + + +[project] +name = "birdxplorer_etl" +description = "ETL module for BirdXplorer" +authors = [ + {name = "osoken"}, +] +dynamic = [ + "version", +] +readme = "../README.md" +license = {file = "../LICENSE"} +requires-python = ">=3.10" + +classifiers = [ + "Development Status :: 3 - Alpha", + "Natural Language :: Japanese", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", +] + +dependencies = [ + "birdxplorer_common @ git+https://github.com/codeforjapan/BirdXplorer.git@feature/issue-53-divide-python-packages#subdirectory=common", +] + +[project.urls] +Source = "https://github.com/codeforjapan/BirdXplorer" + +[tool.setuptools] +packages=["birdxplorer_etl"] + +[tool.setuptools.package-data] +birdxplorer = ["py.typed"] + +[project.optional-dependencies] +dev=[ + "black", + "flake8", + "pyproject-flake8", + "pytest", + "mypy", + "tox", + "isort", + "pytest-mock", + "pytest-cov", + "freezegun", + "types-python-dateutil", + "psycopg2-binary", + "factory_boy", + "polyfactory", + "types-psycopg2", +] +prod=[ + "psycopg2" +] + + +[tool.pytest.ini_options] +addopts = ["-sv", "--doctest-modules", "--cov=birdxplorer_etl", "--cov-report=xml", "--cov-report=term-missing"] +testpaths = ["tests", "birdxplorer_etl"] +filterwarnings = [ + "error", +] + +[tool.black] +line-length = 120 +target-version = ['py310'] + +[tool.flake8] +max-line-length = 120 +extend-ignore = "E203,E701" + +[tool.mypy] +python_version = "3.10" +warn_return_any = true +warn_unused_configs = true +plugins = ["pydantic.mypy"] +mypy_path = "stubs/" + +[tool.pydantic.mypy] +init_typed = true + +[tool.isort] +profile = "black" +known_first_party = "birdxplorer_api,birdxplorer_common,birdxplorer_etl" + +[tool.tox] +legacy_tox_ini = """ + [tox] + skipsdist = true + envlist = py310 + + [testenv] + setenv = + VIRTUALENV_PIP = 24.0 + DATA_DIR = {env:BX_DATA_DIR} + deps = + -e .[dev] + commands = + black birdxplorer_etl tests + isort birdxplorer_etl tests + pytest + pflake8 birdxplorer_etl/ tests/ + mypy birdxplorer_etl --strict + mypy tests --strict +""" diff --git a/etl/tests/test_package.py b/etl/tests/test_package.py new file mode 100644 index 0000000..2caeadc --- /dev/null +++ b/etl/tests/test_package.py @@ -0,0 +1,9 @@ +import re + +import birdxplorer_etl + + +def test_birdxplorer_etl_has_version() -> None: + assert hasattr(birdxplorer_etl, "__version__") + assert isinstance(birdxplorer_etl.__version__, str) + assert re.match(r"^\d+\.\d+\.\d+$", birdxplorer_etl.__version__) diff --git a/scripts/migrations/migrate_all.py b/scripts/migrations/migrate_all.py index 140182d..8d6b879 100644 --- a/scripts/migrations/migrate_all.py +++ b/scripts/migrations/migrate_all.py @@ -6,9 +6,9 @@ from dotenv import load_dotenv from sqlalchemy.orm import Session -from birdxplorer.logger import get_logger -from birdxplorer.settings import GlobalSettings -from birdxplorer.storage import ( +from birdxplorer_common.logger import get_logger +from birdxplorer_common.settings import GlobalSettings +from birdxplorer_common.storage import ( Base, NoteRecord, NoteTopicAssociation, @@ -57,7 +57,11 @@ ) ) sess.commit() - with open(os.path.join(args.data_dir, args.notes_topics_association_file_name), "r", encoding="utf-8") as fin: + with open( + os.path.join(args.data_dir, args.notes_topics_association_file_name), + "r", + encoding="utf-8", + ) as fin: for d in csv.DictReader(fin): if ( sess.query(NoteTopicAssociation) @@ -109,7 +113,7 @@ post_id=d["post_id"], user_id=d["user_id"], text=d["text"], - media_details=json.loads(d["media_details"]) if len(d["media_details"]) > 0 else None, + media_details=(json.loads(d["media_details"]) if len(d["media_details"]) > 0 else None), created_at=d["created_at"], like_count=d["like_count"], repost_count=d["repost_count"], diff --git a/tests/test_birdxplorer.py b/tests/test_birdxplorer.py deleted file mode 100644 index 8e6dd87..0000000 --- a/tests/test_birdxplorer.py +++ /dev/null @@ -1,5 +0,0 @@ -import birdxplorer - - -def test_birdxplorer_has_version() -> None: - assert birdxplorer.__version__ is not None diff --git a/tests/test_settings.py b/tests/test_settings.py deleted file mode 100644 index da77138..0000000 --- a/tests/test_settings.py +++ /dev/null @@ -1,16 +0,0 @@ -import os - -from pytest_mock import MockerFixture - -from birdxplorer.settings import GlobalSettings - - -def test_settings_read_from_env_var(mocker: MockerFixture) -> None: - mocker.patch.dict(os.environ, {"BX_LOGGER_SETTINGS__LEVEL": "99"}, clear=True) - settings = GlobalSettings() - assert settings.logger_settings.level == 99 - - -def test_settings_default() -> None: - settings = GlobalSettings() - assert settings.logger_settings.level == 20