Skip to content

Commit

Permalink
use postgresql for user
Browse files Browse the repository at this point in the history
  • Loading branch information
yu23ki14 committed Oct 11, 2024
1 parent af2b3d9 commit afc8027
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 9 deletions.
2 changes: 1 addition & 1 deletion common/birdxplorer_common/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ class RowUserRecord(Base):
followers_count: Mapped[NonNegativeInt] = mapped_column(nullable=False)
following_count: Mapped[NonNegativeInt] = mapped_column(nullable=False)
tweet_count: Mapped[NonNegativeInt] = mapped_column(nullable=False)
verified: Mapped[BinaryBool] = mapped_column(nullable=False)
verified: Mapped[bool] = mapped_column(nullable=False)
verified_type: Mapped[String] = mapped_column(nullable=False)
location: Mapped[String] = mapped_column(nullable=False)
url: Mapped[String] = mapped_column(nullable=False)
Expand Down
1 change: 0 additions & 1 deletion compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ services:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
app:
depends_on:
db:
Expand Down
3 changes: 2 additions & 1 deletion etl/src/birdxplorer_etl/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def extract_data(sqlite: Session, postgresql: Session):
.filter(RowNoteRecord.created_at_millis <= settings.TARGET_TWITTER_POST_END_UNIX_MILLISECOND)
.all()
)
logging.info("Target notes: ", len(postExtract_targetNotes))
logging.info(f"Target notes: {len(postExtract_targetNotes)}")
for note in postExtract_targetNotes:
tweet_id = note.tweet_id

Expand Down Expand Up @@ -168,6 +168,7 @@ def extract_data(sqlite: Session, postgresql: Session):
lang=post["data"]["lang"],
)
postgresql.add(row_post)
postgresql.commit()

media_recs = [
RowPostMediaRecord(
Expand Down
4 changes: 2 additions & 2 deletions etl/src/birdxplorer_etl/lib/sqlite/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def init_sqlite():
def init_postgresql():
db_host = os.getenv("DB_HOST", "localhost")
db_port = os.getenv("DB_PORT", "5432")
db_user = os.getenv("DB_USER", "birdxplorer")
db_user = os.getenv("DB_USER", "postgres")
db_pass = os.getenv("DB_PASS", "birdxplorer")
db_name = os.getenv("DB_NAME", "etl")
db_name = os.getenv("DB_NAME", "postgres")

logging.info(f"Initializing database at {db_host}:{db_port}/{db_name}")
engine = create_engine(f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}")
Expand Down
3 changes: 3 additions & 0 deletions etl/src/birdxplorer_etl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from extract import extract_data
from load import load_data
from transform import transform_data
import logging

logging.basicConfig(level=logging.INFO)

if __name__ == "__main__":
sqlite = init_sqlite()
Expand Down
6 changes: 3 additions & 3 deletions etl/src/birdxplorer_etl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import Generator

from sqlalchemy import Integer, and_, func, select
from sqlalchemy import Integer, Numeric, and_, func, select
from sqlalchemy.orm import Session

from birdxplorer_common.storage import (
Expand Down Expand Up @@ -97,7 +97,7 @@ def transform_data(sqlite: Session, postgresql: Session):
RowPostRecord.post_id,
RowPostRecord.author_id.label("user_id"),
RowPostRecord.text,
func.cast(RowPostRecord.created_at, Integer).label("created_at"),
func.cast(RowPostRecord.created_at, Numeric).label("created_at"),
func.cast(RowPostRecord.like_count, Integer).label("like_count"),
func.cast(RowPostRecord.repost_count, Integer).label("repost_count"),
func.cast(RowPostRecord.impression_count, Integer).label("impression_count"),
Expand Down Expand Up @@ -256,7 +256,7 @@ def generate_post_link(postgresql: Session):

for link in links:
random.seed(link.unwound_url)
link_id = uuid.UUID(int=random.getransqliteits(128))
link_id = uuid.UUID(int=random.getrandbits(128))
is_link_exist = next((record for record in records if record["link_id"] == link_id), None)
if is_link_exist is None:
with open(link_csv_file_path, "a", newline="", encoding="utf-8") as file:
Expand Down
1 change: 0 additions & 1 deletion init.sql

This file was deleted.

0 comments on commit afc8027

Please sign in to comment.