Skip to content

Commit

Permalink
feat: XからPostを取得する際にMedia情報を保存する
Browse files Browse the repository at this point in the history
  • Loading branch information
sushichan044 committed Oct 10, 2024
1 parent 22da0ca commit 56758a4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
12 changes: 12 additions & 0 deletions common/birdxplorer_common/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,18 @@ class RowPostRecord(Base):
user: Mapped["RowUserRecord"] = relationship("RowUserRecord", back_populates="row_post")


class RowPostMediaRecord(Base):
__tablename__ = "row_post_media"

media_key: Mapped[str] = mapped_column(primary_key=True)

url: Mapped[String] = mapped_column(nullable=False)
type: Mapped[MediaType] = mapped_column(nullable=False)
width: Mapped[NonNegativeInt] = mapped_column(nullable=False)
height: Mapped[NonNegativeInt] = mapped_column(nullable=False)

post_id: Mapped[PostId] = mapped_column(ForeignKey("row_posts.post_id"), nullable=False)

class RowPostEmbedURLRecord(Base):
__tablename__ = "row_post_embed_urls"

Expand Down
23 changes: 19 additions & 4 deletions etl/src/birdxplorer_etl/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from lib.x.postlookup import lookup
from birdxplorer_common.storage import (
RowNoteRecord,
RowPostMediaRecord,
RowPostRecord,
RowUserRecord,
RowNoteStatusRecord,
Expand Down Expand Up @@ -145,16 +146,17 @@ def extract_data(db: Session):
db.add(db_user)

media_data = (
post["includes"]["media"][0]
post["includes"]["media"]
if "includes" in post and "media" in post["includes"] and len(post["includes"]["media"]) > 0
else {}
else [{}]
)

db_post = RowPostRecord(
post_id=post["data"]["id"],
author_id=post["data"]["author_id"],
text=post["data"]["text"],
media_type=media_data.get("type", ""),
media_url=media_data.get("url", ""),
media_type=media_data[0].get("type", ""),
media_url=media_data[0].get("url", ""),
created_at=created_at_millis,
like_count=post["data"]["public_metrics"]["like_count"],
repost_count=post["data"]["public_metrics"]["retweet_count"],
Expand All @@ -166,6 +168,19 @@ def extract_data(db: Session):
)
db.add(db_post)

media_recs = [
RowPostMediaRecord(
media_key=m["media_key"],
type=m["type"],
url=m["url"],
width=m["width"],
height=m["height"],
post_id=post["data"]["id"],
)
for m in media_data
]
db.add_all(media_recs)

if "entities" in post["data"] and "urls" in post["data"]["entities"]:
for url in post["data"]["entities"]["urls"]:
if "unwound_url" in url:
Expand Down

0 comments on commit 56758a4

Please sign in to comment.