From 56758a4f4871fefeba75f61643ab4edb2bb4677e Mon Sep 17 00:00:00 2001
From: sushichan044 <mail@sushichan.live>
Date: Thu, 10 Oct 2024 15:12:57 +0900
Subject: [PATCH] =?UTF-8?q?feat:=20X=E3=81=8B=E3=82=89Post=E3=82=92?=
 =?UTF-8?q?=E5=8F=96=E5=BE=97=E3=81=99=E3=82=8B=E9=9A=9B=E3=81=ABMedia?=
 =?UTF-8?q?=E6=83=85=E5=A0=B1=E3=82=92=E4=BF=9D=E5=AD=98=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 common/birdxplorer_common/storage.py | 12 ++++++++++++
 etl/src/birdxplorer_etl/extract.py   | 23 +++++++++++++++++++----
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/common/birdxplorer_common/storage.py b/common/birdxplorer_common/storage.py
index 6eee4af..b087e2b 100644
--- a/common/birdxplorer_common/storage.py
+++ b/common/birdxplorer_common/storage.py
@@ -223,6 +223,18 @@ class RowPostRecord(Base):
     user: Mapped["RowUserRecord"] = relationship("RowUserRecord", back_populates="row_post")
 
 
+class RowPostMediaRecord(Base):
+    __tablename__ = "row_post_media"
+
+    media_key: Mapped[str] = mapped_column(primary_key=True)
+
+    url: Mapped[String] = mapped_column(nullable=False)
+    type: Mapped[MediaType] = mapped_column(nullable=False)
+    width: Mapped[NonNegativeInt] = mapped_column(nullable=False)
+    height: Mapped[NonNegativeInt] = mapped_column(nullable=False)
+
+    post_id: Mapped[PostId] = mapped_column(ForeignKey("row_posts.post_id"), nullable=False)
+
 class RowPostEmbedURLRecord(Base):
     __tablename__ = "row_post_embed_urls"
 
diff --git a/etl/src/birdxplorer_etl/extract.py b/etl/src/birdxplorer_etl/extract.py
index b29c5de..b268b9c 100644
--- a/etl/src/birdxplorer_etl/extract.py
+++ b/etl/src/birdxplorer_etl/extract.py
@@ -7,6 +7,7 @@
 from lib.x.postlookup import lookup
 from birdxplorer_common.storage import (
     RowNoteRecord,
+    RowPostMediaRecord,
     RowPostRecord,
     RowUserRecord,
     RowNoteStatusRecord,
@@ -145,16 +146,17 @@ def extract_data(db: Session):
             db.add(db_user)
 
         media_data = (
-            post["includes"]["media"][0]
+            post["includes"]["media"]
             if "includes" in post and "media" in post["includes"] and len(post["includes"]["media"]) > 0
-            else {}
+            else [{}]
         )
+
         db_post = RowPostRecord(
             post_id=post["data"]["id"],
             author_id=post["data"]["author_id"],
             text=post["data"]["text"],
-            media_type=media_data.get("type", ""),
-            media_url=media_data.get("url", ""),
+            media_type=media_data[0].get("type", ""),
+            media_url=media_data[0].get("url", ""),
             created_at=created_at_millis,
             like_count=post["data"]["public_metrics"]["like_count"],
             repost_count=post["data"]["public_metrics"]["retweet_count"],
@@ -166,6 +168,19 @@ def extract_data(db: Session):
         )
         db.add(db_post)
 
+        media_recs = [
+            RowPostMediaRecord(
+                media_key=m["media_key"],
+                type=m["type"],
+                url=m["url"],
+                width=m["width"],
+                height=m["height"],
+                post_id=post["data"]["id"],
+            )
+            for m in media_data
+        ]
+        db.add_all(media_recs)
+
         if "entities" in post["data"] and "urls" in post["data"]["entities"]:
             for url in post["data"]["entities"]["urls"]:
                 if "unwound_url" in url: