-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
89 additions
and
95 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ downloads/ | |
eggs/ | ||
.eggs/ | ||
lib/ | ||
!etl/src/lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,45 @@ | ||
import birdxplorer_common.models | ||
from prefect import get_run_logger | ||
import requests | ||
from datetime import datetime, timedelta | ||
import csv | ||
import birdxplorer_common | ||
from typing import List | ||
import stringcase | ||
import settings | ||
from lib.sqlite.init import init_db | ||
|
||
def extract_data(): | ||
logger = get_run_logger() | ||
logger.info("Hello") | ||
url = 'https://ton.twimg.com/birdwatch-public-data/2024/04/22/notes/notes-00000.tsv' | ||
res = requests.get(url) | ||
with open('./data/notes.tsv', 'w') as f: | ||
f.write(res.content.decode('utf-8')) | ||
logger.info("Downloading community notes data") | ||
|
||
db = init_db() | ||
|
||
# 最新のNoteデータを取得 | ||
date = datetime.now() | ||
while True: | ||
url = f'https://ton.twimg.com/birdwatch-public-data/{date.strftime("%Y/%m/%d")}/notes/notes-00000.tsv' | ||
logger.info(url) | ||
res = requests.get(url) | ||
if res.status_code == 200: | ||
# res.contentをdbのNoteテーブル | ||
tsv_data = res.content.decode('utf-8').splitlines() | ||
reader = csv.DictReader(tsv_data, delimiter='\t') | ||
reader.fieldnames = [stringcase.snakecase(field) for field in reader.fieldnames] | ||
|
||
for row in reader: | ||
db.add(row) | ||
break | ||
date = date - timedelta(days=1) | ||
|
||
db.commit() | ||
|
||
db.query(birdxplorer_common.models.Note).first() | ||
|
||
# # Noteに紐づくtweetデータを取得 | ||
# for note in notes_data: | ||
# note_created_at = note.created_at_millis.serialize() | ||
# if note_created_at >= settings.TARGET_TWITTER_POST_START_UNIX_MILLISECOND and note_created_at <= settings.TARGET_TWITTER_POST_END_UNIX_MILLISECOND: | ||
# tweet_id = note.tweet_id.serialize() | ||
# continue | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Create Note table for sqlite with columns: id, title, content, created_at, updated_at by sqlalchemy | ||
from sqlalchemy import create_engine | ||
from sqlalchemy.orm import sessionmaker | ||
import os | ||
from prefect import get_run_logger | ||
from birdxplorer_common.storage import Row | ||
|
||
def init_db(): | ||
logger = get_run_logger() | ||
|
||
db_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'data', 'note.db')) | ||
logger.info(f'Initializing database at {db_path}') | ||
engine = create_engine('sqlite:///' + db_path) | ||
Base.metadata.create_all(engine) | ||
Session = sessionmaker(bind=engine) | ||
|
||
return Session() |
File renamed without changes.
File renamed without changes.