Skip to content

Commit

Permalink
update for rate limit
Browse files Browse the repository at this point in the history
  • Loading branch information
yu23ki14 committed Aug 7, 2024
1 parent fc702a3 commit 736c272
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 18 deletions.
11 changes: 1 addition & 10 deletions etl/src/birdxplorer_etl/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from lib.x.postlookup import lookup
from birdxplorer_common.storage import RowNoteRecord, RowPostRecord, RowUserRecord
import settings
import time


def extract_data(db: Session):
Expand Down Expand Up @@ -51,13 +50,6 @@ def extract_data(db: Session):

db.commit()

# post = lookup()
# created_at = datetime.strptime(post["data"]["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
# created_at_millis = int(created_at.timestamp() * 1000)
# db_post = RowPostRecord(post_id=post["data"]["id"], author_id=post["data"]["author_id"], text=post["data"]["text"], created_at=created_at_millis,like_count=post["data"]["public_metrics"]["like_count"],repost_count=post["data"]["public_metrics"]["retweet_count"],bookmark_count=post["data"]["public_metrics"]["bookmark_count"],impression_count=post["data"]["public_metrics"]["impression_count"],quote_count=post["data"]["public_metrics"]["quote_count"],reply_count=post["data"]["public_metrics"]["reply_count"],lang=post["data"]["lang"])
# db.add(db_post)
# db.commit()

# Noteに紐づくtweetデータを取得
postExtract_targetNotes = (
db.query(RowNoteRecord)
Expand All @@ -79,7 +71,7 @@ def extract_data(db: Session):
logger.info(tweet_id)
post = lookup(tweet_id)

if "data" not in post:
if post == None or "data" not in post:
continue

created_at = datetime.strptime(post["data"]["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
Expand Down Expand Up @@ -131,7 +123,6 @@ def extract_data(db: Session):
)
db.add(db_post)
note.row_post_id = tweet_id
time.sleep(90)
db.commit()
continue

Expand Down
28 changes: 20 additions & 8 deletions etl/src/birdxplorer_etl/lib/x/postlookup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import requests
import json
import settings
from prefect import get_run_logger
import time


def create_url(id):
logger = get_run_logger()
expansions = "expansions=attachments.poll_ids,attachments.media_keys,author_id,edit_history_tweet_ids,entities.mentions.username,geo.place_id,in_reply_to_user_id,referenced_tweets.id,referenced_tweets.id.author_id"
tweet_fields = "tweet.fields=attachments,author_id,context_annotations,conversation_id,created_at,edit_controls,entities,geo,id,in_reply_to_user_id,lang,public_metrics,possibly_sensitive,referenced_tweets,reply_settings,source,text,withheld"
media_fields = (
Expand All @@ -17,7 +16,6 @@ def create_url(id):
url = "https://api.twitter.com/2/tweets/{}?{}&{}&{}&{}&{}".format(
id, tweet_fields, expansions, media_fields, place_fields, user_fields
)
logger.info(url)
return url


Expand All @@ -32,17 +30,31 @@ def bearer_oauth(r):


def connect_to_endpoint(url):
logger = get_run_logger()
response = requests.request("GET", url, auth=bearer_oauth)
print(response.status_code)
if response.status_code != 200:
if response.status_code == 429:
limit = response.headers["x-rate-limit-reset"]
logger.info("Waiting for rate limit reset...")
time.sleep(int(limit) - int(time.time()) + 1)
data = connect_to_endpoint(url)
return data
elif response.status_code != 200:
raise Exception("Request returned an error: {} {}".format(response.status_code, response.text))
return response.json()


def check_existence(id):
url = "https://publish.twitter.com/oembed?url=https://x.com/CommunityNotes/status/{}&partner=&hide_thread=false".format(
id
)
status = requests.get(url).status_code
return status == 200


def lookup(id):
isExist = check_existence(id)
if not isExist:
return None
url = create_url(id)
json_response = connect_to_endpoint(url)
return json_response


# https://oauth-playground.glitch.me/?id=findTweetsById&params=%28%27query%21%28%27C%27*B%29%7Ebody%21%28%29%7Epath%21%28%29*B%7EFAG%27%7EuserADfile_image_url%2CiNcreated_at%2CconnectK_statuHurlMublic_JtricHuserDtecteNentitieHdescriptK%27%7ECG%2Creferenced_Fs.id-keys-source_F%27%7EOAtype%2Curl%27%29*%7EidL146E37035677698IE43339741184I-%2CattachJnts.O_A.fieldLBE46120540165%27CexpansKLDnaJMroE03237FtweetGauthor_idHs%2CI%2C146JmeKionLs%21%27M%2CpNd%2COJdia%01ONMLKJIHGFEDCBA-*_

0 comments on commit 736c272

Please sign in to comment.