Skip to content

Commit

Permalink
add proxy_type as parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
m-p-esser committed Oct 7, 2023
1 parent 1efbbfb commit b8c6072
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
9 changes: 7 additions & 2 deletions src/prefect/ingest_photos_expanded_napi_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import asyncio
import datetime
import faulthandler
from typing import Literal

from prefect_gcp.bigquery import bigquery_insert_stream, bigquery_query
from prefect_gcp.credentials import GcpCredentials
Expand Down Expand Up @@ -114,7 +115,10 @@ def write_request_log_to_bigquery(
@flow(timeout_seconds=120) # Main Flow (1st level) # Main Flow (1st level)
@timer
def ingest_photos_expanded_napi_bigquery(
gcp_credential_block_name: str, batch_size: int = 30, total_record_size: int = 300
gcp_credential_block_name: str,
proxy_type: Literal["datacenter", "residential"],
batch_size: int = 30,
total_record_size: int = 300,
):
"""Flow to load editorial photo metadata from Unsplash and store them in Bigquery"""

Expand Down Expand Up @@ -152,7 +156,7 @@ def ingest_photos_expanded_napi_bigquery(
)

# Prepare Proxy and Useragent
proxies = prepare_proxy_adresses("residential")
proxies = prepare_proxy_adresses(proxy_type)
proxies["http://"] = proxies["http"]
proxies["https://"] = proxies["https"]
proxies.pop("http")
Expand Down Expand Up @@ -250,6 +254,7 @@ def ingest_photos_expanded_napi_bigquery(
faulthandler.dump_traceback_later(60)
ingest_photos_expanded_napi_bigquery(
gcp_credential_block_name="unsplash-photo-trends-deployment-sa",
proxy_type="datacenter",
batch_size=30,
total_record_size=300,
)
5 changes: 4 additions & 1 deletion src/prefect/ingest_photos_napi_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from datetime import timedelta
from pprint import pformat
from random import randint
from typing import Literal

from google.cloud import storage
from prefect_gcp.bigquery import bigquery_query
Expand Down Expand Up @@ -202,6 +203,7 @@ def write_request_log_to_bigquery(
def ingest_photos_napi_gcs(
gcp_credential_block_name: str,
per_page: int,
proxy_type: Literal["datacenter", "residential"],
):
"""Flow to load Editorial photos from Unsplash and store them in a Google Cloud Storage Bucket"""

Expand Down Expand Up @@ -249,7 +251,7 @@ def ingest_photos_napi_gcs(
time.sleep(randint(1, 3))

# Prepare Proxy and Useragent
proxies = prepare_proxy_adresses("residential")
proxies = prepare_proxy_adresses(proxy_type)
useragent_string = create_random_ua_string()
logger.info(f"Will be using '{useragent_string}' to make next requests")
headers = {"User-Agent": useragent_string} # Overwrite Useragent
Expand Down Expand Up @@ -295,4 +297,5 @@ def ingest_photos_napi_gcs(
ingest_photos_napi_gcs(
gcp_credential_block_name="unsplash-photo-trends-deployment-sa",
per_page=30,
proxy_type="datacenter",
)

0 comments on commit b8c6072

Please sign in to comment.