-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #333 from basedosdados/staging/update_metadata
[feat] temporal_coverage_updater
- Loading branch information
Showing
9 changed files
with
1,178 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Constant values for the datasets projects | ||
""" | ||
|
||
from enum import Enum | ||
|
||
|
||
class constants(Enum): # pylint: disable=c0103 | ||
""" | ||
Constant values for the temporal_coverage_updater project | ||
""" | ||
|
||
EMAIL = "email" | ||
PASSWORD = "pass" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Flows for temporal_coverage_updater | ||
""" | ||
|
||
from prefect.run_configs import KubernetesRun | ||
from prefect.storage import GCS | ||
from pipelines.constants import constants | ||
from pipelines.utils.tasks import ( | ||
update_django_metadata, | ||
) | ||
|
||
# from pipelines.datasets.temporal_coverage_updater.schedules import every_two_weeks | ||
from pipelines.utils.decorators import Flow | ||
from prefect import Parameter | ||
|
||
# from pipelines.utils.utils import log | ||
|
||
with Flow( | ||
name="update_temporal_coverage_teste", | ||
code_owners=[ | ||
"arthurfg", | ||
], | ||
) as temporal_coverage_updater_flow: | ||
dataset_id = Parameter("dataset_id", default="test_dataset", required=True) | ||
table_id = Parameter("table_id", default="test_laura_student", required=True) | ||
|
||
update_django_metadata( | ||
dataset_id, | ||
table_id, | ||
metadata_type="DateTimeRange", | ||
bq_last_update=False, | ||
_last_date="2030-01-01", | ||
) | ||
# (email, password) = get_credentials(secret_path="api_user_prod") | ||
# ids = find_ids( | ||
# dataset_id, table_id, email, password, upstream_tasks=[email, password] | ||
# ) | ||
# last_date = extract_last_update( | ||
# dataset_id, table_id, upstream_tasks=[ids, email, password] | ||
# ) | ||
# first_date = get_first_date( | ||
# ids, email, password, upstream_tasks=[ids, last_date, email, password] | ||
# ) | ||
# update_temporal_coverage( | ||
# ids, | ||
# first_date, | ||
# last_date, | ||
# email, | ||
# password, | ||
# upstream_tasks=[ids, last_date, first_date, email, password], | ||
# ) | ||
|
||
|
||
temporal_coverage_updater_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) | ||
temporal_coverage_updater_flow.run_config = KubernetesRun( | ||
image=constants.DOCKER_IMAGE.value | ||
) | ||
# flow.schedule = every_two_weeks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Schedules for temporal_coverage_updater | ||
""" | ||
|
||
from datetime import timedelta, datetime | ||
from prefect.schedules import Schedule | ||
from prefect.schedules.clocks import IntervalClock | ||
from pipelines.constants import constants | ||
|
||
every_two_weeks = Schedule( | ||
clocks=[ | ||
IntervalClock( | ||
interval=timedelta(weeks=2), | ||
start_date=datetime(2021, 1, 1), | ||
labels=[ | ||
constants.DATASETS_AGENT_LABEL.value, | ||
], | ||
), | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Tasks for temporal_coverage_updater | ||
""" | ||
|
||
|
||
from prefect import task | ||
|
||
# from basedosdados.upload.base import Base | ||
import basedosdados as bd | ||
from pipelines.utils.temporal_coverage_updater.utils import ( | ||
find_ids, | ||
parse_temporal_coverage, | ||
get_credentials, | ||
create_update, | ||
extract_last_update, | ||
get_first_date, | ||
) | ||
from datetime import datetime | ||
from pipelines.utils.utils import log, get_credentials_from_secret | ||
from typing import Tuple | ||
|
||
|
||
## TODO: Transformar flow em task OK | ||
## TODO: Criar novo argumento na função update_temporal_coverage p/ selecionar o "tipo" (bool) do first date e last date OK | ||
## TODO: migrar p/ utils.tasks | ||
## TODO: fazer check dentro do parse se está no formato padrão da BD e avisar ao usuário quando n estiver OK | ||
@task | ||
def update_django_metadata( | ||
dataset_id: str, | ||
table_id: str, | ||
metadata_type: str, | ||
_last_date=None, | ||
bq_last_update: bool = True, | ||
): | ||
""" | ||
Updates Django metadata. | ||
Args: | ||
dataset_id (str): The ID of the dataset. | ||
table_id (str): The ID of the table. | ||
metadata_type (str): The type of metadata to update. | ||
_last_date (optional): The last date for metadata update if `bq_last_update` is False. Defaults to None. | ||
bq_last_update (bool, optional): Flag indicating whether to use BigQuery's last update date for metadata. | ||
If True, `_last_date` is ignored. Defaults to True. | ||
Returns: | ||
None | ||
Raises: | ||
Exception: If the metadata_type is not supported. | ||
""" | ||
(email, password) = get_credentials(secret_path="api_user_prod") | ||
|
||
ids = find_ids( | ||
dataset_id, | ||
table_id, | ||
email, | ||
password, | ||
) | ||
|
||
if metadata_type == "DateTimeRange": | ||
if bq_last_update: | ||
last_date = extract_last_update( | ||
dataset_id, | ||
table_id, | ||
) | ||
first_date = get_first_date( | ||
ids, | ||
email, | ||
password, | ||
) | ||
|
||
resource_to_temporal_coverage = parse_temporal_coverage( | ||
f"{first_date}{last_date}" | ||
) | ||
resource_to_temporal_coverage["coverage"] = ids.get("coverage_id") | ||
log(f"Mutation parameters: {resource_to_temporal_coverage}") | ||
|
||
create_update( | ||
query_class="allDatetimerange", | ||
query_parameters={"$coverage_Id: ID": ids.get("coverage_id")}, | ||
mutation_class="CreateUpdateDateTimeRange", | ||
mutation_parameters=resource_to_temporal_coverage, | ||
update=True, | ||
email=email, | ||
password=password, | ||
) | ||
else: | ||
last_date = _last_date | ||
log(f"Última data {last_date}") | ||
first_date = get_first_date( | ||
ids, | ||
email, | ||
password, | ||
) | ||
|
||
resource_to_temporal_coverage = parse_temporal_coverage( | ||
f"{first_date}{last_date}" | ||
) | ||
|
||
resource_to_temporal_coverage["coverage"] = ids.get("coverage_id") | ||
log(f"Mutation parameters: {resource_to_temporal_coverage}") | ||
|
||
create_update( | ||
query_class="allDatetimerange", | ||
query_parameters={"$coverage_Id: ID": ids.get("coverage_id")}, | ||
mutation_class="CreateUpdateDateTimeRange", | ||
mutation_parameters=resource_to_temporal_coverage, | ||
update=True, | ||
email=email, | ||
password=password, | ||
) |
Oops, something went wrong.