Skip to content

Commit

Permalink
Merge pull request #282 from basedosdados/refactor/search_new_coverages
Browse files Browse the repository at this point in the history
Refactor/search new coverages
  • Loading branch information
mfagundes authored Jul 28, 2023
2 parents ed0ee83 + d0ac690 commit c9426a4
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 44 deletions.
167 changes: 127 additions & 40 deletions basedosdados_api/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,36 @@ def image_path_and_rename(instance, filename):
return os.path.join(upload_to, filename)


def get_date_time(date_times):
"""Returns a DateTimeRange object with the minimum start date and maximum end date"""
start_year, start_month, start_day = False, False, False
end_year, end_month, end_day = False, False, False
start_date, end_date = datetime(3000, 12, 31, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0)

for date_time in date_times:
if date_time.start_year and date_time.start_year < start_date.year:
start_year = date_time.start_year
if date_time.start_month and date_time.start_month < start_date.month:
start_month = date_time.start_month
if date_time.start_day and date_time.start_day < start_date.day:
start_day = date_time.start_day
if date_time.end_year and date_time.end_year > end_date.year:
end_year = date_time.end_year
if date_time.end_month and date_time.end_month > end_date.month:
end_month = date_time.end_month
if date_time.end_day and date_time.end_day > end_date.day:
end_day = date_time.end_day

return DateTimeRange(
start_year=start_year,
start_month=start_month,
start_day=start_day,
end_year=end_year,
end_month=end_month,
end_day=end_day,
)


class UUIDHIddenIdForm(forms.ModelForm):
id = forms.UUIDField(widget=forms.HiddenInput(), required=False)

Expand Down Expand Up @@ -466,104 +496,122 @@ def coverage(self):
raw_data_sources = self.raw_data_sources.all()
information_requests = self.information_requests.all()
start_year, start_month, start_day = False, False, False
# start_semester, star_quarter = False, False
# start_hour, start_minute, start_second = False, False, False
end_year, end_month, end_day = False, False, False
# end_semester, end_quarter = False, False
# end_hour, end_minute, end_second = False, False, False

start_date, end_date = datetime(3000, 1, 1, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0)
start_date = datetime(3000, 12, 31, 0, 0, 0)
end_date = datetime(1, 1, 1, 0, 0, 0)

# TODO: refactor this to use a function
for table in tables:
for coverage in table.coverages.all():
try:
date_time = DateTimeRange.objects.get(coverage=coverage.pk)
except DateTimeRange.DoesNotExist:
date_times = DateTimeRange.objects.filter(coverage=coverage.pk)
if len(date_times) == 0:
continue
start_year = date_time.start_year is not None or start_year
start_month = date_time.start_month is not None or start_month
start_day = date_time.start_day is not None or start_day
end_year = date_time.end_year is not None or end_year
end_month = date_time.end_month is not None or end_month
end_day = date_time.end_day is not None or end_day
date_time = get_date_time(date_times)

start_year = (
date_time.start_year if date_time.start_year else start_year
)
start_month = (
date_time.start_month if date_time.start_month else start_month
)
start_day = date_time.start_day if date_time.start_day else start_day
end_year = date_time.end_year if date_time.end_year else end_year
end_month = date_time.end_month if date_time.end_month else end_month
end_day = date_time.end_day if date_time.end_day else end_day

new_start_date = datetime(
date_time.start_year,
date_time.start_year or 3000,
date_time.start_month or 1,
date_time.start_day or 1,
)
start_date = (
new_start_date if new_start_date < start_date else start_date
)
new_end_date = datetime(
date_time.end_year, date_time.end_month or 1, date_time.end_day or 1
date_time.end_year or 1,
date_time.end_month or 1,
date_time.end_day or 1,
)
end_date = new_end_date if new_end_date > end_date else end_date

for raw_data_source in raw_data_sources:
for coverage in raw_data_source.coverages.all():
try:
date_time = DateTimeRange.objects.get(coverage=coverage.pk)
except DateTimeRange.DoesNotExist:
date_times = DateTimeRange.objects.filter(coverage=coverage.pk)
if len(date_times) == 0:
continue
start_year = date_time.start_year is not None or start_year
start_month = date_time.start_month is not None or start_month
start_day = date_time.start_day is not None or start_day
end_year = date_time.end_year is not None or end_year
end_month = date_time.end_month is not None or end_month
end_day = date_time.end_day is not None or end_day
date_time = get_date_time(date_times)

start_year = (
date_time.start_year if date_time.start_year else start_year
)
start_month = (
date_time.start_month if date_time.start_month else start_month
)
start_day = date_time.start_day if date_time.start_day else start_day
end_year = date_time.end_year if date_time.end_year else end_year
end_month = date_time.end_month if date_time.end_month else end_month
end_day = date_time.end_day if date_time.end_day else end_day

new_start_date = datetime(
date_time.start_year,
date_time.start_year or 3000,
date_time.start_month or 1,
date_time.start_day or 1,
)
start_date = (
new_start_date if new_start_date < start_date else start_date
)
new_end_date = datetime(
date_time.end_year, date_time.end_month or 1, date_time.end_day or 1
date_time.end_year or 1,
date_time.end_month or 1,
date_time.end_day or 1,
)
end_date = new_end_date if new_end_date > end_date else end_date

for information_request in information_requests:
for coverage in information_request.coverages.all():
try:
date_time = DateTimeRange.objects.get(coverage=coverage.pk)
except DateTimeRange.DoesNotExist:
date_times = DateTimeRange.objects.filter(coverage=coverage.pk)
if len(date_times) == 0:
continue
start_year = date_time.start_year is not None or start_year
start_month = date_time.start_month is not None or start_month
start_day = date_time.start_day is not None or start_day
end_year = date_time.end_year is not None or end_year
end_month = date_time.end_month is not None or end_month
end_day = date_time.end_day is not None or end_day
date_time = get_date_time(date_times)

start_year = (
date_time.start_year if date_time.start_year else start_year
)
start_month = (
date_time.start_month if date_time.start_month else start_month
)
start_day = date_time.start_day if date_time.start_day else start_day
end_year = date_time.end_year if date_time.end_year else end_year
end_month = date_time.end_month if date_time.end_month else end_month
end_day = date_time.end_day if date_time.end_day else end_day

new_start_date = datetime(
date_time.start_year,
date_time.start_year or 3000,
date_time.start_month or 1,
date_time.start_day or 1,
)
start_date = (
new_start_date if new_start_date < start_date else start_date
)
new_end_date = datetime(
date_time.end_year, date_time.end_month or 1, date_time.end_day or 1
date_time.end_year or 1,
date_time.end_month or 1,
date_time.end_day or 1,
)
end_date = new_end_date if new_end_date > end_date else end_date

start = []
end = []

if start_year and start_date.year:
if start_year < 3000 and start_date.year:
start.append(str(start_date.year))
if start_month and start_date.month:
start.append(str(start_date.month).zfill(2))
if start_day and start_date.day:
start.append(str(start_date.day).zfill(2))

if end_year and end_date.year:
if end_year > 1 and end_date.year:
end.append(str(end_date.year))
if end_month and end_date.month:
end.append(str(end_date.month).zfill(2))
Expand All @@ -584,6 +632,27 @@ def contains_tables(self):
def get_graphql_contains_tables(self):
return self.contains_tables

@property
def contains_closed_data(self):
"""Returns true if there are tables or columns with closed coverages"""
closed_data = False
tables = self.tables.all()
for table in tables:
table_coverages = table.coverages.filter(is_closed=True)
if table_coverages:
closed_data = True
break
for column in table.columns.all():
if column.is_closed: # in the future it will be column.coverages
closed_data = True
break

return closed_data

@property
def get_graphql_contains_closed_data(self):
return self.contains_closed_data

@property
def contains_closed_tables(self):
closed_tables = self.tables.all().filter(is_closed=True)
Expand Down Expand Up @@ -773,6 +842,24 @@ def partitions(self):
def get_graphql_partitions(self):
return self.partitions

@property
def contains_closed_data(self):
"""Returns true if there are columns with closed coverages"""
closed_data = False
table_coverages = self.coverages.filter(is_closed=True)
if table_coverages:
closed_data = True
for column in self.columns.all(): # in the future it will be column.coverages
if column.is_closed:
closed_data = True
break

return closed_data

@property
def get_graphql_contains_closed_data(self):
return self.contains_closed_data

def clean(self):
errors = {}
"""Coverages must not overlap"""
Expand Down
5 changes: 5 additions & 0 deletions basedosdados_api/api/v1/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable):
)
is_closed = indexes.BooleanField(model_attr="is_closed")
contains_tables = indexes.BooleanField(model_attr="contains_tables")
contains_closed_data = indexes.BooleanField(model_attr="contains_closed_data")
contains_open_tables = indexes.BooleanField(model_attr="contains_open_tables")
contains_closed_tables = indexes.BooleanField(model_attr="contains_closed_tables")
contains_raw_data_sources = indexes.BooleanField(
Expand Down Expand Up @@ -204,6 +205,10 @@ def prepare(self, obj):
contains_tables = data.get("contains_tables", False)
data["contains_tables"] = contains_tables

# Contains closed data
contains_closed_data = data.get("contains_closed_data", False)
data["contains_closed_data"] = contains_closed_data

# Contains open tables
contains_open_tables = data.get("contains_open_tables", False)
data["contains_open_tables"] = contains_open_tables
Expand Down
35 changes: 31 additions & 4 deletions basedosdados_api/api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,18 @@ def get(self, request, *args, **kwargs):

if "datasets_with" in req_args:
options = req_args.getlist("datasets_with")
if "open_tables" in options:
all_filters.append({"match": {"contains_open_tables": True}})
if "closed_tables" in options:
all_filters.append({"match": {"contains_closed_tables": True}})
if "tables" in options:
all_filters.append({"match": {"contains_tables": True}})
if "closed_data" in options:
all_filters.append({"match": {"contains_closed_data": True}})
if "raw_data_sources" in options:
all_filters.append({"match": {"contains_raw_data_sources": True}})
if "information_requests" in options:
all_filters.append({"match": {"contains_information_requests": True}})
if "open_tables" in options:
all_filters.append({"match": {"contains_open_tables": True}})
if "closed_tables" in options:
all_filters.append({"match": {"contains_closed_tables": True}})

raw_query = {
"from": (page - 1) * page_size,
Expand Down Expand Up @@ -170,6 +174,12 @@ def get(self, request, *args, **kwargs):
"size": agg_page_size,
}
},
"contains_closed_data_counts": {
"terms": {
"field": "contains_closed_data",
"size": agg_page_size,
}
},
"contains_open_tables_counts": {
"terms": {
"field": "contains_open_tables",
Expand Down Expand Up @@ -329,6 +339,9 @@ def get(self, request, *args, **kwargs):
# boolean fields
cleaned_results["is_closed"] = r.get("is_closed", False)
cleaned_results["contains_tables"] = r.get("contains_tables", False)
cleaned_results["contains_closed_data"] = r.get(
"contains_closed_data", False
)
cleaned_results["contains_closed_tables"] = r.get(
"contains_closed_tables", False
)
Expand All @@ -345,6 +358,7 @@ def get(self, request, *args, **kwargs):
observation_levels_counts = agg["observation_levels_counts"]["buckets"]
is_closed_counts = agg["is_closed_counts"]["buckets"]
contains_tables_counts = agg["contains_tables_counts"]["buckets"]
contains_closed_data_counts = agg["contains_closed_data_counts"]["buckets"]
contains_open_tables_counts = agg["contains_open_tables_counts"]["buckets"]
contains_closed_tables_counts = agg["contains_closed_tables_counts"]["buckets"]
contains_information_requests_counts = agg[
Expand Down Expand Up @@ -443,6 +457,19 @@ def get(self, request, *args, **kwargs):
]
aggregations["contains_tables"] = agg_contains_tables

if contains_closed_data_counts:
agg_contains_closed_data = [
{
"key": contains_closed_data["key"],
"count": contains_closed_data["doc_count"],
"name": "dados fechados"
if contains_closed_data["key"] == 1
else "sem dados fechados",
}
for idx, contains_closed_data in enumerate(contains_closed_data_counts)
]
aggregations["contains_closed_data"] = agg_contains_closed_data

if contains_open_tables_counts:
agg_contains_open_tables = [
{
Expand Down
2 changes: 2 additions & 0 deletions basedosdados_api/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,5 @@
]

CSRF_COOKIE_HTTPONLY = False

DATA_UPLOAD_MAX_NUMBER_FIELDS = 10000

0 comments on commit c9426a4

Please sign in to comment.