diff --git a/basedosdados_api/api/v1/models.py b/basedosdados_api/api/v1/models.py index f81b66c2..dcb9445d 100644 --- a/basedosdados_api/api/v1/models.py +++ b/basedosdados_api/api/v1/models.py @@ -33,6 +33,36 @@ def image_path_and_rename(instance, filename): return os.path.join(upload_to, filename) +def get_date_time(date_times): + """Returns a DateTimeRange object with the minimum start date and maximum end date""" + start_year, start_month, start_day = False, False, False + end_year, end_month, end_day = False, False, False + start_date, end_date = datetime(3000, 12, 31, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0) + + for date_time in date_times: + if date_time.start_year and date_time.start_year < start_date.year: + start_year = date_time.start_year + if date_time.start_month and date_time.start_month < start_date.month: + start_month = date_time.start_month + if date_time.start_day and date_time.start_day < start_date.day: + start_day = date_time.start_day + if date_time.end_year and date_time.end_year > end_date.year: + end_year = date_time.end_year + if date_time.end_month and date_time.end_month > end_date.month: + end_month = date_time.end_month + if date_time.end_day and date_time.end_day > end_date.day: + end_day = date_time.end_day + + return DateTimeRange( + start_year=start_year, + start_month=start_month, + start_day=start_day, + end_year=end_year, + end_month=end_month, + end_day=end_day, + ) + + class UUIDHIddenIdForm(forms.ModelForm): id = forms.UUIDField(widget=forms.HiddenInput(), required=False) @@ -466,30 +496,32 @@ def coverage(self): raw_data_sources = self.raw_data_sources.all() information_requests = self.information_requests.all() start_year, start_month, start_day = False, False, False - # start_semester, star_quarter = False, False - # start_hour, start_minute, start_second = False, False, False end_year, end_month, end_day = False, False, False - # end_semester, end_quarter = False, False - # end_hour, end_minute, end_second = False, False, False - start_date, end_date = datetime(3000, 1, 1, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0) + start_date = datetime(3000, 12, 31, 0, 0, 0) + end_date = datetime(1, 1, 1, 0, 0, 0) # TODO: refactor this to use a function for table in tables: for coverage in table.coverages.all(): - try: - date_time = DateTimeRange.objects.get(coverage=coverage.pk) - except DateTimeRange.DoesNotExist: + date_times = DateTimeRange.objects.filter(coverage=coverage.pk) + if len(date_times) == 0: continue - start_year = date_time.start_year is not None or start_year - start_month = date_time.start_month is not None or start_month - start_day = date_time.start_day is not None or start_day - end_year = date_time.end_year is not None or end_year - end_month = date_time.end_month is not None or end_month - end_day = date_time.end_day is not None or end_day + date_time = get_date_time(date_times) + + start_year = ( + date_time.start_year if date_time.start_year else start_year + ) + start_month = ( + date_time.start_month if date_time.start_month else start_month + ) + start_day = date_time.start_day if date_time.start_day else start_day + end_year = date_time.end_year if date_time.end_year else end_year + end_month = date_time.end_month if date_time.end_month else end_month + end_day = date_time.end_day if date_time.end_day else end_day new_start_date = datetime( - date_time.start_year, + date_time.start_year or 3000, date_time.start_month or 1, date_time.start_day or 1, ) @@ -497,25 +529,32 @@ def coverage(self): new_start_date if new_start_date < start_date else start_date ) new_end_date = datetime( - date_time.end_year, date_time.end_month or 1, date_time.end_day or 1 + date_time.end_year or 1, + date_time.end_month or 1, + date_time.end_day or 1, ) end_date = new_end_date if new_end_date > end_date else end_date for raw_data_source in raw_data_sources: for coverage in raw_data_source.coverages.all(): - try: - date_time = DateTimeRange.objects.get(coverage=coverage.pk) - except DateTimeRange.DoesNotExist: + date_times = DateTimeRange.objects.filter(coverage=coverage.pk) + if len(date_times) == 0: continue - start_year = date_time.start_year is not None or start_year - start_month = date_time.start_month is not None or start_month - start_day = date_time.start_day is not None or start_day - end_year = date_time.end_year is not None or end_year - end_month = date_time.end_month is not None or end_month - end_day = date_time.end_day is not None or end_day + date_time = get_date_time(date_times) + + start_year = ( + date_time.start_year if date_time.start_year else start_year + ) + start_month = ( + date_time.start_month if date_time.start_month else start_month + ) + start_day = date_time.start_day if date_time.start_day else start_day + end_year = date_time.end_year if date_time.end_year else end_year + end_month = date_time.end_month if date_time.end_month else end_month + end_day = date_time.end_day if date_time.end_day else end_day new_start_date = datetime( - date_time.start_year, + date_time.start_year or 3000, date_time.start_month or 1, date_time.start_day or 1, ) @@ -523,25 +562,32 @@ def coverage(self): new_start_date if new_start_date < start_date else start_date ) new_end_date = datetime( - date_time.end_year, date_time.end_month or 1, date_time.end_day or 1 + date_time.end_year or 1, + date_time.end_month or 1, + date_time.end_day or 1, ) end_date = new_end_date if new_end_date > end_date else end_date for information_request in information_requests: for coverage in information_request.coverages.all(): - try: - date_time = DateTimeRange.objects.get(coverage=coverage.pk) - except DateTimeRange.DoesNotExist: + date_times = DateTimeRange.objects.filter(coverage=coverage.pk) + if len(date_times) == 0: continue - start_year = date_time.start_year is not None or start_year - start_month = date_time.start_month is not None or start_month - start_day = date_time.start_day is not None or start_day - end_year = date_time.end_year is not None or end_year - end_month = date_time.end_month is not None or end_month - end_day = date_time.end_day is not None or end_day + date_time = get_date_time(date_times) + + start_year = ( + date_time.start_year if date_time.start_year else start_year + ) + start_month = ( + date_time.start_month if date_time.start_month else start_month + ) + start_day = date_time.start_day if date_time.start_day else start_day + end_year = date_time.end_year if date_time.end_year else end_year + end_month = date_time.end_month if date_time.end_month else end_month + end_day = date_time.end_day if date_time.end_day else end_day new_start_date = datetime( - date_time.start_year, + date_time.start_year or 3000, date_time.start_month or 1, date_time.start_day or 1, ) @@ -549,21 +595,23 @@ def coverage(self): new_start_date if new_start_date < start_date else start_date ) new_end_date = datetime( - date_time.end_year, date_time.end_month or 1, date_time.end_day or 1 + date_time.end_year or 1, + date_time.end_month or 1, + date_time.end_day or 1, ) end_date = new_end_date if new_end_date > end_date else end_date start = [] end = [] - if start_year and start_date.year: + if start_year < 3000 and start_date.year: start.append(str(start_date.year)) if start_month and start_date.month: start.append(str(start_date.month).zfill(2)) if start_day and start_date.day: start.append(str(start_date.day).zfill(2)) - if end_year and end_date.year: + if end_year > 1 and end_date.year: end.append(str(end_date.year)) if end_month and end_date.month: end.append(str(end_date.month).zfill(2)) @@ -584,6 +632,27 @@ def contains_tables(self): def get_graphql_contains_tables(self): return self.contains_tables + @property + def contains_closed_data(self): + """Returns true if there are tables or columns with closed coverages""" + closed_data = False + tables = self.tables.all() + for table in tables: + table_coverages = table.coverages.filter(is_closed=True) + if table_coverages: + closed_data = True + break + for column in table.columns.all(): + if column.is_closed: # in the future it will be column.coverages + closed_data = True + break + + return closed_data + + @property + def get_graphql_contains_closed_data(self): + return self.contains_closed_data + @property def contains_closed_tables(self): closed_tables = self.tables.all().filter(is_closed=True) @@ -773,6 +842,24 @@ def partitions(self): def get_graphql_partitions(self): return self.partitions + @property + def contains_closed_data(self): + """Returns true if there are columns with closed coverages""" + closed_data = False + table_coverages = self.coverages.filter(is_closed=True) + if table_coverages: + closed_data = True + for column in self.columns.all(): # in the future it will be column.coverages + if column.is_closed: + closed_data = True + break + + return closed_data + + @property + def get_graphql_contains_closed_data(self): + return self.contains_closed_data + def clean(self): errors = {} """Coverages must not overlap""" diff --git a/basedosdados_api/api/v1/search_indexes.py b/basedosdados_api/api/v1/search_indexes.py index b1a1e64a..435c10da 100644 --- a/basedosdados_api/api/v1/search_indexes.py +++ b/basedosdados_api/api/v1/search_indexes.py @@ -61,6 +61,7 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable): ) is_closed = indexes.BooleanField(model_attr="is_closed") contains_tables = indexes.BooleanField(model_attr="contains_tables") + contains_closed_data = indexes.BooleanField(model_attr="contains_closed_data") contains_open_tables = indexes.BooleanField(model_attr="contains_open_tables") contains_closed_tables = indexes.BooleanField(model_attr="contains_closed_tables") contains_raw_data_sources = indexes.BooleanField( @@ -204,6 +205,10 @@ def prepare(self, obj): contains_tables = data.get("contains_tables", False) data["contains_tables"] = contains_tables + # Contains closed data + contains_closed_data = data.get("contains_closed_data", False) + data["contains_closed_data"] = contains_closed_data + # Contains open tables contains_open_tables = data.get("contains_open_tables", False) data["contains_open_tables"] = contains_open_tables diff --git a/basedosdados_api/api/v1/views.py b/basedosdados_api/api/v1/views.py index 57a4c5ac..61a1f9db 100644 --- a/basedosdados_api/api/v1/views.py +++ b/basedosdados_api/api/v1/views.py @@ -90,14 +90,18 @@ def get(self, request, *args, **kwargs): if "datasets_with" in req_args: options = req_args.getlist("datasets_with") - if "open_tables" in options: - all_filters.append({"match": {"contains_open_tables": True}}) - if "closed_tables" in options: - all_filters.append({"match": {"contains_closed_tables": True}}) + if "tables" in options: + all_filters.append({"match": {"contains_tables": True}}) + if "closed_data" in options: + all_filters.append({"match": {"contains_closed_data": True}}) if "raw_data_sources" in options: all_filters.append({"match": {"contains_raw_data_sources": True}}) if "information_requests" in options: all_filters.append({"match": {"contains_information_requests": True}}) + if "open_tables" in options: + all_filters.append({"match": {"contains_open_tables": True}}) + if "closed_tables" in options: + all_filters.append({"match": {"contains_closed_tables": True}}) raw_query = { "from": (page - 1) * page_size, @@ -170,6 +174,12 @@ def get(self, request, *args, **kwargs): "size": agg_page_size, } }, + "contains_closed_data_counts": { + "terms": { + "field": "contains_closed_data", + "size": agg_page_size, + } + }, "contains_open_tables_counts": { "terms": { "field": "contains_open_tables", @@ -329,6 +339,9 @@ def get(self, request, *args, **kwargs): # boolean fields cleaned_results["is_closed"] = r.get("is_closed", False) cleaned_results["contains_tables"] = r.get("contains_tables", False) + cleaned_results["contains_closed_data"] = r.get( + "contains_closed_data", False + ) cleaned_results["contains_closed_tables"] = r.get( "contains_closed_tables", False ) @@ -345,6 +358,7 @@ def get(self, request, *args, **kwargs): observation_levels_counts = agg["observation_levels_counts"]["buckets"] is_closed_counts = agg["is_closed_counts"]["buckets"] contains_tables_counts = agg["contains_tables_counts"]["buckets"] + contains_closed_data_counts = agg["contains_closed_data_counts"]["buckets"] contains_open_tables_counts = agg["contains_open_tables_counts"]["buckets"] contains_closed_tables_counts = agg["contains_closed_tables_counts"]["buckets"] contains_information_requests_counts = agg[ @@ -443,6 +457,19 @@ def get(self, request, *args, **kwargs): ] aggregations["contains_tables"] = agg_contains_tables + if contains_closed_data_counts: + agg_contains_closed_data = [ + { + "key": contains_closed_data["key"], + "count": contains_closed_data["doc_count"], + "name": "dados fechados" + if contains_closed_data["key"] == 1 + else "sem dados fechados", + } + for idx, contains_closed_data in enumerate(contains_closed_data_counts) + ] + aggregations["contains_closed_data"] = agg_contains_closed_data + if contains_open_tables_counts: agg_contains_open_tables = [ { diff --git a/basedosdados_api/settings/base.py b/basedosdados_api/settings/base.py index 244bb148..096e35f5 100644 --- a/basedosdados_api/settings/base.py +++ b/basedosdados_api/settings/base.py @@ -453,3 +453,5 @@ ] CSRF_COOKIE_HTTPONLY = False + +DATA_UPLOAD_MAX_NUMBER_FIELDS = 10000