Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: forbid split of tables without content to split #381

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion core/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from django.conf import settings
from rest_framework import status

from core.models import Table
from core.tests.utils import create_data_selection


Expand Down Expand Up @@ -122,6 +123,11 @@ def test_table_split_preview(self):
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/"
).json()

for table in tables:
_table = Table.objects.get(id=table["id"])
_table.should_split = True
_table.save()

response = self.client.patch(
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/{tables[0]['id']}/",
data={"split": True},
Expand Down Expand Up @@ -158,6 +164,11 @@ def test_table_split_include_preview(self):
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/"
).json()

for table in tables:
_table = Table.objects.get(id=table["id"])
_table.should_split = True
_table.save()

response = self.client.patch(
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/{tables[0]['id']}/",
data={"split": True},
Expand Down Expand Up @@ -198,14 +209,20 @@ def test_table_split_include_preview(self):

def test_table_split_no_left_space(self):
selection = create_data_selection(self.client, self.validated_datasource, self.url_prefix)

tables = self.client.get(
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/"
).json()

_table = Table.objects.get(id=tables[1]["id"])
_table.should_split = True
_table.save()

mocked_split = self.mocker.patch("core.views.store_preview_csv")
mocked_split.side_effect = OSError(errno.ENOSPC, "No left space.")

response = self.client.patch(
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/{tables[0]['id']}/",
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/{tables[1]['id']}/",
data={"split": True},
content_type="application/json",
)
Expand All @@ -217,6 +234,10 @@ def test_table_split_preview_no_left_space(self):
tables = self.client.get(
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/"
).json()
for table in tables:
_table = Table.objects.get(id=table["id"])
_table.should_split = True
_table.save()

response = self.client.patch(
f"{self.url_prefix}{self.validated_datasource.id}/selections/{selection['id']}/tables/{tables[0]['id']}/",
Expand Down
9 changes: 8 additions & 1 deletion core/tests/test_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from django.core.files import File
from django.test import override_settings

from core.models import Url
from core.models import Table, Url
from core.serializers import UrlSerializer
from core.utils import dataregistry_path_formatter, dataregistry_path_resolver

Expand Down Expand Up @@ -172,6 +172,9 @@ def test_table_split_preview(self, client, url_obj_w_files):
selection = create_data_selection(client, url_obj_w_files, self.url_prefix)
tables = client.get(f"{self.url_prefix}{url_obj_w_files.id}/selections/{selection['id']}/tables/").json()

_table = Table.objects.get(id=tables[0]["id"])
_table.should_split = True
_table.save()
response = client.patch(
f"{self.url_prefix}{url_obj_w_files.id}/selections/{selection['id']}/tables/{tables[0]['id']}/",
data={"split": True},
Expand Down Expand Up @@ -205,6 +208,10 @@ def test_table_split_preview(self, client, url_obj_w_files):
def test_table_split_include_preview(self, client, url_obj_w_files):
selection = create_data_selection(client, url_obj_w_files, self.url_prefix)
tables = client.get(f"{self.url_prefix}{url_obj_w_files.id}/selections/{selection['id']}/tables/").json()
for table in tables:
_table = Table.objects.get(id=table["id"])
_table.should_split = True
_table.save()

response = client.patch(
f"{self.url_prefix}{url_obj_w_files.id}/selections/{selection['id']}/tables/{tables[0]['id']}/",
Expand Down
22 changes: 18 additions & 4 deletions core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,11 @@ def list(self, request, *args, **kwargs):

def update(self, request, *args, **kwargs):
try:
if "url_id" in kwargs:
datasource = Url.objects.get(id=kwargs["url_id"])
elif "upload_id" in kwargs:
datasource = Upload.objects.get(id=kwargs["upload_id"])
datasource = (
Url.objects.get(id=kwargs["url_id"])
if "url_id" in kwargs
else Upload.objects.get(id=kwargs["upload_id"])
)
table = Table.objects.get(id=kwargs["id"])
spec = DataPreprocessor.restore(datasource.analyzed_file.path)
update_fields = []
Expand All @@ -428,6 +429,7 @@ def update(self, request, *args, **kwargs):
if array_table.parent == table.name:
array_table.include = False
array_table.save()
# Forbid merge of table if any of child arrays is unmergeable
if (
key == "split"
and request.data[key] is False
Expand All @@ -439,6 +441,16 @@ def update(self, request, *args, **kwargs):
{"detail": _(f"Cannot merge '{table.name}' - child arrays are too large")},
status=status.HTTP_400_BAD_REQUEST,
)
# Forbid split when arrays for split are absent in table
if key == "split" and request.data[key] is True and table.should_split is False:
return Response(
{
"detail": _(
f"Cannot split '{table.name}' - there are no arrays to split, or present arrays are not large enough for split"
)
},
status=status.HTTP_400_BAD_REQUEST,
)

update_fields.append(key)
if update_fields:
Expand Down Expand Up @@ -476,6 +488,8 @@ def update(self, request, *args, **kwargs):
)

def _split_table(self, table, analyzed_tables, datasource, child_tables):
if table.should_split is False:
return
datasource_dir = os.path.dirname(datasource.files.all()[0].file.path)
for child_table_key in child_tables:
analyzed_child_table = analyzed_tables.get(child_table_key, {})
Expand Down