Skip to content

Commit

Permalink
feat: adapt data model to avoid complexity
Browse files Browse the repository at this point in the history
  • Loading branch information
vncsna committed Mar 21, 2024
1 parent 49c93fd commit 0f2240b
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 25 deletions.
50 changes: 50 additions & 0 deletions bd_api/apps/api/v1/graphql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-

from graphene import UUID, Boolean, Float, List, ObjectType, String
from graphene_django import DjangoObjectType

from bd_api.apps.api.v1.models import TableNeighbor
from bd_api.custom.graphql_base import PlainTextNode


class TableNeighborNode(DjangoObjectType):
"""Similiar tables and columns with filters"""

table_id = String()
table_name = String()
dataset_id = String()
dataset_name = String()
score = Float()

class Meta:
model = TableNeighbor
fields = ("id",)
filter_fields = ("id",)
interfaces = (PlainTextNode,)

def resolve__table_id(root, info):
return root.table_b.pk

def resolve__table_name(root, info):
return root.table_b.name

def resolve__dataset_id(root, info):
return root.table_b.dataset.pk

def resolve__dataset_name(root, info):
return root.table_b.dataset.name

def resolve_score(root, info):
return root.score


class APIQuery(ObjectType):
get_table_neighbor = List(
TableNeighborNode,
table_id=UUID(required=True),
theme=String(),
share_theme=Boolean(),
)

def resolve_get_table_neighbor(root, info, table_id, **kwargs):
return TableNeighbor.objects.filter(table_a__pk=table_id).all()
6 changes: 5 additions & 1 deletion bd_api/apps/api/v1/migrations/0028_tableneighbor_and_more.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Generated by Django 4.2.10 on 2024-03-15 18:55
# Generated by Django 4.2.10 on 2024-03-20 11:53

import django.db.models.deletion
from django.db import migrations, models
Expand Down Expand Up @@ -27,6 +27,7 @@ class Migration(migrations.Migration):
("similarity_of_area", models.FloatField(default=0)),
("similarity_of_datetime", models.FloatField(default=0)),
("similarity_of_directory", models.FloatField(default=0)),
("similarity_of_popularity", models.FloatField(default=0)),
(
"table_a",
models.ForeignKey(
Expand All @@ -44,6 +45,9 @@ class Migration(migrations.Migration):
),
),
],
options={
"db_table": "table_neighbor",
},
),
migrations.AddConstraint(
model_name="tableneighbor",
Expand Down
42 changes: 19 additions & 23 deletions bd_api/apps/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import Q
from django.urls import reverse
from ordered_model.models import OrderedModel

Expand Down Expand Up @@ -1026,24 +1025,9 @@ def full_coverage(self) -> str:
@property
def neighbors(self) -> list[dict]:
"""Similiar tables and columns without filters"""
all_neighbors = []
for neighbor in TableNeighbor.objects.filter(Q(table_a=self) | Q(table_b=self)).all():
if neighbor.table_a == self:
table = neighbor.table_b
if neighbor.table_b == self:
table = neighbor.table_a
similarity_of_directory = neighbor.similarity_of_directory
similarity_of_popularity = table.dataset.popularity
all_neighbors.append(
{
"table_id": str(table.pk),
"table_name": table.name,
"dataset_id": str(table.dataset.id),
"dataset_name": table.dataset.name,
"score": round(similarity_of_directory, 2) + similarity_of_popularity,
}
)
return sorted(all_neighbors, key=lambda item: item["score"])[::-1]
all_neighbors = [t.as_dict for t in TableNeighbor.objects.filter(table_a=self)]
all_neighbors = sorted(all_neighbors, key=lambda item: item["score"], reverse=True)
return all_neighbors

@property
def last_updated_at(self):
Expand Down Expand Up @@ -1086,7 +1070,7 @@ def get_similarity_of_directory(self, other: "Table"):
intersection = self_directories.intersection(other_directories)
return len(intersection) / len(self_directories), intersection

def get_neighbors(self) -> list[dict]:
def gen_neighbors(self) -> list[dict]:
self_columns = (
self.columns
.filter(directory_primary_key__isnull=False)
Expand Down Expand Up @@ -1185,6 +1169,7 @@ class TableNeighbor(BaseModel):
similarity_of_area = models.FloatField(default=0)
similarity_of_datetime = models.FloatField(default=0)
similarity_of_directory = models.FloatField(default=0)
similarity_of_popularity = models.FloatField(default=0)

class Meta:
db_table = "table_neighbor"
Expand All @@ -1195,11 +1180,22 @@ class Meta:
),
]

@property
def score(self):
return round(self.similarity_of_directory, 2) + round(self.similarity_of_popularity, 2)

@property
def as_dict(self):
return {
"table_id": str(self.table_b.pk),
"table_name": self.table_b.name,
"dataset_id": str(self.table_b.dataset.pk),
"dataset_name": self.table_b.dataset.name,
"score": self.score,
}

def clean(self) -> None:
errors = {}
if self.table_a.pk > self.table_b.pk:
errors["table_a"] = "Table primary keys should be ordered"
errors["table_b"] = "Table primary keys should be ordered"
if self.table_a.pk == self.table_b.pk:
errors["table_a"] = "Table neighbors A & B shouldn't be the same"
errors["table_b"] = "Table neighbors A & B shouldn't be the same"
Expand Down
2 changes: 1 addition & 1 deletion bd_api/apps/api/v1/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def get_uncompressed_file_size(table: Table, bq_table: GBQTable) -> int | None:
@production_task
def update_table_neighbors_task():
for table in Table.objects.all():
for neighbor in table.get_neighbors():
for neighbor in table.gen_neighbors():
TableNeighbor.objects.update_or_create(**neighbor)


Expand Down
2 changes: 2 additions & 0 deletions bd_api/apps/schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
from bd_api.apps.account.graphql import AccountMutation
from bd_api.apps.api.v1.graphql import APIQuery
from bd_api.apps.payment.graphql import (
StripeCustomerMutation,
StripePriceQuery,
Expand All @@ -11,6 +12,7 @@
schema = build_schema(
applications=["account", "v1"],
extra_queries=[
APIQuery,
StripePriceQuery,
],
extra_mutations=[
Expand Down

0 comments on commit 0f2240b

Please sign in to comment.