Skip to content

Commit

Permalink
perf: improve table & column import
Browse files Browse the repository at this point in the history
  • Loading branch information
nextchamp-saqib committed Oct 9, 2023
1 parent 0a813cf commit f4313b9
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def get_database(self):
password = None
with suppress(BaseException):
password = self.get_password()

conn_args = {
"data_source": self.name,
"host": self.host,
Expand Down
70 changes: 23 additions & 47 deletions insights/insights/doctype/insights_data_source/sources/frappe_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,71 +23,47 @@ def __init__(self, data_source) -> None:
self.db_conn: Connection
self.data_source = data_source

def sync_tables(self, connection, tables, force=False):
def sync_tables(self, connection, tablenames, force=False):
self.db_conn = connection
for table in self.get_tables(table_names=tables):
# when force is true, it will overwrite the existing columns & links
create_insights_table(table, force=force)

def get_tables(self, table_names=None):
tables = []
for table in self.get_db_tables(table_names):
table.columns = self.get_table_columns(table.table)
self.columns_by_tables = self.get_columns_by_tables(tablenames)
for tablename, columns in self.columns_by_tables.items():
table = self.get_table(tablename)
table.columns = columns
table.table_links = self.get_table_links(table.label)
tables.append(table)
return tables
create_insights_table(table, force=force)

def get_db_tables(self, table_names=None):
def get_columns_by_tables(self, tablenames=None):
t = Table(
"tables",
"columns",
Column("table_name"),
Column("column_name"),
Column("data_type"),
Column("table_schema"),
Column("table_type"),
schema="information_schema",
)

query = (
t.select()
.where(t.c.table_schema == text("DATABASE()"))
.where(t.c.table_type == "BASE TABLE")
)
if table_names:
query = query.where(t.c.table_name.in_(table_names))
query = t.select().where(t.c.table_schema == text("DATABASE()"))
if tablenames:
query = query.where(t.c.table_name.in_(tablenames))

columns = self.db_conn.execute(query).fetchall()

tables = self.db_conn.execute(query).fetchall()
return [self.get_table(table[0]) for table in tables if not table[0].startswith("__")]
schema = {}
for [table_name, column_name, data_type, _] in columns:
if table_name.startswith("__"):
continue
schema.setdefault(table_name, []).append(self.get_column(column_name, data_type))
return schema

def get_table(self, table_name):
return _dict(
{
"table": table_name,
"label": table_name.replace("tab", ""),
"label": table_name.replace("tab", "").title(),
"data_source": self.data_source,
}
)

def get_all_columns(self):
t = Table(
"columns",
Column("table_name"),
Column("column_name"),
Column("data_type"),
Column("table_schema"),
schema="information_schema",
)

query = t.select().where(t.c.table_schema == text("DATABASE()"))
columns = self.db_conn.execute(query).fetchall()
columns_by_table = {}
for col in columns:
columns_by_table.setdefault(col[0], []).append(self.get_column(col[1], col[2]))
return columns_by_table

def get_table_columns(self, table):
if not hasattr(self, "_all_columns") or not self._all_columns:
self._all_columns = self.get_all_columns()
return self._all_columns.get(table, [])

def get_column(self, column_name, column_type):
return _dict(
{
Expand Down Expand Up @@ -277,7 +253,7 @@ def __init__(self, data_source, host, port, username, password, database_name, u
self.table_factory: FrappeTableFactory = FrappeTableFactory(data_source)

def test_connection(self):
return self.execute_query("select name from `tabDocType` limit 1", pluck=True)
return self.execute_query("select name from tabDocType limit 1", pluck=True)

def sync_tables(self, tables=None, force=False):
# "begin" ensures that the connection is committed and closed
Expand Down
54 changes: 14 additions & 40 deletions insights/insights/doctype/insights_data_source/sources/mariadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,15 @@ def __init__(self, data_source) -> None:
self.db_conn: Connection
self.data_source = data_source

def sync_tables(self, connection, tables, force=False):
def sync_tables(self, connection, tablenames, force=False):
self.db_conn = connection
for table in self.get_tables(table_names=tables):
# when force is true, it will overwrite the existing columns & links
self.columns_by_tables = self.get_columns_by_tables(tablenames)
for tablename, columns in self.columns_by_tables.items():
table = self.get_table(tablename)
table.columns = columns
table.table_links = self.get_table_links(table.label)
create_insights_table(table, force=force)

def get_tables(self, table_names=None):
tables = []
for table in self.get_db_tables(table_names):
table.columns = self.get_table_columns(table.table)
# TODO: process foreign keys as links
tables.append(table)
return tables

def get_db_tables(self, table_names=None):
t = Table(
"tables",
Column("table_name"),
Column("table_schema"),
Column("table_type"),
schema="information_schema",
)

query = (
t.select()
.where(t.c.table_schema == text("DATABASE()"))
.where(t.c.table_type == "BASE TABLE")
)
if table_names:
query = query.where(t.c.table_name.in_(table_names))

tables = self.db_conn.execute(query).fetchall()
return [self.get_table(table[0]) for table in tables if not table[0].startswith("__")]

def get_table(self, table_name):
return frappe._dict(
{
Expand All @@ -77,7 +52,7 @@ def get_table(self, table_name):
}
)

def get_all_columns(self):
def get_columns_by_tables(self, tablenames=None):
t = Table(
"columns",
Column("table_name"),
Expand All @@ -88,16 +63,15 @@ def get_all_columns(self):
)

query = t.select().where(t.c.table_schema == text("DATABASE()"))
if tablenames:
query = query.where(t.c.table_name.in_(tablenames))

columns = self.db_conn.execute(query).fetchall()
columns_by_table = {}
for col in columns:
columns_by_table.setdefault(col[0], []).append(self.get_column(col[1], col[2]))
return columns_by_table

def get_table_columns(self, table):
if not hasattr(self, "_all_columns") or not self._all_columns:
self._all_columns = self.get_all_columns()
return self._all_columns.get(table, [])
schema = {}
for [table_name, column_name, data_type, _] in columns:
schema.setdefault(table_name, []).append(self.get_column(column_name, data_type))
return schema

def get_column(self, column_name, column_type):
return frappe._dict(
Expand Down
5 changes: 2 additions & 3 deletions insights/insights/doctype/insights_table/insights_table.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
],
"index_web_pages_for_search": 1,
"links": [],
"modified": "2023-03-05 14:33:25.604960",
"modified": "2023-10-09 20:00:47.546059",
"modified_by": "Administrator",
"module": "Insights",
"name": "Insights Table",
Expand Down Expand Up @@ -116,6 +116,5 @@
"sort_field": "modified",
"sort_order": "DESC",
"states": [],
"title_field": "label",
"track_changes": 1
"title_field": "label"
}

0 comments on commit f4313b9

Please sign in to comment.