Skip to content

Commit

Permalink
Make concept based table IDs 64 bit
Browse files Browse the repository at this point in the history
While we still have some headroom for the datasets we work with at the
moment, keeping the IDs of concept table based tables (inheritance) at
32 bit would have been a limit at some point. Already we see large
segmentation datasets that make this headroom rather small. Therefore
this migration will rewrite all concept based tables as well as tables
that reference them through foreign keys so that 64 bit IDs are used.

This migration also updates the review table to use 64 bit IDs. It also
fixes the history views of the catmaid_sampler table, which haven't been
kept up to date with column changes.

In addition a lot of missing foreign key constraints have been added.
They shouldn't have a big import on performance and in fact no real
difference could be measured.

The column order of concept changed slightly to improve the column
alignment and reduce padding. So far we wasted 4 Bytes per row, because
project_id occupied 4 Bytes and required 4 Bytes of padding, because it
was followed by a bigint. This is changed now and no more padding is
needed. This also lowers the impact on storage of the change to 64 bit
IDs.

Also, the indices backing the primary keys of many semantic tables
include now additional data like the class_id for class instances and
the relation_id for some relation instance types. This Postgres 11
feature allows to use more index-only scans.

Fixes #1848
  • Loading branch information
tomka committed Nov 4, 2019
1 parent 7bb29fc commit 5c0d8ee
Show file tree
Hide file tree
Showing 9 changed files with 2,576 additions and 24 deletions.
6 changes: 3 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
startup, advisory locks are now used. This should make the startup of parallel
CATMAID workers more robust.

- The application of migration 82 might take a while to complete, because it
- The application of migration 88-91 might take a while to complete, because it
rewrites a potentially big table (treenode_edge). Therefore, make also sure
that there is enough space available at the database storate location (25% of
database data directory should be plenty). If no replication is used, setting
the following Postgres options can speed up the process: `wal_level = minimal`,
`archive_mode = off` and `max_wal_senders = 0`.
the following Postgres options can speed up the process: `wal_level =
minimal`, `archive_mode = off` and `max_wal_senders = 0`.

- Both `configuration.py.example` and `create_configuration.py` support now the
option `catmaid_default_enabled_tools`, which defines the list of front-end
Expand Down
6 changes: 3 additions & 3 deletions django/applications/catmaid/control/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ def _update_neuron_annotations(project_id:Union[int,str], neuron_id,
cursor.execute('''
UPDATE class_instance_class_instance
SET class_instance_a = %s, user_id = missing.u_id
FROM UNNEST(%s::integer[], %s::integer[]) AS missing(cici_id, u_id)
FROM UNNEST(%s::bigint[], %s::integer[]) AS missing(cici_id, u_id)
WHERE id = missing.cici_id;
''', (neuron_id, cici_ids, u_ids))

Expand Down Expand Up @@ -679,12 +679,12 @@ def _update_neuron_annotations(project_id:Union[int,str], neuron_id,
cursor.execute("""
UPDATE class_instance_class_instance
SET creation_time = to_update.creation_time
FROM UNNEST(%s::integer[], %s::timestamptz[])
FROM UNNEST(%s::bigint[], %s::timestamptz[])
AS to_update(cici_id, creation_time)
WHERE id = to_update.cici_id;
UPDATE class_instance_class_instance
SET edition_Time = to_update.edition_time
FROM UNNEST(%s::integer[], %s::timestamptz[])
FROM UNNEST(%s::bigint[], %s::timestamptz[])
AS to_update(cici_id, edition_time)
WHERE id = to_update.cici_id;
""", (to_update_ids,
Expand Down
8 changes: 4 additions & 4 deletions django/applications/catmaid/control/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,13 @@ def _many_to_many_synapses(skids1, skids2, relation_name, project_id) -> Tuple:
treenode t1,
treenode t2,
connector c
WHERE tc1.skeleton_id = ANY(%(skeleton_ids_1)s::int[])
WHERE tc1.skeleton_id = ANY(%(skeleton_ids_1)s::bigint[])
AND tc1.connector_id = c.id
AND tc2.skeleton_id = ANY(%(skeleton_ids_2)s::int[])
AND tc2.skeleton_id = ANY(%(skeleton_ids_2)s::bigint[])
AND tc1.connector_id = tc2.connector_id
AND tc1.relation_id = %(relation_id)s
AND (tc1.relation_id != tc2.relation_id
OR tc1.relation_id = ANY(%(undir_rel_ids)s::int[]))
OR tc1.relation_id = ANY(%(undir_rel_ids)s::bigint[]))
AND tc1.id != tc2.id
AND tc1.treenode_id = t1.id
AND tc2.treenode_id = t2.id
Expand Down Expand Up @@ -1066,7 +1066,7 @@ def get_connectors_in_bb_postgis3d(params) -> List:
JOIN (
SELECT DISTiNCT tc2.connector_id
FROM treenode_connector tc2
JOIN UNNEST(%(skeleton_ids)s::int[]) skeleton(id)
JOIN UNNEST(%(skeleton_ids)s::bigint[]) skeleton(id)
ON tc2.skeleton_id = skeleton.id
) allowed_connector(id)
ON allowed_connector.id = c.id
Expand Down
2 changes: 1 addition & 1 deletion django/applications/catmaid/control/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def label_update(request:HttpRequest, project_id, location_id, ntype:str) -> Jso
class_instance ci,
treenode_class_instance tci,
treenode tn,
unnest(%s::text[], %s::integer[]) AS ll (name, max)
unnest(%s::text[], %s::bigint[]) AS ll (name, max)
WHERE ci.name = ll.name
AND ci.project_id = %s
AND ci.class_id = %s
Expand Down
6 changes: 3 additions & 3 deletions django/applications/catmaid/control/landmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ def get_landmark_group_locations(project_id, landmarkgroup_ids, with_names:bool=
FROM (
SELECT pci.point_id, pci.class_instance_id, array_agg(l.name) as names
FROM point_class_instance pci
JOIN UNNEST(%(landmarkgroup_ids)s::integer[]) landmarkgroup(id)
JOIN UNNEST(%(landmarkgroup_ids)s::bigint[]) landmarkgroup(id)
ON pci.class_instance_id = landmarkgroup.id
LEFT JOIN point_class_instance pci_l
ON pci_l.point_id = pci.point_id
Expand Down Expand Up @@ -937,7 +937,7 @@ def get_landmark_group_locations(project_id, landmarkgroup_ids, with_names:bool=
SELECT pci.point_id, pci.class_instance_id, p.location_x,
p.location_y, p.location_z
FROM point_class_instance pci
JOIN UNNEST(%(landmarkgroup_ids)s::integer[]) landmarkgroup(id)
JOIN UNNEST(%(landmarkgroup_ids)s::bigint[]) landmarkgroup(id)
ON pci.class_instance_id = landmarkgroup.id
JOIN point p
ON p.id = pci.point_id
Expand Down Expand Up @@ -966,7 +966,7 @@ def make_landmark_relation_index(project_id, landmarkgroup_ids) -> Tuple[Default
SELECT cici.id, lg.id, cici.relation_id, r.relation_name,
cici.class_instance_a, cici.class_instance_b
FROM class_instance_class_instance cici
JOIN UNNEST(%(landmarkgroup_ids)s::integer[]) lg(id)
JOIN UNNEST(%(landmarkgroup_ids)s::bigint[]) lg(id)
ON lg.id = cici.class_instance_a
OR lg.id = cici.class_instance_b
JOIN relation r
Expand Down
2 changes: 1 addition & 1 deletion django/applications/catmaid/control/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def compute_random_and_add_delayed(self, project_id, user_id, name,
cursor.execute("""
SELECT css.skeleton_id
FROM catmaid_skeleton_summary css
JOIN UNNEST(%(skeleton_ids)s::int[]) skeleton(id)
JOIN UNNEST(%(skeleton_ids)s::bigint[]) skeleton(id)
ON css.skeleton_id = skeleton.id
WHERE project_id = %(project_id)s
AND cable_length >= %(min_cable)s
Expand Down
16 changes: 8 additions & 8 deletions django/applications/catmaid/control/skeleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,7 @@ def connectivity_counts(request:HttpRequest, project_id=None) -> JsonResponse:
cursor.execute("""
SELECT tc.skeleton_id, tc.relation_id, COUNT(tc)
FROM treenode_connector tc
JOIN UNNEST(%(skeleton_ids)s::int[]) skeleton(id)
JOIN UNNEST(%(skeleton_ids)s::bigint[]) skeleton(id)
ON skeleton.id = tc.skeleton_id
{extra_select}
WHERE tc.project_id = %(project_id)s
Expand Down Expand Up @@ -1572,7 +1572,7 @@ def newPartner():
t1.treenode_id, t2.treenode_id
FROM treenode_connector t1,
treenode_connector t2
WHERE t1.skeleton_id = ANY(%s::integer[])
WHERE t1.skeleton_id = ANY(%s::bigint[])
AND t1.relation_id = %s
AND t1.connector_id = t2.connector_id
AND t1.id != t2.id
Expand Down Expand Up @@ -1607,7 +1607,7 @@ def newPartner():
cursor.execute('''
SELECT skeleton_id, num_nodes
FROM catmaid_skeleton_summary
WHERE skeleton_id = ANY(%s::integer[])
WHERE skeleton_id = ANY(%s::bigint[])
GROUP BY skeleton_id
''', (partner_skids,))
for row in cursor.fetchall():
Expand All @@ -1617,7 +1617,7 @@ def newPartner():
cursor.execute('''
SELECT DISTINCT reviewer_id
FROM review
WHERE skeleton_id = ANY(%s::integer[])
WHERE skeleton_id = ANY(%s::bigint[])
''', (partner_skids,))
reviewers = [row[0] for row in cursor]

Expand Down Expand Up @@ -1940,8 +1940,8 @@ def get_connectivity_matrix(project_id, row_skeleton_ids, col_skeleton_ids,
FROM treenode_connector t1,
treenode_connector t2
{extra_join}
WHERE t1.skeleton_id = ANY(%(row_skeleton_ids)s::integer[])
AND t2.skeleton_id = ANY(%(col_skeleton_ids)s::integer[])
WHERE t1.skeleton_id = ANY(%(row_skeleton_ids)s::bigint[])
AND t2.skeleton_id = ANY(%(col_skeleton_ids)s::bigint[])
AND t1.connector_id = t2.connector_id
AND t1.relation_id = %(pre_rel_id)s
AND t2.relation_id = %(post_rel_id)s
Expand Down Expand Up @@ -3559,7 +3559,7 @@ def skeletons_by_node_labels(request:HttpRequest, project_id=None) -> JsonRespon
ON t.id = tci.treenode_id
JOIN class_instance ci
ON tci.class_instance_id = ci.id
JOIN UNNEST(%(label_ids)s::int[]) label(id)
JOIN UNNEST(%(label_ids)s::bigint[]) label(id)
ON label.id = ci.id
WHERE ci.project_id = %(project_id)s
AND tci.relation_id = %(labeled_as)s
Expand Down Expand Up @@ -3603,7 +3603,7 @@ def get_skeletons_in_bb(params) -> List:

if skeleton_ids:
extra_joins.append("""
JOIN UNNEST(%(skeleton_ids)s::int[]) query_skeleton(id)
JOIN UNNEST(%(skeleton_ids)s::bigint[]) query_skeleton(id)
ON query_skeleton.id = skeleton.id
""")

Expand Down
Loading

0 comments on commit 5c0d8ee

Please sign in to comment.