From ba5d9d96595333a58376f43fcee773e17038c3c1 Mon Sep 17 00:00:00 2001 From: sophiely Date: Wed, 15 Nov 2023 20:17:04 +0100 Subject: [PATCH 1/2] refacto dataset version query Signed-off-by: sophiely --- .../java/marquez/db/DatasetVersionDao.java | 59 +++++++++++-------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/api/src/main/java/marquez/db/DatasetVersionDao.java b/api/src/main/java/marquez/db/DatasetVersionDao.java index 7b3b544a32..c75e98c46e 100644 --- a/api/src/main/java/marquez/db/DatasetVersionDao.java +++ b/api/src/main/java/marquez/db/DatasetVersionDao.java @@ -244,29 +244,42 @@ default Optional findByWithRun(UUID version) { @SqlQuery( """ - SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state, - dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location, - t.tags, f.facets - FROM dataset_versions dv - LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid - LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid - LEFT JOIN ( - SELECT ARRAY_AGG(t.name) AS tags, m.dataset_uuid - FROM tags AS t - INNER JOIN datasets_tag_mapping AS m ON m.tag_uuid = t.uuid - GROUP BY m.dataset_uuid - ) t ON t.dataset_uuid = dv.dataset_uuid - LEFT JOIN ( - SELECT dvf.dataset_version_uuid, - JSONB_AGG(dvf.facet ORDER BY dvf.lineage_event_time ASC) AS facets - FROM dataset_facets_view dvf - WHERE (type ILIKE 'dataset' OR type ILIKE 'unknown') - GROUP BY dataset_version_uuid - ) f ON f.dataset_version_uuid = dv.uuid - WHERE dv.namespace_name = :namespaceName - AND dv.dataset_name = :datasetName - ORDER BY dv.created_at DESC - LIMIT :limit OFFSET :offset + WITH dataset_info AS ( + SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state, + dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location, + t.tags, f.facets, f.lineage_event_time, f.dataset_version_uuid, facet_name + FROM dataset_versions dv + LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid + LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid + LEFT JOIN ( + SELECT ARRAY_AGG(t.name) AS tags, m.dataset_uuid + FROM tags AS t + INNER JOIN datasets_tag_mapping AS m ON m.tag_uuid = t.uuid + GROUP BY m.dataset_uuid + ) t ON t.dataset_uuid = dv.dataset_uuid + LEFT JOIN ( + SELECT + dataset_version_uuid, + name as facet_name, + facet as facets,lineage_event_time + FROM dataset_facets_view + WHERE + (type ILIKE 'dataset' OR type ILIKE 'unknown') + ) f ON f.dataset_version_uuid = dv.uuid + WHERE dv.namespace_name = :namespaceName + AND dv.dataset_name = :datasetName + ORDER BY dv.created_at DESC + LIMIT :limit OFFSET :offset + ) + SELECT + type, name, physical_name, namespace_name, source_name, description, lifecycle_state, + created_at, version, fields, createdByRunUuid, schema_location, + tags, dataset_version_uuid, + JSONB_AGG(facets ORDER BY lineage_event_time ASC) AS facets + FROM dataset_info + GROUP BY type, name, physical_name, namespace_name, source_name, description, lifecycle_state, + created_at, version, fields, createdByRunUuid, schema_location, + tags, dataset_version_uuid """) List findAll(String namespaceName, String datasetName, int limit, int offset); From 8230d17cc91afc0987510601a1dfc1ac157cd072 Mon Sep 17 00:00:00 2001 From: sophiely Date: Thu, 16 Nov 2023 01:03:10 +0100 Subject: [PATCH 2/2] fix order by Signed-off-by: sophiely --- api/src/main/java/marquez/db/DatasetVersionDao.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/marquez/db/DatasetVersionDao.java b/api/src/main/java/marquez/db/DatasetVersionDao.java index c75e98c46e..ad58ac7691 100644 --- a/api/src/main/java/marquez/db/DatasetVersionDao.java +++ b/api/src/main/java/marquez/db/DatasetVersionDao.java @@ -268,7 +268,6 @@ LEFT JOIN ( ) f ON f.dataset_version_uuid = dv.uuid WHERE dv.namespace_name = :namespaceName AND dv.dataset_name = :datasetName - ORDER BY dv.created_at DESC LIMIT :limit OFFSET :offset ) SELECT @@ -280,6 +279,7 @@ LEFT JOIN ( GROUP BY type, name, physical_name, namespace_name, source_name, description, lifecycle_state, created_at, version, fields, createdByRunUuid, schema_location, tags, dataset_version_uuid + ORDER BY created_at DESC """) List findAll(String namespaceName, String datasetName, int limit, int offset);