Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refacto dataset version query #2683

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions api/src/main/java/marquez/db/DatasetVersionDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -244,29 +244,42 @@ default Optional<DatasetVersion> findByWithRun(UUID version) {

@SqlQuery(
"""
SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,
dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location,
t.tags, f.facets
FROM dataset_versions dv
LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid
LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid
LEFT JOIN (
SELECT ARRAY_AGG(t.name) AS tags, m.dataset_uuid
FROM tags AS t
INNER JOIN datasets_tag_mapping AS m ON m.tag_uuid = t.uuid
GROUP BY m.dataset_uuid
) t ON t.dataset_uuid = dv.dataset_uuid
LEFT JOIN (
SELECT dvf.dataset_version_uuid,
JSONB_AGG(dvf.facet ORDER BY dvf.lineage_event_time ASC) AS facets
FROM dataset_facets_view dvf
WHERE (type ILIKE 'dataset' OR type ILIKE 'unknown')
GROUP BY dataset_version_uuid
) f ON f.dataset_version_uuid = dv.uuid
WHERE dv.namespace_name = :namespaceName
AND dv.dataset_name = :datasetName
ORDER BY dv.created_at DESC
LIMIT :limit OFFSET :offset
WITH dataset_info AS (
SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,
dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location,
t.tags, f.facets, f.lineage_event_time, f.dataset_version_uuid, facet_name
FROM dataset_versions dv
LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid
LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid
LEFT JOIN (
SELECT ARRAY_AGG(t.name) AS tags, m.dataset_uuid
FROM tags AS t
INNER JOIN datasets_tag_mapping AS m ON m.tag_uuid = t.uuid
GROUP BY m.dataset_uuid
) t ON t.dataset_uuid = dv.dataset_uuid
LEFT JOIN (
SELECT
dataset_version_uuid,
name as facet_name,
facet as facets,lineage_event_time
FROM dataset_facets_view
WHERE
(type ILIKE 'dataset' OR type ILIKE 'unknown')
) f ON f.dataset_version_uuid = dv.uuid
WHERE dv.namespace_name = :namespaceName
AND dv.dataset_name = :datasetName
LIMIT :limit OFFSET :offset
)
SELECT
type, name, physical_name, namespace_name, source_name, description, lifecycle_state,
created_at, version, fields, createdByRunUuid, schema_location,
tags, dataset_version_uuid,
JSONB_AGG(facets ORDER BY lineage_event_time ASC) AS facets
FROM dataset_info
GROUP BY type, name, physical_name, namespace_name, source_name, description, lifecycle_state,
created_at, version, fields, createdByRunUuid, schema_location,
tags, dataset_version_uuid
ORDER BY created_at DESC
""")
List<DatasetVersion> findAll(String namespaceName, String datasetName, int limit, int offset);

Expand Down
Loading