Skip to content

Commit

Permalink
feat(ingestion): tableau - support for tableau version 2021.1 and old…
Browse files Browse the repository at this point in the history
…er (#4437)

fixes #4119
  • Loading branch information
mayurinehate authored Mar 17, 2022
1 parent 0a522e5 commit 2f078c9
Show file tree
Hide file tree
Showing 8 changed files with 2,972 additions and 100,068 deletions.
27 changes: 23 additions & 4 deletions metadata-ingestion/src/datahub/ingestion/source/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,26 @@ def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:

yield self.get_metadata_change_event(dataset_snapshot)

# Older tableau versions do not support fetching sheet's upstreamDatasources,
# This achieves the same effect by using datasource's downstreamSheets
def get_sheetwise_upstream_datasources(self, workbook: dict) -> dict:
sheet_upstream_datasources: dict = {}

for embedded_ds in workbook.get("embeddedDatasources", []):
for sheet in embedded_ds.get("downstreamSheets", []):
if sheet.get("id") not in sheet_upstream_datasources:
sheet_upstream_datasources[sheet.get("id")] = set()
sheet_upstream_datasources[sheet.get("id")].add(embedded_ds.get("id"))

for published_ds in workbook.get("upstreamDatasources", []):
for sheet in published_ds.get("downstreamSheets", []):
if sheet.get("id") not in sheet_upstream_datasources:
sheet_upstream_datasources[sheet.get("id")] = set()
sheet_upstream_datasources[sheet.get("id")].add(published_ds.get("id"))
return sheet_upstream_datasources

def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
sheet_upstream_datasources = self.get_sheetwise_upstream_datasources(workbook)
for sheet in workbook.get("sheets", []):
chart_snapshot = ChartSnapshot(
urn=builder.make_chart_urn(self.platform, sheet.get("id")),
Expand Down Expand Up @@ -706,9 +725,9 @@ def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:

# datasource urn
datasource_urn = []
data_sources = sheet.get("upstreamDatasources", [])
for datasource in data_sources:
ds_id = datasource.get("id")
data_sources = sheet_upstream_datasources.get(sheet.get("id"), set())

for ds_id in data_sources:
if ds_id is None or not ds_id:
continue
ds_urn = builder.make_dataset_urn(self.platform, ds_id, self.config.env)
Expand All @@ -722,7 +741,7 @@ def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
title=sheet.get("name", ""),
lastModified=last_modified,
externalUrl=sheet_external_url,
inputs=datasource_urn,
inputs=sorted(datasource_urn),
customProperties=fields,
)
chart_snapshot.aspects.append(chart_info)
Expand Down
48 changes: 27 additions & 21 deletions metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ class MetadataQueryException(Exception):
name
path
}
upstreamDatasources {
id
name
}
datasourceFields {
__typename
id
Expand Down Expand Up @@ -124,6 +120,10 @@ class MetadataQueryException(Exception):
extractLastRefreshTime
extractLastIncrementalUpdateTime
extractLastUpdateTime
downstreamSheets {
name
id
}
upstreamDatabases {
id
name
Expand Down Expand Up @@ -176,14 +176,19 @@ class MetadataQueryException(Exception):
dataType
}
}
upstreamDatasources {
name
}
workbook {
name
projectName
}
}
upstreamDatasources {
name
id
downstreamSheets {
name
id
}
}
}
"""

Expand Down Expand Up @@ -241,21 +246,23 @@ class MetadataQueryException(Exception):
extractLastRefreshTime
extractLastIncrementalUpdateTime
extractLastUpdateTime
downstreamSheets {
name
id
workbook {
name
projectName
}
}
upstreamDatabases {
id
name
connectionType
isEmbedded
}
upstreamTables {
id
name
schema
fullName
connectionType
description
columns {
name
schema
fullName
connectionType
description
contact {name}
remoteType
}
}
fields {
__typename
Expand Down Expand Up @@ -291,7 +298,6 @@ class MetadataQueryException(Exception):
dataType
}
}
upstreamDatasources {name}
owner {username}
description
uri
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,41 @@
}
}
],
"upstreamDatabases": [
{
"id": "a7825692-7de9-113d-5377-ae113331a9ec",
"name": "dvdrental",
"connectionType": "postgres",
"isEmbedded": false
}
],
"upstreamTables": [
{
"id": "39657832-0769-6372-60c3-687a51e2a772",
"name": "customer",
"schema": "",
"fullName": "customer",
"connectionType": "postgres",
"description": "",
"contact": null
"columns": []
},
{
"id": "3cdd0522-44ef-62eb-ba52-71545c258344",
"name": "payment",
"schema": "",
"fullName": "payment",
"connectionType": "postgres",
"description": "",
"contact": null
"columns": []
},
{
"id": "7df39af9-6767-4c9c-4120-155a024de062",
"name": "staff",
"schema": "",
"fullName": "staff",
"connectionType": "postgres",
"description": "",
"contact": null
"columns": []
}
],
"fields": [
Expand Down Expand Up @@ -235,30 +246,144 @@
}
}
],
"upstreamDatabases": [
{
"id": "1ade1d51-bbc3-ed8d-25d2-c51f44b8b31b",
"name": "Sample - Superstore.xls",
"connectionType": "excel-direct",
"isEmbedded": true
}
],
"upstreamTables": [
{
"id": "15714253-8e46-a209-63cc-700705b66de9",
"name": "People",
"schema": "",
"fullName": "[People$]",
"connectionType": "excel-direct",
"description": "",
"contact": null
"columns": [
{
"name": "Person",
"remoteType": "WSTR"
},
{
"name": "Region",
"remoteType": "WSTR"
}
]
},
{
"id": "19be3c28-8e4d-ebac-b44d-8f0851d9f206",
"name": "Returns",
"schema": "",
"fullName": "[Returns$]",
"connectionType": "excel-direct",
"description": "",
"contact": null
"columns": [
{
"name": "Returned",
"remoteType": "WSTR"
},
{
"name": "Order ID",
"remoteType": "WSTR"
}
]
},
{
"id": "b0e0c3eb-6e53-e0f5-ded1-478d5d9f7281",
"name": "Orders",
"schema": "",
"fullName": "[Orders$]",
"connectionType": "excel-direct",
"description": "",
"contact": null
"columns": [
{
"name": "Product ID",
"remoteType": "WSTR"
},
{
"name": "Category",
"remoteType": "WSTR"
},
{
"name": "Postal Code",
"remoteType": "I8"
},
{
"name": "City",
"remoteType": "WSTR"
},
{
"name": "Quantity",
"remoteType": "I8"
},
{
"name": "State",
"remoteType": "WSTR"
},
{
"name": "Order Date",
"remoteType": "DATE"
},
{
"name": "Customer Name",
"remoteType": "WSTR"
},
{
"name": "Country/Region",
"remoteType": "WSTR"
},
{
"name": "Sales",
"remoteType": "R8"
},
{
"name": "Segment",
"remoteType": "WSTR"
},
{
"name": "Sub-Category",
"remoteType": "WSTR"
},
{
"name": "Profit",
"remoteType": "R8"
},
{
"name": "Product Name",
"remoteType": "WSTR"
},
{
"name": "Customer ID",
"remoteType": "WSTR"
},
{
"name": "Order ID",
"remoteType": "WSTR"
},
{
"name": "Row ID",
"remoteType": "I8"
},
{
"name": "Discount",
"remoteType": "R8"
},
{
"name": "Ship Date",
"remoteType": "DATE"
},
{
"name": "Ship Mode",
"remoteType": "WSTR"
},
{
"name": "Region",
"remoteType": "WSTR"
}
]
}
],
"fields": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"hasNextPage": true,
"endCursor": null
},
"totalCount": 8
"totalCount": 3
}
}
}
Loading

0 comments on commit 2f078c9

Please sign in to comment.