Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Jul 19, 2024
2 parents 89b03c5 + 4fe5f28 commit ac2812d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
15 changes: 6 additions & 9 deletions metadata-ingestion/docs/sources/looker/lookml_post.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
#### Configuration Notes

:::note

The integration can use an SQL parser to try to parse the tables the views depends on.

:::

This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package.
As this package doesn't officially support all the SQL dialects that Looker supports, the result might not be correct. You can, however, implement a custom parser and take it into use by setting the `sql_parser` configuration value. A custom SQL parser must inherit from `datahub.utilities.sql_parser.SQLParser`
and must be made available to Datahub by ,for example, installing it. The configuration then needs to be set to `module_name.ClassName` of the parser.
1. If a view contains a liquid template (e.g. `sql_table_name: {{ user_attributes['db']}}.kafka_streaming.events }}`, with `db=ANALYTICS_PROD`), then you will need to specify the values of those variables in the `liquid_variable` config as shown below:
```yml
liquid_variable:
user_attributes:
db: ANALYTICS_PROD
```
### Multi-Project LookML (Advanced)
Expand Down
15 changes: 13 additions & 2 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@
"azure-identity>=1.14.0",
"azure-storage-blob>=12.19.0",
"azure-storage-file-datalake>=12.14.0",
"more-itertools>=8.12.0",
"pyarrow>=6.0.1",
"smart-open[azure]>=5.2.1",
"tableschema>=1.20.2",
"ujson>=5.2.0",
*path_spec_common,
}

data_lake_profiling = {
Expand Down Expand Up @@ -352,6 +358,10 @@
"feast>=0.34.0,<1",
"flask-openid>=1.3.0",
"dask[dataframe]<2024.7.0",
# We were seeing an error like this `numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject`
# with numpy 2.0. This likely indicates a mismatch between scikit-learn and numpy versions.
# https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility
"numpy<2",
},
"grafana": {"requests"},
"glue": aws_common,
Expand Down Expand Up @@ -415,7 +425,7 @@
| {"cachetools"},
"s3": {*s3_base, *data_lake_profiling},
"gcs": {*s3_base, *data_lake_profiling},
"abs": {*abs_base},
"abs": {*abs_base, *data_lake_profiling},
"sagemaker": aws_common,
"salesforce": {"simple-salesforce"},
"snowflake": snowflake_common | usage_common | sqlglot_lib,
Expand Down Expand Up @@ -539,6 +549,7 @@
*list(
dependency
for plugin in [
"abs",
"athena",
"bigquery",
"clickhouse",
Expand Down Expand Up @@ -627,6 +638,7 @@
entry_points = {
"console_scripts": ["datahub = datahub.entrypoints:main"],
"datahub.ingestion.source.plugins": [
"abs = datahub.ingestion.source.abs.source:ABSSource",
"csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource",
"file = datahub.ingestion.source.file:GenericFileSource",
"datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource",
Expand Down Expand Up @@ -695,7 +707,6 @@
"demo-data = datahub.ingestion.source.demo_data.DemoDataSource",
"unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource",
"gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource",
"abs = datahub.ingestion.source.abs.source:ABSSource",
"sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource",
"fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource",
"qlik-sense = datahub.ingestion.source.qlik_sense.qlik_sense:QlikSenseSource",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ class BigQuerySchemaGenerator:
"BIGINT": NumberType,
"TINYINT": NumberType,
"BYTEINT": NumberType,
"BIGNUMERIC": NumberType,
"NUMERIC": NumberType,
"DECIMAL": NumberType,
"BIGDECIMAL": NumberType,
"FLOAT64": NumberType,
"RANGE": NullType,
"STRING": StringType,
"TIME": TimeType,
"TIMESTAMP": TimeType,
Expand Down

0 comments on commit ac2812d

Please sign in to comment.