From c30f4c7fb5c18d11e41ce188c402a2bfda526324 Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Fri, 19 Jul 2024 18:09:37 +0530 Subject: [PATCH 1/3] doc(ingest/looker): fix doc for sql parsing documentation (#10883) Co-authored-by: Harshal Sheth --- .../docs/sources/looker/lookml_post.md | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/docs/sources/looker/lookml_post.md b/metadata-ingestion/docs/sources/looker/lookml_post.md index 773a917202f24..8a4bf823ffc27 100644 --- a/metadata-ingestion/docs/sources/looker/lookml_post.md +++ b/metadata-ingestion/docs/sources/looker/lookml_post.md @@ -1,14 +1,11 @@ #### Configuration Notes -:::note - -The integration can use an SQL parser to try to parse the tables the views depends on. - -::: - -This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package. -As this package doesn't officially support all the SQL dialects that Looker supports, the result might not be correct. You can, however, implement a custom parser and take it into use by setting the `sql_parser` configuration value. A custom SQL parser must inherit from `datahub.utilities.sql_parser.SQLParser` -and must be made available to Datahub by ,for example, installing it. The configuration then needs to be set to `module_name.ClassName` of the parser. +1. If a view contains a liquid template (e.g. `sql_table_name: {{ user_attributes['db']}}.kafka_streaming.events }}`, with `db=ANALYTICS_PROD`), then you will need to specify the values of those variables in the `liquid_variable` config as shown below: + ```yml + liquid_variable: + user_attributes: + db: ANALYTICS_PROD + ``` ### Multi-Project LookML (Advanced) From 91e80017399be7996306eb6a45053cdeea3ac7ea Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Fri, 19 Jul 2024 15:09:13 +0200 Subject: [PATCH 2/3] fix(ingest/bigquery): Adding missing BigQuery types (#10950) --- .../ingestion/source/bigquery_v2/bigquery_schema_gen.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 3ffcb225db1c2..46ec75edb9734 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -134,6 +134,12 @@ class BigQuerySchemaGenerator: "BIGINT": NumberType, "TINYINT": NumberType, "BYTEINT": NumberType, + "BIGNUMERIC": NumberType, + "NUMERIC": NumberType, + "DECIMAL": NumberType, + "BIGDECIMAL": NumberType, + "FLOAT64": NumberType, + "RANGE": NullType, "STRING": StringType, "TIME": TimeType, "TIMESTAMP": TimeType, From 4fe5f280b3be2d7e67212bacbad69eb3ef55dd81 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Fri, 19 Jul 2024 15:30:43 +0200 Subject: [PATCH 3/3] fix(ingest/setup): feast and abs source setup (#10951) --- metadata-ingestion/setup.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index e1a9e6a55909d..f4e9de839d5f3 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -263,6 +263,12 @@ "azure-identity>=1.14.0", "azure-storage-blob>=12.19.0", "azure-storage-file-datalake>=12.14.0", + "more-itertools>=8.12.0", + "pyarrow>=6.0.1", + "smart-open[azure]>=5.2.1", + "tableschema>=1.20.2", + "ujson>=5.2.0", + *path_spec_common, } data_lake_profiling = { @@ -352,6 +358,10 @@ "feast>=0.34.0,<1", "flask-openid>=1.3.0", "dask[dataframe]<2024.7.0", + # We were seeing an error like this `numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject` + # with numpy 2.0. This likely indicates a mismatch between scikit-learn and numpy versions. + # https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility + "numpy<2", }, "grafana": {"requests"}, "glue": aws_common, @@ -415,7 +425,7 @@ | {"cachetools"}, "s3": {*s3_base, *data_lake_profiling}, "gcs": {*s3_base, *data_lake_profiling}, - "abs": {*abs_base}, + "abs": {*abs_base, *data_lake_profiling}, "sagemaker": aws_common, "salesforce": {"simple-salesforce"}, "snowflake": snowflake_common | usage_common | sqlglot_lib, @@ -539,6 +549,7 @@ *list( dependency for plugin in [ + "abs", "athena", "bigquery", "clickhouse", @@ -627,6 +638,7 @@ entry_points = { "console_scripts": ["datahub = datahub.entrypoints:main"], "datahub.ingestion.source.plugins": [ + "abs = datahub.ingestion.source.abs.source:ABSSource", "csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource", "file = datahub.ingestion.source.file:GenericFileSource", "datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource", @@ -695,7 +707,6 @@ "demo-data = datahub.ingestion.source.demo_data.DemoDataSource", "unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource", "gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource", - "abs = datahub.ingestion.source.abs.source:ABSSource", "sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource", "fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource", "qlik-sense = datahub.ingestion.source.qlik_sense.qlik_sense:QlikSenseSource",