Skip to content

Commit

Permalink
Merge branch 'Canner:main' into feat-pyspark
Browse files Browse the repository at this point in the history
  • Loading branch information
ichuniq authored Nov 28, 2024
2 parents 8b6f230 + 689caf8 commit dfc62bc
Show file tree
Hide file tree
Showing 95 changed files with 2,706 additions and 1,378 deletions.
12 changes: 8 additions & 4 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ documentation:
- changed-files:
- any-glob-to-any-file: ['**/*.md']

trino:
- changed-files:
- any-glob-to-any-file: ['**/*trino*']

bigquery:
- changed-files:
- any-glob-to-any-file: ['**/*bigquery*']

ci:
- changed-files:
- any-glob-to-any-file: ['.github/**']

v1-engine-changed:
- changed-files:
- any-glob-to-any-file: ['wren-base/**', 'wren-main/**', 'wren-server/**', 'wren-tests/**', 'trino-parser/**', 'pom.xml']
11 changes: 7 additions & 4 deletions .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
run: |
if [ "${{ github.event_name }}" = "push" ]; then
echo "type=sha" > tags.txt
echo "type=raw,value=nightly" > tags.txt
echo "type=schedule" >> tags.txt
fi
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
if [ -n "${{ github.event.inputs.docker_image_tag_name }}" ]; then
Expand Down Expand Up @@ -56,7 +56,8 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.ENGINE_IMAGE }}
tags: ${{ needs.prepare-tag.outputs.tags }}
tags: |
${{ needs.prepare-tag.outputs.tags }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
Expand Down Expand Up @@ -152,7 +153,8 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.IBIS_IMAGE }}
tags: ${{ needs.prepare-tag.outputs.tags }}
tags: |
${{ needs.prepare-tag.outputs.tags }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
Expand All @@ -162,9 +164,10 @@ jobs:
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
TAGS=$(echo "${{ steps.meta.outputs.tags }}" | awk '{printf "--tag %s ", $0}')
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.IBIS_IMAGE }}@sha256:%s ' *) \
--tag ${{ steps.meta.outputs.tags }}
$TAGS
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ steps.meta.outputs.tags }}
197 changes: 0 additions & 197 deletions .github/workflows/core-py-mult-platform-ci.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/maven-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ concurrency:
jobs:
maven-tests:
runs-on: ubuntu-latest
if: ${{ github.event.label.name == 'v1-engine-changed' }}
if: contains(github.event.pull_request.labels.*.name, 'v1-engine-changed')
steps:
- uses: actions/checkout@v4
- name: Set up JDK 21
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/stable-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,7 @@ jobs:
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
TAGS=$(echo "${{ steps.meta.outputs.tags }}" | awk '{printf "--tag %s ", $0}')
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.IBIS_IMAGE }}@sha256:%s ' *) \
--tag ${{ steps.meta.outputs.tags }}
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ steps.meta.outputs.tags }}
$TAGS
4 changes: 2 additions & 2 deletions ibis-server/app/mdl/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def analyze(manifest_str: str, sql: str) -> list[dict]:
)
return r.raise_for_status().json()
except httpx.ConnectError as e:
raise ConnectionError(f"Can not connect to Wren Engine: {e}") from e
raise ConnectionError(f"Can not connect to Java Engine: {e}") from e
except httpx.HTTPStatusError as e:
raise AnalyzeError(e.response.text)

Expand All @@ -32,7 +32,7 @@ def analyze_batch(manifest_str: str, sqls: list[str]) -> list[list[dict]]:
)
return r.raise_for_status().json()
except httpx.ConnectError as e:
raise ConnectionError(f"Can not connect to Wren Engine: {e}") from e
raise ConnectionError(f"Can not connect to Java Engine: {e}") from e
except httpx.HTTPStatusError as e:
raise AnalyzeError(e.response.text)

Expand Down
6 changes: 3 additions & 3 deletions ibis-server/app/mdl/context.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from functools import cache

from wren_core import SessionContext


@cache
def get_session_context(manifest_str: str, function_path: str) -> SessionContext:
def get_session_context(manifest_str: str, function_path: str):
from wren_core import SessionContext

return SessionContext(manifest_str, function_path)
4 changes: 2 additions & 2 deletions ibis-server/app/mdl/rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def rewrite(self, sql: str) -> str:
)
return r.raise_for_status().text.replace("\n", " ")
except httpx.ConnectError as e:
raise WrenEngineError(f"Can not connect to Wren Engine: {e}")
raise WrenEngineError(f"Can not connect to Java Engine: {e}")
except httpx.TimeoutException as e:
raise WrenEngineError(f"Timeout when connecting to Wren Engine: {e}")
raise WrenEngineError(f"Timeout when connecting to Java Engine: {e}")
except httpx.HTTPStatusError as e:
raise RewriteError(e.response.text)

Expand Down
2 changes: 1 addition & 1 deletion ibis-server/app/middleware/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async def dispatch(self, request: Request, call_next) -> Response:
if body:
json_obj = orjson.loads(body)
if "connectionInfo" in json_obj:
json_obj["connectionInfo"] = "REMOVED_SENSITIVE_DATA"
json_obj["connectionInfo"] = "REDACTED"
body = orjson.dumps(json_obj)
logger.info("Request body: {body}", body=body.decode("utf-8"))
try:
Expand Down
37 changes: 32 additions & 5 deletions ibis-server/app/model/metadata/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Column,
Constraint,
ConstraintType,
RustWrenEngineColumnType,
Table,
TableProperties,
)
Expand All @@ -17,6 +18,8 @@ def __init__(self, connection_info: BigQueryConnectionInfo):

def get_table_list(self) -> list[Table]:
dataset_id = self.connection_info.dataset_id.get_secret_value()

# filter out columns with GEOGRAPHY & RANGE types
sql = f"""
SELECT
c.table_catalog,
Expand Down Expand Up @@ -46,14 +49,11 @@ def get_table_list(self) -> list[Table]:
AND cf.column_name = c.column_name
LEFT JOIN {dataset_id}.INFORMATION_SCHEMA.TABLE_OPTIONS table_options
ON c.table_name = table_options.table_name
WHERE cf.data_type != 'GEOGRAPHY'
AND cf.data_type NOT LIKE 'RANGE%'
"""
response = self.connection.sql(sql).to_pandas().to_dict(orient="records")

def get_data_type(data_type) -> str:
if "STRUCT" in data_type:
return "RECORD"
return data_type

def get_column(row, nestedColumns=None) -> Column:
return Column(
# field_path supports both column & nested column
Expand Down Expand Up @@ -139,3 +139,30 @@ def get_constraints(self) -> list[Constraint]:

def get_version(self) -> str:
return "Follow BigQuery release version"

def _transform_column_type(self, data_type):
# lower case the data_type
data_type = data_type.lower()

# if data_type start with "array" or "struct", by pass it
if data_type.startswith(("array", "struct")):
return data_type

# Map BigQuery types to RustWrenEngineColumnType
switcher = {
# GEOGRAPHY and RANGE columns were filtered out
"bytes": RustWrenEngineColumnType.BYTES,
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.DATETIME,
"interval": RustWrenEngineColumnType.INTERVAL,
"json": RustWrenEngineColumnType.JSON,
"int64": RustWrenEngineColumnType.INT64,
"numeric": RustWrenEngineColumnType.NUMERIC,
"bignumeric": RustWrenEngineColumnType.BIGNUMERIC,
"float64": RustWrenEngineColumnType.FLOAT64,
"string": RustWrenEngineColumnType.STRING,
"time": RustWrenEngineColumnType.TIME,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
}

return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN)
Loading

0 comments on commit dfc62bc

Please sign in to comment.