Skip to content

Commit

Permalink
Merge branch 'dbtcloud-add-multiproject-filter' of https://github.com…
Browse files Browse the repository at this point in the history
…/SumanMaharana/OpenMetadata into dbtcloud-add-multiproject-filter
  • Loading branch information
SumanMaharana committed Nov 27, 2024
2 parents c039243 + e443504 commit 99ba9cc
Show file tree
Hide file tree
Showing 198 changed files with 10,176 additions and 2,506 deletions.
76 changes: 76 additions & 0 deletions .github/workflows/auto-cherry-pick-labeled-prs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
---
name: Cherry-pick labeled PRs to OpenMetadata release branch on merge
# yamllint disable-line rule:comments
run-name: OpenMetadata release cherry-pick PR #${{ github.event.pull_request.number }}

# yamllint disable-line rule:truthy
on:
pull_request:
types: [closed]
branches:
- main
permissions:
contents: write
pull-requests: write
env:
CURRENT_RELEASE_ENDPOINT: ${{ vars.CURRENT_RELEASE_ENDPOINT }} # Endpoint that returns the current release version in json format
jobs:
cherry_pick_to_release_branch:
if: github.event.pull_request.merged == true &&
contains(github.event.pull_request.labels.*.name, 'To release')
runs-on: ubuntu-latest # Running it on ubuntu-latest on purpose (we're not using all the free minutes)
steps:
- name: Checkout main branch
uses: actions/checkout@v4
with:
ref: main
fetch-depth: 0
- name: Get the release version
id: get_release_version
run: |
CURRENT_RELEASE=$(curl -s $CURRENT_RELEASE_ENDPOINT | jq -r .om_branch)
echo "CURRENT_RELEASE=${CURRENT_RELEASE}" >> $GITHUB_ENV
- name: Cherry-pick changes from PR
id: cherry_pick
continue-on-error: true
run: |
git config --global user.email "[email protected]"
git config --global user.name "OpenMetadata Release Bot"
git fetch origin ${CURRENT_RELEASE}
git checkout ${CURRENT_RELEASE}
git cherry-pick -x ${{ github.event.pull_request.merge_commit_sha }}
- name: Push changes to release branch
id: push_changes
continue-on-error: true
if: steps.cherry_pick.outcome == 'success'
run: |
git push origin ${CURRENT_RELEASE}
- name: Post a comment on failure
if: steps.cherry_pick.outcome != 'success' || steps.push_changes.outcome != 'success'
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const releaseVersion = process.env.CURRENT_RELEASE;
const workflowRunUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`;
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: `Failed to cherry-pick changes to the ${releaseVersion} branch.
Please cherry-pick the changes manually.
You can find more details [here](${workflowRunUrl}).`
})
- name: Post a comment on success
if: steps.cherry_pick.outcome == 'success' && steps.push_changes.outcome == 'success'
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const releaseVersion = process.env.CURRENT_RELEASE;
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: `Changes have been cherry-picked to the ${releaseVersion} branch.`
})
1 change: 1 addition & 0 deletions ingestion/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,4 @@ ignore = [
reportDeprecated = false
reportMissingTypeStubs = false
reportAny = false
reportExplicitAny = false
1 change: 1 addition & 0 deletions ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@
"typing-inspect",
"packaging", # For version parsing
"shapely",
"collate-data-diff",
}

plugins: Dict[str, Set[str]] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
from metadata.generated.schema.type.basic import Timestamp
from metadata.profiler.processor.runner import QueryRunner
from metadata.utils.importer import import_test_case_class
from metadata.utils import importer

if TYPE_CHECKING:
from pandas import DataFrame
Expand Down Expand Up @@ -59,7 +59,8 @@ def __init__(
"""
self._test_case = test_case
self.runner = runner
self.validator_cls: Type[BaseTestValidator] = import_test_case_class(
# TODO this will be removed on https://github.com/open-metadata/OpenMetadata/pull/18716
self.validator_cls: Type[BaseTestValidator] = importer.import_test_case_class(
entity_type,
self._get_source_type(),
self.test_case.testDefinition.fullyQualifiedName, # type: ignore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# limitations under the License.
"""Module that defines the TableDiffParamsSetter class."""
from ast import literal_eval
from typing import List, Optional
from typing import List, Optional, Set
from urllib.parse import urlparse

from metadata.data_quality.validations import utils
Expand Down Expand Up @@ -75,7 +75,9 @@ def get_parameters(self, test_case) -> TableDiffRuntimeParameters:
DatabaseService, table2.service.id, nullable=False
)
key_columns = self.get_key_columns(test_case)
extra_columns = self.get_extra_columns(key_columns, test_case)
extra_columns = self.get_extra_columns(
key_columns, test_case, self.table_entity.columns, table2.columns
)
return TableDiffRuntimeParameters(
table_profile_config=self.table_entity.tableProfilerConfig,
table1=TableParameter(
Expand Down Expand Up @@ -111,8 +113,8 @@ def get_parameters(self, test_case) -> TableDiffRuntimeParameters:
case_sensitive=case_sensitive_columns,
),
),
keyColumns=key_columns,
extraColumns=extra_columns,
keyColumns=list(key_columns),
extraColumns=list(extra_columns),
whereClause=self.build_where_clause(test_case),
)

Expand All @@ -134,21 +136,25 @@ def build_where_clause(self, test_case) -> Optional[str]:
return " AND ".join(where_clauses)

def get_extra_columns(
self, key_columns: List[str], test_case
) -> Optional[List[str]]:
self,
key_columns: Set[str],
test_case,
left_columns: List[Column],
right_columns: List[Column],
) -> Optional[Set[str]]:
extra_columns_param = self.get_parameter(test_case, "useColumns", None)
if extra_columns_param is not None:
extra_columns: List[str] = literal_eval(extra_columns_param)
self.validate_columns(extra_columns)
return extra_columns
return set(extra_columns)
if extra_columns_param is None:
extra_columns_param = []
for column in self.table_entity.columns:
for column in left_columns + right_columns:
if column.name.root not in key_columns:
extra_columns_param.insert(0, column.name.root)
return extra_columns_param
return set(extra_columns_param)

def get_key_columns(self, test_case) -> List[str]:
def get_key_columns(self, test_case) -> Set[str]:
key_columns_param = self.get_parameter(test_case, "keyColumns", "[]")
key_columns: List[str] = literal_eval(key_columns_param)
if key_columns:
Expand All @@ -167,13 +173,13 @@ def get_key_columns(self, test_case) -> List[str]:
"Could not find primary key or unique constraint columns.\n",
"Specify 'keyColumns' to explicitly set the columns to use as keys.",
)
return key_columns
return set(key_columns)

@staticmethod
def filter_relevant_columns(
columns: List[Column],
key_columns: List[str],
extra_columns: List[str],
key_columns: Set[str],
extra_columns: Set[str],
case_sensitive: bool,
) -> List[Column]:
validated_columns = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,15 +273,16 @@ def get_incomparable_columns(self) -> List[str]:
).with_schema()
result = []
for column in table1.key_columns + table1.extra_columns:
col1_type = self._get_column_python_type(
table1._schema[column] # pylint: disable=protected-access
)
# Skip columns that are not in the second table. We cover this case in get_changed_added_columns.
if table2._schema.get(column) is None: # pylint: disable=protected-access
col1 = table1._schema.get(column) # pylint: disable=protected-access
if col1 is None:
# Skip columns that are not in the first table. We cover this case in get_changed_added_columns.
continue
col2_type = self._get_column_python_type(
table2._schema[column] # pylint: disable=protected-access
)
col2 = table2._schema.get(column) # pylint: disable=protected-access
if col2 is None:
# Skip columns that are not in the second table. We cover this case in get_changed_added_columns.
continue
col1_type = self._get_column_python_type(col1)
col2_type = self._get_column_python_type(col2)
if is_numeric(col1_type) and is_numeric(col2_type):
continue
if col1_type != col2_type:
Expand Down
14 changes: 9 additions & 5 deletions ingestion/src/metadata/ingestion/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,24 @@
from typing import Generic, Optional, TypeVar

from pydantic import BaseModel, Field
from typing_extensions import Annotated

from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)

# Entities are instances of BaseModel
Entity = BaseModel

T = TypeVar("T")


class Either(BaseModel, Generic[T]):
"""Any execution should return us Either an Entity of an error for us to handle"""

left: Optional[StackTraceError] = Field(
None, description="Error encountered during execution"
)
right: Optional[T] = Field(None, description="Correct instance of an Entity")
left: Annotated[
Optional[StackTraceError],
Field(description="Error encountered during execution", default=None),
]
right: Annotated[
Optional[T], Field(description="Correct instance of an Entity", default=None)
]
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,10 @@ def __init__(self, config: PowerBIConnection):
client_credential=self.config.clientSecret.get_secret_value(),
authority=self.config.authorityURI + self.config.tenantId,
)
self.auth_token = self.get_auth_token()
client_config = ClientConfig(
base_url="https://api.powerbi.com",
api_version="v1.0",
auth_token=lambda: self.auth_token,
auth_token=self.get_auth_token,
auth_header="Authorization",
allow_redirects=True,
retry_codes=[429],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def yield_dashboard_chart(
except Exception as exc:
yield Either(
left=StackTraceError(
name=chart_json.id,
name=str(chart_json.id),
error=f"Error yielding Chart [{chart_json.id} - {chart_json.slice_name}]: {exc}",
stackTrace=traceback.format_exc(),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def _(config: DbtCloudConfig): # pylint: disable=too-many-locals
logger.debug(
"Requesting [dbt_catalog], [dbt_manifest] and [dbt_run_results] data"
)
params_data = {"order_by": "-finished_at", "limit": "1", "status": "10"}
params_data = {"order_by": "-finished_at", "limit": "1"}
if project_id:
params_data["project_id"] = project_id

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Elasticsearch source to extract metadata
"""
import shutil
import traceback
from pathlib import Path
from typing import Any, Iterable, Optional

Expand All @@ -21,6 +22,7 @@
CreateSearchIndexRequest,
)
from metadata.generated.schema.entity.data.searchIndex import (
IndexType,
SearchIndex,
SearchIndexSampleData,
)
Expand Down Expand Up @@ -103,6 +105,7 @@ def yield_search_index(
fields=parse_es_index_mapping(
search_index_details.get(index_name, {}).get("mappings")
),
indexType=IndexType.Index,
)
yield Either(right=search_index_request)
self.register_record(search_index_request=search_index_request)
Expand Down Expand Up @@ -143,6 +146,56 @@ def yield_search_index_sample_data(
)
)

def get_search_index_template_list(self) -> Iterable[dict]:
"""
Get List of all search index template
"""
yield from self.client.indices.get_index_template().get("index_templates", [])

def get_search_index_template_name(
self, search_index_template_details: dict
) -> Optional[str]:
"""
Get Search Index Template Name
"""
return search_index_template_details and search_index_template_details["name"]

def yield_search_index_template(
self, search_index_template_details: Any
) -> Iterable[Either[CreateSearchIndexRequest]]:
"""
Method to Get Search Index Template Entity
"""
try:
if self.source_config.includeIndexTemplate:
index_name = self.get_search_index_template_name(
search_index_template_details
)
index_template = search_index_template_details["index_template"]
if index_name:
search_index_template_request = CreateSearchIndexRequest(
name=EntityName(index_name),
displayName=index_name,
searchIndexSettings=index_template.get("template", {}).get(
"settings", {}
),
service=FullyQualifiedEntityName(
self.context.get().search_service
),
fields=parse_es_index_mapping(
index_template.get("template", {}).get("mappings")
),
indexType=IndexType.IndexTemplate,
description=index_template.get("_meta", {}).get("description"),
)
yield Either(right=search_index_template_request)
self.register_record(
search_index_request=search_index_template_request
)
except Exception as exc:
logger.debug(traceback.format_exc())
logger.error(f"Could not include index templates due to {exc}")

def close(self):
try:
if Path(self.service_connection.sslConfig.certificates.stagingDir).exists():
Expand Down
Loading

0 comments on commit 99ba9cc

Please sign in to comment.