Skip to content

Commit

Permalink
Merge branch 'master' into ps-fix-entityfield-resolver
Browse files Browse the repository at this point in the history
  • Loading branch information
pedro93 authored Oct 25, 2023
2 parents 25eb1b3 + 2d1584b commit 5f66692
Show file tree
Hide file tree
Showing 24 changed files with 480 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ export default function PolicyDetailsModal({ policy, visible, onClose, privilege
const isMetadataPolicy = policy?.type === PolicyType.Metadata;

const resources = convertLegacyResourceFilter(policy?.resources);
const resourceTypes = getFieldValues(resources?.filter, 'RESOURCE_TYPE') || [];
const resourceEntities = getFieldValues(resources?.filter, 'RESOURCE_URN') || [];
const resourceTypes = getFieldValues(resources?.filter, 'TYPE') || [];
const resourceEntities = getFieldValues(resources?.filter, 'URN') || [];
const domains = getFieldValues(resources?.filter, 'DOMAIN') || [];

const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ export default function PolicyPrivilegeForm({
} = useAppConfig();

const resources: ResourceFilter = convertLegacyResourceFilter(maybeResources) || EMPTY_POLICY.resources;
const resourceTypes = getFieldValues(resources.filter, 'RESOURCE_TYPE') || [];
const resourceEntities = getFieldValues(resources.filter, 'RESOURCE_URN') || [];
const resourceTypes = getFieldValues(resources.filter, 'TYPE') || [];
const resourceEntities = getFieldValues(resources.filter, 'URN') || [];

const getDisplayName = (entity) => {
if (!entity) {
Expand Down Expand Up @@ -145,10 +145,7 @@ export default function PolicyPrivilegeForm({
};
setResources({
...resources,
filter: setFieldValues(filter, 'RESOURCE_TYPE', [
...resourceTypes,
createCriterionValue(selectedResourceType),
]),
filter: setFieldValues(filter, 'TYPE', [...resourceTypes, createCriterionValue(selectedResourceType)]),
});
};

Expand All @@ -160,7 +157,7 @@ export default function PolicyPrivilegeForm({
...resources,
filter: setFieldValues(
filter,
'RESOURCE_TYPE',
'TYPE',
resourceTypes?.filter((criterionValue) => criterionValue.value !== deselectedResourceType),
),
});
Expand All @@ -173,7 +170,7 @@ export default function PolicyPrivilegeForm({
};
setResources({
...resources,
filter: setFieldValues(filter, 'RESOURCE_URN', [
filter: setFieldValues(filter, 'URN', [
...resourceEntities,
createCriterionValueWithEntity(
resource,
Expand All @@ -192,7 +189,7 @@ export default function PolicyPrivilegeForm({
...resources,
filter: setFieldValues(
filter,
'RESOURCE_URN',
'URN',
resourceEntities?.filter((criterionValue) => criterionValue.value !== resource),
),
});
Expand Down
4 changes: 2 additions & 2 deletions datahub-web-react/src/app/permissions/policy/policyUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ export const convertLegacyResourceFilter = (resourceFilter: Maybe<ResourceFilter
}
const criteria = new Array<PolicyMatchCriterion>();
if (resourceFilter.type) {
criteria.push(createCriterion('RESOURCE_TYPE', [createCriterionValue(resourceFilter.type)]));
criteria.push(createCriterion('TYPE', [createCriterionValue(resourceFilter.type)]));
}
if (resourceFilter.resources && resourceFilter.resources.length > 0) {
criteria.push(createCriterion('RESOURCE_URN', resourceFilter.resources.map(createCriterionValue)));
criteria.push(createCriterion('URN', resourceFilter.resources.map(createCriterionValue)));
}
return {
filter: {
Expand Down
68 changes: 36 additions & 32 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ module.exports = {
projectName: "datahub", // Usually your repo name.
staticDirectories: ["static", "genStatic"],
stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"],
scripts: [
{
src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38",
async: true,
defer: true,
},
],
noIndex: isSaas,
customFields: {
isSaas: isSaas,
Expand Down Expand Up @@ -50,44 +57,41 @@ module.exports = {
position: "right",
},
{
to: "https://demo.datahubproject.io/",
label: "Demo",
position: "right",
},
{
href: "https://blog.datahubproject.io/",
label: "Blog",
position: "right",
},
{
href: "https://feature-requests.datahubproject.io/roadmap",
label: "Roadmap",
type: "dropdown",
label: "Resources",
position: "right",
items: [
{
href: "https://demo.datahubproject.io/",
label: "Demo",
},
{
href: "https://blog.datahubproject.io/",
label: "Blog",
},
{
href: "https://feature-requests.datahubproject.io/roadmap",
label: "Roadmap",
},
{
href: "https://slack.datahubproject.io",
label: "Slack",
},
{
href: "https://github.com/datahub-project/datahub",
label: "GitHub",
},
{
href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w",
label: "YouTube",
},
],
},
{
type: "docsVersionDropdown",
position: "right",
position: "left",
dropdownActiveClassDisabled: true,
},
{
href: "https://slack.datahubproject.io",
"aria-label": "Slack",
position: "right",
className: "item__icon item__slack",
},
{
href: "https://github.com/datahub-project/datahub",
"aria-label": "GitHub",
position: "right",
className: "item__icon item__github",
},

{
href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w",
"aria-label": "YouTube",
position: "right",
className: "item__icon item__youtube",
},
],
},
footer: {
Expand Down
27 changes: 18 additions & 9 deletions docs-website/src/styles/global.scss
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,29 @@ div[class^="announcementBar"] {

/** Navbar */

@media only screen and (max-width: 1050px) {
.navbar__toggle {
display: inherit;
}
.navbar__item {
display: none;
}
}

.navbar {
.navbar__logo {
height: 3rem;
}

.navbar__link {
align-items: center;
margin: 0 1rem 0;
padding: 0;
border-bottom: 2px solid transparent;
}

.dropdown > .navbar__link:after {
top: -1px;
border-width: 0.3em 0.3em 0;
margin-left: 0.4em;
}

.navbar__link--active {
border-bottom-color: var(--ifm-navbar-link-hover-color);
}
.navbar__item {
padding: 0.25rem 0;
svg[class*="iconExternalLink"] {
display: none;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ import { translate } from "@docusaurus/Translate";
import { useLocation } from "@docusaurus/router";
import DefaultNavbarItem from "@theme/NavbarItem/DefaultNavbarItem";
import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem";

import styles from "./styles.module.scss";

const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId);
export default function DocsVersionDropdownNavbarItem({
mobile,
Expand Down Expand Up @@ -60,6 +63,7 @@ export default function DocsVersionDropdownNavbarItem({
return (
<DropdownNavbarItem
{...props}
className={styles.versionNavItem}
mobile={mobile}
label={dropdownLabel}
to={false} // This component is Swizzled to disable the link here
Expand Down
8 changes: 8 additions & 0 deletions docs-website/src/theme/NavbarItem/styles.module.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.versionNavItem {
margin-left: 0 !important;
padding: 0.2em 1em !important;
display: block;
border-radius: var(--ifm-button-border-radius) !important;
color: var(--ifm-menu-color-active);
background: var(--ifm-menu-color-background-active);
}
8 changes: 4 additions & 4 deletions docs/authorization/policies.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ We currently support the following:
#### Resources

Resource filter defines the set of resources that the policy applies to is defined using a list of criteria. Each
criterion defines a field type (like resource_type, resource_urn, domain), a list of field values to compare, and a
criterion defines a field type (like type, urn, domain), a list of field values to compare, and a
condition (like EQUALS). It essentially checks whether the field of a certain resource matches any of the input values.
Note, that if there are no criteria or resource is not set, policy is applied to ALL resources.

Expand All @@ -149,7 +149,7 @@ For example, the following resource filter will apply the policy to datasets, ch
"filter": {
"criteria": [
{
"field": "RESOURCE_TYPE",
"field": "TYPE",
"condition": "EQUALS",
"values": [
"dataset",
Expand All @@ -175,8 +175,8 @@ Supported fields are as follows

| Field Type | Description | Example |
|---------------|------------------------|-------------------------|
| resource_type | Type of the resource | dataset, chart, dataJob |
| resource_urn | Urn of the resource | urn:li:dataset:... |
| type | Type of the resource | dataset, chart, dataJob |
| urn | Urn of the resource | urn:li:dataset:... |
| domain | Domain of the resource | urn:li:domain:domainX |

## Managing Policies
Expand Down
41 changes: 40 additions & 1 deletion docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,50 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
by Looker and LookML source connectors.
- #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
- #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
- #8943 - The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
`datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
- #8846 - Changed enum values in resource filters used by policies. `RESOURCE_TYPE` became `TYPE` and `RESOURCE_URN` became `URN`.
Any existing policies using these filters (i.e. defined for particular `urns` or `types` such as `dataset`) need to be upgraded
manually, for example by retrieving their respective `dataHubPolicyInfo` aspect and changing part using filter i.e.
```yaml
"resources": {
"filter": {
"criteria": [
{
"field": "RESOURCE_TYPE",
"condition": "EQUALS",
"values": [
"dataset"
]
}
]
}
```
into
```yaml
"resources": {
"filter": {
"criteria": [
{
"field": "TYPE",
"condition": "EQUALS",
"values": [
"dataset"
]
}
]
}
```
for example, using `datahub put` command. Policies can be also removed and re-created via UI.
- #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`.
This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully
qualified dataset name, i.e. `<project_name>.<dataset_name>`. If this is not the case, please
update your pattern (e.g. prepend your old dataset pattern with `.*\.` which matches the project part),
or set `match_fully_qualified_names: false` in your recipe. However, note that
setting this to `false` is deprecated and this flag will be removed entirely in a future release.

### Potential Downtime

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ class BigQueryV2Config(
)

match_fully_qualified_names: bool = Field(
default=False,
description="Whether `dataset_pattern` is matched against fully qualified dataset name `<project_id>.<dataset_name>`.",
default=True,
description="[deprecated] Whether `dataset_pattern` is matched against fully qualified dataset name `<project_id>.<dataset_name>`.",
)

include_external_url: bool = Field(
Expand Down Expand Up @@ -327,8 +327,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
):
logger.warning(
"Please update `dataset_pattern` to match against fully qualified schema name `<project_id>.<dataset_name>` and set config `match_fully_qualified_names : True`."
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
"The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`."
"The config option `match_fully_qualified_names` is deprecated and will be removed in a future release."
)
return values

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def _get_parsed_audit_log_events(self, project_id: str) -> Iterable[QueryEvent]:
# handle the case where the read happens within our time range but the query
# completion event is delayed and happens after the configured end time.
corrected_start_time = self.start_time - self.config.max_query_duration
corrected_end_time = self.end_time + -self.config.max_query_duration
corrected_end_time = self.end_time + self.config.max_query_duration
self.report.log_entry_start_time = corrected_start_time
self.report.log_entry_end_time = corrected_end_time

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,12 @@ def get_time_window(self) -> Tuple[datetime, datetime]:
def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool:
return (
table_ref is not None
and self.config.dataset_pattern.allowed(table_ref.table_identifier.dataset)
and self.config.table_pattern.allowed(table_ref.table_identifier.table)
and self.config.dataset_pattern.allowed(
f"{table_ref.table_identifier.project_id}.{table_ref.table_identifier.dataset}"
if self.config.match_fully_qualified_names
else table_ref.table_identifier.dataset
)
and self.config.table_pattern.allowed(str(table_ref.table_identifier))
)

def _should_ingest_usage(self) -> bool:
Expand Down Expand Up @@ -844,7 +848,7 @@ def _get_parsed_bigquery_log_events(
# handle the case where the read happens within our time range but the query
# completion event is delayed and happens after the configured end time.
corrected_start_time = self.start_time - self.config.max_query_duration
corrected_end_time = self.end_time + -self.config.max_query_duration
corrected_end_time = self.end_time + self.config.max_query_duration
self.report.audit_start_time = corrected_start_time
self.report.audit_end_time = corrected_end_time

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Optional

from pydantic import Field, root_validator
Expand Down Expand Up @@ -67,9 +68,25 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
),
)

pull_from_datahub_api: bool = Field(
default=False,
description="Use the DataHub API to fetch versioned aspects.",
hidden_from_docs=True,
)

max_workers: int = Field(
default=5 * (os.cpu_count() or 4),
description="Number of worker threads to use for datahub api ingestion.",
hidden_from_docs=True,
)

@root_validator
def check_ingesting_data(cls, values):
if not values.get("database_connection") and not values.get("kafka_connection"):
if (
not values.get("database_connection")
and not values.get("kafka_connection")
and not values.get("pull_from_datahub_api")
):
raise ValueError(
"Your current config will not ingest any data."
" Please specify at least one of `database_connection` or `kafka_connection`, ideally both."
Expand Down
Loading

0 comments on commit 5f66692

Please sign in to comment.