Skip to content

Commit

Permalink
Merge branch 'master' into ps-add-recipe-deploy
Browse files Browse the repository at this point in the history
  • Loading branch information
iprentic authored Jul 31, 2023
2 parents f538ade + de1f23d commit b2fb924
Show file tree
Hide file tree
Showing 38 changed files with 212 additions and 162 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,9 @@ jobs:
if: ${{ needs.setup.outputs.publish != 'true' }}
with:
image: ${{ env.DATAHUB_UPGRADE_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Disable datahub-actions
run: |
yq -i 'del(.services.datahub-actions)' docker/quickstart/docker-compose-without-neo4j.quickstart.yml
- name: run quickstart
env:
DATAHUB_TELEMETRY_ENABLED: false
Expand All @@ -501,6 +504,20 @@ jobs:
# we are doing this because gms takes time to get ready
# and we don't have a better readiness check when bootstrap is done
sleep 60s
- name: Disable ES Disk Threshold
run: |
curl -XPUT "http://localhost:9200/_cluster/settings" \
-H 'Content-Type: application/json' -d'{
"persistent": {
"cluster": {
"routing": {
"allocation.disk.threshold_enabled": false
}
}
}
}'
- name: Remove Source Code
run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete
- name: Smoke test
env:
RUN_QUICKSTART: false
Expand Down
12 changes: 6 additions & 6 deletions datahub-frontend/app/controllers/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ public CompletableFuture<Result> proxy(String path, Http.Request request) throws
.stream()
// Remove X-DataHub-Actor to prevent malicious delegation.
.filter(entry -> !AuthenticationConstants.LEGACY_X_DATAHUB_ACTOR_HEADER.equalsIgnoreCase(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_LENGTH.equals(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_TYPE.equals(entry.getKey()))
.filter(entry -> !Http.HeaderNames.AUTHORIZATION.equals(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_LENGTH.equalsIgnoreCase(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_TYPE.equalsIgnoreCase(entry.getKey()))
.filter(entry -> !Http.HeaderNames.AUTHORIZATION.equalsIgnoreCase(entry.getKey()))
// Remove Host s.th. service meshes do not route to wrong host
.filter(entry -> !Http.HeaderNames.HOST.equals(entry.getKey()))
.filter(entry -> !Http.HeaderNames.HOST.equalsIgnoreCase(entry.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
)
.addHeader(Http.HeaderNames.AUTHORIZATION, authorizationHeaderValue)
Expand All @@ -152,8 +152,8 @@ public CompletableFuture<Result> proxy(String path, Http.Request request) throws
final ResponseHeader header = new ResponseHeader(apiResponse.getStatus(), apiResponse.getHeaders()
.entrySet()
.stream()
.filter(entry -> !Http.HeaderNames.CONTENT_LENGTH.equals(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_TYPE.equals(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_LENGTH.equalsIgnoreCase(entry.getKey()))
.filter(entry -> !Http.HeaderNames.CONTENT_TYPE.equalsIgnoreCase(entry.getKey()))
.map(entry -> Pair.of(entry.getKey(), String.join(";", entry.getValue())))
.collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)));
final HttpEntity body = new HttpEntity.Strict(apiResponse.getBodyAsBytes(), Optional.ofNullable(apiResponse.getContentType()));
Expand Down
3 changes: 2 additions & 1 deletion datahub-web-react/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ const errorLink = onError((error) => {
if (serverError.statusCode === 401) {
isLoggedInVar(false);
Cookies.remove(GlobalCfg.CLIENT_AUTH_COOKIE);
window.location.replace(PageRoutes.AUTHENTICATE);
const currentPath = window.location.pathname + window.location.search;
window.location.replace(`${PageRoutes.AUTHENTICATE}?redirect_uri=${encodeURIComponent(currentPath)}`);
}
}
if (graphQLErrors && graphQLErrors.length) {
Expand Down
6 changes: 0 additions & 6 deletions datahub-web-react/src/app/context/UserContextProvider.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { useGetMeLazyQuery } from '../../graphql/me.generated';
import { useGetGlobalViewsSettingsLazyQuery } from '../../graphql/app.generated';
import { CorpUser, PlatformPrivileges } from '../../types.generated';
import { UserContext, LocalState, DEFAULT_STATE, State } from './userContext';
import { useInitialRedirect } from './useInitialRedirect';

// TODO: Migrate all usage of useAuthenticatedUser to using this provider.

Expand Down Expand Up @@ -125,11 +124,6 @@ const UserContextProvider = ({ children }: { children: React.ReactNode }) => {
}
}, [state, localState.selectedViewUrn, setDefaultSelectedView]);

/**
* Route to the most recently visited path once on first load of home page, if present in local storage.
*/
useInitialRedirect(state, localState, setState, updateLocalState);

return (
<UserContext.Provider
value={{
Expand Down
52 changes: 0 additions & 52 deletions datahub-web-react/src/app/context/useInitialRedirect.ts

This file was deleted.

5 changes: 0 additions & 5 deletions datahub-web-react/src/app/context/userContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ export type State = {
loadedPersonalDefaultViewUrn: boolean;
hasSetDefaultView: boolean;
};
/**
* Whether the initial page path has been loaded.
*/
loadedInitialPath: boolean;
};

/**
Expand Down Expand Up @@ -54,7 +50,6 @@ export const DEFAULT_STATE: State = {
loadedPersonalDefaultViewUrn: false,
hasSetDefaultView: false,
},
loadedInitialPath: false,
};

export const DEFAULT_CONTEXT = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ export const AddGroupMembersModal = ({ urn, visible, onCloseModal, onSubmit }: P
setSelectedMembers(newUsers);
};

const onDeselectMember = (memberUrn: string) => {
const onDeselectMember = (memberUrn: { key: string; label: React.ReactNode; value: string }) => {
setInputValue('');
const newUserActors = selectedMembers.filter((user) => user !== memberUrn);
const newUserActors = selectedMembers.filter((user) => user.value !== memberUrn.value);
setSelectedMembers(newUserActors);
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,10 @@ export function filterColumns(
node: { x: number; y: number; data: Omit<NodeData, 'children'> },
setColumnsByUrn: (value: React.SetStateAction<Record<string, SchemaField[]>>) => void,
) {
const filteredFields = node.data.schemaMetadata?.fields.filter((field) => field.fieldPath.includes(filterText));
const formattedFilterText = filterText.toLocaleLowerCase();
const filteredFields = node.data.schemaMetadata?.fields.filter((field) =>
field.fieldPath.toLocaleLowerCase().includes(formattedFilterText),
);
if (filteredFields) {
setColumnsByUrn((colsByUrn) => ({
...colsByUrn,
Expand Down
2 changes: 2 additions & 0 deletions docker/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') {

environment "DATAHUB_TELEMETRY_ENABLED", "false"
environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
environment "ACTIONS_VERSION", 'alpine3.17-slim'
environment "DATAHUB_ACTIONS_IMAGE", 'nginx'

def cmd = [
'source ../metadata-ingestion/venv/bin/activate && ',
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose-with-cassandra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ services:
datahub-actions:
container_name: datahub-actions
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
env_file: datahub-actions/env/docker.env
depends_on:
datahub-gms:
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose-without-neo4j.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ services:
datahub-actions:
container_name: datahub-actions
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
env_file: datahub-actions/env/docker.env
depends_on:
datahub-gms:
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ services:
datahub-actions:
container_name: datahub-actions
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
env_file: datahub-actions/env/docker.env
depends_on:
datahub-gms:
Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart/docker-compose-m1.quickstart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart/docker-compose.quickstart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
1 change: 1 addition & 0 deletions docs-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ module.exports = {
"docs/components",
"docs/architecture/metadata-ingestion",
"docs/architecture/metadata-serving",
"docs/architecture/docker-containers",
],
},
{
Expand Down
27 changes: 27 additions & 0 deletions docs/architecture/docker-containers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
title: "Docker Container Architecture"
---

# Docker Container Architecture

When running DataHub via docker-compose. or helm, the following is a diagram of the containers involved
with running DataHub and their relationships with each other. The helm chart uses helm hooks to determine
the proper ordering of the components whereas docker-compose relies on a series of health checks.

```text
datahub-frontend-react datahub-actions
\ /
| datahub-upgrade (NoCodeDataMigration, helm only)
| /
datahub-gms (healthy)
|
datahub-upgrade (SystemUpdate completed)
/--------------------/ | \ \------------------------------------------------\
/ | \-------------------\ \
mysql-setup (completed) elasticsearch-setup (completed) kafka-setup (completed) (if apply) neo4j (healthy)
| | / \
| | / \
mysql (healthy) elasticsearch (healthy) broker (healthy) (if not internal) schema-registry (healthy)
|
zookeeper (healthy)
```
1 change: 1 addition & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ The environment variables listed below take precedence over the DataHub CLI conf
- `DATAHUB_DEBUG` (default `false`) - Set to `true` to enable debug logging for CLI. Can also be achieved through `--debug` option of the CLI.
- `DATAHUB_VERSION` (default `head`) - Set to a specific version to run quickstart with the particular version of docker images.
- `ACTIONS_VERSION` (default `head`) - Set to a specific version to run quickstart with that image tag of `datahub-actions` container.
- `DATAHUB_ACTIONS_IMAGE` (default `acryldata/datahub-actions`) - Set to `-slim` to run a slimmer actions container without pyspark/deequ features.

```shell
DATAHUB_SKIP_CONFIG=false
Expand Down
9 changes: 4 additions & 5 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,11 @@ def get_long_description():
}

sqllineage_lib = {
"sqllineage==1.3.6",
"sqllineage==1.3.8",
# We don't have a direct dependency on sqlparse but it is a dependency of sqllineage.
# As per https://github.com/reata/sqllineage/issues/361
# and https://github.com/reata/sqllineage/pull/360
# sqllineage has compat issues with sqlparse 0.4.4.
"sqlparse==0.4.3",
# There have previously been issues from not pinning sqlparse, so it's best to pin it.
# Related: https://github.com/reata/sqllineage/issues/361 and https://github.com/reata/sqllineage/pull/360
"sqlparse==0.4.4",
}

sqlglot_lib = {
Expand Down
5 changes: 0 additions & 5 deletions metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from datahub.cli.cli_utils import get_url_and_token
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect, make_data_platform_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
Expand Down Expand Up @@ -65,10 +64,6 @@ class DatahubClientConfig(ConfigModel):
ca_certificate_path: Optional[str] = None
disable_ssl_verification: bool = False

_max_threads_moved_to_sink = pydantic_removed_field(
"max_threads", print_warning=False
)


# Alias for backwards compatibility.
# DEPRECATION: Remove in v0.10.2.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def create(
elif config_dict is None:
raise ConfigurationError("Missing provider configuration.")
else:
provider_config = DatahubIngestionStateProviderConfig.parse_obj(config_dict)
provider_config = (
DatahubIngestionStateProviderConfig.parse_obj_allow_extras(config_dict)
)
if provider_config.datahub_api:
graph = DataHubGraph(provider_config.datahub_api)
return cls(graph, name)
Expand Down
20 changes: 12 additions & 8 deletions metadata-ingestion/tests/unit/stateful_ingestion/test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"token": "dummy_test_tok",
"timeout_sec": 10,
"extra_headers": {},
"max_threads": 15,
"max_threads": 10,
},
"simple": {},
"default": {},
Expand All @@ -44,11 +44,15 @@
"datahub_api": datahub_client_configs["full"],
},
DatahubIngestionStateProviderConfig(
datahub_api=DatahubClientConfig(
server="http://localhost:8080",
token="dummy_test_tok",
timeout_sec=10,
extra_headers={},
# This test verifies that the max_threads arg is ignored.
datahub_api=DatahubClientConfig.parse_obj_allow_extras(
dict(
server="http://localhost:8080",
token="dummy_test_tok",
timeout_sec=10,
extra_headers={},
max_threads=10,
)
),
),
False,
Expand Down Expand Up @@ -188,7 +192,7 @@ def test_state_provider_configs(
if raises_exception:
with pytest.raises(ValidationError):
assert expected is None
config_class.parse_obj(config_dict)
config_class.parse_obj_allow_extras(config_dict)
else:
config = config_class.parse_obj(config_dict)
config = config_class.parse_obj_allow_extras(config_dict)
assert config == expected
2 changes: 1 addition & 1 deletion metadata-ingestion/tests/unit/test_usage_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def test_make_usage_workunit():
def test_query_formatting():
test_email = "[email protected]"
test_query = "select * from foo where id in (select id from bar);"
formatted_test_query: str = "SELECT *\n FROM foo\n WHERE id in (\n SELECT id\n FROM bar\n );"
formatted_test_query: str = "SELECT *\n FROM foo\n WHERE id IN (\n SELECT id\n FROM bar\n );"
event_time = datetime(2020, 1, 1)

floored_ts = get_time_bucket(event_time, BucketDuration.DAY)
Expand Down
Loading

0 comments on commit b2fb924

Please sign in to comment.