Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Oct 5, 2023
2 parents 9dfeaac + 2fcced6 commit f3c9c3c
Show file tree
Hide file tree
Showing 94 changed files with 2,606 additions and 268 deletions.
14 changes: 10 additions & 4 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ jobs:
command:
[
# metadata-ingestion and airflow-plugin each have dedicated build jobs
"./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
"./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
"except_metadata_ingestion",
"frontend"
]
timezone:
[
Expand All @@ -53,9 +53,15 @@ jobs:
with:
python-version: "3.10"
cache: pip
- name: Gradle build (and test)
- name: Gradle build (and test) for metadata ingestion
# we only need the timezone runs for frontend tests
if: ${{ matrix.command == 'except_metadata_ingestion' && matrix.timezone == 'America/New_York' }}
run: |
${{ matrix.command }}
./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel
- name: Gradle build (and test) for frontend
if: ${{ matrix.command == 'frontend' }}
run: |
./gradlew :datahub-frontend:build :datahub-web-react:build --parallel
env:
NODE_OPTIONS: "--max-old-space-size=3072"
- uses: actions/upload-artifact@v3
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ jobs:
python-version: ["3.7", "3.10"]
command:
[
"lint",
"testQuick",
"testIntegrationBatch0",
"testIntegrationBatch1",
Expand All @@ -54,6 +53,9 @@ jobs:
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install package
run: ./gradlew :metadata-ingestion:installPackageOnly
- name: Run lint alongwith testQuick
if: ${{ matrix.command == 'testQuick' }}
run: ./gradlew :metadata-ingestion:lint
- name: Run metadata-ingestion tests
run: ./gradlew :metadata-ingestion:${{ matrix.command }}
- name: Debug info
Expand All @@ -65,15 +67,14 @@ jobs:
docker image ls
docker system df
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.command != 'lint' }}
with:
name: Test Results (metadata ingestion ${{ matrix.python-version }})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: ${{ always() && matrix.python-version == '3.10' && matrix.command != 'lint' }}
if: ${{ always() && matrix.python-version == '3.10' }}
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.datahub.plugins.auth.authorization.Authorizer;
import com.linkedin.common.AuditStamp;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl;
import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
Expand All @@ -21,6 +22,8 @@
public class TestUtils {

public static EntityService getMockEntityService() {
PathSpecBasedSchemaAnnotationVisitor.class.getClassLoader()
.setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false);
EntityRegistry registry = new ConfigEntityRegistry(TestUtils.class.getResourceAsStream("/test-entity-registry.yaml"));
EntityService mockEntityService = Mockito.mock(EntityService.class);
Mockito.when(mockEntityService.getEntityRegistry()).thenReturn(registry);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ entities:
- assertionInfo
- dataPlatformInstance
- assertionRunEvent
- assertionActions
- status
- name: dataHubRetention
category: internal
Expand Down Expand Up @@ -292,4 +293,11 @@ entities:
aspects:
- ownershipTypeInfo
- status
- name: dataContract
category: core
keyAspect: dataContractKey
aspects:
- dataContractProperties
- dataContractStatus
- status
events:
46 changes: 28 additions & 18 deletions datahub-web-react/src/app/search/SearchBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { useHistory } from 'react-router';
import { AutoCompleteResultForEntity, EntityType, FacetFilterInput, ScenarioType } from '../../types.generated';
import EntityRegistry from '../entity/EntityRegistry';
import filterSearchQuery from './utils/filterSearchQuery';
import { ANTD_GRAY, ANTD_GRAY_V2 } from '../entity/shared/constants';
import { ANTD_GRAY, ANTD_GRAY_V2, REDESIGN_COLORS } from '../entity/shared/constants';
import { getEntityPath } from '../entity/shared/containers/profile/utils';
import { EXACT_SEARCH_PREFIX } from './utils/constants';
import { useListRecommendationsQuery } from '../../graphql/recommendations.generated';
Expand All @@ -20,7 +20,6 @@ import RecommendedOption from './autoComplete/RecommendedOption';
import SectionHeader, { EntityTypeLabel } from './autoComplete/SectionHeader';
import { useUserContext } from '../context/useUserContext';
import { navigateToSearchUrl } from './utils/navigateToSearchUrl';
import { getQuickFilterDetails } from './autoComplete/quickFilters/utils';
import ViewAllSearchItem from './ViewAllSearchItem';
import { ViewSelect } from '../entity/view/select/ViewSelect';
import { combineSiblingsInAutoComplete } from './utils/combineSiblingsInAutoComplete';
Expand All @@ -39,13 +38,14 @@ const StyledSearchBar = styled(Input)`
&&& {
border-radius: 70px;
height: 40px;
font-size: 20px;
color: ${ANTD_GRAY[7]};
background-color: ${ANTD_GRAY_V2[2]};
}
> .ant-input {
font-size: 14px;
color: ${ANTD_GRAY[7]};
background-color: ${ANTD_GRAY_V2[2]};
border: 2px solid transparent;
&:focus-within {
border: 1.5px solid ${REDESIGN_COLORS.BLUE};
}
}
> .ant-input::placeholder {
color: ${ANTD_GRAY_V2[10]};
Expand Down Expand Up @@ -203,23 +203,16 @@ export const SearchBar = ({
const { quickFilters, selectedQuickFilter, setSelectedQuickFilter } = useQuickFiltersContext();

const autoCompleteQueryOptions = useMemo(() => {
const query = suggestions.length ? effectiveQuery : '';
const selectedQuickFilterLabel =
showQuickFilters && selectedQuickFilter
? getQuickFilterDetails(selectedQuickFilter, entityRegistry).label
: '';
const text = query || selectedQuickFilterLabel;

if (!text) return [];
if (effectiveQuery === '') return [];

return [
{
value: `${EXACT_SEARCH_PREFIX}${text}`,
label: <ViewAllSearchItem searchTarget={text} />,
value: `${EXACT_SEARCH_PREFIX}${effectiveQuery}`,
label: <ViewAllSearchItem searchTarget={effectiveQuery} />,
type: EXACT_AUTOCOMPLETE_OPTION_TYPE,
},
];
}, [showQuickFilters, suggestions.length, effectiveQuery, selectedQuickFilter, entityRegistry]);
}, [effectiveQuery]);

const autoCompleteEntityOptions = useMemo(() => {
return suggestions.map((suggestion: AutoCompleteResultForEntity) => {
Expand Down Expand Up @@ -296,6 +289,22 @@ export const SearchBar = ({
}
}

const searchInputRef = useRef(null);

useEffect(() => {
const handleKeyDown = (event) => {
// Support command-k to select the search bar.
// 75 is the keyCode for 'k'
if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) {
(searchInputRef?.current as any)?.focus();
}
};
document.addEventListener('keydown', handleKeyDown);
return () => {
document.removeEventListener('keydown', handleKeyDown);
};
}, []);

return (
<AutoCompleteContainer style={style} ref={searchBarWrapperRef}>
<StyledAutoComplete
Expand Down Expand Up @@ -399,6 +408,7 @@ export const SearchBar = ({
/>
</>
}
ref={searchInputRef}
/>
</StyledAutoComplete>
</AutoCompleteContainer>
Expand Down
28 changes: 14 additions & 14 deletions datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,6 @@ export function HeaderLinks(props: Props) {
</Link>
</LinkWrapper>
)}
{showIngestion && (
<LinkWrapper>
<Link to="/ingestion">
<Button id={HOME_PAGE_INGESTION_ID} type="text">
<Tooltip title="Connect DataHub to your organization's data sources">
<NavTitleContainer>
<ApiOutlined />
<NavTitleText>Ingestion</NavTitleText>
</NavTitleContainer>
</Tooltip>
</Button>
</Link>
</LinkWrapper>
)}
<Dropdown
trigger={['click']}
overlay={
Expand Down Expand Up @@ -145,6 +131,20 @@ export function HeaderLinks(props: Props) {
</Button>
</LinkWrapper>
</Dropdown>
{showIngestion && (
<LinkWrapper>
<Link to="/ingestion">
<Button id={HOME_PAGE_INGESTION_ID} type="text">
<Tooltip title="Connect DataHub to your organization's data sources">
<NavTitleContainer>
<ApiOutlined />
<NavTitleText>Ingestion</NavTitleText>
</NavTitleContainer>
</Tooltip>
</Button>
</Link>
</LinkWrapper>
)}
{showSettings && (
<LinkWrapper style={{ marginRight: 12 }}>
<Link to="/settings">
Expand Down
4 changes: 2 additions & 2 deletions datahub-web-react/src/conf/theme/theme_dark.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"homepageMessage": "Find data you can count(*) on"
},
"search": {
"searchbarMessage": "Search Datasets, People, & more..."
"searchbarMessage": "Search Tables, Dashboards, People, & more..."
},
"menu": {
"items": [
Expand All @@ -52,4 +52,4 @@
]
}
}
}
}
4 changes: 2 additions & 2 deletions datahub-web-react/src/conf/theme/theme_light.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"homepageMessage": "Find data you can count on"
},
"search": {
"searchbarMessage": "Search Datasets, People, & more..."
"searchbarMessage": "Search Tables, Dashboards, People, & more..."
},
"menu": {
"items": [
Expand All @@ -60,4 +60,4 @@
]
}
}
}
}
2 changes: 1 addition & 1 deletion docs/deploy/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ eksctl create iamserviceaccount \
Install the TargetGroupBinding custom resource definition by running the following.

```
kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller//crds?ref=master"
kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller/crds?ref=master"
```

Add the helm chart repository containing the latest version of the ALB controller.
Expand Down
5 changes: 5 additions & 0 deletions docs/deploy/confluent-cloud.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ First, you'll need to create following new topics in the [Confluent Control Cent
6. (Deprecated) **MetadataChangeEvent_v4**: Metadata change proposal messages
7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages
8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event
9. **MetadataGraphEvent_v4**:
10. **MetadataGraphEvent_v4**:
11. **PlatformEvent_v1**
12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup).
Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic.

The first five are the most important, and are explained in more depth in [MCP/MCL](../advanced/mcp-mcl.md). The final topics are
those which are deprecated but still used under certain circumstances. It is likely that in the future they will be completely
Expand Down
17 changes: 11 additions & 6 deletions docs/how/kafka-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,21 @@ Also see [Kafka Connect Security](https://docs.confluent.io/current/connect/secu

By default, DataHub relies on the a set of Kafka topics to operate. By default, they have the following names:

- **MetadataChangeProposal_v1**
- **FailedMetadataChangeProposal_v1**
- **MetadataChangeLog_Versioned_v1**
- **MetadataChangeLog_Timeseries_v1**
- **DataHubUsageEvent_v1**: User behavior tracking event for UI
1. **MetadataChangeProposal_v1**
2. **FailedMetadataChangeProposal_v1**
3. **MetadataChangeLog_Versioned_v1**
4. **MetadataChangeLog_Timeseries_v1**
5. **DataHubUsageEvent_v1**: User behavior tracking event for UI
6. (Deprecated) **MetadataChangeEvent_v4**: Metadata change proposal messages
7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages
8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event
9. **MetadataGraphEvent_v4**:
10. **MetadataGraphEvent_v4**:
11. **PlatformEvent_v1**:
12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup).
Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic.

These topics are discussed at more length in [Metadata Events](../what/mxe.md).
How Metadata Events relate to these topics is discussed at more length in [Metadata Events](../what/mxe.md).

We've included environment variables to customize the name each of these topics, for cases where an organization has naming rules for your topics.

Expand Down
72 changes: 72 additions & 0 deletions metadata-ingestion/docs/sources/athena/athena_pre.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
### Prerequisities

In order to execute this source, you will need to create a policy with below permissions and attach it to the the aws role or credentials used in ingestion recipe.

```json
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"athena:GetTableMetadata",
"athena:StartQueryExecution",
"athena:GetQueryResults",
"athena:GetDatabase",
"athena:ListDataCatalogs",
"athena:GetDataCatalog",
"athena:ListQueryExecutions",
"athena:GetWorkGroup",
"athena:StopQueryExecution",
"athena:GetQueryResultsStream",
"athena:ListDatabases",
"athena:GetQueryExecution",
"athena:ListTableMetadata",
"athena:BatchGetQueryExecution",
"glue:GetTables",
"glue:GetDatabases",
"glue:GetTable",
"glue:GetDatabase",
"glue:SearchTables",
"glue:GetTableVersions",
"glue:GetTableVersion",
"glue:GetPartition",
"glue:GetPartitions",
"s3:GetObject",
"s3:ListBucket",
"s3:GetBucketLocation",
],
"Resource": [
"arn:aws:athena:${region-id}:${account-id}:datacatalog/*",
"arn:aws:athena:${region-id}:${account-id}:workgroup/*",
"arn:aws:glue:${region-id}:${account-id}:tableVersion/*/*/*",
"arn:aws:glue:${region-id}:${account-id}:table/*/*",
"arn:aws:glue:${region-id}:${account-id}:catalog",
"arn:aws:glue:${region-id}:${account-id}:database/*",
"arn:aws:s3:::${datasets-bucket}",
"arn:aws:s3:::${datasets-bucket}/*"
]
},
{
"Sid": "VisualEditor1",
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:GetObject",
"s3:ListBucketMultipartUploads",
"s3:AbortMultipartUpload",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:ListMultipartUploadParts"
],
"Resource": [
"arn:aws:s3:::${athena-query-result-bucket}/*",
"arn:aws:s3:::${athena-query-result-bucket}"
]
},
]
}
```

Replace `${var}` with appropriate values as per your athena setup.
Loading

0 comments on commit f3c9c3c

Please sign in to comment.