-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rewrite BigQuery Loader from scratch
- Loading branch information
0 parents
commit 6566ddd
Showing
45 changed files
with
3,351 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
name: CI | ||
|
||
on: | ||
push: | ||
tags: | ||
- '*' | ||
pull_request: | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- uses: coursier/cache-action@v6 | ||
- name: Set up JDK 11 | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: 11 | ||
- name: Check Scala formatting | ||
run: sbt scalafmtCheckAll scalafmtSbtCheck | ||
- name: Run tests | ||
run: sbt test | ||
|
||
publish_docker: | ||
needs: test | ||
if: github.ref_type == 'tag' | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
app: | ||
- kafka | ||
- pubsub | ||
- kinesis | ||
steps: | ||
- name: Checkout Github | ||
uses: actions/checkout@v2 | ||
- uses: coursier/cache-action@v6 | ||
- name: Set up JDK 11 for loader and streaming transformer | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: 11 | ||
- name: Docker login | ||
uses: docker/login-action@v1 | ||
with: | ||
username: ${{ secrets.DOCKER_USERNAME }} | ||
password: ${{ secrets.DOCKER_PASSWORD }} | ||
- name: Stage the Docker build | ||
run: sbt "project ${{ matrix.app}}" docker:stage | ||
- name: Stage the Docker Distroless build | ||
run: sbt "project ${{ matrix.app}}Distroless" docker:stage | ||
- name: Docker metadata | ||
id: meta | ||
uses: docker/metadata-action@v3 | ||
with: | ||
images: "snowplow/bigquery-loader-${{ matrix.app }}" | ||
tags: | | ||
type=raw,value=latest,enable=${{ !contains(github.ref_name, 'rc') }} | ||
type=raw,value=latest-focal,enable=${{ !contains(github.ref_name, 'rc') }} | ||
type=raw,value=${{ github.ref_name }} | ||
type=raw,value=${{ github.ref_name }}-focal | ||
flavor: | | ||
latest=false | ||
- name: Docker metadata distroless | ||
id: metaDistroless | ||
uses: docker/metadata-action@v3 | ||
with: | ||
images: "snowplow/bigquery-loader-${{ matrix.app }}" | ||
tags: | | ||
type=raw,value=latest-distroless,enable=${{ !contains(github.ref_name, 'rc') }} | ||
type=raw,value=${{ github.ref_name }}-distroless | ||
flavor: | | ||
latest=false | ||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v1 | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v1 | ||
- name: Push image | ||
uses: docker/build-push-action@v2 | ||
with: | ||
context: modules/${{ matrix.app }}/target/docker/stage | ||
file: modules/${{ matrix.app }}/target/docker/stage/Dockerfile | ||
platforms: linux/amd64,linux/arm64/v8 | ||
tags: ${{ steps.meta.outputs.tags }} | ||
push: true | ||
- name: Push distroless image | ||
uses: docker/build-push-action@v2 | ||
with: | ||
context: modules/distroless/${{ matrix.app }}/target/docker/stage | ||
file: modules/distroless/${{ matrix.app }}/target/docker/stage/Dockerfile | ||
platforms: linux/amd64,linux/arm64/v8 | ||
tags: ${{ steps.metaDistroless.outputs.tags }} | ||
push: true | ||
- name: Build local image, which is needed to run Snyk | ||
# if: ${{ !contains(github.ref_name, 'rc') }} # TODO: uncomment before final release | ||
run: sbt "project ${{ matrix.app }}Distroless" docker:publishLocal | ||
- name: Run Snyk to check for vulnerabilities | ||
uses: snyk/actions/docker@master | ||
# if: ${{ !contains(github.ref_name, 'rc') }} # TODO: uncomment before final release | ||
with: | ||
image: "snowplow/bigquery-loader-${{ matrix.app }}:${{ github.ref_name }}-distroless" | ||
args: "--app-vulns --org=99605b41-ca0f-42c9-a9ff-45c201a10a26" | ||
command: monitor | ||
env: | ||
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
target/ | ||
lib/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
version = "3.6.0" | ||
runner.dialect = scala213 | ||
align.preset = none | ||
align.openParenCallSite = false | ||
align.arrowEnumeratorGenerator = true | ||
align.tokens = [ | ||
{ | ||
code = "=>" | ||
owners = [{ | ||
regex = "Case" | ||
}] | ||
}, | ||
{ | ||
code = "=" | ||
owners = [] | ||
}, | ||
{ | ||
code = "%" | ||
owners = [{ | ||
regex = "Term.ApplyInfix" | ||
}] | ||
}, | ||
{ | ||
code = "%%" | ||
owners = [{ | ||
regex = "Term.ApplyInfix" | ||
}] | ||
} | ||
] | ||
maxColumn = 140 | ||
docstrings.style = Asterisk | ||
docstrings.wrap = yes | ||
docstrings.wrapMaxColumn = 100 | ||
optIn.breakChainOnFirstMethodDot = true | ||
spaces.afterKeywordBeforeParen = true | ||
indent.callSite = 2 | ||
indent.defnSite = 2 | ||
verticalMultiline.atDefnSite = true | ||
verticalMultiline.arityThreshold = 3 | ||
verticalMultiline.newlineAfterOpenParen = true | ||
danglingParentheses.defnSite = true | ||
danglingParentheses.exclude = [] | ||
importSelectors = noBinPack | ||
rewrite.rules = [ | ||
Imports, | ||
RedundantBraces, | ||
RedundantParens, | ||
PreferCurlyFors | ||
] | ||
rewrite.imports.sort = ascii |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Snowplow Limited Use License Agreement | ||
|
||
_Version 1.0, January 2024_ | ||
|
||
This Snowplow Limited Use License Agreement, Version 1.0 (the “Agreement”) sets forth the terms on which Snowplow Analytics, Ltd. (“Snowplow”) makes available certain software (the “Software”). BY INSTALLING, DOWNLOADING, ACCESSING, OR USING ANY OF THE SOFTWARE, YOU AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE TO SUCH TERMS AND CONDITIONS, YOU MUST NOT USE THE SOFTWARE. IF YOU ARE RECEIVING THE SOFTWARE ON BEHALF OF A LEGAL ENTITY, YOU REPRESENT AND WARRANT THAT YOU HAVE THE ACTUAL AUTHORITY TO AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT ON BEHALF OF SUCH ENTITY. “Licensee” means you, an individual, or the entity on whose behalf you are receiving the Software. | ||
|
||
## LICENSE GRANT AND CONDITIONS | ||
|
||
**1.1 License.** Subject to the terms and conditions of this Agreement, Snowplow hereby grants to Licensee a non-exclusive, royalty-free, worldwide, non-transferable, non-sublicensable license during the term of this Agreement to: (a) use the Software; (b) prepare modifications and derivative works of the Software; and (c) reproduce copies of the Software (the “License”). No right to distribute or make available the Software is granted under this License. Licensee is not granted the right to, and Licensee shall not, exercise the License for any Excluded Purpose. | ||
|
||
**1.2** For purposes of this Agreement, an “Excluded Purpose” is any use that is either a Competing Use or a Highly-Available Production Use, or both of them. | ||
|
||
* **1.2.1** A “Competing Use” is making available any on-premises or distributed software product, or any software-as-a-service, platform-as-a-service, infrastructure-as-a-service, or other similar online service, that competes with any products or services that Snowplow or any of its affiliates provides using the Software. | ||
|
||
* **1.2.2** Highly-Available Production Use is any highly-available use, including without limitation any use where multiple instances of any Software component run concurrently to avoid a single point of failure, in a production environment, where production means use on live data. | ||
|
||
**1.3 Conditions.** In consideration of the License, Licensee’s use of the Software is subject to the following conditions: | ||
|
||
* **a.** Licensee must cause any Software modified by Licensee to carry prominent notices stating that Licensee modified the Software. | ||
|
||
* **b.** On each Software copy, Licensee shall reproduce and not remove or alter all Snowplow or third party copyright or other proprietary notices contained in the Software, and Licensee must include the notice below on each copy. | ||
|
||
``` | ||
This software is made available by Snowplow Analytics, Ltd., | ||
under the terms of the Snowplow Limited Use License Agreement, Version 1.0 | ||
located at https://docs.snowplow.io/limited-use-license-1.0 | ||
BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION | ||
OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. | ||
``` | ||
|
||
**1.4 Licensee Modifications.** Licensee may add its own copyright notices to modifications made by Licensee. | ||
|
||
**1.5 No Sublicensing.** The License does not include the right to sublicense the Software, however, each recipient to which Licensee provides the Software may exercise the Licenses so long as such recipient agrees to the terms and conditions of this Agreement. | ||
|
||
## TERM AND TERMINATION | ||
|
||
This Agreement will continue unless and until earlier terminated as set forth herein. If Licensee breaches any of its conditions or obligations under this Agreement, this Agreement will terminate automatically and the License will terminate automatically and permanently. | ||
|
||
## INTELLECTUAL PROPERTY | ||
|
||
As between the parties, Snowplow will retain all right, title, and interest in the Software, and all intellectual property rights therein. Snowplow hereby reserves all rights not expressly granted to Licensee in this Agreement. Snowplow hereby reserves all rights in its trademarks and service marks, and no licenses therein are granted in this Agreement. | ||
|
||
## DISCLAIMER | ||
|
||
SNOWPLOW HEREBY DISCLAIMS ANY AND ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, AND SPECIFICALLY DISCLAIMS ANY WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WITH RESPECT TO THE SOFTWARE. | ||
|
||
## LIMITATION OF LIABILITY | ||
|
||
SNOWPLOW WILL NOT BE LIABLE FOR ANY DAMAGES OF ANY KIND, INCLUDING BUT NOT LIMITED TO LOST PROFITS OR ANY CONSEQUENTIAL, SPECIAL, INCIDENTAL, INDIRECT, OR DIRECT DAMAGES, HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, ARISING OUT OF THIS AGREEMENT. THE FOREGOING SHALL APPLY TO THE EXTENT PERMITTED BY APPLICABLE LAW. | ||
|
||
## GENERAL | ||
|
||
**6.1 Governing Law.** This Agreement will be governed by and interpreted in accordance with the laws of the state of Delaware, without reference to its conflict of laws principles. If Licensee is located within the United States, all disputes arising out of this Agreement are subject to the exclusive jurisdiction of courts located in Delaware, USA. If Licensee is located outside of the United States, any dispute, controversy or claim arising out of or relating to this Agreement will be referred to and finally determined by arbitration in accordance with the JAMS International Arbitration Rules. The tribunal will consist of one arbitrator. The place of arbitration will be in the State of Delaware, USA. The language to be used in the arbitral proceedings will be English. Judgment upon the award rendered by the arbitrator may be entered in any court having jurisdiction thereof. | ||
|
||
**6.2. Assignment.** Licensee is not authorized to assign its rights under this Agreement to any third party. Snowplow may freely assign its rights under this Agreement to any third party. | ||
|
||
**6.3. Other.** This Agreement is the entire agreement between the parties regarding the subject matter hereof. No amendment or modification of this Agreement will be valid or binding upon the parties unless made in writing and signed by the duly authorized representatives of both parties. In the event that any provision, including without limitation any condition, of this Agreement is held to be unenforceable, this Agreement and all licenses and rights granted hereunder will immediately terminate. Waiver by Snowplow of a breach of any provision of this Agreement or the failure by Snowplow to exercise any right hereunder will not be construed as a waiver of any subsequent breach of that right or as a waiver of any other right. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# Snowplow Bigquery Loader | ||
|
||
[![Build Status][build-image]][build] | ||
[![Release][release-image]][releases] | ||
[![License][license-image]][license] | ||
|
||
## Introduction | ||
|
||
This project contains applications required to load Snowplow data into Bigquery with low latency. | ||
|
||
Check out [the example config files](./config) for how to configure your loader. | ||
|
||
#### Azure | ||
|
||
The Azure bigquery loader reads the stream of enriched events from Event Hubs. | ||
|
||
Basic usage: | ||
` | ||
```bash | ||
docker run \ | ||
-v /path/to/config.hocon:/var/config.hocon \ | ||
snowplow/bigquery-loader-kafka:2.0.0 \ | ||
--config /var/config.hocon \ | ||
--iglu-config /var/iglu.json | ||
``` | ||
|
||
#### GCP | ||
|
||
The GCP bigquery loader reads the stream of enriched events from Pubsub. | ||
|
||
```bash | ||
docker run \ | ||
-v /path/to/config.hocon:/var/config.hocon \ | ||
snowplow/bigquery-loader-pubsub:2.0.0 \ | ||
--config /var/config.hocon \ | ||
--iglu-config /var/iglu.json | ||
``` | ||
|
||
#### AWS | ||
|
||
The AWS bigquery loader reads the stream of enriched events from Kinesis. | ||
|
||
```bash | ||
docker run \ | ||
-v /path/to/config.hocon:/var/config.hocon \ | ||
snowplow/bigquery-loader-kinesis:2.0.0 \ | ||
--config /var/config.hocon \ | ||
--iglu-config /var/iglu.json | ||
``` | ||
|
||
## Find out more | ||
|
||
| Technical Docs | Setup Guide | Roadmap & Contributing | | ||
|----------------------------|----------------------|------------------------| | ||
| ![i1][techdocs-image] | ![i2][setup-image] | ![i3][roadmap-image] | | ||
| [Technical Docs][techdocs] | [Setup Guide][setup] | [Roadmap][roadmap] | | ||
|
||
|
||
|
||
## Copyright and License | ||
|
||
Copyright (c) 2012-present Snowplow Analytics Ltd. All rights reserved. | ||
|
||
Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you are uncertain how it applies to your use case, check our answers to [frequently asked questions][faq].)_ | ||
|
||
[techdocs-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png | ||
[setup-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png | ||
[roadmap-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/roadmap.png | ||
[setup]: https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/ | ||
[techdocs]: https://docs.snowplow.io/docs/pipeline-components-and-applications/loaders-storage-targets/bigquery-loader/ | ||
[roadmap]: https://github.com/snowplow/snowplow/projects/7 | ||
|
||
[build-image]: https://github.com/snowplow-incubator/snowplow-bigquery-loader/workflows/CI/badge.svg | ||
[build]: https://github.com/snowplow-incubator/snowplow-bigquery-loader/actions/workflows/ci.yml | ||
|
||
[release-image]: https://img.shields.io/badge/release-2.0.0-blue.svg?style=flat | ||
[releases]: https://github.com/snowplow-incubator/snowplow-biguery-loader/releases | ||
|
||
[license]: https://docs.snowplow.io/limited-use-license-1.0 | ||
[license-image]: https://img.shields.io/badge/license-Snowplow--Limited-Use-blue.svg?style=flat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/** | ||
* Copyright (c) 2013-present Snowplow Analytics Ltd. All rights reserved. | ||
* | ||
* This software is made available by Snowplow Analytics, Ltd., under the terms of the Snowplow | ||
* Limited Use License Agreement, Version 1.0 located at | ||
* https://docs.snowplow.io/limited-use-license-1.0 BY INSTALLING, DOWNLOADING, ACCESSING, USING OR | ||
* DISTRIBUTING ANY PORTION OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. | ||
*/ | ||
|
||
lazy val root = project | ||
.in(file(".")) | ||
.aggregate( | ||
core, | ||
kafka, | ||
kafkaDistroless, | ||
pubsub, | ||
pubsubDistroless, | ||
kinesis, | ||
kinesisDistroless | ||
) | ||
|
||
lazy val core: Project = project | ||
.in(file("modules/core")) | ||
.settings(BuildSettings.coreSettings) | ||
.settings(libraryDependencies ++= Dependencies.coreDependencies) | ||
.enablePlugins(IgluSchemaPlugin) | ||
|
||
lazy val kafka: Project = project | ||
.in(file("modules/kafka")) | ||
.settings(BuildSettings.kafkaSettings) | ||
.settings(libraryDependencies ++= Dependencies.kafkaDependencies) | ||
.dependsOn(core) | ||
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDockerPlugin) | ||
|
||
lazy val kafkaDistroless: Project = project | ||
.in(file("modules/distroless/kafka")) | ||
.settings(BuildSettings.kafkaSettings) | ||
.settings(libraryDependencies ++= Dependencies.kafkaDependencies) | ||
.settings(sourceDirectory := (kafka / sourceDirectory).value) | ||
.dependsOn(core) | ||
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDistrolessDockerPlugin) | ||
|
||
lazy val pubsub: Project = project | ||
.in(file("modules/pubsub")) | ||
.settings(BuildSettings.pubsubSettings) | ||
.settings(libraryDependencies ++= Dependencies.pubsubDependencies) | ||
.dependsOn(core) | ||
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDockerPlugin) | ||
|
||
lazy val pubsubDistroless: Project = project | ||
.in(file("modules/distroless/pubsub")) | ||
.settings(BuildSettings.pubsubSettings) | ||
.settings(libraryDependencies ++= Dependencies.pubsubDependencies) | ||
.settings(sourceDirectory := (pubsub / sourceDirectory).value) | ||
.dependsOn(core) | ||
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDistrolessDockerPlugin) | ||
|
||
lazy val kinesis: Project = project | ||
.in(file("modules/kinesis")) | ||
.settings(BuildSettings.kinesisSettings) | ||
.settings(libraryDependencies ++= Dependencies.kinesisDependencies) | ||
.dependsOn(core) | ||
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDockerPlugin) | ||
|
||
lazy val kinesisDistroless: Project = project | ||
.in(file("modules/distroless/kinesis")) | ||
.settings(BuildSettings.kinesisSettings) | ||
.settings(libraryDependencies ++= Dependencies.kinesisDependencies) | ||
.settings(sourceDirectory := (kinesis / sourceDirectory).value) | ||
.dependsOn(core) | ||
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDistrolessDockerPlugin) | ||
|
||
ThisBuild / fork := true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"license" { | ||
"accept": true | ||
} | ||
|
||
"input": { | ||
"topicName": "sp-dev-enriched" | ||
"bootstrapServers": "localhost:9092" | ||
} | ||
|
||
"output": { | ||
|
||
"good": { | ||
"project": "my-project" | ||
"dataset": "snowplow" | ||
} | ||
|
||
"bad": { | ||
"topicName": "sp-dev-bad" | ||
"bootstrapServers": "localhost:9092" | ||
} | ||
} | ||
} |
Oops, something went wrong.