diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8cc48669a..b449ad675 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,10 +91,7 @@ jobs: run: echo "::set-output name=tag::${GITHUB_REF#refs/tags/}" - name: Assemble fat jars run: | - sbt "project streamKinesis; set assembly / test := {}; assembly" \ - "project streamKafka; set assembly / test := {}; assembly" \ - "project streamNsq; set assembly / test := {}; assembly" \ - "project pubsub; set assembly / test := {}; assembly" \ + sbt "project pubsub; set assembly / test := {}; assembly" \ "project kinesis; set assembly / test := {}; assembly" \ "project rabbitmq; set assembly / test := {}; assembly" \ "project kafka; set assembly / test := {}; assembly" \ @@ -107,9 +104,6 @@ jobs: name: ${{ steps.ver.outputs.tag }} tag_name: ${{ steps.ver.outputs.tag }} files: | - modules/stream/kinesis/target/scala-2.12/snowplow-stream-enrich-kinesis-${{ steps.ver.outputs.tag }}.jar - modules/stream/kafka/target/scala-2.12/snowplow-stream-enrich-kafka-${{ steps.ver.outputs.tag }}.jar - modules/stream/nsq/target/scala-2.12/snowplow-stream-enrich-nsq-${{ steps.ver.outputs.tag }}.jar modules/pubsub/target/scala-2.12/snowplow-enrich-pubsub-${{ steps.ver.outputs.tag }}.jar modules/kinesis/target/scala-2.12/snowplow-enrich-kinesis-${{ steps.ver.outputs.tag }}.jar modules/rabbitmq/target/scala-2.12/snowplow-enrich-rabbitmq-${{ steps.ver.outputs.tag }}.jar @@ -125,9 +119,6 @@ jobs: strategy: matrix: app: - - streamKinesis - - streamKafka - - streamNsq - pubsub - kinesis - kafka diff --git a/.github/workflows/lacework.yml b/.github/workflows/lacework.yml index 6c5e2d670..615dd4f31 100644 --- a/.github/workflows/lacework.yml +++ b/.github/workflows/lacework.yml @@ -60,48 +60,6 @@ jobs: LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} run: ./lw-scanner image evaluate snowplow/snowplow-enrich-kinesis ${{ steps.ver.outputs.tag }}-distroless --build-id ${{ github.run_id }} --no-pull - - name: Scan Stream Enrich Kinesis - env: - LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} - LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }} - LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} - run: ./lw-scanner image evaluate snowplow/stream-enrich-kinesis ${{ steps.ver.outputs.tag }} --build-id ${{ github.run_id }} --no-pull - - - name: Scan Stream Enrich Kinesis distroless - env: - LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} - LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }} - LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} - run: ./lw-scanner image evaluate snowplow/stream-enrich-kinesis ${{ steps.ver.outputs.tag }}-distroless --build-id ${{ github.run_id }} --no-pull - - - name: Scan Stream Enrich Kafka - env: - LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} - LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }} - LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} - run: ./lw-scanner image evaluate snowplow/stream-enrich-kafka ${{ steps.ver.outputs.tag }} --build-id ${{ github.run_id }} --no-pull - - - name: Scan Stream Enrich Kafka distroless - env: - LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} - LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }} - LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} - run: ./lw-scanner image evaluate snowplow/stream-enrich-kafka ${{ steps.ver.outputs.tag }}-distroless --build-id ${{ github.run_id }} --no-pull - - - name: Scan Stream Enrich NSQ - env: - LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} - LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }} - LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} - run: ./lw-scanner image evaluate snowplow/stream-enrich-nsq ${{ steps.ver.outputs.tag }} --build-id ${{ github.run_id }} --no-pull - - - name: Scan Stream Enrich NSQ distroless - env: - LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} - LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }} - LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }} - run: ./lw-scanner image evaluate snowplow/stream-enrich-nsq ${{ steps.ver.outputs.tag }}-distroless --build-id ${{ github.run_id }} --no-pull - - name: Scan enrich-kafka env: LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }} diff --git a/build.sbt b/build.sbt index 7a9db72e2..5a0f7d5b0 100644 --- a/build.sbt +++ b/build.sbt @@ -23,7 +23,7 @@ lazy val root = project.in(file(".")) .settings(projectSettings) .settings(compilerSettings) .settings(resolverSettings) - .aggregate(common, commonFs2, pubsub, pubsubDistroless, kinesis, kinesisDistroless, streamCommon, streamKinesis, streamKinesisDistroless, streamKafka, streamKafkaDistroless, streamNsq, streamNsqDistroless, streamStdin, kafka, kafkaDistroless, rabbitmq, rabbitmqDistroless, nsq, nsqDistroless) + .aggregate(common, commonFs2, pubsub, pubsubDistroless, kinesis, kinesisDistroless, kafka, kafkaDistroless, rabbitmq, rabbitmqDistroless, nsq, nsqDistroless) lazy val common = project .in(file("modules/common")) @@ -31,72 +31,6 @@ lazy val common = project .settings(libraryDependencies ++= commonDependencies) .settings(excludeDependencies ++= exclusions) -lazy val streamCommon = project - .in(file("modules/stream/common")) - .enablePlugins(BuildInfoPlugin) - .settings(streamCommonBuildSettings) - .settings(libraryDependencies ++= streamCommonDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(common) - -lazy val streamKinesis = project - .in(file("modules/stream/kinesis")) - .enablePlugins(JavaAppPackaging, SnowplowDockerPlugin) - .settings(streamKinesisBuildSettings) - .settings(libraryDependencies ++= streamKinesisDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - -lazy val streamKinesisDistroless = project - .in(file("modules/distroless/stream/kinesis")) - .enablePlugins(JavaAppPackaging, SnowplowDistrolessDockerPlugin) - .settings(sourceDirectory := (streamKinesis / sourceDirectory).value) - .settings(streamKinesisDistrolessBuildSettings) - .settings(libraryDependencies ++= streamKinesisDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - -lazy val streamKafka = project - .in(file("modules/stream/kafka")) - .enablePlugins(JavaAppPackaging, SnowplowDockerPlugin) - .settings(streamKafkaBuildSettings) - .settings(libraryDependencies ++= streamKafkaDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - -lazy val streamKafkaDistroless = project - .in(file("modules/distroless/stream/kafka")) - .enablePlugins(JavaAppPackaging, SnowplowDistrolessDockerPlugin) - .settings(sourceDirectory := (streamKafka / sourceDirectory).value) - .settings(streamKafkaDistrolessBuildSettings) - .settings(libraryDependencies ++= streamKafkaDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - -lazy val streamNsq = project - .in(file("modules/stream/nsq")) - .enablePlugins(JavaAppPackaging, SnowplowDockerPlugin) - .settings(streamNsqBuildSettings) - .settings(libraryDependencies ++= streamNsqDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - -lazy val streamNsqDistroless = project - .in(file("modules/distroless/stream/nsq")) - .enablePlugins(JavaAppPackaging, SnowplowDistrolessDockerPlugin) - .settings(sourceDirectory := (streamNsq / sourceDirectory).value) - .settings(streamNsqDistrolessBuildSettings) - .settings(libraryDependencies ++= streamNsqDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - -lazy val streamStdin = project - .in(file("modules/stream/stdin")) - .settings(streamStdinBuildSettings) - .settings(libraryDependencies ++= streamCommonDependencies) - .settings(excludeDependencies ++= exclusions) - .dependsOn(streamCommon) - lazy val commonFs2 = project .in(file("modules/common-fs2")) .enablePlugins(BuildInfoPlugin) diff --git a/config/stream-enrich-kafka.hocon b/config/stream-enrich-kafka.hocon deleted file mode 100644 index e8ab3395f..000000000 --- a/config/stream-enrich-kafka.hocon +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. -# -# This program is licensed to you under the Apache License Version 2.0, and -# you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the Apache License Version 2.0 is distributed on an "AS -# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. See the Apache License Version 2.0 for the specific language -# governing permissions and limitations there under. - -enrich { - - streams { - - in { - # Stream/topic where the raw events to be enriched are located - raw = {{streamsInRaw}} - raw = ${?ENRICH_STREAMS_IN_RAW} - } - - out { - # Stream/topic where the events that were successfully enriched will end up - enriched = {{outEnriched}} - enriched = ${?ENRICH_STREAMS_OUT_ENRICHED} - # Stream/topic where the event that failed enrichment will be stored - bad = {{outBad}} - bad = ${?ENRICH_STREAMS_OUT_BAD} - # Stream/topic where the pii tranformation events will end up - pii = {{outPii}} - pii = ${?ENRICH_STREAMS_OUT_PII} - - # How the output stream/topic will be partitioned. - # Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid, - # user_ipaddress, domain_sessionid, user_fingerprint. - # Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the - # possible parittion keys correspond to. - # Otherwise, the partition key will be a random UUID. - partitionKey = {{partitionKeyName}} - partitionKey = ${?ENRICH_STREAMS_OUT_PARTITION_KEY} - } - - sourceSink { - enabled = kafka - - # Optional. Region where the streams are located - # region = {{region}} - # region = ${?ENRICH_STREAMS_SOURCE_SINK_REGION} - - # AWS credentials - # If both are set to 'default', use the default AWS credentials provider chain. - # If both are set to 'iam', use AWS IAM Roles to provision credentials. - # If both are set to 'env', use env variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY - # aws { - # accessKey = iam - # accessKey = ${?ENRICH_STREAMS_SOURCE_SINK_AWS_ACCESS_KEY} - # secretKey = iam - # secretKey = ${?ENRICH_STREAMS_SOURCE_SINK_AWS_SECRET_KEY} - # } - - # GCP credentials - # Either provide path to service account file or set environment variable GOOGLE_APPLICATION_CREDENTIALS - # gcp { - # creds = {{googleApplicationCredentials}} - # creds = ${?GOOGLE_APPLICATION_CREDENTIALS} - # } - - brokers = "{{kafkaBrokers}}" - - # Number of retries to perform before giving up on sending a record - retries = 0 - - # The kafka producer has a variety of possible configuration options defined at - # https://kafka.apache.org/documentation/#producerconfigs - # Some values are set to other values from this config by default: - # "bootstrap.servers" -> brokers - # retries -> retries - # "buffer.memory" -> buffer.byteLimit - # "linger.ms" -> buffer.timeLimit - #producerConf { - # acks = all - # "key.serializer" = "org.apache.kafka.common.serialization.StringSerializer" - # "value.serializer" = "org.apache.kafka.common.serialization.StringSerializer" - #} - - # The kafka consumer has a variety of possible configuration options defined at - # https://kafka.apache.org/documentation/#consumerconfigs - # Some values are set to other values from this config by default: - # "bootstrap.servers" -> brokers - # "group.id" -> appName - #consumerConf { - # "enable.auto.commit" = true - # "auto.commit.interval.ms" = 1000 - # "auto.offset.reset" = earliest - # "session.timeout.ms" = 30000 - # "key.deserializer" = "org.apache.kafka.common.serialization.StringDeserializer" - # "value.deserializer" = "org.apache.kafka.common.serialization.ByteArrayDeserializer" - #} - } - - # After enrichment, events are accumulated in a buffer before being sent to Kafka. - # The buffer is emptied whenever: - # - the number of stored records reaches recordLimit or - # - the combined size of the stored records reaches byteLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit when - # a new event enters the buffer - buffer { - byteLimit = {{bufferByteThreshold}} - byteLimit = ${?ENRICH_STREAMS_BUFFER_BYTE_LIMIT} - recordLimit = {{bufferRecordThreshold}} # Not supported by Kafka; will be ignored - recordLimit = ${?ENRICH_STREAMS_BUFFER_RECORD_LIMIT} - timeLimit = {{bufferTimeThreshold}} - timeLimit = ${?ENRICH_STREAMS_BUFFER_TIME_LIMIT} - } - - # Used as the Kafka consumer group ID. - appName = "{{appName}}" - appName = ${?ENRICH_STREAMS_APP_NAME} - } - - # The setting below requires an adapter being ready, i.e.: https://github.com/snowplow-incubator/remote-adapter-example - # remoteAdapters = [ - # { - # vendor: "com.globeandmail" - # version: "v1" - # url: "http://remote-adapter-example:8995/sampleRemoteAdapter" - # connectionTimeout: 1000 - # readTimeout: 5000 - # } - # ] - - # Optional section for tracking endpoints - monitoring { - snowplow { - collectorUri = "{{collectorUri}}" - collectorUri = ${?ENRICH_MONITORING_COLLECTOR_URI} - collectorPort = 80 - collectorPort = ${?ENRICH_MONITORING_COLLECTOR_PORT} - appId = {{enrichAppName}} - appId = ${?ENRICH_MONITORING_APP_ID} - method = GET - method = ${?ENRICH_MONITORING_METHOD} - } - } - - # Optional section for Sentry - sentry { - dsn = ${?SENTRY_DSN} - } -} diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala index e61f594fe..edbb44efb 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala @@ -16,7 +16,6 @@ import org.openjdk.jmh.annotations._ import java.util.concurrent.TimeUnit -import cats.Id import cats.data.Validated import cats.effect.{IO, Clock} diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Assets.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Assets.scala index fd293ba88..82b86a74d 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Assets.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Assets.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2020-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -33,7 +33,7 @@ import fs2.io.file.{exists, move, readAll, tempFileResource, writeAll} import org.typelevel.log4cats.Logger import org.typelevel.log4cats.slf4j.Slf4jLogger -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, HttpClient, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, ShiftExecution} import com.snowplowanalytics.snowplow.enrich.common.fs2.io.Clients @@ -243,7 +243,7 @@ object Assets { _ <- Logger[F].info("Reinitializing enrichments") old <- enrichments.get - fresh <- old.reinitialize(BlockerF.ofBlocker(blocker), shifter) + fresh <- old.reinitialize(blocker, shifter) _ <- enrichments.set(fresh) } yield () diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala index c32efd30f..741de2a03 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2020-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -39,7 +39,7 @@ import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.RemoteAdap import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, HttpClient, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, ShiftExecution} import com.snowplowanalytics.snowplow.enrich.common.fs2.config.{ConfigFile, ParsedConfigs} import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{ @@ -134,17 +134,20 @@ object Environment { Slf4jLogger.getLogger[F] /** Registry with all allocated clients (MaxMind, IAB etc) and their original configs */ - final case class Enrichments[F[_]: Clock](registry: EnrichmentRegistry[F], configs: List[EnrichmentConf]) { + final case class Enrichments[F[_]: Async: Clock: ContextShift: HttpClient]( + registry: EnrichmentRegistry[F], + configs: List[EnrichmentConf] + ) { /** Initialize same enrichments, specified by configs (in case DB files updated) */ - def reinitialize(blocker: BlockerF[F], shifter: ShiftExecution[F])(implicit A: Async[F], C: HttpClient[F]): F[Enrichments[F]] = + def reinitialize(blocker: Blocker, shifter: ShiftExecution[F]): F[Enrichments[F]] = Enrichments.buildRegistry(configs, blocker, shifter).map(registry => Enrichments(registry, configs)) } object Enrichments { - def make[F[_]: Async: Clock: HttpClient]( + def make[F[_]: Async: Clock: ContextShift: HttpClient]( configs: List[EnrichmentConf], - blocker: BlockerF[F], + blocker: Blocker, shifter: ShiftExecution[F] ): Resource[F, Ref[F, Enrichments[F]]] = Resource.eval { @@ -154,9 +157,9 @@ object Environment { } yield ref } - def buildRegistry[F[_]: Async: HttpClient: Clock]( + def buildRegistry[F[_]: Async: Clock: ContextShift: HttpClient]( configs: List[EnrichmentConf], - blocker: BlockerF[F], + blocker: Blocker, shifter: ShiftExecution[F] ) = EnrichmentRegistry.build[F](configs, blocker, shifter).value.flatMap { @@ -204,7 +207,7 @@ object Environment { shifter <- ShiftExecution.ofSingleThread[F] enrichments <- { implicit val C: Http4sClient[F] = http - Enrichments.make[F](parsedConfigs.enrichmentConfigs, BlockerF.ofBlocker(blocker), shifter) + Enrichments.make[F](parsedConfigs.enrichmentConfigs, blocker, shifter) } } yield Environment[F, A]( igluClient, @@ -297,7 +300,7 @@ object Environment { ): Resource[F, (Option[Http4sClient[F]], Map[(String, String), RemoteAdapter])] = { val preparedRemoteAdapters = remoteAdapters.configs.map { config => - (config.vendor, config.version) -> RemoteAdapter(config.url, None, None) + (config.vendor, config.version) -> RemoteAdapter(config.url) }.toMap if (preparedRemoteAdapters.nonEmpty) Clients diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/AssetsSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/AssetsSpec.scala index 931bb3735..3643af7d7 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/AssetsSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/AssetsSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2020-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -27,7 +27,7 @@ import cats.effect.{Blocker, IO, Resource} import cats.effect.concurrent.Semaphore import cats.effect.testing.specs2.CatsIO -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.utils.ShiftExecution import com.snowplowanalytics.snowplow.enrich.common.fs2.test._ import org.http4s.client.{Client => Http4sClient} @@ -122,16 +122,17 @@ class AssetsSpec extends Specification with CatsIO with ScalaCheck { val resources = for { blocker <- Blocker[IO] + shiftExecution <- ShiftExecution.ofSingleThread sem <- Resource.eval(Semaphore[IO](1L)) - enrichments <- Environment.Enrichments.make[IO](List(), BlockerF.noop, ShiftExecution.noop) + enrichments <- Environment.Enrichments.make[IO](List(), blocker, shiftExecution) _ <- SpecHelpers.filesResource(blocker, TestFiles) - } yield (blocker, sem, enrichments) + } yield (blocker, shiftExecution, sem, enrichments) val update = Stream .resource(resources) .flatMap { - case (blocker, sem, enrichments) => - Assets.updateStream[IO](blocker, ShiftExecution.noop, sem, state, enrichments, 1.second, List(uri -> filename)) + case (blocker, shift, sem, enrichments) => + Assets.updateStream[IO](blocker, shift, sem, state, enrichments, 1.second, List(uri -> filename)) } .haltAfter(2.second) diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala index 4bda2ff67..8c173019f 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2022-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -17,8 +17,7 @@ import scala.concurrent.ExecutionContext import org.specs2.mutable.Specification -import cats.effect.Blocker -import cats.effect.IO +import cats.effect.{Blocker, IO, Resource} import cats.effect.testing.specs2.CatsIO @@ -41,7 +40,7 @@ import com.snowplowanalytics.iglu.client.resolver.registries.Registry import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.utils.ShiftExecution import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry @@ -98,21 +97,24 @@ object BlackBoxTesting extends Specification with CatsIO { ) = createIgluClient(List(Registry.EmbeddedRegistry)) .flatMap { igluClient => - Enrich - .enrichWith(getEnrichmentRegistry(enrichmentConfig, igluClient), - TestEnvironment.adapterRegistry, - igluClient, - None, - EnrichSpec.processor, - featureFlags, - IO.unit - )( - input - ) - .map { - case (List(Validated.Valid(enriched)), _) => checkEnriched(enriched, expected) - case other => ko(s"there should be one enriched event but got $other") - } + getEnrichmentRegistry(enrichmentConfig, igluClient).use { registry => + Enrich + .enrichWith( + IO.pure(registry), + TestEnvironment.adapterRegistry, + igluClient, + None, + EnrichSpec.processor, + featureFlags, + IO.unit + )( + input + ) + .map { + case (List(Validated.Valid(enriched)), _) => checkEnriched(enriched, expected) + case other => ko(s"there should be one enriched event but got $other") + } + } } private def checkEnriched(enriched: EnrichedEvent, expectedFields: Map[String, String]) = { @@ -135,27 +137,31 @@ object BlackBoxTesting extends Specification with CatsIO { private def getMap(enriched: EnrichedEvent): Map[String, String] = enrichedFields.map(f => (f.getName(), Option(f.get(enriched)).map(_.toString).getOrElse(""))).toMap - private def getEnrichmentRegistry(enrichmentConfig: Option[Json], igluClient: IgluCirceClient[IO]): IO[EnrichmentRegistry[IO]] = - enrichmentConfig match { - case None => - IO.pure(EnrichmentRegistry[IO]()) - case Some(json) => - val enrichmentsSchemaKey = - SchemaKey("com.snowplowanalytics.snowplow", "enrichments", "jsonschema", SchemaVer.Full(1, 0, 0)) - val enrichmentsJson = SelfDescribingData(enrichmentsSchemaKey, Json.arr(json)).asJson - for { - parsed <- EnrichmentRegistry.parse[IO](enrichmentsJson, igluClient, true) - confs <- parsed match { - case Invalid(e) => IO.raiseError(new IllegalArgumentException(s"can't parse enrichmentsJson: $e")) - case Valid(list) => IO.pure(list) - } - built <- EnrichmentRegistry.build[IO](confs, BlockerF.noop, ShiftExecution.noop).value - registry <- built match { - case Left(e) => IO.raiseError(new IllegalArgumentException(s"can't build EnrichmentRegistry: $e")) - case Right(r) => IO.pure(r) - } - } yield registry - } + private def getEnrichmentRegistry(enrichmentConfig: Option[Json], igluClient: IgluCirceClient[IO]): Resource[IO, EnrichmentRegistry[IO]] = + for { + shift <- ShiftExecution.ofSingleThread[IO] + registry = enrichmentConfig match { + case None => + IO.pure(EnrichmentRegistry[IO]()) + case Some(json) => + val enrichmentsSchemaKey = + SchemaKey("com.snowplowanalytics.snowplow", "enrichments", "jsonschema", SchemaVer.Full(1, 0, 0)) + val enrichmentsJson = SelfDescribingData(enrichmentsSchemaKey, Json.arr(json)).asJson + for { + parsed <- EnrichmentRegistry.parse[IO](enrichmentsJson, igluClient, true) + confs <- parsed match { + case Invalid(e) => IO.raiseError(new IllegalArgumentException(s"can't parse enrichmentsJson: $e")) + case Valid(list) => IO.pure(list) + } + built <- EnrichmentRegistry.build[IO](confs, blocker, shift).value + registry <- built match { + case Left(e) => IO.raiseError(new IllegalArgumentException(s"can't build EnrichmentRegistry: $e")) + case Right(r) => IO.pure(r) + } + } yield registry + } + resource <- Resource.eval(registry) + } yield resource private val featureFlags = FeatureFlags(acceptInvalid = false, legacyEnrichmentOrder = false, tryBase64Decoding = false) } diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala index cdad0618b..1ebba125c 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2020-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -29,6 +29,9 @@ import cats.effect.testing.specs2.CatsIO import fs2.Stream +import org.http4s.client.{Client => Http4sClient} +import org.http4s.dsl.Http4sDsl + import io.circe.parser import com.snowplowanalytics.iglu.client.resolver.registries.{Http4sRegistryLookup, Registry} @@ -40,15 +43,13 @@ import com.snowplowanalytics.snowplow.badrows.BadRow import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.utils.ShiftExecution import com.snowplowanalytics.snowplow.enrich.common.fs2.{Assets, AttributedData, Enrich, EnrichSpec, Environment} import com.snowplowanalytics.snowplow.enrich.common.fs2.Environment.{Enrichments, StreamsSettings} import com.snowplowanalytics.snowplow.enrich.common.fs2.SpecHelpers.{createIgluClient, filesResource, ioClock} import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, Telemetry} import com.snowplowanalytics.snowplow.enrich.common.fs2.io.Clients -import org.http4s.client.{Client => Http4sClient} -import org.http4s.dsl.Http4sDsl case class TestEnvironment[A]( env: Environment[IO, A], @@ -105,10 +106,18 @@ object TestEnvironment extends CatsIO { val logger: Logger[IO] = Slf4jLogger.getLogger[IO] + val http4sClient: Http4sClient[IO] = Http4sClient[IO] { _ => + val dsl = new Http4sDsl[IO] {} + import dsl._ + Resource.eval(Ok("")) + } + val enrichmentReg: EnrichmentRegistry[IO] = EnrichmentRegistry[IO]() - val enrichments: Environment.Enrichments[IO] = + val enrichments: Environment.Enrichments[IO] = { + implicit val httpClient = http4sClient Environment.Enrichments(enrichmentReg, Nil) + } val ioBlocker: Resource[IO, Blocker] = Blocker[IO] @@ -116,11 +125,6 @@ object TestEnvironment extends CatsIO { val adapterRegistry = new AdapterRegistry() - val http4sClient: Http4sClient[IO] = Http4sClient[IO] { _ => - val dsl = new Http4sDsl[IO] {}; import dsl._ - Resource.eval(Ok("")) - } - /** * A dummy test environment without enrichment and with noop sinks and sources * One can replace stream and sinks via `.copy` @@ -139,8 +143,8 @@ object TestEnvironment extends CatsIO { assetsState <- Resource.eval(Assets.State.make(blocker, sem, clients, enrichments.flatMap(_.filesToCache))) shifter <- ShiftExecution.ofSingleThread enrichmentsRef <- { - implicit val client: Http4sClient[IO] = http - Enrichments.make[IO](enrichments, BlockerF.ofBlocker(blocker), shifter) + implicit val httpClient = http + Enrichments.make[IO](enrichments, blocker, shifter) } goodRef <- Resource.eval(Ref.of[IO, Vector[AttributedData[Array[Byte]]]](Vector.empty)) piiRef <- Resource.eval(Ref.of[IO, Vector[AttributedData[Array[Byte]]]](Vector.empty)) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala index 608d50112..ea9c8b622 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -34,13 +34,9 @@ import Adapter.Adapted /** * An adapter for an enrichment that is handled by a remote webservice. * @param remoteUrl the url of the remote webservice, e.g. http://localhost/myEnrichment - * @param connectionTimeout max duration of each connection attempt - * @param readTimeout max duration of read wait time */ final case class RemoteAdapter( - remoteUrl: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] + remoteUrl: String ) extends Adapter { /** @@ -63,7 +59,7 @@ final case class RemoteAdapter( "body" := payload.body ) HttpClient[F] - .getResponse(remoteUrl, None, None, Some(json.noSpaces), "POST", connectionTimeout, readTimeout) + .getResponse(remoteUrl, None, None, Some(json.noSpaces), "POST") .map(processResponse(payload, _).toValidatedNel) case _ => val msg = s"empty body: not a valid remote adapter $remoteUrl payload" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala index 34291d0cd..0e88919d7 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -15,7 +15,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} -import cats.effect.Clock +import cats.effect.{Async, Blocker, Clock, ContextShift} import cats.implicits._ import io.circe._ @@ -27,14 +27,9 @@ import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.iglu.client.IgluCirceClient import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.forex.CreateForex -import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups -import com.snowplowanalytics.refererparser.CreateParser -import com.snowplowanalytics.weather.providers.openweather.CreateOWM - import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf._ -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, CirceUtils, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.utils.{CirceUtils, HttpClient, ShiftExecution} import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.ApiRequestEnrichment import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.PiiPseudonymizerEnrichment @@ -105,11 +100,9 @@ object EnrichmentRegistry { } yield configs).toValidated // todo: ValidatedNel? - def build[ - F[_]: Monad: CreateForex: CreateIabClient: CreateIpLookups: CreateOWM: CreateParser: CreateUaParserEnrichment: sqlquery.CreateSqlQueryEnrichment: apirequest.CreateApiRequestEnrichment - ]( + def build[F[_]: Async: Clock: ContextShift: HttpClient]( confs: List[EnrichmentConf], - blocker: BlockerF[F], + blocker: Blocker, shifter: ShiftExecution[F] ): EitherT[F, String, EnrichmentRegistry[F]] = confs.foldLeft(EitherT.pure[F, String](EnrichmentRegistry[F]())) { (er, e) => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala index ea53de495..fbac729a7 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -17,6 +17,8 @@ import java.net.URI import cats.{Functor, Monad} import cats.data.EitherT +import cats.effect.{Async, Blocker, Clock, ContextShift} + import org.joda.money.CurrencyUnit import com.snowplowanalytics.iglu.core.SchemaKey @@ -27,13 +29,9 @@ import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups import com.snowplowanalytics.refererparser.CreateParser import com.snowplowanalytics.weather.providers.openweather.CreateOWM -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.{ - ApiRequestEnrichment, - CreateApiRequestEnrichment, - HttpApi -} -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.{CreateSqlQueryEnrichment, Rdbms, SqlQueryEnrichment} -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, ShiftExecution} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.{ApiRequestEnrichment, HttpApi} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.{Rdbms, SqlQueryEnrichment} +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, ShiftExecution} sealed trait EnrichmentConf { @@ -57,8 +55,8 @@ object EnrichmentConf { cache: apirequest.Cache, ignoreOnError: Boolean ) extends EnrichmentConf { - def enrichment[F[_]: CreateApiRequestEnrichment]: F[ApiRequestEnrichment[F]] = - ApiRequestEnrichment[F](this) + def enrichment[F[_]: Async: Clock: HttpClient]: F[ApiRequestEnrichment[F]] = + ApiRequestEnrichment.create[F](this) } final case class PiiPseudonymizerConf( @@ -80,8 +78,8 @@ object EnrichmentConf { cache: SqlQueryEnrichment.Cache, ignoreOnError: Boolean ) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateSqlQueryEnrichment](blocker: BlockerF[F], shifter: ShiftExecution[F]): F[SqlQueryEnrichment[F]] = - SqlQueryEnrichment[F](this, blocker, shifter) + def enrichment[F[_]: Async: Clock: ContextShift](blocker: Blocker, shifter: ShiftExecution[F]): F[SqlQueryEnrichment[F]] = + SqlQueryEnrichment.create[F](this, blocker, shifter) } final case class AnonIpConf( @@ -165,7 +163,7 @@ object EnrichmentConf { ) extends EnrichmentConf { override val filesToCache: List[(URI, String)] = List(geoFile, ispFile, domainFile, connectionTypeFile).flatten - def enrichment[F[_]: Functor: CreateIpLookups](blocker: BlockerF[F]): F[IpLookupsEnrichment[F]] = + def enrichment[F[_]: ContextShift: CreateIpLookups: Functor](blocker: Blocker): F[IpLookupsEnrichment[F]] = IpLookupsEnrichment[F](this, blocker) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala index 52a29d7aa..a2a38402e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala @@ -15,7 +15,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.io.File import java.net.{InetAddress, URI} -import cats.{Id, Monad} +import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} import cats.effect.Sync @@ -188,16 +188,6 @@ object CreateIabClient { new IabClient(new File(ipFile), new File(excludeUaFile), new File(includeUaFile)) } } - - implicit def idCreateIabClient: CreateIabClient[Id] = - new CreateIabClient[Id] { - def create( - ipFile: String, - excludeUaFile: String, - includeUaFile: String - ): Id[IabClient] = - new IabClient(new File(ipFile), new File(excludeUaFile), new File(includeUaFile)) - } } /** Case class copy of `com.snowplowanalytics.iab.spidersandrobotsclient.IabResponse` */ diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala index 341c88eaf..1bd6b9b10 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -18,6 +18,8 @@ import cats.Functor import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ +import cats.effect.{Blocker, ContextShift} + import io.circe._ import inet.ipaddr.HostName @@ -28,7 +30,7 @@ import com.snowplowanalytics.maxmind.iplookups._ import com.snowplowanalytics.maxmind.iplookups.model._ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.IpLookupsConf -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, CirceUtils} +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create an IpLookupsEnrichment instance from a Json. */ object IpLookupsEnrichment extends ParseableEnrichment { @@ -99,7 +101,7 @@ object IpLookupsEnrichment extends ParseableEnrichment { * @param conf Configuration for the ip lookups enrichment * @return an ip lookups enrichment */ - def apply[F[_]: Functor: CreateIpLookups](conf: IpLookupsConf, blocker: BlockerF[F]): F[IpLookupsEnrichment[F]] = + def apply[F[_]: ContextShift: CreateIpLookups: Functor](conf: IpLookupsConf, blocker: Blocker): F[IpLookupsEnrichment[F]] = CreateIpLookups[F] .createFromFilenames( conf.geoFile.map(_._2), @@ -118,7 +120,7 @@ object IpLookupsEnrichment extends ParseableEnrichment { * @param ipLookups IP lookups client * @param blocker Runs db lookups on a separate thread pool */ -final case class IpLookupsEnrichment[F[_]](ipLookups: IpLookups[F], blocker: BlockerF[F]) extends Enrichment { +final case class IpLookupsEnrichment[F[_]: ContextShift](ipLookups: IpLookups[F], blocker: Blocker) extends Enrichment { /** * Extract the geo-location using the client IP address. diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala index 5041da503..1a2ea3246 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala @@ -14,11 +14,10 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apireq import cats.Monad import cats.data.{EitherT, NonEmptyList, Validated, ValidatedNel} -import cats.effect.Clock +import cats.effect.{Async, Clock} import cats.implicits._ import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} -import com.snowplowanalytics.lrumap._ import com.snowplowanalytics.snowplow.badrows.FailureDetails import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.ApiRequestEnrichment.ApiRequestEvaluator @@ -94,8 +93,26 @@ object ApiRequestEnrichment extends ParseableEnrichment { UUID.nameUUIDFromBytes(contentKey.getBytes).toString } - def apply[F[_]: CreateApiRequestEnrichment](conf: ApiRequestConf): F[ApiRequestEnrichment[F]] = - CreateApiRequestEnrichment[F].create(conf) + def create[F[_]: Async: Clock: HttpClient](conf: ApiRequestConf): F[ApiRequestEnrichment[F]] = { + val cacheConfig = CachingEvaluator.Config( + size = conf.cache.size, + successTtl = conf.cache.ttl, + errorTtl = conf.cache.ttl / 10 + ) + + CachingEvaluator + .create[F, String, Json](cacheConfig) + .map { evaluator => + ApiRequestEnrichment( + conf.schemaKey, + conf.inputs, + conf.api, + conf.outputs, + evaluator, + conf.ignoreOnError + ) + } + } } final case class ApiRequestEnrichment[F[_]: Monad: HttpClient: Clock]( @@ -205,37 +222,3 @@ final case class ApiRequestEnrichment[F[_]: Monad: HttpClient: Clock]( FailureDetails.EnrichmentFailure(enrichmentInfo, message) } } - -sealed trait CreateApiRequestEnrichment[F[_]] { - def create(conf: ApiRequestConf): F[ApiRequestEnrichment[F]] -} - -object CreateApiRequestEnrichment { - def apply[F[_]](implicit ev: CreateApiRequestEnrichment[F]): CreateApiRequestEnrichment[F] = ev - - implicit def instance[F[_]: Monad: HttpClient: Clock]( - implicit CLM: CreateLruMap[F, String, CachingEvaluator.CachedItem[Json]] - ): CreateApiRequestEnrichment[F] = - new CreateApiRequestEnrichment[F] { - def create(conf: ApiRequestConf): F[ApiRequestEnrichment[F]] = { - val cacheConfig = CachingEvaluator.Config( - size = conf.cache.size, - successTtl = conf.cache.ttl, - errorTtl = conf.cache.ttl / 10 - ) - - CachingEvaluator - .create[F, String, Json](cacheConfig) - .map { evaluator => - ApiRequestEnrichment( - conf.schemaKey, - conf.inputs, - conf.api, - conf.outputs, - evaluator, - conf.ignoreOnError - ) - } - } - } -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/HttpApi.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/HttpApi.scala index 2367f37ba..2f2965ffc 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/HttpApi.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/HttpApi.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -62,9 +62,7 @@ final case class HttpApi( authUser = authUser, authPassword = authPassword, body, - method, - None, - None + method ) /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala deleted file mode 100644 index 985964fe9..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery - -import cats.Monad -import cats.effect.Clock -import cats.implicits._ -import com.snowplowanalytics.iglu.core.SelfDescribingData -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CachingEvaluator -import com.zaxxer.hikari.HikariDataSource -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, ResourceF, ShiftExecution} -import io.circe.Json - -import scala.collection.immutable.IntMap - -/** Initialize resources, necessary for SQL Query enrichment: cache and connection */ -sealed trait CreateSqlQueryEnrichment[F[_]] { - def create( - conf: SqlQueryConf, - blocker: BlockerF[F], - shifter: ShiftExecution[F] - ): F[SqlQueryEnrichment[F]] -} - -object CreateSqlQueryEnrichment { - - def apply[F[_]](implicit ev: CreateSqlQueryEnrichment[F]): CreateSqlQueryEnrichment[F] = ev - - implicit def createSqlQueryEnrichment[F[_]: DbExecutor: Monad: ResourceF: Clock]( - implicit CLM: SqlCacheInit[F] - ): CreateSqlQueryEnrichment[F] = - new CreateSqlQueryEnrichment[F] { - def create( - conf: SqlQueryConf, - blocker: BlockerF[F], - shifter: ShiftExecution[F] - ): F[SqlQueryEnrichment[F]] = { - val cacheConfig = CachingEvaluator.Config( - size = conf.cache.size, - successTtl = conf.cache.ttl, - errorTtl = conf.cache.ttl / 10 - ) - - CachingEvaluator - .create[F, IntMap[Input.ExtractedValue], List[SelfDescribingData[Json]]](cacheConfig) - .map { evaluator => - SqlQueryEnrichment( - conf.schemaKey, - conf.inputs, - conf.db, - conf.query, - conf.output, - evaluator, - blocker, - shifter, - getDataSource(conf.db), - conf.ignoreOnError - ) - } - } - } - - private def getDataSource(rdbms: Rdbms): HikariDataSource = { - val source = new HikariDataSource() - source.setJdbcUrl(rdbms.connectionString) - source.setMaximumPoolSize(1) // see https://github.com/snowplow/enrich/issues/549 - source - } -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala index 7ad325aa1..0ad69c4aa 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,30 +12,27 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery +import scala.collection.immutable.IntMap + import java.sql.{Connection, PreparedStatement, ResultSet, ResultSetMetaData} import javax.sql.DataSource -import scala.collection.mutable.ListBuffer -import scala.util.control.NonFatal - import io.circe.Json -import cats.{Id, Monad} +import cats.Monad import cats.data.EitherT -import cats.effect.{Bracket, Sync} import cats.implicits._ -import com.snowplowanalytics.snowplow.enrich.common.utils.BlockerF -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.Input.ExtractedValue +import cats.effect.{Async, Blocker, Bracket, ContextShift, Resource, Sync} -import scala.collection.immutable.IntMap +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.Input.ExtractedValue // DbExecutor must have much smaller interface, ideally without any JDBC types /** Side-effecting ability to connect to database */ trait DbExecutor[F[_]] { /** Get a connection from the Hikari data source */ - def getConnection(dataSource: DataSource, blocker: BlockerF[F]): F[Either[Throwable, Connection]] + def getConnection(dataSource: DataSource, blocker: Blocker): Resource[F, Connection] /** Execute a SQL query */ def execute(query: PreparedStatement): EitherT[F, Throwable, ResultSet] @@ -77,14 +74,14 @@ trait DbExecutor[F[_]] { object DbExecutor { - // TYPE CLASS - def apply[F[_]](implicit ev: DbExecutor[F]): DbExecutor[F] = ev - implicit def syncDbExecutor[F[_]: Sync]: DbExecutor[F] = + def async[F[_]: Async: ContextShift]: DbExecutor[F] = sync[F] + + def sync[F[_]: ContextShift: Sync]: DbExecutor[F] = new DbExecutor[F] { - def getConnection(dataSource: DataSource, blocker: BlockerF[F]): F[Either[Throwable, Connection]] = - blocker.blockOn(Sync[F].delay(Either.catchNonFatal(dataSource.getConnection()))) + def getConnection(dataSource: DataSource, blocker: Blocker): Resource[F, Connection] = + Resource.fromAutoCloseable(blocker.blockOn(Sync[F].delay(dataSource.getConnection()))) def execute(query: PreparedStatement): EitherT[F, Throwable, ResultSet] = Sync[F].delay(query.executeQuery()).attemptT @@ -126,50 +123,6 @@ object DbExecutor { } - implicit def idDbExecutor: DbExecutor[Id] = - new DbExecutor[Id] { - def getConnection(dataSource: DataSource, blocker: BlockerF[Id]): Either[Throwable, Connection] = - Either.catchNonFatal(dataSource.getConnection()) - - def execute(query: PreparedStatement): EitherT[Id, Throwable, ResultSet] = - EitherT[Id, Throwable, ResultSet](Either.catchNonFatal(query.executeQuery())) - - def convert(resultSet: ResultSet, names: JsonOutput.PropertyNameMode): EitherT[Id, Throwable, List[Json]] = - EitherT( - try { - val buffer = ListBuffer.empty[EitherT[Id, Throwable, Json]] - while (resultSet.next()) - buffer += transform[Id](resultSet, names)(this, Monad[Id]) - val parsedJsons = buffer.result().sequence - resultSet.close() - parsedJsons.value - } catch { - case NonFatal(error) => error.asLeft - } - ) - - def getMetaData(rs: ResultSet): EitherT[Id, Throwable, ResultSetMetaData] = - Either.catchNonFatal(rs.getMetaData).toEitherT[Id] - - def getColumnCount(rsMeta: ResultSetMetaData): EitherT[Id, Throwable, Int] = - Either.catchNonFatal(rsMeta.getColumnCount).toEitherT[Id] - - def getColumnLabel(column: Int, rsMeta: ResultSetMetaData): EitherT[Id, Throwable, String] = - Either.catchNonFatal(rsMeta.getColumnLabel(column)).toEitherT[Id] - - def getColumnType(column: Int, rsMeta: ResultSetMetaData): EitherT[Id, Throwable, String] = - Either.catchNonFatal(rsMeta.getColumnClassName(column)).toEitherT[Id] - - def getColumnValue( - datatype: String, - columnIdx: Int, - rs: ResultSet - ): EitherT[Id, Throwable, Json] = - EitherT[Id, Throwable, Json](for { - value <- Either.catchNonFatal(rs.getObject(columnIdx)).map(Option.apply) - } yield value.map(JsonOutput.getValue(_, datatype)).getOrElse(Json.Null)) - } - /** * Transform fetched from DB row (as ResultSet) into JSON object * All column names are mapped to object keys using propertyNames @@ -242,6 +195,6 @@ object DbExecutor { if (intMap.keys.size == placeholderCount) true else false } - def getConnection[F[_]: Monad: DbExecutor](dataSource: DataSource, blocker: BlockerF[F]): F[Either[Throwable, Connection]] = + def getConnection[F[_]: Monad: DbExecutor](dataSource: DataSource, blocker: Blocker): Resource[F, Connection] = DbExecutor[F].getConnection(dataSource, blocker) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala index 4e10a7576..93f70fe0e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,22 +12,29 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery -import cats.Monad +import java.sql.Connection +import javax.sql.DataSource + +import scala.collection.immutable.IntMap + +import com.zaxxer.hikari.HikariDataSource + +import io.circe._ +import io.circe.generic.semiauto._ + import cats.data.{EitherT, NonEmptyList, Validated, ValidatedNel} -import cats.effect.Clock import cats.implicits._ + +import cats.effect.{Async, Blocker, Clock, ContextShift} + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} + import com.snowplowanalytics.snowplow.badrows.FailureDetails + import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{CachingEvaluator, Enrichment, ParseableEnrichment} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, CirceUtils, ResourceF, ShiftExecution} -import io.circe._ -import io.circe.generic.semiauto._ -import org.slf4j.LoggerFactory - -import java.sql.Connection -import javax.sql.DataSource +import com.snowplowanalytics.snowplow.enrich.common.utils.{CirceUtils, ShiftExecution} /** Lets us create an SqlQueryConf from a Json */ object SqlQueryEnrichment extends ParseableEnrichment { @@ -82,13 +89,6 @@ object SqlQueryEnrichment extends ParseableEnrichment { ).mapN(SqlQueryConf(schemaKey, _, _, _, _, _, _)).toEither }.toValidated - def apply[F[_]: CreateSqlQueryEnrichment]( - conf: SqlQueryConf, - blocker: BlockerF[F], - ec: ShiftExecution[F] - ): F[SqlQueryEnrichment[F]] = - CreateSqlQueryEnrichment[F].create(conf, blocker, ec) - /** Just a string with SQL, not escaped */ final case class Query(sql: String) extends AnyVal @@ -100,6 +100,45 @@ object SqlQueryEnrichment extends ParseableEnrichment { implicit val cacheCirceDecoder: Decoder[Cache] = deriveDecoder[Cache] + + def create[F[_]: Async: Clock: ContextShift]( + conf: SqlQueryConf, + blocker: Blocker, + shifter: ShiftExecution[F] + ): F[SqlQueryEnrichment[F]] = { + val cacheConfig = CachingEvaluator.Config( + size = conf.cache.size, + successTtl = conf.cache.ttl, + errorTtl = conf.cache.ttl / 10 + ) + + val executor: DbExecutor[F] = DbExecutor.async[F] + + CachingEvaluator + .create[F, IntMap[Input.ExtractedValue], List[SelfDescribingData[Json]]](cacheConfig) + .map { evaluator => + SqlQueryEnrichment( + conf.schemaKey, + conf.inputs, + conf.db, + conf.query, + conf.output, + evaluator, + executor, + blocker, + shifter, + getDataSource(conf.db), + conf.ignoreOnError + ) + } + } + + private def getDataSource(rdbms: Rdbms): HikariDataSource = { + val source = new HikariDataSource() + source.setJdbcUrl(rdbms.connectionString) + source.setMaximumPoolSize(1) // see https://github.com/snowplow/enrich/issues/549 + source + } } /** @@ -112,14 +151,15 @@ object SqlQueryEnrichment extends ParseableEnrichment { * @param cache actual mutable LRU cache * @param blocker Allows running blocking enrichments on a dedicated thread pool */ -final case class SqlQueryEnrichment[F[_]: Monad: DbExecutor: ResourceF: Clock]( +final case class SqlQueryEnrichment[F[_]: Async: Clock]( schemaKey: SchemaKey, inputs: List[Input], db: Rdbms, query: SqlQueryEnrichment.Query, output: Output, sqlQueryEvaluator: SqlQueryEvaluator[F], - blocker: BlockerF[F], + dbExecutor: DbExecutor[F], + blocker: Blocker, shifter: ShiftExecution[F], dataSource: DataSource, ignoreOnError: Boolean @@ -127,8 +167,6 @@ final case class SqlQueryEnrichment[F[_]: Monad: DbExecutor: ResourceF: Clock]( private val enrichmentInfo = FailureDetails.EnrichmentInformation(schemaKey, "sql-query").some - private val logger = LoggerFactory.getLogger(getClass) - /** * Primary function of the enrichment. Failure means connection failure, failed unexpected * JSON-value, etc. Successful Nil skipped lookup (unfilled placeholder for eg, empty response) @@ -171,13 +209,10 @@ final case class SqlQueryEnrichment[F[_]: Monad: DbExecutor: ResourceF: Clock]( EitherT.rightT(Nil) } - private def runLookup(intMap: Input.ExtractedValueMap): F[Either[Throwable, List[SelfDescribingData[Json]]]] = { - val eitherT = for { - connection <- EitherT(DbExecutor.getConnection[F](dataSource, blocker)) - result <- EitherT(ResourceF[F].use(connection)(closeConnection)(maybeRunWithConnection(_, intMap))) - } yield result - eitherT.value - } + private def runLookup(intMap: Input.ExtractedValueMap): F[Either[Throwable, List[SelfDescribingData[Json]]]] = + dbExecutor.getConnection(dataSource, blocker).use { connection => + maybeRunWithConnection(connection, intMap) + } // We now have a connection. But time has passed since we last checked the cache, and another // fiber might have run a query while we were waiting for the connection. So we check the cache @@ -202,8 +237,8 @@ final case class SqlQueryEnrichment[F[_]: Monad: DbExecutor: ResourceF: Clock]( case true => for { sqlQuery <- DbExecutor.createStatement(connection, query.sql, intMap).toEitherT[F] - resultSet <- DbExecutor[F].execute(sqlQuery) - context <- DbExecutor[F].convert(resultSet, output.json.propertyNames) + resultSet <- dbExecutor.execute(sqlQuery) + context <- dbExecutor.convert(resultSet, output.json.propertyNames) result <- output.envelope(context).toEitherT[F] } yield result } @@ -230,12 +265,4 @@ final case class SqlQueryEnrichment[F[_]: Monad: DbExecutor: ResourceF: Clock]( val message = FailureDetails.EnrichmentFailureMessage.Simple(error) FailureDetails.EnrichmentFailure(enrichmentInfo, message) } - - private def closeConnection(connection: Connection): Unit = - Either.catchNonFatal(connection.close()) match { - case Left(err) => - logger.error("Can't close the connection", err) - case _ => - () - } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CljTomcatLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CljTomcatLoader.scala deleted file mode 100644 index cc1780e32..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CljTomcatLoader.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import java.nio.charset.StandardCharsets.UTF_8 -import java.time.Instant - -import cats.data.ValidatedNel -import cats.implicits._ -import com.snowplowanalytics.snowplow.badrows._ - -import utils.ConversionUtils - -/** - * The dedicated loader for events collected by the Clojure Collector running on Tomcat. The - * format started as an approximation of the CloudFront format, but has now diverged as - * we add support for POST payloads. - */ -object CljTomcatLoader extends Loader[String] { - // The encoding used on these logs - private val CollectorEncoding = UTF_8 - - // The name of this collector - private val CollectorName = "clj-tomcat" - - // Define the regular expression for extracting the fields - // Adapted and evolved from the Clojure Collector's regular expression - private val CljTomcatRegex = { - val w = "[\\s]+" // Whitespace regex - val ow = "(?:" + w // Non-capturing optional whitespace begins - - // Our regex follows. Try debuggex.com if it doesn't make sense - ("^([\\S]+)" + // Date / date - w + "([\\S]+)" + // Time / time - w + "(-)" + // - / x-edge-location added for consistency with CloudFront - w + "([\\S]+)" + // BytesSent / sc-bytes - w + "([\\S]+)" + // IPAddress / c-ip - w + "([\\S]+)" + // Operation / cs-method - w + "([\\S]+)" + // Domain / cs(Host) - w + "([\\S]+)" + // Object / cs-uri-stem - w + "([\\S]+)" + // HttpStatus / sc-status - w + "([\\S]+)" + // Referer / cs(Referer) - w + "([\\S]+)" + // UserAgent / cs(User Agent) - w + "([\\S]+)" + // Querystring / cs-uri-query - ow + "-" + // - / cs(Cookie) added for consistency with CloudFront - w + "-" + // - / x-edge-result-type added for consistency with CloudFront - w + "-)?" + // - / x-edge-request-id added for consistency with CloudFront - ow + "([\\S]+)?" + // ContentType / POST support - w + "([\\S]+)?)?$").r // PostBody / POST support - } - - /** - * Converts the source string into a ValidatedMaybeCollectorPayload. - * @param line A line of data to convert - * @return either a set of validation errors or an Option-boxed CanonicalInput object, wrapped - * in a ValidatedNel. - */ - override def toCollectorPayload(line: String, processor: Processor): ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]] = { - def build( - qs: String, - date: String, - time: String, - ip: String, - ua: String, - refr: String, - path: String, - ct: Option[String], - bdy: Option[String] - ): ValidatedNel[FailureDetails.CPFormatViolationMessage, Option[CollectorPayload]] = { - val querystring = parseQuerystring(toOption(qs), CollectorEncoding) - val timestamp = toTimestamp(date, time) - val contentType = ct - .traverse(enc => ConversionUtils.decodeString(CollectorEncoding, enc)) - .leftMap { m => - FailureDetails.CPFormatViolationMessage.InputData("contentType", ct, m) - } - val body = bdy - .traverse(ConversionUtils.decodeBase64Url) - .leftMap { m => - FailureDetails.CPFormatViolationMessage.InputData("body", bdy, m) - } - val collectorApi = CollectorPayload.parseApi(path) - - ( - timestamp.toValidatedNel, - querystring.toValidatedNel, - collectorApi.toValidatedNel, - contentType.toValidatedNel, - body.toValidatedNel - ).mapN { (t, q, a, c, b) => - val source = CollectorPayload.Source(CollectorName, CollectorEncoding.toString, None) - val context = - CollectorPayload.Context(Some(t), toOption(ip), toOption(ua), toOption(refr), Nil, None) - CollectorPayload(a, q, c, b, source, context).some - } - } - - val collectorPayload = line match { - // A. For a request, to CljTomcat collector <= v0.6.0 - case CljTomcatRegex(date, time, _, _, ip, _, _, objct, _, refr, ua, qs, null, null) => - // API, content type and request body all unavailable - build(qs, date, time, ip, ua, refr, objct, None, None) - // B: For a request without body and potentially a content type, to CljTomcat collector >= v0.7.0 - - // B.1 No body or content type - // TODO: really we ought to be matching on "-", not-"-" and not-"-", "-" as well - case CljTomcatRegex(date, time, _, _, ip, _, _, objct, _, refr, ua, qs, "-", "-") => - // API, content type and request body all unavailable - build(qs, date, time, ip, ua, refr, objct, None, None) - - // B.2 No body but has content type - case CljTomcatRegex(date, time, _, _, ip, _, _, objct, _, refr, ua, qs, ct, "-") => - // API and request body unavailable - build(qs, date, time, ip, ua, refr, objct, ct.some, None) - - // C: For a request with content type and/or body, to CljTomcat collector >= v0.7.0 - // C.1 Not a POST request - case CljTomcatRegex(_, _, _, _, _, op, _, _, _, _, _, _, _, _) if op.toUpperCase != "POST" => - val msg = "operation must be POST if content type and/or body are provided" - FailureDetails.CPFormatViolationMessage - .InputData("verb", op.toUpperCase().some, msg) - .invalidNel - - // C.2 A POST, let's check we can discern API format - // TODO: we should check for nulls/"-"s for ct and body below - case CljTomcatRegex(date, time, _, _, ip, _, _, objct, _, refr, ua, qs, ct, bdy) => - build(qs, date, time, ip, ua, refr, objct, ct.some, bdy.some) - - // D. Row not recognised - case _ => - FailureDetails.CPFormatViolationMessage - .Fallback("does not match the raw event format") - .invalidNel - } - - collectorPayload.leftMap( - _.map(f => - BadRow.CPFormatViolation( - processor, - Failure.CPFormatViolation(Instant.now(), CollectorName, f), - Payload.RawPayload(line) - ) - ) - ) - } -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CloudfrontLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CloudfrontLoader.scala deleted file mode 100644 index 1df69a318..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CloudfrontLoader.scala +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import java.nio.charset.StandardCharsets.UTF_8 -import java.time.Instant - -import scala.util.matching.Regex - -import cats.data.ValidatedNel -import cats.implicits._ -import com.snowplowanalytics.snowplow.badrows._ - -import utils.ConversionUtils.singleEncodePcts - -/** - * The dedicated loader for events collected by CloudFront. - * We support the following CloudFront access log formats: - * 1. Pre-12 Sep 2012 - * 2. 12 Sep 2012 - 21 Oct 2013 - * 3. 21 Oct 2013 - 29 Apr 2014 - * 4. Potential future updates, provided they are solely additive in nature - * For more details on this format, please see: - * http://docs.amazonwebservices.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#LogFileFormat - */ -object CloudfrontLoader extends Loader[String] { - // The encoding used on CloudFront logs - private val CollectorEncoding = UTF_8 - - // The name of this collector - private val CollectorName = "cloudfront" - - private val originalFields = List( - "([\\S]+)", // Date / date - "([\\S]+)", // Time / time - "([\\S]+)", // EdgeLocation / x-edge-location - "([\\S]+)", // BytesSent / sc-bytes - "([\\S]+)", // IPAddress / c-ip - "([\\S]+)", // Operation / cs-method - "([\\S]+)", // Domain / cs(Host) - "([\\S]+)", // Object / cs-uri-stem - "([\\S]+)", // HttpStatus / sc-status - "([\\S]+)", // Referer / cs(Referer) - "([\\S]+)", // UserAgent / cs(User Agent) - "([\\S]+)" // Querystring / cs-uri-query - ) - private val fields12Sep2012 = originalFields ++ List( - "[\\S]*", // CookieHeader / cs(Cookie) added 12 Sep 2012 // TODO: why the *? - "[\\S]+", // ResultType / x-edge-result-type added 12 Sep 2012 - "[\\S]+" // X-Amz-Cf-Id / x-edge-request-id added 12 Sep 2012 - ) - private val fields21Oct2013 = fields12Sep2012 ++ List( - "[\\S]+", // XHostHeader / x-host-header added 21 Oct 2013 - "[\\S]+", // CsProtocol / cs-protocol added 21 Oct 2013 - "[\\S]+" // CsBytes / cs-bytes added 21 Oct 2013 - ) - private val fields29Apr2014 = fields21Oct2013 ++ List( - "[\\S]+" // TimeTaken / time-taken added 29 Apr 2014 - ) - private val fields01Jul2014 = fields29Apr2014 ++ List( - "([\\S]+)", // ForwardedFor / x-forwarded-for added 01 Jul 2014 - "[\\S]+", // SslProtocol / ssl-protocol added 01 Jul 2014 - "[\\S]+", // SslCipher / ssl-cipher added 01 Jul 2014 - "[\\S]+" // EdgeResResult / x-edge-response-result-type added 01 Jul 2014 - ) - - private val CfOriginalPlusAdditionalRegex = toRegex(originalFields, additionalFields = true) - private val CfOriginalRegex = toRegex(originalFields) - private val Cf12Sep2012Regex = toRegex(fields12Sep2012) - private val Cf21Oct2013Regex = toRegex(fields21Oct2013) - private val Cf29Apr2014Regex = toRegex(fields29Apr2014) - private val Cf01Jul2014Regex = toRegex(fields01Jul2014, additionalFields = true) - - /** - * Converts the source string into a ValidatedMaybeCollectorPayload. - * @param line A line of data to convert - * @return either a set of validation errors or an Option-boxed CanonicalInput object, wrapped - * in a ValidatedNel. - */ - override def toCollectorPayload(line: String, processor: Processor): ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]] = - (line match { - // 1. Header row - case h if h.startsWith("#Version:") || h.startsWith("#Fields:") => None.valid - // 2. Not a GET request - case CfOriginalPlusAdditionalRegex(_, _, _, _, _, op, _, _, _, _, _, _) if op.toUpperCase != "GET" => - val msg = "operation must be GET" - FailureDetails.CPFormatViolationMessage - .InputData("verb", op.toUpperCase().some, msg) - .invalidNel - // 3. Row matches original CloudFront format - case CfOriginalRegex(date, time, _, _, ip, _, _, objct, _, rfr, ua, qs) => - CloudfrontLogLine(date, time, ip, objct, rfr, ua, qs).toValidatedMaybeCollectorPayload - case Cf12Sep2012Regex(date, time, _, _, ip, _, _, objct, _, rfr, ua, qs) => - CloudfrontLogLine(date, time, ip, objct, rfr, ua, qs).toValidatedMaybeCollectorPayload - case Cf21Oct2013Regex(date, time, _, _, ip, _, _, objct, _, rfr, ua, qs) => - CloudfrontLogLine(date, time, ip, objct, rfr, ua, qs).toValidatedMaybeCollectorPayload - case Cf29Apr2014Regex(date, time, _, _, ip, _, _, objct, _, rfr, ua, qs) => - CloudfrontLogLine(date, time, ip, objct, rfr, ua, qs).toValidatedMaybeCollectorPayload - case Cf01Jul2014Regex(date, time, _, _, ip, _, _, objct, _, rfr, ua, qs, forwardedFor) => - CloudfrontLogLine(date, time, ip, objct, rfr, ua, qs, forwardedFor).toValidatedMaybeCollectorPayload - // 4. Row not recognised - case _ => - FailureDetails.CPFormatViolationMessage - .Fallback("does not match header or data row formats") - .invalidNel - }).leftMap( - _.map(f => - BadRow.CPFormatViolation( - processor, - Failure.CPFormatViolation(Instant.now(), CollectorName, f), - Payload.RawPayload(line) - ) - ) - ) - - /** - * 'Cleans' a string to make it parsable by URLDecoder.decode. - * The '%' character seems to be appended to the end of some URLs in the CloudFront logs, causing - * Exceptions when using URLDecoder.decode. Perhaps a CloudFront bug? - * @param uri The String to clean - * @return the cleaned string - */ - private[loaders] def toCleanUri(uri: String): String = - uri - .foldLeft((new StringBuilder, 1)) { - case ((acc, cnt), c) => - if (cnt == uri.length() && c == '%') (acc, cnt) - else (acc.append(c), cnt + 1) - } - ._1 - .toString() - - private def toRegex(fields: List[String], additionalFields: Boolean = false): Regex = { - val whitespaceRegex = "[\\s]+" - if (additionalFields) - fields.mkString("", whitespaceRegex, ".*").r - else - fields.mkString(whitespaceRegex).r - } - - private case class CloudfrontLogLine( - date: String, - time: String, - lastIp: String, - path: String, - rfr: String, - ua: String, - qs: String, - forwardedFor: String = "-" - ) { - def toValidatedMaybeCollectorPayload: ValidatedNel[FailureDetails.CPFormatViolationMessage, Option[CollectorPayload]] = { - val timestamp = toTimestamp(date, time) - val querystring = - parseQuerystring(toOption(singleEncodePcts(qs)), CollectorEncoding) - - // No validation (yet) on the below - val ip = IpAddressExtractor.extractIpAddress(forwardedFor, lastIp) - val userAgent = singleEncodePcts(ua) - val refr = singleEncodePcts(rfr) - val referer = toOption(refr) map toCleanUri - - val collectorApi = CollectorPayload.parseApi(path) - - (timestamp.toValidatedNel, querystring.toValidatedNel, collectorApi.toValidatedNel).mapN { (t, q, a) => - val source = CollectorPayload.Source(CollectorName, CollectorEncoding.toString, None) - val context = - CollectorPayload.Context(Some(t), toOption(ip), toOption(userAgent), referer, Nil, None) - CollectorPayload(a, q, None, None, source, context).some - } - } - } -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/Loader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/Loader.scala index ad72ae90b..3dff88533 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/Loader.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/Loader.scala @@ -90,39 +90,4 @@ abstract class Loader[T] { msg ) } - - /** - * Checks whether a String field is a hyphen "-", which is used by CloudFront to signal a null. - * @param field The field to check - * @return True if the String was a hyphen "-" - */ - private[loaders] def toOption(field: String): Option[String] = - Option(field) match { - case Some("-") => None - case Some("") => None - case s => s // Leaves any other Some(x) or None as-is - } -} - -/** Companion object to the CollectorLoader. Contains factory methods. */ -object Loader { - private val TsvRegex = "^tsv/(.*)$".r - private val NdjsonRegex = "^ndjson/(.*)$".r - - /** - * Factory to return a CollectorLoader based on the supplied collector identifier (e.g. - * "cloudfront" or "clj-tomcat"). - * @param collectorOrProtocol Identifier for the event collector - * @return either a CollectorLoader object or an an error message - */ - def getLoader(collectorOrProtocol: String): Either[String, Loader[_]] = - collectorOrProtocol match { - case "cloudfront" => CloudfrontLoader.asRight - case "clj-tomcat" => CljTomcatLoader.asRight - // a data protocol rather than a piece of software - case "thrift" => ThriftLoader.asRight - case TsvRegex(f) => TsvLoader(f).asRight - case NdjsonRegex(f) => NdjsonLoader(f).asRight - case c => s"[$c] is not a recognised Snowplow event collector".asLeft - } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/NdjsonLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/NdjsonLoader.scala deleted file mode 100644 index bb0822066..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/NdjsonLoader.scala +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import java.time.Instant - -import cats.data.ValidatedNel -import cats.syntax.either._ -import cats.syntax.option._ - -import com.snowplowanalytics.snowplow.badrows._ - -import utils.JsonUtils - -final case class NdjsonLoader(adapter: String) extends Loader[String] { - - private val CollectorName = "ndjson" - private val CollectorEncoding = "UTF-8" - - /** - * Converts the source string into a CanonicalInput. - * @param line A line of data to convert - * @return a CanonicalInput object, Option-boxed, or None if no input was extractable. - */ - override def toCollectorPayload(line: String, processor: Processor): ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]] = { - val collectorPayload = - if (line.replaceAll("\r?\n", "").isEmpty) - None.asRight - else if (line.split("\r?\n").length > 1) - FailureDetails.CPFormatViolationMessage - .Fallback(s"expected a single line, found ${line.split("\r?\n").length}") - .asLeft - else - for { - _ <- JsonUtils - .extractJson(line) - .leftMap(FailureDetails.CPFormatViolationMessage.Fallback.apply) - api <- CollectorPayload.parseApi(adapter) - source = CollectorPayload.Source(CollectorName, CollectorEncoding, None) - context = CollectorPayload.Context(None, None, None, None, Nil, None) - payload = CollectorPayload(api, Nil, None, Some(line), source, context) - } yield payload.some - - collectorPayload - .leftMap(message => - BadRow.CPFormatViolation( - processor, - Failure.CPFormatViolation(Instant.now(), CollectorName, message), - Payload.RawPayload(line) - ) - ) - .toValidatedNel - } -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/TsvLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/TsvLoader.scala deleted file mode 100644 index 3e14fbf59..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/TsvLoader.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import java.time.Instant - -import cats.data.ValidatedNel -import cats.syntax.either._ -import cats.syntax.option._ -import cats.syntax.validated._ -import com.snowplowanalytics.snowplow.badrows._ - -/** Loader for TSVs */ -final case class TsvLoader(adapter: String) extends Loader[String] { - private val CollectorName = "tsv" - private val CollectorEncoding = "UTF-8" - - /** - * Converts the source TSV into a ValidatedMaybeCollectorPayload. - * @param line A TSV - * @return either a set of validation errors or an Option-boxed CanonicalInput object, wrapped in - * a ValidatedNel. - */ - override def toCollectorPayload(line: String, processor: Processor): ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]] = - // Throw away the first two lines of Cloudfront web distribution access logs - if (line.startsWith("#Version:") || line.startsWith("#Fields:")) - None.valid - else - CollectorPayload - .parseApi(adapter) - .map { api => - val source = CollectorPayload.Source(CollectorName, CollectorEncoding, None) - val context = CollectorPayload.Context(None, None, None, None, Nil, None) - CollectorPayload(api, Nil, None, Some(line), source, context).some - } - .leftMap(f => - BadRow.CPFormatViolation( - processor, - Failure.CPFormatViolation(Instant.now(), CollectorName, f), - Payload.RawPayload(line) - ) - ) - .toValidatedNel -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/BlockerF.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/BlockerF.scala deleted file mode 100644 index b4bb60ee8..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/BlockerF.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common.utils - -import cats.effect.{Blocker, ContextShift} - -/** - * An execution context that is safe to use for blocking operations - * - * BlockerF is similar to a cats.effect.Blocker, except that `blockOn` does not require an implicit - * ContextShift; instead, the ContextShift is bound to the instance of the trait when it is - * constructed. - * - * This is a bit of a hack... but it allows us to define a BlockerF[Id], which is a requirement for - * non-fs2 apps. - */ -trait BlockerF[F[_]] { - - def blockOn[A](f: F[A]): F[A] - -} - -object BlockerF { - - def ofBlocker[F[_]: ContextShift](blocker: Blocker): BlockerF[F] = - new BlockerF[F] { - override def blockOn[A](f: F[A]): F[A] = - blocker.blockOn(f) - } - - def noop[F[_]]: BlockerF[F] = - new BlockerF[F] { - override def blockOn[A](f: F[A]): F[A] = - f - } - -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala index f169aecbd..b28ac7d96 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,15 +12,13 @@ */ package com.snowplowanalytics.snowplow.enrich.common.utils -import scala.util.control.NonFatal -import cats.{Applicative, Id} +import cats.Applicative import cats.effect.Sync import cats.implicits._ import fs2.Stream import org.http4s.client.{Client => Http4sClient} import org.http4s.headers.Authorization import org.http4s.{BasicCredentials, EmptyBody, EntityBody, Header, Headers, Method, Request, Status, Uri} -import scalaj.http._ trait HttpClient[F[_]] { def getResponse( @@ -28,9 +26,7 @@ trait HttpClient[F[_]] { authUser: Option[String], authPassword: Option[String], body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] + method: String ): F[Either[Throwable, String]] } @@ -47,20 +43,12 @@ object HttpClient { implicit def syncHttpClient[F[_]: Sync](implicit http4sClient: Http4sClient[F]): HttpClient[F] = new HttpClient[F] { - /** - * Only uri, method and body are used for syncHttpClient - * Other parameters exist for compatibility with Id instance - * and they aren't used here - * Corresponding configurations come from http4s client configuration - */ override def getResponse( uri: String, authUser: Option[String], authPassword: Option[String], body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] + method: String ): F[Either[Throwable, String]] = Uri.fromString(uri) match { case Left(parseFailure) => @@ -84,91 +72,4 @@ object HttpClient { .handleError(_.asLeft[String]) } } - - implicit val idHttpClient: HttpClient[Id] = - new HttpClient[Id] { - override def getResponse( - uri: String, - authUser: Option[String], - authPassword: Option[String], - body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] - ): Id[Either[Throwable, String]] = - getBody( - buildRequest( - uri, - authUser, - authPassword, - body, - method, - connectionTimeout, - readTimeout - ) - ) - } - - // The defaults are from scalaj library - val DEFAULT_CONNECTION_TIMEOUT_MS = 1000 - val DEFAULT_READ_TIMEOUT_MS = 5000 - - /** - * Blocking method to get body of HTTP response - * @param request assembled request object - * @return validated body of HTTP request - */ - private def getBody(request: HttpRequest): Either[Throwable, String] = - try { - val res = request.asString - if (res.isSuccess) res.body.asRight - else new Exception(s"Request failed with status ${res.code} and body ${res.body}").asLeft - } catch { - case NonFatal(e) => e.asLeft - } - - /** - * Build HTTP request object - * @param uri full URI to request - * @param authUser optional username for basic auth - * @param authPassword optional password for basic auth - * @param body optional request body - * @param method HTTP method - * @param connectionTimeout connection timeout, if not set default is 1000ms - * @param readTimeout read timeout, if not set default is 5000ms - * @return HTTP request - */ - def buildRequest( - uri: String, - authUser: Option[String], - authPassword: Option[String], - body: Option[String], - method: String = "GET", - connectionTimeout: Option[Long], - readTimeout: Option[Long] - ): HttpRequest = { - val req: HttpRequest = Http(uri).method(method).maybeTimeout(connectionTimeout, readTimeout) - req.maybeAuth(authUser, authPassword).maybePostData(body) - } - - implicit class RichHttpRequest(request: HttpRequest) { - - def maybeAuth(user: Option[String], password: Option[String]): HttpRequest = - if (user.isDefined || password.isDefined) - request.auth(user.getOrElse(""), password.getOrElse("")) - else request - - def maybeTimeout(connectionTimeout: Option[Long], readTimeout: Option[Long]): HttpRequest = - (connectionTimeout, readTimeout) match { - case (Some(ct), Some(rt)) => request.timeout(ct.toInt, rt.toInt) - case (Some(ct), None) => request.timeout(ct.toInt, DEFAULT_READ_TIMEOUT_MS) - case (None, Some(rt)) => request.timeout(DEFAULT_CONNECTION_TIMEOUT_MS, rt.toInt) - case _ => request.timeout(DEFAULT_CONNECTION_TIMEOUT_MS, DEFAULT_READ_TIMEOUT_MS) - } - - def maybePostData(body: Option[String]): HttpRequest = - body - .map(data => request.postData(data).header("content-type", "application/json").header("accept", "*/*")) - .getOrElse(request) - } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ResourceF.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ResourceF.scala deleted file mode 100644 index 98c80374a..000000000 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ResourceF.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common.utils - -import cats.Id - -import cats.effect.{Resource, Sync} - -trait ResourceF[F[_]] { - def use[A, B](resource: A)(release: A => Unit)(use: A => F[B]): F[B] -} - -object ResourceF { - - def apply[F[_]](implicit ev: ResourceF[F]): ResourceF[F] = ev - - implicit def syncResource[F[_]: Sync]: ResourceF[F] = - new ResourceF[F] { - def use[A, B](resource: A)(release: A => Unit)(use: A => F[B]): F[B] = - Resource.make(Sync[F].pure(resource))(a => Sync[F].delay(release(a))).use(a => use(a)) - } - - implicit def idResource: ResourceF[Id] = - new ResourceF[Id] { - def use[A, B](resource: A)(release: A => Unit)(use: A => B): B = - try use(resource) - finally release(resource) - } -} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ShiftExecution.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ShiftExecution.scala index 9d3423abc..b63c7abb2 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ShiftExecution.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ShiftExecution.scala @@ -49,13 +49,4 @@ object ShiftExecution { def shift[A](f: F[A]): F[A] = ContextShift[F].evalOn(ec)(f) } - - // Shifting is not needed in stream-enrich, because all enrichment operations run sequentially on - // the same thread. - def noop[F[_]]: ShiftExecution[F] = - new ShiftExecution[F] { - override def shift[A](f: F[A]): F[A] = - f - } - } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala index 4bf131cd8..1ad8762f9 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2022-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -16,6 +16,5 @@ package com.snowplowanalytics.snowplow.enrich.common * See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 */ object AcceptInvalid { - val countInvalid = () val featureFlags = EtlPipeline.FeatureFlags(acceptInvalid = false, legacyEnrichmentOrder = false) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala index 90968fe51..a5f818280 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala @@ -12,10 +12,12 @@ */ package com.snowplowanalytics.snowplow.enrich.common -import cats.Id import cats.data.Validated import cats.syntax.validated._ +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.iglu.client.IgluCirceClient import com.snowplowanalytics.iglu.client.resolver.Resolver import com.snowplowanalytics.iglu.client.resolver.registries.Registry @@ -36,9 +38,10 @@ import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient._ -class EtlPipelineSpec extends Specification with ValidatedMatchers { +class EtlPipelineSpec extends Specification with ValidatedMatchers with CatsIO { def is = s2""" EtlPipeline should always produce either bad or good row for each event of the payload $e1 Processing of events with malformed query string should be supported $e2 @@ -47,81 +50,94 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { """ val adapterRegistry = new AdapterRegistry() - val enrichmentReg = EnrichmentRegistry[Id]() + val enrichmentReg = EnrichmentRegistry[IO]() val igluCentral = Registry.IgluCentral - val client = IgluCirceClient.fromResolver[Id](Resolver(List(igluCentral), None), cacheSize = 0) + val client = IgluCirceClient.fromResolver[IO](Resolver(List(igluCentral), None), cacheSize = 0).unsafeRunSync() val processor = Processor("sce-test-suite", "1.0.0") val dateTime = DateTime.now() def e1 = { val collectorPayloadBatched = EtlPipelineSpec.buildBatchedPayload() - val output = EtlPipeline.processEvents[Id]( - adapterRegistry, - enrichmentReg, - client, - processor, - dateTime, - Some(collectorPayloadBatched).validNel, - AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid - ) - output must be like { - case a :: b :: c :: d :: Nil => - (a must beValid).and(b must beInvalid).and(c must beInvalid).and(d must beInvalid) - } + EtlPipeline + .processEvents[IO]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + Some(collectorPayloadBatched).validNel, + AcceptInvalid.featureFlags, + IO.unit + ) + .map { output => + output must be like { + case a :: b :: c :: d :: Nil => + (a must beValid).and(b must beInvalid).and(c must beInvalid).and(d must beInvalid) + } + } } def e2 = { val thriftBytesMalformedQS = EtlPipelineSpec.buildThriftBytesMalformedQS() - ThriftLoader + val collectorPayload = ThriftLoader .toCollectorPayload(thriftBytesMalformedQS, processor) .map(_.get) - .map(collectorPayload => - EtlPipeline.processEvents[Id]( - adapterRegistry, - enrichmentReg, - client, - processor, - dateTime, - Some(collectorPayload).validNel, - AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid - ) - ) must beValid.like { - case Validated.Valid(_: EnrichedEvent) :: Nil => ok - case res => ko(s"[$res] doesn't contain one enriched event") - } + .toOption + .get + EtlPipeline + .processEvents[IO]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + Some(collectorPayload).validNel, + AcceptInvalid.featureFlags, + IO.unit + ) + .map { output => + output must beLike { + case Validated.Valid(_: EnrichedEvent) :: Nil => ok + case res => ko(s"[$res] doesn't contain one enriched event") + } + } } def e3 = { val invalidCollectorPayload = ThriftLoader.toCollectorPayload(Array(1.toByte), processor) - EtlPipeline.processEvents[Id]( - adapterRegistry, - enrichmentReg, - client, - processor, - dateTime, - invalidCollectorPayload, - AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid - ) must be like { - case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok - case other => ko(s"One invalid CPFormatViolation expected, got ${other}") - } + EtlPipeline + .processEvents[IO]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + invalidCollectorPayload, + AcceptInvalid.featureFlags, + IO.unit + ) + .map { output => + output must be like { + case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok + case other => ko(s"One invalid CPFormatViolation expected, got ${other}") + } + } } def e4 = { val collectorPayload: Option[CollectorPayload] = None - EtlPipeline.processEvents[Id]( - adapterRegistry, - enrichmentReg, - client, - processor, - dateTime, - collectorPayload.validNel[BadRow], - AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid - ) must beEqualTo(Nil) + EtlPipeline + .processEvents[IO]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + collectorPayload.validNel[BadRow], + AcceptInvalid.featureFlags, + IO.unit + ) + .map(output => output must beEqualTo(Nil)) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala index 5f2de3e9b..f0bef085d 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,8 +12,13 @@ */ package com.snowplowanalytics.snowplow.enrich.common -import cats.Id +import scala.util.control.NonFatal +import scala.concurrent.ExecutionContext + import cats.implicits._ +import cats.effect.{Blocker, IO} + +import scalaj.http._ import com.snowplowanalytics.iglu.client.IgluCirceClient import com.snowplowanalytics.iglu.core.SelfDescribingData @@ -27,7 +32,7 @@ import io.circe.literal._ import org.apache.http.NameValuePair import org.apache.http.message.BasicNameValuePair -import com.snowplowanalytics.snowplow.enrich.common.utils.JsonUtils +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils} object SpecHelpers { @@ -62,11 +67,16 @@ object SpecHelpers { }""" /** Builds an Iglu client from the above Iglu configuration. */ - val client: IgluCirceClient[Id] = IgluCirceClient - .parseDefault[Id](igluConfig) + val client: IgluCirceClient[IO] = IgluCirceClient + .parseDefault[IO](igluConfig) .value + .unsafeRunSync() .getOrElse(throw new RuntimeException("invalid resolver configuration")) + val blockingEC = Blocker.liftExecutionContext(ExecutionContext.global) + +// implicit def httpClient(implicit C: ContextShift[IO]): Http4sClient[IO] = JavaNetClientBuilder[IO](blockingEC).create + private type NvPair = (String, String) /** @@ -107,4 +117,74 @@ object SpecHelpers { implicit class MapOps[A, B](underlying: Map[A, B]) { def toOpt: Map[A, Option[B]] = underlying.map { case (a, b) => (a, Option(b)) } } + + implicit val ioHttpClient: HttpClient[IO] = + new HttpClient[IO] { + override def getResponse( + uri: String, + authUser: Option[String], + authPassword: Option[String], + body: Option[String], + method: String + ): IO[Either[Throwable, String]] = + getBody( + buildRequest( + uri, + authUser, + authPassword, + body, + method + ) + ) + } + + /** + * Blocking method to get body of HTTP response + * + * @param request assembled request object + * @return validated body of HTTP request + */ + private def getBody(request: HttpRequest): IO[Either[Throwable, String]] = + IO.delay(request.asString) + .map { res => + if (res.isSuccess) res.body.asRight + else new Exception(s"Request failed with status ${res.code} and body ${res.body}").asLeft + } + .recover { + case NonFatal(e) => new Exception(e).asLeft + } + + /** + * Build HTTP request object + * + * @param uri full URI to request + * @param authUser optional username for basic auth + * @param authPassword optional password for basic auth + * @param body optional request body + * @param method HTTP method + * @return HTTP request + */ + def buildRequest( + uri: String, + authUser: Option[String], + authPassword: Option[String], + body: Option[String], + method: String = "GET" + ): HttpRequest = { + val req: HttpRequest = Http(uri).method(method) + req.maybeAuth(authUser, authPassword).maybePostData(body) + } + + implicit class RichHttpRequest(request: HttpRequest) { + + def maybeAuth(user: Option[String], password: Option[String]): HttpRequest = + if (user.isDefined || password.isDefined) + request.auth(user.getOrElse(""), password.getOrElse("")) + else request + + def maybePostData(body: Option[String]): HttpRequest = + body + .map(data => request.postData(data).header("content-type", "application/json").header("accept", "*/*")) + .getOrElse(request) + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala index 2b817a167..4046109f1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala @@ -14,10 +14,11 @@ package com.snowplowanalytics.snowplow.enrich.common package adapters package registry -import cats.Id import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup._ import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime @@ -27,8 +28,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import SpecHelpers._ import utils.Clock._ +import utils.HttpClient._ + +import SpecHelpers._ -class CallrailAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class CallrailAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents should return a NEL containing one RawEvent if the querystring is correctly populated $e1 toRawEvents should return a Validation Failure if there are no parameters on the querystring $e2 @@ -97,7 +101,6 @@ class CallrailAdapterSpec extends Specification with DataTables with ValidatedMa "nuid" -> "-" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.source, Shared.context) - val actual = CallrailAdapter.toRawEvents[Id](payload, SpecHelpers.client) val expectedJson = """|{ @@ -144,29 +147,36 @@ class CallrailAdapterSpec extends Specification with DataTables with ValidatedMa |} |}""".stripMargin.replaceAll("[\n\r]", "") - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson, "nuid" -> "-").toOpt, - None, - Shared.source, - Shared.context + CallrailAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Expected.static ++ Map("ue_pr" -> expectedJson, "nuid" -> "-").toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e2 = { val params = toNameValuePairs() val payload = CollectorPayload(Shared.api, params, None, None, Shared.source, Shared.context) - val actual = CallrailAdapter.toRawEvents[Id](payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("querystring", None, "empty querystring") + CallrailAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("querystring", None, "empty querystring") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala index b00411e2d..61a8bf41d 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala @@ -14,19 +14,25 @@ package com.snowplowanalytics.snowplow.enrich.common package adapters package registry -import cats.data.{NonEmptyList, Validated} +import java.time.Instant +import cats.data.{NonEmptyList, Validated, ValidatedNel} import cats.syntax.option._ +import cats.syntax.either._ +import cats.syntax.validated._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.snowplow.badrows.{Payload => BadrowPayload} import org.joda.time.DateTime import org.specs2.Specification import org.specs2.matcher.{DataTables, ValidatedMatchers} -import loaders._ +import loaders.CollectorPayload import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { val processor = Processor("CloudfrontAccessLogAdapterSpec", "v1") def is = s2""" @@ -41,7 +47,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with toRawEvents should return a Validation Failure if the line contains an unparseable field $e9 """ - val loader = new TsvLoader("com.amazon.aws.cloudfront/wd_access_log") + val loader = CloudfrontAccessLogAdapterSpec.TsvLoader("com.amazon.aws.cloudfront/wd_access_log") val doubleEncodedUa = "Mozilla/5.0%2520(Macintosh;%2520Intel%2520Mac%2520OS%2520X%252010_9_2)%2520AppleWebKit/537.36%2520(KHTML,%2520like%2520Gecko)%2520Chrome/34.0.1847.131%2520Safari/537.36" @@ -87,7 +93,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -135,7 +141,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -186,7 +192,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -240,7 +246,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -295,7 +301,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -354,7 +360,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -414,7 +420,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with val payload = loader.toCollectorPayload(input, processor) val actual = payload.map( - _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client)) + _.map(CloudfrontAccessLogAdapter.toRawEvents(_, SpecHelpers.client).unsafeRunSync()) ) val expectedJson = @@ -480,18 +486,21 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with Shared.source, Shared.context ) - val actual = CloudfrontAccessLogAdapter.toRawEvents(payload, SpecHelpers.client) - - actual must beInvalid( - NonEmptyList - .one( - FailureDetails.AdapterFailure.InputData( - "body", - "2013-10-07 23:35:30 c ".some, - "access log contained 5 fields, expected 12, 15, 18, 19, 23, 24 or 26" - ) + + CloudfrontAccessLogAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList + .one( + FailureDetails.AdapterFailure.InputData( + "body", + "2013-10-07 23:35:30 c ".some, + "access log contained 5 fields, expected 12, 15, 18, 19, 23, 24 or 26" + ) + ) ) - ) + ) } def e9 = { @@ -505,18 +514,57 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with Shared.source, Shared.context ) - val actual = CloudfrontAccessLogAdapter.toRawEvents(payload, SpecHelpers.client) - - actual must beInvalid( - NonEmptyList.of( - FailureDetails.AdapterFailure.InputData( - "dateTime", - "a b".some, - """could not convert access log timestamp: Invalid format: "aTb+00:00"""" - ), - FailureDetails.AdapterFailure - .InputData("scBytes", "d".some, "cannot be converted to Int") + + CloudfrontAccessLogAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.of( + FailureDetails.AdapterFailure.InputData( + "dateTime", + "a b".some, + """could not convert access log timestamp: Invalid format: "aTb+00:00"""" + ), + FailureDetails.AdapterFailure + .InputData("scBytes", "d".some, "cannot be converted to Int") + ) + ) ) - ) + } +} + +object CloudfrontAccessLogAdapterSpec { + + final case class TsvLoader(adapter: String) { + private val CollectorName = "tsv" + private val CollectorEncoding = "UTF-8" + + /** + * Converts the source TSV into a ValidatedMaybeCollectorPayload. + * + * @param line A TSV + * @return either a set of validation errors or an Option-boxed CanonicalInput object, wrapped in + * a ValidatedNel. + */ + def toCollectorPayload(line: String, processor: Processor): ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]] = + // Throw away the first two lines of Cloudfront web distribution access logs + if (line.startsWith("#Version:") || line.startsWith("#Fields:")) + None.valid + else + CollectorPayload + .parseApi(adapter) + .map { api => + val source = CollectorPayload.Source(CollectorName, CollectorEncoding, None) + val context = CollectorPayload.Context(None, None, None, None, Nil, None) + CollectorPayload(api, Nil, None, Some(line), source, context).some + } + .leftMap(f => + BadRow.CPFormatViolation( + processor, + Failure.CPFormatViolation(Instant.now(), CollectorName, f), + BadrowPayload.RawPayload(line) + ) + ) + .toValidatedNel } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala index ca2f74b61..9eef23fa1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala @@ -18,6 +18,8 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO + import org.joda.time.DateTime import org.specs2.Specification @@ -28,10 +30,11 @@ import com.snowplowanalytics.snowplow.badrows._ import loaders._ import GoogleAnalyticsAdapter._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class GoogleAnalyticsAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class GoogleAnalyticsAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents returns a failNel if the query string is empty $e1 @@ -78,50 +81,55 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali def e1 = { val payload = CollectorPayload(api, Nil, None, None, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData("body", None, "empty body") + toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData("body", None, "empty body") + ) + ) ) - ) } def e2 = { val body = "dl=docloc" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "body", - "dl=docloc".some, - "no t parameter provided: cannot determine hit type" + toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "body", + "dl=docloc".some, + "no t parameter provided: cannot determine hit type" + ) + ) ) ) - ) } def e3 = { val body = "t=unknown&dl=docloc" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.of( - FailureDetails.AdapterFailure - .InputData("t", "unknown".some, "no matching hit type"), - FailureDetails.AdapterFailure.SchemaMapping( - "unknown".some, - unstructEventData.mapValues(_.schemaKey), - "no schema associated with the provided type parameter" + toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.of( + FailureDetails.AdapterFailure + .InputData("t", "unknown".some, "no matching hit type"), + FailureDetails.AdapterFailure.SchemaMapping( + "unknown".some, + unstructEventData.mapValues(_.schemaKey), + "no schema associated with the provided type parameter" + ) + ) ) ) - ) } def e4 = { val body = "t=pageview&dh=host&dp=path" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedJson = """|{ @@ -140,13 +148,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":[${hitContext("pageview")}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e5 = { val body = "t=pageview&dh=host&cid=id&v=version" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -170,13 +177,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e6 = { val body = "t=pageview&dp=path&uip=ip" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -196,13 +202,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO, "ip" -> "ip").toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e7 = { val body = "t=item&in=name&ip=12.228&iq=12&aip=0" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -232,13 +237,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "ti_qu" -> "12", "ti_nm" -> "name" ).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e8 = { val body = "t=exception&exd=desc&exf=1&dh=host" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -257,13 +261,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e9 = { val body = "t=transaction&ti=tr&cu=EUR&pr12id=ident&pr12cd42=val" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -290,13 +293,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "tr_cu" -> "EUR", "tr_id" -> "tr" ).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e10 = { val body = "t=pageview&dp=path&il12pi42id=s&il12pi42cd36=dim" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -318,13 +320,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e11 = { val body = "t=screenview&cd=name&cd12=dim" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -343,13 +344,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e12 = { val body = "t=pageview&dp=path&pr1id=s1&pr2id=s2&pr1cd1=v1&pr1cd2=v2" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -377,13 +377,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e13 = { val body = "t=pageview&dp=path&promoa=action&promo12id=id" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedUE = """|{ @@ -405,13 +404,12 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}] |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e14 = { val body = "t=pageview&dh=host&dp=path\nt=pageview&dh=host&dp=path" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedJson = """|{ @@ -431,14 +429,13 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |}""".stripMargin.replaceAll("[\n\r]", "") val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO).toOpt val event = RawEvent(api, expectedParams, None, source, context) - actual must beValid(NonEmptyList.of(event, event)) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.of(event, event))) } def e15 = { val body = "t=pageview&dh=host&dp=path&cu=EUR&il1pi1pr=1&il1pi1nm=name1&il1pi1ps=1&il1pi1ca=cat1&il1pi1id=id1&il1pi1br=brand1&il1pi2pr=2&il1pi2nm=name2&il1pi2ps=2&il1pi2ca=cat2&il1pi2id=id2&il1pi2br=brand2&il2pi1pr=21&il2pi1nm=name21&il2pi1ps=21&il2pi1ca=cat21&il2pi1id=id21&il2pi1br=brand21" val payload = CollectorPayload(api, Nil, None, body.some, source, context) - val actual = toRawEvents(payload, SpecHelpers.client) val expectedJson = """|{ @@ -470,7 +467,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "co" -> expectedCO, "ti_cu" -> "EUR" ).toOpt - actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) + toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context)))) } def e20 = { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala index d0414a9e7..53238626a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala @@ -17,6 +17,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import io.circe.literal._ import org.joda.time.DateTime @@ -25,10 +26,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" payloadBodyToEvents must return a Success list of event JSON's from a valid payload body $e1 payloadBodyToEvents must return a Failure Nel for an invalid payload body being passed $e2 @@ -99,7 +101,7 @@ class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - HubSpotAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + HubSpotAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e4 = { @@ -118,34 +120,46 @@ class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMat HubSpotAdapter.EventSchemaMap, "no schema associated with the provided type parameter at index 0" ) - HubSpotAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one(expected) - ) + HubSpotAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one(expected) + ) + ) } def e5 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - HubSpotAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: not events to process") + HubSpotAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: not events to process") + ) + ) ) - ) } def e6 = { val payload = CollectorPayload(Shared.api, Nil, None, "stub".some, Shared.cljSource, Shared.context) - HubSpotAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/json" + HubSpotAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/json" + ) + ) ) ) - ) } def e7 = { @@ -158,11 +172,15 @@ class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMat Shared.cljSource, Shared.context ) - HubSpotAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("contentType", ct, "expected application/json") + HubSpotAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("contentType", ct, "expected application/json") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala index e54ec2c97..0ac656515 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.Specification @@ -23,10 +24,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class IgluAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class IgluAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents should return a NEL containing one RawEvent if the CloudFront querystring is minimally populated $e1 toRawEvents should return a NEL containing one RawEvent if the CloudFront querystring is maximally populated $e2 @@ -82,7 +84,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "ad_unit" -> "" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.cfSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expectedJson = """|{ @@ -100,17 +101,21 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche |} |}""".stripMargin.replaceAll("[\n\r]", "") - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, - None, - Shared.cfSource, - Shared.context + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, + None, + Shared.cfSource, + Shared.context + ) + ) ) ) - ) } def e2 = { @@ -124,7 +129,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "aid" -> "webhooks" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.cfSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expectedMap = { val json = @@ -147,11 +151,15 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche ).toOpt } - actual must beValid( - NonEmptyList.one( - RawEvent(Shared.api, Expected.static ++ expectedMap, None, Shared.cfSource, Shared.context) + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent(Shared.api, Expected.static ++ expectedMap, None, Shared.cfSource, Shared.context) + ) + ) ) - ) } def e3 = { @@ -167,7 +175,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "nuid" -> "" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.cljSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expectedMap = { val json = @@ -192,11 +199,15 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche ).toOpt } - actual must beValid( - NonEmptyList.one( - RawEvent(Shared.api, Expected.static ++ expectedMap, None, Shared.cljSource, Shared.context) + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent(Shared.api, Expected.static ++ expectedMap, None, Shared.cljSource, Shared.context) + ) + ) ) - ) } def e4 = { @@ -207,7 +218,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "p" -> "mob" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.cfSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expectedJson = """|{ @@ -221,31 +231,38 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche |} |}""".stripMargin.replaceAll("[\n\r]", "") - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Expected.staticNoPlatform ++ Map("p" -> "mob", "ue_pr" -> expectedJson).toOpt, - None, - Shared.cfSource, - Shared.context + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Expected.staticNoPlatform ++ Map("p" -> "mob", "ue_pr" -> expectedJson).toOpt, + None, + Shared.cfSource, + Shared.context + ) + ) ) ) - ) } def e5 = { val params = SpecHelpers.toNameValuePairs() val payload = CollectorPayload(Shared.api, params, None, None, Shared.cfSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.of( - FailureDetails.AdapterFailure - .InputData("schema", None, "empty `schema` field"), - FailureDetails.AdapterFailure.InputData("body", None, "empty body") + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.of( + FailureDetails.AdapterFailure + .InputData("schema", None, "empty `schema` field"), + FailureDetails.AdapterFailure.InputData("body", None, "empty body") + ) + ) ) - ) } def e6 = { @@ -254,15 +271,18 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "p" -> "mob" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.cfSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.of( - FailureDetails.AdapterFailure - .InputData("schema", None, "empty `schema` field"), - FailureDetails.AdapterFailure.InputData("body", None, "empty body") + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.of( + FailureDetails.AdapterFailure + .InputData("schema", None, "empty `schema` field"), + FailureDetails.AdapterFailure.InputData("body", None, "empty body") + ) + ) ) - ) } def e7 = { @@ -270,15 +290,18 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "schema" -> "iglooooooo://blah" ) val payload = CollectorPayload(Shared.api, params, None, None, Shared.cfSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList - .one( - FailureDetails.AdapterFailure - .InputData("schema", "iglooooooo://blah".some, "INVALID_IGLUURI") + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList + .one( + FailureDetails.AdapterFailure + .InputData("schema", "iglooooooo://blah".some, "INVALID_IGLUURI") + ) ) - ) + ) } def e8 = { @@ -297,7 +320,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expected = RawEvent( Shared.api, @@ -312,7 +334,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) - actual must beValid(NonEmptyList.one(expected)) + IgluAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(expected))) } def e9 = { @@ -331,14 +353,13 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expected = FailureDetails.AdapterFailure.InputData( "contentType", "application/badtype".some, "expected one of application/json, application/json; charset=utf-8, application/x-www-form-urlencoded" ) - actual must beInvalid(NonEmptyList.one(expected)) + IgluAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(NonEmptyList.one(expected))) } def e10 = { @@ -357,14 +378,17 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", "{}".some, "has no key-value pairs") + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", "{}".some, "has no key-value pairs") + ) + ) ) - ) } def e11 = { @@ -376,17 +400,20 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche val jsonStr = """{"key":"value"}""" val payload = CollectorPayload(Shared.api, params, None, jsonStr.some, Shared.cljSource, Shared.context) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "expected one of application/json, application/json; charset=utf-8, application/x-www-form-urlencoded" + + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "expected one of application/json, application/json; charset=utf-8, application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e12 = { @@ -402,7 +429,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expected = RawEvent( Shared.api, @@ -417,7 +443,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) - actual must beValid(NonEmptyList.one(expected)) + IgluAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(expected))) } def e13 = { @@ -433,17 +459,20 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - "application/xxx-url-form-encoded".some, - "expected one of application/json, application/json; charset=utf-8" + + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + "application/xxx-url-form-encoded".some, + "expected one of application/json, application/json; charset=utf-8" + ) + ) ) ) - ) } def e14 = { @@ -458,17 +487,20 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - "application/xxx-url-form-encoded".some, - "expected one of application/json, application/json; charset=utf-8, application/x-www-form-urlencoded" + + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + "application/xxx-url-form-encoded".some, + "expected one of application/json, application/json; charset=utf-8, application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e15 = { @@ -483,7 +515,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expected = RawEvent( Shared.api, @@ -498,7 +529,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) - actual must beValid(NonEmptyList.one(expected)) + IgluAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.one(expected))) } def e16 = { @@ -518,7 +549,6 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) val expected = RawEvent( Shared.api, @@ -533,7 +563,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) - actual must beValid(NonEmptyList.of(expected, expected)) + IgluAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(NonEmptyList.of(expected, expected))) } def e17 = { @@ -552,13 +582,16 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.cljSource, Shared.context ) - val actual = IgluAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", "[]".some, "empty array of events") + IgluAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", "[]".some, "empty array of events") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala index 2b197a840..aa5b8517d 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala @@ -15,6 +15,8 @@ package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.snowplow.badrows._ import io.circe._ @@ -28,11 +30,12 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient._ import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ -class MailchimpAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class MailchimpAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toKeys should return a valid List of Keys from a string containing braces (or not) $e1 toNestedJson should return a valid JField nested to contain all keys and then the supplied value $e2 @@ -148,18 +151,21 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM |} |}""".stripMargin.replaceAll("[\n\r]", "") - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, - ContentType.some, - Shared.cljSource, - Shared.context + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, + ContentType.some, + Shared.cljSource, + Shared.context + ) + ) ) ) - ) } def e7 = { @@ -189,18 +195,21 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM |} |}""".stripMargin.replaceAll("[\n\r]", "") - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, - ContentType.some, - Shared.cljSource, - Shared.context + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, + ContentType.some, + Shared.cljSource, + Shared.context + ) + ) ) ) - ) } def e8 = @@ -222,18 +231,21 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM ) val expectedJson = "{\"schema\":\"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0\",\"data\":{\"schema\":\"" + expected + "\",\"data\":{\"type\":\"" + schema + "\"}}}" - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, - ContentType.some, - Shared.cljSource, - Shared.context + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, + ContentType.some, + Shared.cljSource, + Shared.context + ) + ) ) ) - ) } def e9 = @@ -257,8 +269,9 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM Shared.cljSource, Shared.context ) - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid(NonEmptyList.one(expected)) + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid(NonEmptyList.one(expected))) } def e10 = { @@ -300,51 +313,60 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM |} |}""".stripMargin.replaceAll("[\n\r]", "") - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Map( - "tv" -> "com.mailchimp-v1", - "e" -> "ue", - "p" -> "srv", - "ue_pr" -> expectedJson, - "nuid" -> "123" - ).toOpt, - ContentType.some, - Shared.cljSource, - Shared.context + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Map( + "tv" -> "com.mailchimp-v1", + "e" -> "ue", + "p" -> "srv", + "ue_pr" -> expectedJson, + "nuid" -> "123" + ).toOpt, + ContentType.some, + Shared.cljSource, + Shared.context + ) + ) ) ) - ) } def e11 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e12 = { val payload = CollectorPayload(Shared.api, Nil, None, "stub".some, Shared.cljSource, Shared.context) - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/x-www-form-urlencoded" + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e13 = { @@ -356,16 +378,19 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM Shared.cljSource, Shared.context ) - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - "application/json".some, - "expected application/x-www-form-urlencoded" + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + "application/json".some, + "expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e14 = { @@ -378,15 +403,18 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM Shared.cljSource, Shared.context ) - val actual = MailchimpAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "body", - "fired_at=2014-10-22+13%3A10%3A40".some, - "no `type` parameter provided: cannot determine event type" + MailchimpAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "body", + "fired_at=2014-10-22+13%3A10%3A40".some, + "no `type` parameter provided: cannot determine event type" + ) + ) ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala index 39f64cc6a..4d42dfda3 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.Specification @@ -24,10 +25,11 @@ import io.circe.parser._ import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class MailgunAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class MailgunAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents must return a Success Nel if every event 'delivered' in the payload is successful $e1 toRawEvents must return a Success Nel if every event 'opened' in the payload is successful $e2 @@ -144,7 +146,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e2 = { @@ -211,7 +213,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e3 = { @@ -243,7 +245,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e4 = { @@ -275,7 +277,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e5 = { @@ -360,33 +362,41 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e6 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + MailgunAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e7 = { val body = "body" val payload = CollectorPayload(Shared.api, Nil, None, body.some, Shared.cljSource, Shared.context) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/json" + MailgunAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/json" + ) + ) ) ) - ) } def e8 = { @@ -394,15 +404,19 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val ct = "multipart/form-data" val payload = CollectorPayload(Shared.api, Nil, ct.some, body.some, Shared.cljSource, Shared.context) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - ct.some, - "expected application/json" + MailgunAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + ct.some, + "expected application/json" + ) + ) ) ) - ) } def e9 = { @@ -420,7 +434,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat FailureDetails.AdapterFailure .InputData("body", None, "empty body: no events to process") ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e10 = { @@ -440,7 +454,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat "no `event` parameter provided: cannot determine event type" ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e11 = { @@ -461,7 +475,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat "no schema associated with the provided type parameter" ) ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e12 = { @@ -480,7 +494,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat FailureDetails.AdapterFailure .InputData("timestamp", None, "missing 'timestamp'") ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e13 = { @@ -497,7 +511,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( FailureDetails.AdapterFailure.InputData("token", None, "missing 'token'") ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e14 = { @@ -516,6 +530,6 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat FailureDetails.AdapterFailure .InputData("signature", None, "missing 'signature'") ) - MailgunAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MailgunAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala index a42f24e89..55ddef015 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import io.circe.literal._ import org.joda.time.DateTime @@ -24,10 +25,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class MandrillAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class MandrillAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" payloadBodyToEvents must return a Success List[JValue] for a valid events string $e1 payloadBodyToEvents must return a Failure String if the mapped events string is not in a valid format $e2 @@ -214,7 +216,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa Shared.context ) ) - MandrillAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MandrillAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e5 = { // Spec for nine seperate events where two have incorrect event names and one does not have event as a parameter @@ -245,33 +247,41 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "cannot determine event type: type parameter not provided at index 2" ) ) - MandrillAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + MandrillAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e6 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - MandrillAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + MandrillAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e7 = { val body = "mandrill_events=%5B%7B%22event%22%3A%20%22subscribe%22%7D%5D" val payload = CollectorPayload(Shared.api, Nil, None, body.some, Shared.cljSource, Shared.context) - MandrillAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/x-www-form-urlencoded" + MandrillAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e8 = { @@ -279,11 +289,15 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa val ct = "application/x-www-form-urlencoded; charset=utf-8".some val payload = CollectorPayload(Shared.api, Nil, ct, body.some, Shared.cljSource, Shared.context) - MandrillAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("contentType", ct, "expected application/x-www-form-urlencoded") + MandrillAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("contentType", ct, "expected application/x-www-form-urlencoded") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala index 5895a6efc..35e413eb9 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.Specification @@ -23,10 +24,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class MarketoAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class MarketoAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents must return a success for a valid "event" type payload body being passed $e1 toRawEvents must return a Failure Nel if the payload body is empty $e2 @@ -72,17 +74,21 @@ class MarketoAdapterSpec extends Specification with DataTables with ValidatedMat Shared.context ) ) - MarketoAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + MarketoAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e2 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - MarketoAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + MarketoAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala index 960e3a9ca..92d65a5c4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala @@ -17,6 +17,8 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime @@ -26,10 +28,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents must return a Success Nel if the transcript event in the payload is successful $e1 toRawEvents must return a Success Nel if the offline message event in the payload is successful $e2 @@ -142,7 +145,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch Shared.context ) ) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + OlarkAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e2 = { @@ -206,18 +209,22 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch Shared.context ) ) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + OlarkAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e3 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + OlarkAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e4 = { @@ -225,15 +232,19 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch "data=%7B%22kind%22%3A%20%22Conversation%22%2C%20%22tags%22%3A%20%5B%22olark%22%2C%20%22customer%22%5D%2C%20%22items%22%3A%20%5B%7B%22body%22%3A%20%22Hi%20there.%20Need%20any%20help%3F%22%2C%20%22timestamp%22%3A%20%221307116657.1%22%2C%20%22kind%22%3A%20%22MessageToVisitor%22%2C%20%22nickname%22%3A%20%22John%22%2C%20%22operatorId%22%3A%20%221234%22%7D%2C%20%7B%22body%22%3A%20%22Yes%2C%20please%20help%20me%20with%20billing.%22%2C%20%22timestamp%22%3A%20%221307116661.25%22%2C%20%22kind%22%3A%20%22MessageToOperator%22%2C%20%22nickname%22%3A%20%22Bob%22%7D%5D%2C%20%22operators%22%3A%20%7B%221234%22%3A%20%7B%22username%22%3A%20%22jdoe%22%2C%20%22emailAddress%22%3A%20%22john%40example.com%22%2C%20%22kind%22%3A%20%22Operator%22%2C%20%22nickname%22%3A%20%22John%22%2C%20%22id%22%3A%20%221234%22%7D%7D%2C%20%22groups%22%3A%20%5B%7B%22kind%22%3A%20%22Group%22%2C%20%22name%22%3A%20%22My%20Sales%20Group%22%2C%20%22id%22%3A%20%220123456789abcdef%22%7D%5D%2C%20%22visitor%22%3A%20%7B%22ip%22%3A%20%22123.4.56.78%22%2C%20%22city%22%3A%20%22Palo%20Alto%22%2C%20%22kind%22%3A%20%22Visitor%22%2C%20%22conversationBeginPage%22%3A%20%22http%3A%2F%2Fwww.example.com%2Fpath%22%2C%20%22countryCode%22%3A%20%22US%22%2C%20%22country%22%3A%20%22United%20State%22%2C%20%22region%22%3A%20%22CA%22%2C%20%22chat_feedback%22%3A%20%7B%22overall_chat%22%3A%205%2C%20%22responsiveness%22%3A%205%2C%20%22friendliness%22%3A%205%2C%20%22knowledge%22%3A%205%2C%20%22comments%22%3A%20%22Very%20helpful%2C%20thanks%22%7D%2C%20%22operatingSystem%22%3A%20%22Windows%22%2C%20%22emailAddress%22%3A%20%22bob%40example.com%22%2C%20%22organization%22%3A%20%22Widgets%20Inc.%22%2C%20%22phoneNumber%22%3A%20%22%28555%29%20555-5555%22%2C%20%22fullName%22%3A%20%22Bob%20Doe%22%2C%20%22customFields%22%3A%20%7B%22favoriteColor%22%3A%20%22blue%22%2C%20%22myInternalCustomerId%22%3A%20%2212341234%22%7D%2C%20%22id%22%3A%20%229QRF9YWM5XW3ZSU7P9CGWRU89944341%22%2C%20%22browser%22%3A%20%22Chrome%2012.1%22%7D%2C%20%22id%22%3A%20%22EV695BI2930A6XMO32886MPT899443414%22%7D" val payload = CollectorPayload(Shared.api, Nil, None, body.some, Shared.cljSource, Shared.context) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/x-www-form-urlencoded" + OlarkAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e5 = { @@ -242,15 +253,19 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch val ct = "application/json" val payload = CollectorPayload(Shared.api, Nil, ct.some, body.some, Shared.cljSource, Shared.context) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "expected application/x-www-form-urlencoded" + OlarkAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e6 = { @@ -268,7 +283,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch FailureDetails.AdapterFailure .InputData("body", None, "empty body: no events to process") ) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + OlarkAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e7 = { @@ -285,7 +300,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch val expected = NonEmptyList.one( FailureDetails.AdapterFailure.InputData("data", None, "missing 'data' field") ) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + OlarkAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e8 = { @@ -299,13 +314,15 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch Shared.cljSource, Shared.context ) - OlarkAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid.like { - case nel => - nel.size must_== 1 - nel.head must haveClass[FailureDetails.AdapterFailure.NotJson] - val f = nel.head.asInstanceOf[FailureDetails.AdapterFailure.NotJson] - f.field must_== "data" - f.error must_== """invalid json: expected json value got 'kind":...' (line 1, column 1)""" - } + OlarkAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid.like { + case nel => + nel.size must_== 1 + nel.head must haveClass[FailureDetails.AdapterFailure.NotJson] + val f = nel.head.asInstanceOf[FailureDetails.AdapterFailure.NotJson] + f.field must_== "data" + f.error must_== """invalid json: expected json value got 'kind":...' (line 1, column 1)""" + }) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala index 6581f530c..cbfde4465 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import io.circe.literal._ import org.joda.time.DateTime @@ -24,10 +25,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" reformatParameters must return an updated JSON whereby all null Strings have been replaced by null $e1 reformatParameters must return an updated JSON where 'incident.xxx' is replaced by xxx $e2 @@ -135,7 +137,7 @@ class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedM Shared.context ) ) - PagerdutyAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + PagerdutyAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e7 = { @@ -154,34 +156,46 @@ class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedM PagerdutyAdapter.EventSchemaMap, "no schema associated with the provided type parameter at index 0" ) - PagerdutyAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one(expected) - ) + PagerdutyAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one(expected) + ) + ) } def e8 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - PagerdutyAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + PagerdutyAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e9 = { val payload = CollectorPayload(Shared.api, Nil, None, "stub".some, Shared.cljSource, Shared.context) - PagerdutyAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/json" + PagerdutyAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/json" + ) + ) ) ) - ) } def e10 = { @@ -194,11 +208,15 @@ class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedM Shared.cljSource, Shared.context ) - PagerdutyAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("contentType", ct, "expected application/json") + PagerdutyAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("contentType", ct, "expected application/json") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala index 285e0a4bb..70e029ca0 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala @@ -17,6 +17,8 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.snowplow.badrows.FailureDetails import io.circe.literal._ @@ -28,10 +30,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class PingdomAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class PingdomAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" reformatParameters should return either an updated JSON without the 'action' field or the same JSON $e1 reformatMapParams must return a Failure Nel for any Python Unicode wrapped values $e2 @@ -91,9 +94,13 @@ class PingdomAdapterSpec extends Specification with DataTables with ValidatedMat Shared.cljSource, Shared.context ) - PingdomAdapter.toRawEvents(payload, SpecHelpers.client) must beValid( - NonEmptyList.one(expected) - ) + PingdomAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one(expected) + ) + ) } def e4 = { @@ -104,9 +111,13 @@ class PingdomAdapterSpec extends Specification with DataTables with ValidatedMat None, "empty querystring: no events to process" ) - PingdomAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one(expected) - ) + PingdomAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one(expected) + ) + ) } def e5 = { @@ -119,8 +130,12 @@ class PingdomAdapterSpec extends Specification with DataTables with ValidatedMat "p=apps".some, "no `message` parameter provided" ) - PingdomAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one(expected) - ) + PingdomAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one(expected) + ) + ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala index 4cd70b352..15c71b41c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala @@ -20,9 +20,10 @@ import java.util.concurrent.TimeUnit import scala.concurrent.duration.Duration +import cats.implicits._ import cats.data.NonEmptyList -import cats.syntax.either._ -import cats.syntax.option._ + +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ @@ -44,7 +45,7 @@ import utils.Clock._ import SpecHelpers._ -class RemoteAdapterSpec extends Specification with ValidatedMatchers { +class RemoteAdapterSpec extends Specification with ValidatedMatchers with CatsIO { def is = sequential ^ s2""" @@ -85,7 +86,6 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { s"/$basePath", new HttpHandler { def handle(exchange: HttpExchange): Unit = { - val response = MockRemoteAdapter.handle(getBodyAsString(exchange.getRequestBody)) if (response != "\"server error\"") exchange.sendResponseHeaders(200, 0) @@ -163,11 +163,7 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { httpServer = localHttpServer(mockServerPort, mockServerPath) httpServer.start() - testAdapter = new RemoteAdapter( - s"http://localhost:$mockServerPort/$mockServerPath", - Some(1000L), - Some(5000L) - ) + testAdapter = RemoteAdapter(s"http://localhost:$mockServerPort/$mockServerPath") } def after = @@ -198,8 +194,9 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { ) } - val they = testAdapter.toRawEvents(payload, SpecHelpers.client) - they must beValid(expected) + testAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e2 = { @@ -212,7 +209,7 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { "empty body: not a valid remote adapter http://localhost:8091/myEnrichment payload" ) ) - testAdapter.toRawEvents(emptyListPayload, SpecHelpers.client) must beInvalid(expected) + testAdapter.toRawEvents(emptyListPayload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e3 = { @@ -225,7 +222,7 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { "empty body: not a valid remote adapter http://localhost:8091/myEnrichment payload" ) ) - testAdapter.toRawEvents(bodylessPayload, SpecHelpers.client) must beInvalid(expected) + testAdapter.toRawEvents(bodylessPayload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e4 = { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala index b5f62bd71..8490d42ae 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.matcher.ValidatedMatchers @@ -23,10 +24,11 @@ import org.specs2.mutable.Specification import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class SendgridAdapterSpec extends Specification with ValidatedMatchers { +class SendgridAdapterSpec extends Specification with ValidatedMatchers with CatsIO { object Shared { val api = CollectorPayload.Api("com.sendgrid", "v3") val cljSource = CollectorPayload.Source("clj-tomcat", "UTF-8", None) @@ -223,7 +225,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - val actual = SendgridAdapter.toRawEvents(payload, SpecHelpers.client) + val actual = SendgridAdapter.toRawEvents(payload, SpecHelpers.client).unsafeRunSync() "return the correct number of events" in { actual must beValid @@ -267,9 +269,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { "reject empty bodies" in { val invalidpayload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - val toBeRejected = SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client) - - toBeRejected must beInvalid + SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client).map(_ must beInvalid) } "reject empty content type" in { @@ -282,8 +282,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - val toBeRejected = SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client) - toBeRejected must beInvalid + SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client).map(_ must beInvalid) } "reject unexpected content type" in { @@ -296,7 +295,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client) must beInvalid + SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client).map(_ must beInvalid) } "accept content types with explicit charsets" in { @@ -309,8 +308,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - val res = SendgridAdapter.toRawEvents(payload, SpecHelpers.client) - res must beValid + SendgridAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid) } "reject unsupported event types" in { @@ -339,7 +337,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.context ) - SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client) must beInvalid + SendgridAdapter.toRawEvents(invalidpayload, SpecHelpers.client).map(_ must beInvalid) } "reject invalid/unparsable json" in { @@ -355,7 +353,8 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.context ), SpecHelpers.client - ) must beInvalid + ) + .map(_ must beInvalid) } "reject valid json in incorrect format" in { @@ -371,7 +370,8 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.context ), SpecHelpers.client - ) must beInvalid + ) + .map(_ must beInvalid) } "reject a payload with a some valid, some invalid events" in { @@ -406,16 +406,19 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - val actual = SendgridAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.SchemaMapping( - None, - SendgridAdapter.EventSchemaMap, - "cannot determine event type: type parameter not provided at index 1" + SendgridAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.SchemaMapping( + None, + SendgridAdapter.EventSchemaMap, + "cannot determine event type: type parameter not provided at index 1" + ) + ) ) ) - ) } "return correct json for sample event, including stripping out event keypair and fixing timestamp" in { @@ -451,23 +454,26 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { val expectedJson = """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.sendgrid/processed/jsonschema/2-0-0","data":{"timestamp":"2015-11-03T11:20:15.000Z","email":"example@test.com","marketing_campaign_name":"campaign name","sg_event_id":"sZROwMGMagFgnOEmSdvhig==","smtp-id":"\u003c14c5d75ce93.dfd.64b469@ismtpd-555\u003e","marketing_campaign_version":"B","marketing_campaign_id":12345,"marketing_campaign_split_id":13471,"category":"cat facts","sg_message_id":"14c5d75ce93.dfd.64b469.filter0001.16648.5515E0B88.0"}}}""" - val actual = SendgridAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Map( - "tv" -> "com.sendgrid-v3", - "e" -> "ue", - "p" -> "srv", - "ue_pr" -> expectedJson // NB this includes removing the "event" keypair as redundant - ).toOpt, - ContentType.some, - Shared.cljSource, - Shared.context + SendgridAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Map( + "tv" -> "com.sendgrid-v3", + "e" -> "ue", + "p" -> "srv", + "ue_pr" -> expectedJson // NB this includes removing the "event" keypair as redundant + ).toOpt, + ContentType.some, + Shared.cljSource, + Shared.context + ) + ) ) ) - ) } "filter events if they are exact duplicates" in { @@ -513,11 +519,12 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { Shared.context ) - val res = SendgridAdapter.toRawEvents(payload, SpecHelpers.client) - res must beValid.like { - case nel: NonEmptyList[RawEvent] => - nel.toList must have size 1 - } + SendgridAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid.like { + case nel: NonEmptyList[RawEvent] => + nel.toList must have size 1 + }) } } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala index da03f419b..e7c778119 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.Specification @@ -23,10 +24,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class StatusGatorAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class StatusGatorAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents must return a Success Nel if every event in the payload is successful $e1 toRawEvents must return a Nel Failure if the request body is missing $e2 @@ -88,18 +90,22 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate Shared.context ) ) - StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e2 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + StatusGatorAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e3 = { @@ -107,15 +113,19 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate "service_name=CloudFlare&favicon_url=https%3A%2F%2Fdwxjd9cd6rwno.cloudfront.net%2Ffavicons%2Fcloudflare.ico&status_page_url=https%3A%2F%2Fwww.cloudflarestatus.com%2F&home_page_url=http%3A%2F%2Fwww.cloudflare.com¤t_status=up&last_status=warn&occurred_at=2016-05-19T09%3A26%3A31%2B00%3A00" val payload = CollectorPayload(Shared.api, Nil, None, body.some, Shared.cljSource, Shared.context) - StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/x-www-form-urlencoded" + StatusGatorAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e4 = { @@ -124,15 +134,19 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate val ct = "application/json" val payload = CollectorPayload(Shared.api, Nil, ct.some, body.some, Shared.cljSource, Shared.context) - StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - "application/json".some, - "expected application/x-www-form-urlencoded" + StatusGatorAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + "application/json".some, + "expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e5 = { @@ -150,7 +164,7 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate FailureDetails.AdapterFailure .InputData("body", None, "empty body: no events to process") ) - StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e6 = { @@ -171,6 +185,6 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate "could not parse body: Illegal character in query at index 18: http://localhost/?{service_name=CloudFlare&favicon_url=https%3A%2F%2Fdwxjd9cd6rwno.cloudfront.net%2Ffavicons%2Fcloudflare.ico&status_page_url=https%3A%2F%2Fwww.cloudflarestatus.com%2F&home_page_url=http%3A%2F%2Fwww.cloudflare.com¤t_status=up&last_status=warn&occurred_at=2016-05-19T09%3A26%3A31%2B00%3A00" ) ) - StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + StatusGatorAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala index 29c2041cc..e218c9cd2 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.Specification @@ -23,10 +24,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents must return a Success Nel if the query string is valid $e1 toRawEvents must return a Nel Failure if the request body is missing $e2 @@ -117,19 +119,23 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa Shared.context ) ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beValid(expected)) } def e2 = { val params = SpecHelpers.toNameValuePairs("schema" -> "iglu:com.unbounce/test/jsonschema/1-0-0") val payload = CollectorPayload(Shared.api, params, ContentType.some, None, Shared.cljSource, Shared.context) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + UnbounceAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } def e3 = { @@ -137,15 +143,19 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa "page_id=f7afd389-65a3-45fa-8bad-b7a42236044c&page_name=Test-Webhook&variant=a&page_url=http%3A%2F%2Funbouncepages.com%2Ftest-webhook-1&data.json=%7B%22email%22%3A%5B%22test%40snowplowanalytics.com%22%5D%2C%22ip_address%22%3A%5B%22200.121.220.179%22%5D%2C%22time_submitted%22%3A%5B%2204%3A17%20PM%20UTC%22%5D%7D" val payload = CollectorPayload(Shared.api, Nil, None, body.some, Shared.cljSource, Shared.context) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - None, - "no content type: expected application/x-www-form-urlencoded" + UnbounceAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + None, + "no content type: expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e4 = { @@ -154,15 +164,19 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa val ct = "application/json" val payload = CollectorPayload(Shared.api, Nil, ct.some, body.some, Shared.cljSource, Shared.context) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "contentType", - "application/json".some, - "expected application/x-www-form-urlencoded" + UnbounceAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "contentType", + "application/json".some, + "expected application/x-www-form-urlencoded" + ) + ) ) ) - ) } def e5 = { @@ -181,7 +195,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("body", None, "empty body: no events to process") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e6 = { @@ -200,7 +214,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("data.json", None, "missing 'data.json' field in body") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e7 = { @@ -219,7 +233,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("data.json", None, "empty 'data.json' field in body") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e8 = { @@ -241,7 +255,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa """invalid json: expected " got '{"emai...' (line 1, column 2)""" ) ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e9 = { @@ -261,7 +275,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("page_id", None, "missing 'page_id' field in body") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e10 = { @@ -280,7 +294,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("page_name", None, "missing 'page_name' field in body") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e11 = { @@ -300,7 +314,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("variant", None, "missing 'variant' field in body") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } def e12 = { @@ -319,6 +333,6 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa FailureDetails.AdapterFailure .InputData("page_url", None, "missing 'page_url' field in body") ) - UnbounceAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid(expected) + UnbounceAdapter.toRawEvents(payload, SpecHelpers.client).map(_ must beInvalid(expected)) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala index b4c954481..7a5683df4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala @@ -16,6 +16,7 @@ package adapters.registry import cats.data.{NonEmptyList, Validated} import cats.syntax.either._ import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import io.circe.literal._ import io.circe.parser._ @@ -25,8 +26,11 @@ import org.specs2.mutable.Specification import loaders._ import utils.Clock._ +import utils.HttpClient._ -class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { +import SpecHelpers._ + +class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers with CatsIO { object Shared { val api = CollectorPayload.Api("com.urbanairship.connect", "v1") @@ -79,7 +83,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - val actual = UrbanAirshipAdapter.toRawEvents(payload, SpecHelpers.client) + val actual = UrbanAirshipAdapter.toRawEvents(payload, SpecHelpers.client).unsafeRunSync() val expectedUnstructEventJson = json"""{ "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", @@ -128,13 +132,17 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - UrbanAirshipAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid + UrbanAirshipAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid) } "reject unparsable json" in { val payload = CollectorPayload(Shared.api, Nil, None, """{ """.some, Shared.cljSource, Shared.context) - UrbanAirshipAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid + UrbanAirshipAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid) } "reject badly formatted json" in { @@ -147,7 +155,9 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - UrbanAirshipAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid + UrbanAirshipAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid) } "reject content types" in { @@ -159,14 +169,16 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { Shared.cljSource, Shared.context ) - val res = UrbanAirshipAdapter.toRawEvents(payload, SpecHelpers.client) - - res must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("contentType", "a/type".some, "expected no content type") + UrbanAirshipAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("contentType", "a/type".some, "expected no content type") + ) + ) ) - ) } "populate content-type as None (it's not applicable)" in { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala index b1f6a9865..ec2f37d7e 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala @@ -16,6 +16,7 @@ package registry import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.snowplow.badrows._ import org.joda.time.DateTime import org.specs2.Specification @@ -23,10 +24,11 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import utils.HttpClient._ import SpecHelpers._ -class VeroAdapterSpec extends Specification with DataTables with ValidatedMatchers { +class VeroAdapterSpec extends Specification with DataTables with ValidatedMatchers with CatsIO { def is = s2""" toRawEvents must return a success for a valid "sent" type payload body being passed $e1 toRawEvents must return a success for a valid "delivered" type payload body being passed $e2 @@ -80,7 +82,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e2 = { @@ -108,7 +112,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e3 = { @@ -136,7 +142,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e4 = { @@ -164,7 +172,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e5 = { @@ -192,7 +202,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e6 = { @@ -220,7 +232,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e7 = { @@ -248,7 +262,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e8 = { @@ -276,7 +292,9 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche Shared.context ) ) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beValid(expected) + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beValid(expected)) } def e9 = @@ -300,28 +318,35 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche ) val expectedJson = "{\"schema\":\"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0\",\"data\":{\"schema\":\"" + expected + "\",\"data\":{}}}" - val actual = VeroAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Shared.api, - Map("tv" -> "com.getvero-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, - ContentType.some, - Shared.cljSource, - Shared.context + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Shared.api, + Map("tv" -> "com.getvero-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, + ContentType.some, + Shared.cljSource, + Shared.context + ) + ) ) ) - ) } def e10 = { val payload = CollectorPayload(Shared.api, Nil, ContentType.some, None, Shared.cljSource, Shared.context) - VeroAdapter.toRawEvents(payload, SpecHelpers.client) must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure - .InputData("body", None, "empty body: no events to process") + VeroAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure + .InputData("body", None, "empty body: no events to process") + ) + ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala index ab7a18c2f..1045b5c87 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -18,6 +18,8 @@ package snowplow import cats.data.NonEmptyList import cats.syntax.option._ +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.iglu.client._ import com.snowplowanalytics.iglu.client.validator._ import com.snowplowanalytics.iglu.core._ @@ -33,11 +35,12 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.{ConversionUtils => CU} +import utils.HttpClient._ import utils.Clock._ import SpecHelpers._ -class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { +class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck with CatsIO { def is = s2""" Tp1.toRawEvents should return a NEL containing one RawEvent if the querystring is populated $e1 Tp1.toRawEvents should return a Validation Failure if the querystring is empty $e2 @@ -93,25 +96,38 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp1Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList - .one(RawEvent(Snowplow.Tp1, Map("aid" -> "test").toOpt, None, Shared.source, Shared.context)) - ) + Tp1Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp1, + Map("aid" -> "test").toOpt, + None, + Shared.source, + Shared.context + ) + ) + ) + ) } def e2 = { val payload = CollectorPayload(Snowplow.Tp1, Nil, None, None, Shared.source, Shared.context) - val actual = Tp1Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.AdapterFailure.InputData( - "querystring", - None, - "empty querystring: not a valid URI redirect" + Tp1Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.AdapterFailure.InputData( + "querystring", + None, + "empty querystring: not a valid URI redirect" + ) + ) ) ) - ) } def e3 = { @@ -123,18 +139,21 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map("aid" -> "tp2", "e" -> "se").toOpt, - None, - Shared.source, - Shared.context + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map("aid" -> "tp2", "e" -> "se").toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e4 = { @@ -149,18 +168,21 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, - ApplicationJsonWithCharset.some, - Shared.source, - Shared.context + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, + ApplicationJsonWithCharset.some, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e5 = { @@ -176,8 +198,6 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - val rawEvent: RawEventParameters => RawEvent = params => RawEvent( Snowplow.Tp2, @@ -186,13 +206,18 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - actual must beValid( - NonEmptyList.of( - rawEvent(Map("tv" -> "0", "p" -> "1", "e" -> "1", "nuid" -> "123").toOpt), - rawEvent(Map("tv" -> "0", "p" -> "2", "e" -> "2", "nuid" -> "123").toOpt), - rawEvent(Map("tv" -> "0", "p" -> "3", "e" -> "3", "nuid" -> "123").toOpt) + + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.of( + rawEvent(Map("tv" -> "0", "p" -> "1", "e" -> "1", "nuid" -> "123").toOpt), + rawEvent(Map("tv" -> "0", "p" -> "2", "e" -> "2", "nuid" -> "123").toOpt), + rawEvent(Map("tv" -> "0", "p" -> "3", "e" -> "3", "nuid" -> "123").toOpt) + ) + ) ) - ) } def e6 = { @@ -206,18 +231,21 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, - ApplicationJsonWithCapitalCharset.some, - Shared.source, - Shared.context + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, + ApplicationJsonWithCapitalCharset.some, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e7 = @@ -274,8 +302,9 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid(expected) + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid(expected)) } def e8 = { @@ -287,13 +316,16 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation - .NotIglu(json"""{"not":"self-desc"}""", ParseError.InvalidData) + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation + .NotIglu(json"""{"not":"self-desc"}""", ParseError.InvalidData) + ) + ) ) - ) } def e9 = { @@ -306,32 +338,35 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation.IgluError( - SchemaKey( - "com.snowplowanalytics.snowplow", - "geolocation_context", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ), - ClientError.ValidationError( - ValidatorError.InvalidData( - NonEmptyList.one( - ValidatorReport( - "$.latitude: is missing but it is required", - "$".some, - List("latitude"), - "required".some - ) + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.IgluError( + SchemaKey( + "com.snowplowanalytics.snowplow", + "geolocation_context", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + ClientError.ValidationError( + ValidatorError.InvalidData( + NonEmptyList.one( + ValidatorReport( + "$.latitude: is missing but it is required", + "$".some, + List("latitude"), + "required".some + ) + ) + ), + None ) - ), - None + ) ) ) ) - ) } def e10 = @@ -441,8 +476,9 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.context ) - val actual = Tp2Adapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid(expected) + Tp2Adapter + .toRawEvents(payload, SpecHelpers.client) + .map(_ must beInvalid(expected)) } def e11 = { @@ -457,24 +493,27 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map( - "e" -> "ue", - "tv" -> "r-tp2", - "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}}""", - "p" -> "web", - "cx" -> "dGVzdHRlc3R0ZXN0" - ).toOpt, - None, - Shared.source, - Shared.context + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map( + "e" -> "ue", + "tv" -> "r-tp2", + "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}}""", + "p" -> "web", + "cx" -> "dGVzdHRlc3R0ZXN0" + ).toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e12 = { @@ -490,24 +529,27 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map( - "e" -> "se", - "aid" -> "ads", - "tv" -> "r-tp2", - "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", - "p" -> "web" - ).toOpt, - None, - Shared.source, - Shared.context + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map( + "e" -> "se", + "aid" -> "ads", + "tv" -> "r-tp2", + "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", + "p" -> "web" + ).toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e13 = { @@ -524,24 +566,27 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map( - "e" -> "se", - "aid" -> "ads", - "tv" -> "r-tp2", - "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", - "p" -> "web" - ).toOpt, - None, - Shared.source, - Shared.context + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map( + "e" -> "se", + "aid" -> "ads", + "tv" -> "r-tp2", + "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", + "p" -> "web" + ).toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e14 = { @@ -557,23 +602,26 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map( - "e" -> "se", - "tv" -> "r-tp2", - "co" -> """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""", - "p" -> "web" - ).toOpt, - None, - Shared.source, - Shared.context + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map( + "e" -> "se", + "tv" -> "r-tp2", + "co" -> """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""", + "p" -> "web" + ).toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e15 = { @@ -592,39 +640,45 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beValid( - NonEmptyList.one( - RawEvent( - Snowplow.Tp2, - Map( - "e" -> "se", - "tv" -> "r-tp2", - "cx" -> CU.encodeBase64Url( - """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""" - ), - "p" -> "web" - ).toOpt, - None, - Shared.source, - Shared.context + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beValid( + NonEmptyList.one( + RawEvent( + Snowplow.Tp2, + Map( + "e" -> "se", + "tv" -> "r-tp2", + "cx" -> CU.encodeBase64Url( + """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""" + ), + "p" -> "web" + ).toOpt, + None, + Shared.source, + Shared.context + ) + ) ) ) - ) } def e16 = { val payload = CollectorPayload(Snowplow.Tp2, Nil, None, None, Shared.source, Shared.context) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation.InputData( - "querystring", - None, - "empty querystring: not a valid URI redirect" + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + None, + "empty querystring: not a valid URI redirect" + ) + ) ) ) - ) } def e17 = { @@ -637,16 +691,19 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation.InputData( - "querystring", - "aid=test".some, - "missing `u` parameter: not a valid URI redirect" + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + "aid=test".some, + "missing `u` parameter: not a valid URI redirect" + ) + ) ) ) - ) } def e18 = { @@ -662,16 +719,19 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation.NotJson( - "co|cx", - "{[-".some, - """invalid json: expected " got '[-' (line 1, column 2)""" + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.NotJson( + "co|cx", + "{[-".some, + """invalid json: expected " got '[-' (line 1, column 2)""" + ) + ) ) ) - ) } def e19 = { @@ -687,13 +747,16 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation - .NotJson("co|cx", "".some, "invalid json: exhausted input") + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation + .NotJson("co|cx", "".some, "invalid json: exhausted input") + ) + ) ) - ) } def e20 = { @@ -708,15 +771,18 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Shared.source, Shared.context ) - val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) - actual must beInvalid( - NonEmptyList.one( - FailureDetails.TrackerProtocolViolation.InputData( - "querystring", - "u=null&cx=dGVzdHRlc3R0ZXN0".some, - "missing `u` parameter: not a valid URI redirect" + RedirectAdapter + .toRawEvents(payload, SpecHelpers.client) + .map( + _ must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + "u=null&cx=dGVzdHRlc3R0ZXN0".some, + "missing `u` parameter: not a valid URI redirect" + ) + ) ) ) - ) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/CachingEvaluatorSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/CachingEvaluatorSpec.scala index 099c6fde5..ceefb94f3 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/CachingEvaluatorSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/CachingEvaluatorSpec.scala @@ -12,18 +12,24 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments -import cats.Id +import scala.concurrent.duration.TimeUnit + import cats.effect.Clock -import com.snowplowanalytics.snowplow.enrich.common.enrichments.CachingEvaluatorSpec.{TestClock, TestContext} -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CachingEvaluator -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CachingEvaluator._ + +import cats.effect.IO + +import cats.effect.testing.specs2.CatsIO + import io.circe.Json import io.circe.literal.JsonStringContext + import org.specs2.mutable.Specification -import scala.concurrent.duration.TimeUnit +import com.snowplowanalytics.snowplow.enrich.common.enrichments.CachingEvaluatorSpec.{TestClock, TestContext} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CachingEvaluator +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CachingEvaluator._ -class CachingEvaluatorSpec extends Specification { +class CachingEvaluatorSpec extends Specification with CatsIO { private val successTtl = 5 private val errorTtl = 2 @@ -31,120 +37,111 @@ class CachingEvaluatorSpec extends Specification { "Cached evaluation should work when" >> { "TTL is not exceeded, second call not evaluated" >> { "for success" in { - val context = setupContext() - - val v1 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value1" } """)) - - context.addSeconds(4) // for success => 4 < 5 - - val v2 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) - - v1 must beRight(json""" { "field": "value1" } """) - v2 must beRight(json""" { "field": "value1" } """) + for { + context <- setupContext() + v1 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value1" } """)) + _ <- IO(context.addSeconds(4)) // for success => 4 < 5 + v2 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) + } yield { + v1 must beRight(json""" { "field": "value1" } """) + v2 must beRight(json""" { "field": "value1" } """) + } } "for errors" in { - val context = setupContext() - - val v1 = getValue(context, ifEvaluated = error(new RuntimeException("Some error1!"))) - - context.addSeconds(1) // for error => 1 < 2 - - val v2 = getValue(context, ifEvaluated = error(new RuntimeException("This second error should not be evaluated!"))) - - v1.left.map(_.getMessage) must beLeft("Some error1!") - v2.left.map(_.getMessage) must beLeft("Some error1!") + for { + context <- setupContext() + v1 <- getValue(context, ifEvaluated = error(new RuntimeException("Some error1!"))) + _ <- IO(context.addSeconds(1)) // for error => 1 < 2 + v2 <- getValue(context, ifEvaluated = error(new RuntimeException("This second error should not be evaluated!"))) + } yield { + v1.left.map(_.getMessage) must beLeft("Some error1!") + v2.left.map(_.getMessage) must beLeft("Some error1!") + } } } "TTL is exceeded, second call is evaluated" >> { "1 call - success, 2 call - success => use new json" in { - val context = setupContext() - - val v1 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value1" } """)) - - context.addSeconds(6) // for success => 6 > 5 - - val v2 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) - - v1 must beRight(json""" { "field": "value1" } """) - v2 must beRight(json""" { "field": "value2" } """) + for { + context <- setupContext() + v1 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value1" } """)) + _ <- IO(context.addSeconds(6)) // for success => 6 > 5 + v2 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) + } yield { + v1 must beRight(json""" { "field": "value1" } """) + v2 must beRight(json""" { "field": "value2" } """) + } } "1 call - success, 2 call - error => fallback to previous success, still TTL for errors in force " in { - val context = setupContext() - - val v1 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value1" } """)) - - context.addSeconds(6) // for success => 6 > 5 - - val v2 = getValue(context, ifEvaluated = error(new RuntimeException("This second error should be evaluated but not returned!"))) - - context.addSeconds(3) // for error => 3 > 2 - - val v3 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) - - v1 must beRight(json""" { "field": "value1" } """) - v2 must beRight(json""" { "field": "value1" } """) - v3 must beRight(json""" { "field": "value2" } """) + for { + context <- setupContext() + v1 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value1" } """)) + _ <- IO(context.addSeconds(6)) // for success => 6 > 5 + v2 <- getValue(context, ifEvaluated = error(new RuntimeException("This second error should be evaluated but not returned!"))) + _ <- IO(context.addSeconds(3)) // for error => 3 > 2 + v3 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) + } yield { + v1 must beRight(json""" { "field": "value1" } """) + v2 must beRight(json""" { "field": "value1" } """) + v3 must beRight(json""" { "field": "value2" } """) + } } "1 call - error, 2 call - error => use new error" in { - val context = setupContext() - - val v1 = getValue(context, ifEvaluated = error(new RuntimeException("Some error1!"))) - - context.addSeconds(3) // for error => 3 > 2 - - val v2 = getValue(context, ifEvaluated = error(new RuntimeException("This second error should be evaluated!"))) - - v1.left.map(_.getMessage) must beLeft("Some error1!") - v2.left.map(_.getMessage) must beLeft("This second error should be evaluated!") + for { + context <- setupContext() + v1 <- getValue(context, ifEvaluated = error(new RuntimeException("Some error1!"))) + _ <- IO(context.addSeconds(3)) // for error => 3 > 2 + v2 <- getValue(context, ifEvaluated = error(new RuntimeException("This second error should be evaluated!"))) + } yield { + v1.left.map(_.getMessage) must beLeft("Some error1!") + v2.left.map(_.getMessage) must beLeft("This second error should be evaluated!") + } } "1 call - error, 2 call - success => use new json" in { - val context = setupContext() - - val v1 = getValue(context, ifEvaluated = error(new RuntimeException("Some error1!"))) - - context.addSeconds(3) // for error => 3 > 2 - - val v2 = getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) - - v1.left.map(_.getMessage) must beLeft("Some error1!") - v2 must beRight(json""" { "field": "value2" } """) + for { + context <- setupContext() + v1 <- getValue(context, ifEvaluated = error(new RuntimeException("Some error1!"))) + _ <- IO(context.addSeconds(3)) // for error => 3 > 2 + v2 <- getValue(context, ifEvaluated = successful(result = json""" { "field": "value2" } """)) + } yield { + v1.left.map(_.getMessage) must beLeft("Some error1!") + v2 must beRight(json""" { "field": "value2" } """) + } } } - } - private def getValue(context: TestContext, ifEvaluated: GetResult[Id, Json]): Either[Throwable, Json] = { + private def getValue(context: TestContext, ifEvaluated: GetResult[IO, Json]): IO[Either[Throwable, Json]] = { implicit val clock: TestClock = context.clock context.evaluation.evaluateForKey("key", ifEvaluated) } - private def setupContext(): TestContext = - TestContext( - new TestClock, - CachingEvaluator.create[Id, String, Json](Config(size = 1, successTtl, errorTtl)) - ) + private def setupContext(): IO[TestContext] = + for { + evaluator <- CachingEvaluator.create[IO, String, Json](Config(size = 1, successTtl, errorTtl)) + context = TestContext(new TestClock, evaluator) + } yield context - private def successful(result: Json): GetResult[Id, Json] = () => Right(result) - private def error(ex: Throwable): GetResult[Id, Json] = () => Left(ex) + private def successful(result: Json): GetResult[IO, Json] = () => IO.pure(Right(result)) + private def error(ex: Throwable): GetResult[IO, Json] = () => IO.pure(Left(ex)) } object CachingEvaluatorSpec { - final case class TestContext(clock: TestClock, evaluation: CachingEvaluator[Id, String, Json]) { + final case class TestContext(clock: TestClock, evaluation: CachingEvaluator[IO, String, Json]) { def addSeconds(value: Int): Unit = clock.secondsCounter += value } - final class TestClock extends Clock[Id] { + final class TestClock extends Clock[IO] { var secondsCounter: Long = 0 - override def realTime(unit: TimeUnit): Id[Long] = secondsCounter - override def monotonic(unit: TimeUnit): Id[Long] = secondsCounter + override def realTime(unit: TimeUnit): IO[Long] = IO.pure(secondsCounter) + override def monotonic(unit: TimeUnit): IO[Long] = IO.pure(secondsCounter) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index 5b3a3bbc3..f70dd68c3 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -14,7 +14,8 @@ package com.snowplowanalytics.snowplow.enrich.common package enrichments -import cats.Id +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO import cats.implicits._ import cats.data.NonEmptyList import io.circe.literal._ @@ -31,8 +32,8 @@ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.{ PiiStrategyPseudonymize } import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import utils.Clock._ import utils.ConversionUtils +import utils.Clock._ import enrichments.registry.{HttpHeaderExtractorEnrichment, IabEnrichment, JavascriptScriptEnrichment, YauaaEnrichment} import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification @@ -40,7 +41,7 @@ import org.specs2.matcher.EitherMatchers import SpecHelpers._ import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailureMessage -class EnrichmentManagerSpec extends Specification with EitherMatchers { +class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsIO { import EnrichmentManagerSpec._ "enrichEvent" should { @@ -49,7 +50,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -65,20 +67,20 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { """ ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - - enriched.value must beLeft.like { - case _: BadRow.SchemaViolations => ok - case br => ko(s"bad row [$br] is not SchemaViolations") - } + enriched.value + .map(_ must beLeft.like { + case _: BadRow.SchemaViolations => ok + case br => ko(s"bad row [$br] is not SchemaViolations") + }) } "return a SchemaViolations bad row if the input unstructured event is invalid" >> { @@ -86,7 +88,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -100,29 +103,32 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beLeft.like { - case _: BadRow.SchemaViolations => ok - case br => ko(s"bad row [$br] is not SchemaViolations") - } + enriched.value + .map(_ must beLeft.like { + case _: BadRow.SchemaViolations => ok + case br => ko(s"bad row [$br] is not SchemaViolations") + }) } "return an EnrichmentFailures bad row if one of the enrichment (JS enrichment here) fails" >> { - val script = """ + val script = + """ function process(event) { throw "Javascript exception"; return [ { a: "b" } ]; }""" - val config = json"""{ + val config = + json"""{ "parameters": { "script": ${ConversionUtils.encodeBase64Url(script)} } @@ -136,7 +142,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { val jsEnrichConf = JavascriptScriptEnrichment.parse(config, schemaKey).toOption.get val jsEnrich = JavascriptScriptEnrichment(jsEnrichConf.schemaKey, jsEnrichConf.rawFunction) - val enrichmentReg = EnrichmentRegistry[Id](javascriptScript = Some(jsEnrich)) + val enrichmentReg = EnrichmentRegistry[IO](javascriptScript = Some(jsEnrich)) val parameters = Map( "e" -> "pp", @@ -144,40 +150,42 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "p" -> "web" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beLeft.like { - case BadRow.EnrichmentFailures( - _, - Failure.EnrichmentFailures( + enriched.value + .map(_ must beLeft.like { + case BadRow.EnrichmentFailures( _, - NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - _: FailureDetails.EnrichmentFailureMessage.Simple - ), - Nil - ) - ), - _ - ) => - ok - case br => - ko( - s"bad row [$br] is not an EnrichmentFailures containing one EnrichmentFailureMessage.Simple" - ) - } + Failure.EnrichmentFailures( + _, + NonEmptyList( + FailureDetails.EnrichmentFailure( + _, + _: FailureDetails.EnrichmentFailureMessage.Simple + ), + Nil + ) + ), + _ + ) => + ok + case br => + ko( + s"bad row [$br] is not an EnrichmentFailures containing one EnrichmentFailureMessage.Simple" + ) + }) } "return an EnrichmentFailures bad row containing one IgluError if one of the contexts added by the enrichments is invalid" >> { - val script = """ + val script = + """ function process(event) { return [ { schema: "iglu:com.acme/email_sent/jsonschema/1-0-0", data: { @@ -187,7 +195,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } ]; }""" - val config = json"""{ + val config = + json"""{ "parameters": { "script": ${ConversionUtils.encodeBase64Url(script)} } @@ -201,7 +210,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { val jsEnrichConf = JavascriptScriptEnrichment.parse(config, schemaKey).toOption.get val jsEnrich = JavascriptScriptEnrichment(jsEnrichConf.schemaKey, jsEnrichConf.rawFunction) - val enrichmentReg = EnrichmentRegistry[Id](javascriptScript = Some(jsEnrich)) + val enrichmentReg = EnrichmentRegistry[IO](javascriptScript = Some(jsEnrich)) val parameters = Map( "e" -> "pp", @@ -209,33 +218,34 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "p" -> "web" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beLeft.like { - case BadRow.EnrichmentFailures( - _, - Failure.EnrichmentFailures( + enriched.value + .map(_ must beLeft.like { + case BadRow.EnrichmentFailures( _, - NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - _: FailureDetails.EnrichmentFailureMessage.IgluError - ), - Nil - ) - ), - payload - ) if payload.enriched.derived_contexts.isDefined => - ok - case br => ko(s"bad row [$br] is not an EnrichmentFailures containing one IgluError and with derived_contexts defined") - } + Failure.EnrichmentFailures( + _, + NonEmptyList( + FailureDetails.EnrichmentFailure( + _, + _: FailureDetails.EnrichmentFailureMessage.IgluError + ), + Nil + ) + ), + payload + ) if payload.enriched.derived_contexts.isDefined => + ok + case br => ko(s"bad row [$br] is not an EnrichmentFailures containing one IgluError and with derived_contexts defined") + }) } "emit an EnrichedEvent if everything goes well" >> { @@ -243,7 +253,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -257,7 +268,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """, - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -270,16 +282,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight + enriched.value.map(_ must beRight) } "emit an EnrichedEvent if a PII value that needs to be hashed is an empty string" >> { @@ -287,7 +299,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -301,7 +314,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """, - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -315,7 +329,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiJson( @@ -332,16 +346,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight + enriched.value.map(_ must beRight) } "emit an EnrichedEvent if a PII value that needs to be hashed is null" >> { @@ -349,7 +363,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -363,7 +378,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """, - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -377,7 +393,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiJson( @@ -394,16 +410,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight + enriched.value.map(_ must beRight) } "fail to emit an EnrichedEvent if a PII value that needs to be hashed is an empty object" >> { @@ -411,7 +427,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -425,7 +442,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """, - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -439,7 +457,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiJson( @@ -456,16 +474,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beLeft + enriched.value.map(_ must beLeft) } "fail to emit an EnrichedEvent if a context PII value that needs to be hashed is an empty object" >> { @@ -473,7 +491,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -488,7 +507,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """, - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -501,7 +521,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiJson( @@ -518,16 +538,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beLeft + enriched.value.map(_ must beLeft) } "fail to emit an EnrichedEvent if a PII value needs to be hashed in both co and ue and is invalid in one of them" >> { @@ -535,7 +555,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "ue", "tv" -> "js-0.13.1", "p" -> "web", - "co" -> """ + "co" -> + """ { "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", "data": [ @@ -550,7 +571,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """, - "ue_pr" -> """ + "ue_pr" -> + """ { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ @@ -564,7 +586,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiJson( @@ -586,16 +608,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beLeft + enriched.value.map(_ must beLeft) } "emit an EnrichedEvent for valid integer fields" >> { @@ -612,16 +634,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { field -> integer ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight + enriched.value.unsafeRunSync() must beRight } } .reduce(_ and _) @@ -641,16 +663,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { field -> decimal ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight + enriched.value.unsafeRunSync() must beRight } } .reduce(_ and _) @@ -679,16 +701,16 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "ev_va" -> input ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight { ee: EnrichedEvent => + enriched.value.unsafeRunSync() must beRight { ee: EnrichedEvent => ee.se_value.toString must_== expected } } @@ -705,17 +727,20 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value.map(_.useragent) must beRight(qs_ua) - enriched.value.map(_.derived_contexts) must beRight((_: String).contains("\"agentName\":\"Firefox\"")) + enriched.value.map { e => + val res1 = e.map(_.useragent) must beRight(qs_ua) + val res2 = e.map(_.derived_contexts) must beRight((_: String).contains("\"agentName\":\"Firefox\"")) + res1 and res2 + } } "use user agent of HTTP header if 'ua' query string parameter is not set" >> { @@ -726,16 +751,18 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value.map(_.useragent) must beRight("header-useragent") + enriched.value.map { e => + e.map(_.useragent) must beRight("header-useragent") + } } "accept user agent of HTTP header when it is not URL decodable" >> { @@ -747,16 +774,18 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { val ua = "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 %1$s/%2$s Firefox/75.0" val contextWithUa = context.copy(useragent = Some(ua)) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value.map(_.useragent) must beRight(ua) + enriched.value.map { e => + e.map(_.useragent) must beRight(ua) + } } "accept 'ua' in query string when it is not URL decodable" >> { @@ -769,17 +798,20 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value.map(_.useragent) must beRight(qs_ua) - enriched.value.map(_.derived_contexts) must beRight((_: String).contains("\"agentName\":\"%1$S\"")) + enriched.value.map { e => + val res1 = e.map(_.useragent) must beRight(qs_ua) + val res2 = e.map(_.derived_contexts) must beRight((_: String).contains("\"agentName\":\"%1$S\"")) + res1 and res2 + } } "pass derived contexts generated by previous enrichments to the JavaScript enrichment" >> { @@ -798,7 +830,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "jsonschema", SchemaVer.Full(1, 0, 0) ) - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( javascriptScript = Some(JavascriptScriptEnrichment(schemaKey, script)), httpHeaderExtractor = Some(HttpHeaderExtractorEnrichment(".*")) ) @@ -810,16 +842,18 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ).toOpt val headerContext = context.copy(headers = List("X-Tract-Me: moo")) val rawEvent = RawEvent(api, parameters, None, source, headerContext) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg, client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value.map(_.app_id) must beRight("moo") + enriched.value.map { e => + e.map(_.app_id) must beRight("moo") + } } "emit an EnrichedEvent with superseded schemas" >> { @@ -959,17 +993,17 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent[Id]( + val enriched = EnrichmentManager.enrichEvent[IO]( enrichmentReg.copy(yauaa = None), client, processor, timestamp, rawEvent, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) - enriched.value must beRight.like { + enriched.value.map(_ must beRight.like { case e: EnrichedEvent => val p = EnrichedEvent.toPartiallyEnrichedEvent(e) val contextsJson = jparse(p.contexts.get).toOption.get @@ -979,7 +1013,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { (derivedContextsJson must beEqualTo(expectedDerivedContexts)) and (ueJson must beEqualTo(expectedUnstructEvent)) case _ => ko - } + }) } } @@ -989,10 +1023,13 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { input.setUser_ipaddress("127.0.0.1") input.setDerived_tstamp("2010-06-30 01:20:01.000") val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getIabContext[Id](iabEnrichment).runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - (acc.errors must beEmpty) and (acc.contexts must beEmpty) - } + EnrichmentManager + .getIabContext[IO](iabEnrichment) + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + (acc.errors must beEmpty) and (acc.contexts must beEmpty) + }) } "return no context if user_ipaddress is null" >> { @@ -1000,10 +1037,13 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { input.setUseragent("Firefox") input.setDerived_tstamp("2010-06-30 01:20:01.000") val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getIabContext[Id](iabEnrichment).runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - (acc.errors must beEmpty) and (acc.contexts must beEmpty) - } + EnrichmentManager + .getIabContext[IO](iabEnrichment) + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + (acc.errors must beEmpty) and (acc.contexts must beEmpty) + }) } "return no context if derived_tstamp is null" >> { @@ -1011,10 +1051,13 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { input.setUser_ipaddress("127.0.0.1") input.setUseragent("Firefox") val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getIabContext[Id](iabEnrichment).runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - (acc.errors must beEmpty) and (acc.contexts must beEmpty) - } + EnrichmentManager + .getIabContext[IO](iabEnrichment) + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + (acc.errors must beEmpty) and (acc.contexts must beEmpty) + }) } "return no context if user_ipaddress is invalid" >> { @@ -1023,10 +1066,13 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { input.setUseragent("Firefox") input.setDerived_tstamp("2010-06-30 01:20:01.000") val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getIabContext[Id](iabEnrichment).runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - (acc.errors must beEmpty) and (acc.contexts must beEmpty) - } + EnrichmentManager + .getIabContext[IO](iabEnrichment) + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + (acc.errors must beEmpty) and (acc.contexts must beEmpty) + }) } "return no context if user_ipaddress is hostname (don't try to resovle it)" >> { @@ -1035,10 +1081,13 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { input.setUseragent("Firefox") input.setDerived_tstamp("2010-06-30 01:20:01.000") val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getIabContext[Id](iabEnrichment).runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - (acc.errors must beEmpty) and (acc.contexts must beEmpty) - } + EnrichmentManager + .getIabContext[IO](iabEnrichment) + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + (acc.errors must beEmpty) and (acc.contexts must beEmpty) + }) } "return Some if all arguments are valid" >> { @@ -1047,52 +1096,70 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { input.setUseragent("Firefox") input.setDerived_tstamp("2010-06-30 01:20:01.000") val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getIabContext[Id](iabEnrichment).runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - (acc.errors must beEmpty) and (acc.contexts must not beEmpty) - } + EnrichmentManager + .getIabContext[IO](iabEnrichment) + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + (acc.errors must beEmpty) and (acc.contexts must not beEmpty) + }) } } "getCollectorVersionSet" should { - "return an enrichment failure if v_collector is null or empty" >> { + "return an enrichment failure if v_collector is null" >> { val input = new EnrichedEvent() val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getCollectorVersionSet[Id].runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - acc.errors must not beEmpty - } + EnrichmentManager + .getCollectorVersionSet[IO] + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + acc.errors must not beEmpty + }) + } + + "return an enrichment failure if v_collector is empty" >> { + val input = new EnrichedEvent() input.v_collector = "" - EnrichmentManager.getCollectorVersionSet[Id].runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - acc.errors must not beEmpty - } + val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) + EnrichmentManager + .getCollectorVersionSet[IO] + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + acc.errors must not beEmpty + }) } "return Unit if v_collector is set" >> { val input = new EnrichedEvent() input.v_collector = "v42" val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) - EnrichmentManager.getCollectorVersionSet[Id].runS(inputState) must beLike { - case acc: EnrichmentManager.Accumulation => - acc.errors must beEmpty - } + EnrichmentManager + .getCollectorVersionSet[IO] + .runS(inputState) + .map(_ must beLike { + case acc: EnrichmentManager.Accumulation => + acc.errors must beEmpty + }) } } "validateEnriched" should { "create a bad row if a field is oversized" >> { val result = EnrichmentManager - .enrichEvent[Id]( + .enrichEvent[IO]( enrichmentReg, client, processor, timestamp, RawEvent(api, fatBody, None, source, context), featureFlags = AcceptInvalid.featureFlags.copy(acceptInvalid = false), - AcceptInvalid.countInvalid + IO.unit ) .value + .unsafeRunSync() result must beLeft.like { case badRow: BadRow.EnrichmentFailures => @@ -1110,16 +1177,17 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "not create a bad row if a field is oversized and acceptInvalid is set to true" >> { val result = EnrichmentManager - .enrichEvent[Id]( + .enrichEvent[IO]( enrichmentReg, client, processor, timestamp, RawEvent(api, fatBody, None, source, context), featureFlags = AcceptInvalid.featureFlags.copy(acceptInvalid = true), - AcceptInvalid.countInvalid + IO.unit ) .value + .unsafeRunSync() result must beRight[EnrichedEvent] } @@ -1128,7 +1196,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { object EnrichmentManagerSpec { - val enrichmentReg = EnrichmentRegistry[Id](yauaa = Some(YauaaEnrichment(None))) + val enrichmentReg = EnrichmentRegistry[IO](yauaa = Some(YauaaEnrichment(None))) val client = SpecHelpers.client val processor = Processor("ssc-tests", "0.0.0") val timestamp = DateTime.now() @@ -1187,7 +1255,8 @@ object EnrichmentManagerSpec { ) .toOption .getOrElse(throw new RuntimeException("IAB enrichment couldn't be initialised")) // to make sure it's not none - .enrichment[Id] + .enrichment[IO] + .unsafeRunSync() .some } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala index 09c5d38eb..2d3891361 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,26 +12,30 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -import cats.Id +import java.math.BigDecimal + +import org.joda.money.CurrencyUnit +import org.joda.time.DateTime + +import org.specs2.Specification + import cats.data.{NonEmptyList, Validated, ValidatedNel} import cats.implicits._ +import cats.effect.IO + +import cats.effect.testing.specs2.CatsIO + import com.snowplowanalytics.forex.CreateForex._ import com.snowplowanalytics.forex.model._ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} import com.snowplowanalytics.snowplow.badrows.FailureDetails -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CurrencyConversionConf - -import org.joda.money.CurrencyUnit -import org.joda.time.DateTime -import java.math.BigDecimal - -import org.specs2.Specification +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CurrencyConversionConf -class CurrencyConversionEnrichmentSpec extends Specification { +class CurrencyConversionEnrichmentSpec extends Specification with CatsIO { import CurrencyConversionEnrichmentSpec._ def is = @@ -79,8 +83,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { ) ) ) - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e2 = { @@ -101,8 +104,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { "Unknown currency 'HUL'" ) ).invalidNel - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e3 = { @@ -122,8 +124,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { "Open Exchange Rates error, type: [OtherErrors], message: [invalid_app_id]" ) ).invalidNel - val actual = runEnrichment(input, wrongKey) - actual must beEqualTo(expected) + runEnrichment(input, wrongKey).map(_ must beEqualTo(expected)) } def e4 = { @@ -145,8 +146,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { "missing" ) ).invalidNel - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e5 = { @@ -165,8 +165,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { FailureDetails.EnrichmentFailureMessage .InputData("collector_tstamp", None, "missing") ).invalidNel - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e6 = { @@ -181,8 +180,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(coTstamp) ) val expected: Result = (Some(new BigDecimal("12.75")), None, None, None).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e7 = { @@ -197,8 +195,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(coTstamp) ) val expected: Result = (None, Some(new BigDecimal("3.09")), Some(new BigDecimal("0.00")), None).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e8 = { @@ -213,8 +210,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(coTstamp) ) val expected: Result = (None, None, None, Some(new BigDecimal("15.05"))).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e9 = { @@ -229,8 +225,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(coTstamp) ) val expected: Result = (Some(new BigDecimal("12.75")), Some(new BigDecimal("3.09")), Some(new BigDecimal("0.00")), None).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e10 = { @@ -246,8 +241,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { ) val expected: Result = (None, None, None, Some(new BigDecimal("12.74"))).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e11 = { @@ -262,8 +256,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(coTstamp) ) val expected: Result = (None, None, None, None).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e12 = { @@ -284,8 +277,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(new BigDecimal("0.00")), Some(new BigDecimal("12.99")) ).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } def e13 = { @@ -300,8 +292,7 @@ class CurrencyConversionEnrichmentSpec extends Specification { Some(coTstamp) ) val expected: Result = (Some(new BigDecimal("18.54")), Some(new BigDecimal("3.09")), Some(new BigDecimal("0.00")), None).valid - val actual = runEnrichment(input) - actual must beEqualTo(expected) + runEnrichment(input).map(_ must beEqualTo(expected)) } } @@ -342,7 +333,7 @@ object CurrencyConversionEnrichmentSpec { ) = for { e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR) - .enrichment[Id] + .enrichment[IO] res <- e.convertCurrencies( input.trCurrency, input.trTotal, diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala index 5ca3de718..c16b10d0a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala @@ -14,8 +14,8 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.InetAddress -import cats.Id -import cats.syntax.functor._ +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO import io.circe.literal._ @@ -26,7 +26,7 @@ import inet.ipaddr.HostName import org.specs2.Specification import org.specs2.matcher.DataTables -class IabEnrichmentSpec extends Specification with DataTables { +class IabEnrichmentSpec extends Specification with DataTables with CatsIO { def is = s2""" performCheck should correctly perform IAB checks on valid input $e1 @@ -80,7 +80,7 @@ class IabEnrichmentSpec extends Specification with DataTables { expectedReason, expectedPrimaryImpact ) => - validConfig.enrichment[Id].map { e => + validConfig.enrichment[IO].map { e => e.performCheck(userAgent, ipAddress, DateTime.now()) must beRight.like { case check => check.spiderOrRobot must_== expectedSpiderOrRobot and @@ -98,9 +98,10 @@ class IabEnrichmentSpec extends Specification with DataTables { json"""{"spiderOrRobot": false, "category": "BROWSER", "reason": "PASSED_ALL", "primaryImpact": "NONE"}""" ) validConfig - .enrichment[Id] - .map(_.getIabContext("Xdroid", "192.168.0.1".ip, DateTime.now())) must - beRight(responseJson) + .enrichment[IO] + .map { e => + e.getIabContext("Xdroid", "192.168.0.1".ip, DateTime.now()) must beRight(responseJson) + } } private implicit class IpOps(s: String) { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IpLookupsEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IpLookupsEnrichmentSpec.scala index 85107666e..f2040968f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IpLookupsEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IpLookupsEnrichmentSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,19 +12,26 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -import cats.Id +import scala.concurrent.ExecutionContext + import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} -import com.snowplowanalytics.maxmind.iplookups.model.IpLocation -import com.snowplowanalytics.snowplow.enrich.common.utils.BlockerF +import cats.effect.{Blocker, IO} + +import cats.effect.testing.specs2.CatsIO import io.circe.literal._ import org.specs2.Specification import org.specs2.matcher.DataTables -class IpLookupsEnrichmentSpec extends Specification with DataTables { +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.maxmind.iplookups.model.IpLocation + +class IpLookupsEnrichmentSpec extends Specification with DataTables with CatsIO { + val blocker: Blocker = Blocker.liftExecutionContext(ExecutionContext.global) + def is = s2""" extractIpInformation should correctly extract location data from IP addresses where possible $e1 extractIpInformation should correctly extract ISP data from IP addresses where possible $e2 @@ -97,15 +104,17 @@ class IpLookupsEnrichmentSpec extends Specification with DataTables { continent = "Asia", accuracyRadius = 100 ).asRight.some |> { (_, ipAddress, expected) => - (for { - e <- config.enrichment[Id](BlockerF.noop) - res <- e.extractIpInformation(ipAddress) - } yield res.ipLocation).map(_.leftMap(_.getClass.getSimpleName)) must_== expected + for { + ipLookup <- config.enrichment[IO](blocker) + result <- ipLookup.extractIpInformation(ipAddress) + ipLocation = result.ipLocation.map(_.leftMap(_.getClass.getSimpleName)) + } yield ipLocation must beEqualTo(expected) } def e2 = - config - .enrichment[Id](BlockerF.noop) - .extractIpInformation("70.46.123.145") - .isp must_== "FDN Communications".asRight.some + for { + ipLookup <- config.enrichment[IO](blocker) + result <- ipLookup.extractIpInformation("70.46.123.145") + isp = result.isp + } yield isp must beEqualTo(Some(Right("FDN Communications"))) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala index e63f42bb8..7b3458e1c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -14,23 +14,27 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.Id import cats.data.EitherT import cats.syntax.either._ -import io.circe.literal._ +import cats.effect.IO -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} -import com.snowplowanalytics.refererparser._ +import cats.effect.testing.specs2.CatsIO + +import io.circe.literal._ import org.specs2.Specification import org.specs2.matcher.DataTables +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.refererparser._ + /** * A small selection of tests partially borrowed from referer-parser. * This is a very imcomplete set - more a tripwire than an exhaustive test. */ -class RefererParserEnrichmentSpec extends Specification with DataTables { +class RefererParserEnrichmentSpec extends Specification with DataTables with CatsIO { def is = s2""" parsing referer URIs should work $e1 tabs and newlines in search terms should be replaced $e2 @@ -60,7 +64,7 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { Medium.Unknown ) |> { (_, refererUri, referer) => (for { - c <- EitherT.fromEither[Id]( + c <- EitherT.fromEither[IO]( RefererParserEnrichment .parse( json"""{ @@ -84,16 +88,16 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { .toEither .leftMap(_.head) ) - e <- c.enrichment[Id] + e <- c.enrichment[IO] res = e.extractRefererDetails(new URI(refererUri), PageHost) - } yield res).value must beRight.like { + } yield res).value.map(_ must beRight.like { case o => o must beSome(referer) - } + }) } def e2 = (for { - c <- EitherT.fromEither[Id]( + c <- EitherT.fromEither[IO]( RefererParserEnrichment .parse( json"""{ @@ -117,14 +121,14 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { .toEither .leftMap(_.head) ) - e <- c.enrichment[Id] + e <- c.enrichment[IO] res = e.extractRefererDetails( new URI( "http://www.google.com/search?q=%0Agateway%09oracle%09cards%09denise%09linn&hl=en&client=safari" ), PageHost ) - } yield res).value must beRight.like { + } yield res).value.map(_ must beRight.like { case o => o must beSome( SearchReferer( @@ -133,5 +137,5 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { Some("gateway oracle cards denise linn") ) ) - } + }) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala index 1de3ad4aa..18f9ae8a1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala @@ -1,5 +1,5 @@ /** - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -14,8 +14,15 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.Id +import org.specs2.matcher.DataTables +import org.specs2.mutable.Specification + import cats.data.EitherT +import cats.implicits._ + +import cats.effect.IO + +import cats.effect.testing.specs2.CatsIO import io.circe.literal._ @@ -23,10 +30,7 @@ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UaParserConf -import org.specs2.matcher.DataTables -import org.specs2.mutable.Specification - -class UaParserEnrichmentSpec extends Specification with DataTables { +class UaParserEnrichmentSpec extends Specification with DataTables with CatsIO { val mobileSafariUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3" @@ -77,12 +81,12 @@ class UaParserEnrichmentSpec extends Specification with DataTables { "Custom Rules" | "Input UserAgent" | "Parsed UserAgent" | Some(badRulefile) !! mobileSafariUserAgent !! "Failed to initialize ua parser" |> { (rules, input, errorPrefix) => (for { - c <- EitherT.rightT[Id, String](UaParserConf(schemaKey, rules)) - e <- c.enrichment[Id] + c <- EitherT.rightT[IO, String](UaParserConf(schemaKey, rules)) + e <- c.enrichment[IO] res = e.extractUserAgent(input) - } yield res).value must beLeft.like { + } yield res).value.map(_ must beLeft.like { case a => a must startWith(errorPrefix) - } + }) } } @@ -91,12 +95,11 @@ class UaParserEnrichmentSpec extends Specification with DataTables { None !! mobileSafariUserAgent !! mobileSafariJson | None !! safariUserAgent !! safariJson | Some(customRules) !! mobileSafariUserAgent !! testAgentJson |> { (rules, input, expected) => - val json = for { - c <- EitherT.rightT[Id, String](UaParserConf(schemaKey, rules)) - e <- c.enrichment[Id].leftMap(_.toString) - res <- EitherT.fromEither[Id](e.extractUserAgent(input)).leftMap(_.toString) - } yield res - json.value must beRight(expected) + (for { + c <- EitherT.rightT[IO, String](UaParserConf(schemaKey, rules)) + e <- c.enrichment[IO] + res <- EitherT(e.extractUserAgent(input).map(_.leftMap(_.toString()))) + } yield res).value.map(_ must beRight(expected)) } } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala index 0ee2372d2..f616b6f13 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala @@ -1,5 +1,5 @@ /** - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -14,25 +14,28 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.lang.{Float => JFloat} -import cats.Id +import org.joda.time.DateTime + +import org.specs2.Specification + import cats.data.EitherT +import cats.effect.IO + +import cats.effect.testing.specs2.CatsIO + import io.circe.generic.auto._ import io.circe.literal._ -import org.joda.time.DateTime - import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.WeatherConf -import org.specs2.Specification - object WeatherEnrichmentSpec { val OwmApiKey = "OWM_KEY" } -class WeatherEnrichmentSpec extends Specification { +class WeatherEnrichmentSpec extends Specification with CatsIO { import WeatherEnrichmentSpec._ def is = skipAllIf(sys.env.get(OwmApiKey).isEmpty) ^ // Actually only e4 and e6 need to be skipped @@ -67,10 +70,10 @@ class WeatherEnrichmentSpec extends Specification { var time: DateTime = new DateTime("2020-04-28T12:00:00.000+00:00") } - def e1 = { - val res = for { + def e1 = + (for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", "KEY", 10, 5200, 1) - .enrichment[Id] + .enrichment[IO] stamp <- EitherT( enr.getWeatherContext( Option(invalidEvent.lat), @@ -78,17 +81,15 @@ class WeatherEnrichmentSpec extends Specification { Option(invalidEvent.time) ) ).leftMap(_.head.toString) - } yield stamp - res.value must beLeft.like { + } yield stamp).value.map(_ must beLeft.like { case e => e must contain("InputData(derived_tstamp,None,missing)") - } - } + }) - def e2 = { - val res = for { + def e2 = + (for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 10, 5200, 1) - .enrichment[Id] + .enrichment[IO] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -96,14 +97,12 @@ class WeatherEnrichmentSpec extends Specification { Option(validEvent.time) ) ).leftMap(_.head.toString) - } yield stamp - res.value must beRight - } + } yield stamp).value.map(_ must beRight) - def e3 = { - val res = for { + def e3 = + (for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", "KEY", 10, 5200, 1) - .enrichment[Id] + .enrichment[IO] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -111,14 +110,12 @@ class WeatherEnrichmentSpec extends Specification { Option(validEvent.time) ) ).leftMap(_.head.toString) - } yield stamp - res.value must beLeft.like { case e => e must contain("Check your API key") } - } + } yield stamp).value.map(_ must beLeft.like { case e => e must contain("Check your API key") }) - def e4 = { - val res = for { + def e4 = + (for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 15, 5200, 1) - .enrichment[Id] + .enrichment[IO] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -126,13 +123,11 @@ class WeatherEnrichmentSpec extends Specification { Option(validEvent.time) ) ).leftMap(_.head.toString) - } yield stamp - res.value must beRight.like { + } yield stamp).value.map(_ must beRight.like { case weather => val temp = weather.data.hcursor.downField("main").get[Double]("humidity") temp must beRight(69.0d) - } - } + }) def e5 = { val configJson = json"""{ @@ -166,10 +161,10 @@ class WeatherEnrichmentSpec extends Specification { ) } - def e6 = { - val res = for { + def e6 = + (for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 15, 2, 1) - .enrichment[Id] + .enrichment[IO] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -177,14 +172,11 @@ class WeatherEnrichmentSpec extends Specification { Option(validEvent.time) ) ).leftMap(_.head.toString) - } yield stamp - res.value must beRight.like { // successful request + } yield stamp).value.map(_ must beRight.like { // successful request case weather => weather.data.hcursor.as[TransformedWeather] must beRight.like { case w => w.dt must equalTo("2020-04-28T12:00:00.000Z") } - } - } - + }) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentIntegrationTest.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentIntegrationTest.scala index 94b548f97..7a7cb4fe4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentIntegrationTest.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentIntegrationTest.scala @@ -13,7 +13,8 @@ package com.snowplowanalytics.snowplow.enrich.common package enrichments.registry.apirequest -import cats.Id +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO import io.circe._ import io.circe.literal._ @@ -23,7 +24,9 @@ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData import org.specs2.Specification import org.specs2.matcher.Matcher -import com.snowplowanalytics.snowplow.enrich.common.utils.Clock.idClock +import org.specs2.matcher.ValidatedMatchers +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ import outputs.EnrichedEvent @@ -36,7 +39,7 @@ object ApiRequestEnrichmentIntegrationTest { } import ApiRequestEnrichmentIntegrationTest._ -class ApiRequestEnrichmentIntegrationTest extends Specification { +class ApiRequestEnrichmentIntegrationTest extends Specification with ValidatedMatchers with CatsIO { def is = skipAllUnless(continuousIntegration) ^ s2""" @@ -302,57 +305,52 @@ class ApiRequestEnrichmentIntegrationTest extends Specification { } def e1 = { - val enrichment = ApiRequestEnrichment - .parse(IntegrationTests.configuration, SCHEMA_KEY) - .map(_.enrichment[Id]) - .toEither - val event = new EnrichedEvent - event.setApp_id("lookup-test") - event.setUser_id("snowplower") - val context = enrichment.flatMap(_.lookup(event, Nil, Nil, None).toEither) - context must beRight.like { + val config = ApiRequestEnrichment.parse(IntegrationTests.configuration, SCHEMA_KEY).toOption.get + for { + enrichment <- config.enrichment[IO] + event = { + val e = new EnrichedEvent + e.setApp_id("lookup-test") + e.setUser_id("snowplower") + e + } + context <- enrichment.lookup(event, Nil, Nil, None) + } yield context must beValid.like { case context => context must contain(IntegrationTests.correctResultContext) and (context must have size 1) } } def e2 = { - val enrichment = ApiRequestEnrichment - .parse(IntegrationTests.configuration2, SCHEMA_KEY) - .map(_.enrichment[Id]) - .toEither - val event = new EnrichedEvent - event.setApp_id("lookup test") - event.setUser_id("snowplower") - - // Fill cache - enrichment.flatMap( - _.lookup( - event, - List(IntegrationTests.weatherContext), - List(IntegrationTests.customContexts), - Some(IntegrationTests.unstructEvent) - ).toEither - ) - enrichment.flatMap( - _.lookup( - event, - List(IntegrationTests.weatherContext), - List(IntegrationTests.customContexts), - Some(IntegrationTests.unstructEvent) - ).toEither - ) - - val context = enrichment.flatMap( - _.lookup( - event, - List(IntegrationTests.weatherContext), - List(IntegrationTests.customContexts), - Some(IntegrationTests.unstructEvent) - ).toEither - ) - - context must beRight.like { + val config = ApiRequestEnrichment.parse(IntegrationTests.configuration2, SCHEMA_KEY).toOption.get + for { + enrichment <- config.enrichment[IO] + event = { + val e = new EnrichedEvent + e.setApp_id("lookup test") + e.setUser_id("snowplower") + e + } + // Fill cache + _ <- enrichment.lookup( + event, + List(IntegrationTests.weatherContext), + List(IntegrationTests.customContexts), + Some(IntegrationTests.unstructEvent) + ) + _ <- enrichment.lookup( + event, + List(IntegrationTests.weatherContext), + List(IntegrationTests.customContexts), + Some(IntegrationTests.unstructEvent) + ) + context <- enrichment.lookup( + event, + List(IntegrationTests.weatherContext), + List(IntegrationTests.customContexts), + Some(IntegrationTests.unstructEvent) + ) + } yield context must beValid.like { case contexts => contexts must contain( beJson(IntegrationTests.correctResultContext3), @@ -362,14 +360,16 @@ class ApiRequestEnrichmentIntegrationTest extends Specification { } def e3 = { - val enrichment = ApiRequestEnrichment - .parse(IntegrationTests.configuration3, SCHEMA_KEY) - .map(_.enrichment[Id]) - .toEither - val event = new EnrichedEvent - event.setUser_ipaddress("127.0.0.1") - val context = enrichment.flatMap(_.lookup(event, Nil, Nil, None).toEither) - context must beRight.like { + val config = ApiRequestEnrichment.parse(IntegrationTests.configuration3, SCHEMA_KEY).toOption.get + for { + enrichment <- config.enrichment[IO] + event = { + val e = new EnrichedEvent + e.setUser_ipaddress("127.0.0.1") + e + } + context <- enrichment.lookup(event, Nil, Nil, None) + } yield context must beValid.like { case contexts => (contexts must have size 1) and (contexts must contain(IntegrationTests.correctResultContext4)) and (CirceValidator.validate(contexts.head.data, IntegrationTests.schema) must beRight) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala index f00f51d9f..384757bdd 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,23 +12,30 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest -import cats.Id import cats.data.ValidatedNel import cats.syntax.either._ + +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + import io.circe.Json import io.circe.literal._ import io.circe.parser._ + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + import com.snowplowanalytics.snowplow.badrows.FailureDetails + import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient -import com.snowplowanalytics.snowplow.enrich.common.utils.Clock.idClock +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers import org.specs2.mock.Mockito -class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with Mockito { +class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with Mockito with CatsIO { def is = s2""" extract correct configuration for GET request and perform the request $e1 skip incorrect input (none of json or pojo) in configuration $e2 @@ -65,17 +72,15 @@ class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with 1000, Authentication(Some(HttpBasic(Some("xxx"), None))) ) - implicit val idHttpClient: HttpClient[Id] = new HttpClient[Id] { + implicit val ioHttpClient: HttpClient[IO] = new HttpClient[IO] { override def getResponse( uri: String, authUser: Option[String], authPassword: Option[String], body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] - ): Id[Either[Throwable, String]] = - """{"record": {"name": "Fancy User", "company": "Acme"}}""".asRight + method: String + ): IO[Either[Throwable, String]] = + IO.pure("""{"record": {"name": "Fancy User", "company": "Acme"}}""".asRight) } val output = Output("iglu:com.acme/user/jsonschema/1-0-0", Some(JsonOutput("$.record"))) val cache = Cache(3000, 60) @@ -164,18 +169,18 @@ class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with json"""{"name": "Fancy User", "company": "Acme" }""" ) - val enrichedContextResult = config - .enrichment[Id] - .lookup( - event = fakeEnrichedEvent, - derivedContexts = List.empty, - customContexts = List(clientSession), - unstructEvent = None - ) - - val validResult = enrichedContextResult must beValid(List(user)) - - validConfig and validResult + for { + enrichment <- config.enrichment[IO] + enrichedContextResult <- enrichment.lookup( + event = fakeEnrichedEvent, + derivedContexts = List.empty, + customContexts = List(clientSession), + unstructEvent = None + ) + } yield { + val validResult = enrichedContextResult must beValid(List(user)) + validConfig and validResult + } } def e2 = { @@ -301,17 +306,15 @@ class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with authentication = Authentication(Some(HttpBasic(Some("xxx"), None))) ) - implicit val idHttpClient: HttpClient[Id] = new HttpClient[Id] { + implicit val ioHttpClient: HttpClient[IO] = new HttpClient[IO] { override def getResponse( uri: String, authUser: Option[String], authPassword: Option[String], body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] - ): Id[Either[Throwable, String]] = - """{"record": {"name": "Fancy User", "company": "Acme"}}""".asRight + method: String + ): IO[Either[Throwable, String]] = + IO.pure("""{"record": {"name": "Fancy User", "company": "Acme"}}""".asRight) } val output = Output(schema = "iglu:com.acme/user/jsonschema/1-0-0", json = Some(JsonOutput("$.record"))) @@ -404,18 +407,18 @@ class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with json"""{"name": "Fancy User", "company": "Acme" }""" ) - val enrichedContextResult = config - .enrichment[Id] - .lookup( - event = fakeEnrichedEvent, - derivedContexts = List.empty, - customContexts = List(clientSession), - unstructEvent = None - ) - - val validResult = enrichedContextResult must beValid(List(user)) - - validConfig and validResult + for { + enrichment <- config.enrichment[IO] + enrichedContextResult <- enrichment.lookup( + event = fakeEnrichedEvent, + derivedContexts = List.empty, + customContexts = List(clientSession), + unstructEvent = None + ) + } yield { + val validResult = enrichedContextResult must beValid(List(user)) + validConfig and validResult + } } def e5 = { @@ -427,17 +430,15 @@ class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with 1000, Authentication(None) ) - implicit val idHttpClient: HttpClient[Id] = new HttpClient[Id] { + implicit val ioHttpClient: HttpClient[IO] = new HttpClient[IO] { override def getResponse( uri: String, authUser: Option[String], authPassword: Option[String], body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] - ): Id[Either[Throwable, String]] = - """{"latitude":32.234,"longitude":33.564}""".asRight + method: String + ): IO[Either[Throwable, String]] = + IO.pure("""{"latitude":32.234,"longitude":33.564}""".asRight) } val output = Output("iglu:com.acme/geo/jsonschema/1-0-0", Some(JsonOutput("$"))) val cache = Cache(3000, 60) @@ -449,36 +450,38 @@ class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with json"""{"latitude": 32.234, "longitude": 33.564}""" ) - val enrichedContextResult = config.enrichment[Id].lookup(new EnrichedEvent, Nil, Nil, None) - - enrichedContextResult must beValid(List(expectedDerivation)) + for { + enrichment <- config.enrichment[IO] + enrichedContextResult <- enrichment.lookup(new EnrichedEvent, Nil, Nil, None) + } yield enrichedContextResult must beValid(List(expectedDerivation)) } def e6 = - failingLookup(ignoreOnError = false) must beInvalid + failingLookup(ignoreOnError = false).map(_ must beInvalid) def e7 = - failingLookup(ignoreOnError = true) must beValid(List.empty) + failingLookup(ignoreOnError = true).map(_ must beValid(List.empty)) - private def failingLookup(ignoreOnError: Boolean): ValidatedNel[FailureDetails.EnrichmentFailure, List[SelfDescribingData[Json]]] = { + private def failingLookup(ignoreOnError: Boolean): IO[ValidatedNel[FailureDetails.EnrichmentFailure, List[SelfDescribingData[Json]]]] = { val inputs = List() val api = HttpApi("GET", "unused", 1000, Authentication(None)) - implicit val idHttpClient: HttpClient[Id] = new HttpClient[Id] { + implicit val ioHttpClient: HttpClient[IO] = new HttpClient[IO] { override def getResponse( uri: String, authUser: Option[String], authPassword: Option[String], body: Option[String], - method: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long] - ): Id[Either[Throwable, String]] = - Left(new RuntimeException("API failed!!!")) + method: String + ): IO[Either[Throwable, String]] = + IO.pure(Left(new RuntimeException("API failed!!!"))) } val output = Output("unused", None) val cache = Cache(3000, 60) val config = ApiRequestConf(SCHEMA_KEY, inputs, api, List(output), cache, ignoreOnError) - config.enrichment[Id].lookup(new EnrichedEvent, Nil, Nil, None) + for { + enrichment <- config.enrichment[IO] + result <- enrichment.lookup(new EnrichedEvent, Nil, Nil, None) + } yield result } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala index 7535c1bd0..58a437385 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala @@ -12,19 +12,21 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest -import cats.Id +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.Clock.idClock +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers import org.specs2.mock.Mockito -class HttpApiSpec extends Specification with ValidatedMatchers with Mockito { +class HttpApiSpec extends Specification with ValidatedMatchers with Mockito with CatsIO { def is = s2""" fail to build request string without all keys $e1 build request string from template context $e2 @@ -55,17 +57,17 @@ class HttpApiSpec extends Specification with ValidatedMatchers with Mockito { def e3 = { val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) - val enrichment = ApiRequestConf( - schemaKey, - Nil, - HttpApi("GET", "http://thishostdoesntexist31337:8123/endpoint", 1000, Authentication(None)), - List(Output("", Some(JsonOutput("")))), - Cache(1, 1), - ignoreOnError = false - ).enrichment[Id] - - val event = new EnrichedEvent - val request = enrichment.lookup(event, Nil, Nil, None) - request must beInvalid + for { + enrichment <- ApiRequestConf( + schemaKey, + Nil, + HttpApi("GET", "http://thishostdoesntexist31337:8123/endpoint", 1000, Authentication(None)), + List(Output("", Some(JsonOutput("")))), + Cache(1, 1), + ignoreOnError = false + ).enrichment[IO] + event = new EnrichedEvent + request <- enrichment.lookup(event, Nil, Nil, None) + } yield request must beInvalid } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala index b19d6b58d..fe994bfed 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala @@ -12,22 +12,25 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest -import cats.Id import cats.data.ValidatedNel import cats.syntax.option._ +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.Clock.idClock +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers -class InputSpec extends Specification with ValidatedMatchers { +class InputSpec extends Specification with ValidatedMatchers with CatsIO { def is = s2""" create template context from POJO inputs $e1 create template context from JSON inputs $e2 @@ -247,19 +250,20 @@ class InputSpec extends Specification with ValidatedMatchers { val input2 = Input.Pojo("time", "true_tstamp") val uriTemplate = "http://thishostdoesntexist31337:8123/{{ user }}/foo/{{ time}}/{{user}}" val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) - val enrichment = ApiRequestConf( - schemaKey, - List(input1, input2), - HttpApi("GET", uriTemplate, 1000, Authentication(None)), - List(Output("iglu:someschema", JsonOutput("$").some)), - Cache(10, 5), - ignoreOnError = false - ).enrichment[Id] - val event = new EnrichedEvent - event.setUser_id("chuwy") - // time in true_tstamp won't be found - val request = enrichment.lookup(event, Nil, Nil, None) - request must beValid.like { + for { + enrichment <- ApiRequestConf( + schemaKey, + List(input1, input2), + HttpApi("GET", uriTemplate, 1000, Authentication(None)), + List(Output("iglu:someschema", JsonOutput("$").some)), + Cache(10, 5), + ignoreOnError = false + ).enrichment[IO] + event = new EnrichedEvent + _ = event.setUser_id("chuwy") + // time in true_tstamp won't be found + request <- enrichment.lookup(event, Nil, Nil, None) + } yield request must beValid.like { case response => response must be(Nil) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 918838cca..fb96c4906 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -12,11 +12,16 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii -import cats.Id +import scala.concurrent.ExecutionContext + import cats.data.Validated import cats.syntax.option._ import cats.syntax.validated._ +import cats.effect.{Blocker, IO} + +import cats.effect.testing.specs2.CatsIO + import io.circe.literal._ import io.circe.parser._ @@ -42,11 +47,13 @@ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.Campaig import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.BlockerF import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ import com.snowplowanalytics.snowplow.enrich.common.AcceptInvalid +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ + +class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatchers with CatsIO { + val blocker: Blocker = Blocker.liftExecutionContext(ExecutionContext.global) -class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatchers { def is = s2""" Hashing configured scalar fields in POJO should work $e1 Hashing configured JSON fields in POJO should work in the simplest case and not affect anything else $e2 @@ -59,7 +66,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher removeAddedFields should remove fields added by PII enrichment $e9 """ - def commonSetup(enrichmentReg: EnrichmentRegistry[Id]): List[Validated[BadRow, EnrichedEvent]] = { + def commonSetup(enrichmentReg: EnrichmentRegistry[IO]): IO[List[Validated[BadRow, EnrichedEvent]]] = { val context = CollectorPayload.Context( Some(DateTime.parse("2017-07-14T03:39:39.000+00:00")), @@ -165,9 +172,9 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher List("com.snowplowanalytics.snowplow", "com.acme", "com.mailgun") ) val reg = Registry.Embedded(regConf, path = "/iglu-schemas") - val client = IgluCirceClient.fromResolver[Id](Resolver(List(reg), None), cacheSize = 0) + val client = IgluCirceClient.fromResolver[IO](Resolver(List(reg), None), cacheSize = 0).unsafeRunSync() EtlPipeline - .processEvents[Id]( + .processEvents[IO]( new AdapterRegistry(), enrichmentReg, client, @@ -175,7 +182,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher new DateTime(1500000000L), input, AcceptInvalid.featureFlags, - AcceptInvalid.countInvalid + IO.unit ) } @@ -199,7 +206,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher "jsonschema", SchemaVer.Full(2, 0, 0) ) - IpLookupsEnrichment.parse(js, schemaKey, true).toOption.get.enrichment[Id](BlockerF.noop) + IpLookupsEnrichment.parse(js, schemaKey, true).toOption.get.enrichment[IO](blocker).unsafeRunSync() } private val campaignAttributionEnrichment = { @@ -226,7 +233,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher } def e1 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, campaignAttribution = campaignAttributionEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( @@ -304,37 +311,38 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.se_property = "eb19004c52cd4557aacfa0b30035160c417c3a6a5fad44b96f03c9e2bebaf0b3" expected.refr_domain_userid = "f3e68fd96eaef0cafc1257ec7132b4b3dbae20b1073155531f909999e5da9b2c" expected.domain_sessionid = "7378a72b0183f456df98453b2ff9ed5685206a67f312edb099dc74aed76e1b34" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - (enrichedEvent.app_id must_== expected.app_id) and - (enrichedEvent.geo_city must_== expected.geo_city) and - (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and - (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and - (enrichedEvent.user_id must_== expected.user_id) and - (enrichedEvent.user_ipaddress must_== expected.user_ipaddress) and - (enrichedEvent.user_fingerprint must_== expected.user_fingerprint) and - (enrichedEvent.domain_userid must_== expected.domain_userid) and - (enrichedEvent.network_userid must_== expected.network_userid) and - (enrichedEvent.ip_organization must_== expected.ip_organization) and - (enrichedEvent.ip_domain must_== expected.ip_domain) and - (enrichedEvent.tr_orderid must_== expected.tr_orderid) and - (enrichedEvent.ti_orderid must_== expected.ti_orderid) and - (enrichedEvent.mkt_term must_== expected.mkt_term) and - (enrichedEvent.mkt_clickid must_== expected.mkt_clickid) and - (enrichedEvent.mkt_content must_== expected.mkt_content) and - (enrichedEvent.se_category must_== expected.se_category) and - (enrichedEvent.se_action must_== expected.se_action) and - (enrichedEvent.se_label must_== expected.se_label) and - (enrichedEvent.se_property must_== expected.se_property) and - (enrichedEvent.refr_domain_userid must_== expected.refr_domain_userid) and - (enrichedEvent.domain_sessionid must_== expected.domain_sessionid) - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + (enrichedEvent.app_id must_== expected.app_id) and + (enrichedEvent.geo_city must_== expected.geo_city) and + (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and + (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and + (enrichedEvent.user_id must_== expected.user_id) and + (enrichedEvent.user_ipaddress must_== expected.user_ipaddress) and + (enrichedEvent.user_fingerprint must_== expected.user_fingerprint) and + (enrichedEvent.domain_userid must_== expected.domain_userid) and + (enrichedEvent.network_userid must_== expected.network_userid) and + (enrichedEvent.ip_organization must_== expected.ip_organization) and + (enrichedEvent.ip_domain must_== expected.ip_domain) and + (enrichedEvent.tr_orderid must_== expected.tr_orderid) and + (enrichedEvent.ti_orderid must_== expected.ti_orderid) and + (enrichedEvent.mkt_term must_== expected.mkt_term) and + (enrichedEvent.mkt_clickid must_== expected.mkt_clickid) and + (enrichedEvent.mkt_content must_== expected.mkt_content) and + (enrichedEvent.se_category must_== expected.se_category) and + (enrichedEvent.se_action must_== expected.se_action) and + (enrichedEvent.se_label must_== expected.se_label) and + (enrichedEvent.se_property must_== expected.se_property) and + (enrichedEvent.refr_domain_userid must_== expected.refr_domain_userid) and + (enrichedEvent.domain_sessionid must_== expected.domain_sessionid) + }) + } yield size and validOut } def e2 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -393,82 +401,82 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor - val contextJFirstElement = contextJ.downField("data").downArray - val contextJSecondElement = contextJFirstElement.right - val contextJThirdElement = contextJSecondElement.right - val unstructEventJ = parse(enrichedEvent.unstruct_event).toOption.get.hcursor - .downField("data") - .downField("data") - val first = (contextJFirstElement - .downField("data") - .get[String]("emailAddress") must beRight( - "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" - )) and - (contextJFirstElement.downField("data").get[String]("emailAddress2") must beRight( - "bob@acme.com" - )) and - (contextJSecondElement.downField("data").get[String]("emailAddress") must beRight( - "tim@acme.com" - )) and - (contextJSecondElement.downField("data").get[String]("emailAddress2") must beRight( - "tom@acme.com" - )) - - // The following three tests are for the case that the context schema allows the fields - // data and schema and in addition the schema field matches the configured schema. There - // should be no replacement there (unless that is specified in jsonpath) - val second = (contextJSecondElement - .downField("data") - .downField("data") - .get[String]("emailAddress") must beRight("jim@acme.com")) and - (contextJSecondElement - .downField("data") - .downField("data") - .get[String]("emailAddress2") must beRight( - "1c6660411341411d5431669699149283d10e070224be4339d52bbc4b007e78c5" - )) and - (contextJSecondElement.downField("data").get[String]("schema") must beRight( - "iglu:com.acme/email_sent/jsonschema/1-0-0" - )) and - (unstructEventJ.get[String]("ip") must beRight( - "269c433d0cc00395e3bc5fe7f06c5ad822096a38bec2d8a005367b52c0dfb428" - )) and - (unstructEventJ.get[String]("myVar2") must beRight("awesome")) - - val third = (contextJThirdElement - .downField("data") - .get[List[String]]("field") must - beRight( - List[String]("b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb", - "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" - ) - )) and - (contextJThirdElement - .downField("data") - .downField("field2") - .focus must beSome.like { case json => json.isNull }) and - (contextJThirdElement - .downField("data") - .downField("field3") - .focus must beSome.like { case json => json.isNull }) - - // Test that empty string in Pii field gets hashed - val fourth = contextJThirdElement - .downField("data") - .get[String]("field4") must beRight("7a3477dad66e666bd203b834c54b6dfe8b546bdbc5283462ad14052abfb06600") - - first and second and third and fourth - } - - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor + val contextJFirstElement = contextJ.downField("data").downArray + val contextJSecondElement = contextJFirstElement.right + val contextJThirdElement = contextJSecondElement.right + val unstructEventJ = parse(enrichedEvent.unstruct_event).toOption.get.hcursor + .downField("data") + .downField("data") + val first = (contextJFirstElement + .downField("data") + .get[String]("emailAddress") must beRight( + "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" + )) and + (contextJFirstElement.downField("data").get[String]("emailAddress2") must beRight( + "bob@acme.com" + )) and + (contextJSecondElement.downField("data").get[String]("emailAddress") must beRight( + "tim@acme.com" + )) and + (contextJSecondElement.downField("data").get[String]("emailAddress2") must beRight( + "tom@acme.com" + )) + + // The following three tests are for the case that the context schema allows the fields + // data and schema and in addition the schema field matches the configured schema. There + // should be no replacement there (unless that is specified in jsonpath) + val second = (contextJSecondElement + .downField("data") + .downField("data") + .get[String]("emailAddress") must beRight("jim@acme.com")) and + (contextJSecondElement + .downField("data") + .downField("data") + .get[String]("emailAddress2") must beRight( + "1c6660411341411d5431669699149283d10e070224be4339d52bbc4b007e78c5" + )) and + (contextJSecondElement.downField("data").get[String]("schema") must beRight( + "iglu:com.acme/email_sent/jsonschema/1-0-0" + )) and + (unstructEventJ.get[String]("ip") must beRight( + "269c433d0cc00395e3bc5fe7f06c5ad822096a38bec2d8a005367b52c0dfb428" + )) and + (unstructEventJ.get[String]("myVar2") must beRight("awesome")) + + val third = (contextJThirdElement + .downField("data") + .get[List[String]]("field") must + beRight( + List[String]("b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb", + "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + ) + )) and + (contextJThirdElement + .downField("data") + .downField("field2") + .focus must beSome.like { case json => json.isNull }) and + (contextJThirdElement + .downField("data") + .downField("field3") + .focus must beSome.like { case json => json.isNull }) + + // Test that empty string in Pii field gets hashed + val fourth = contextJThirdElement + .downField("data") + .get[String]("field4") must beRight("7a3477dad66e666bd203b834c54b6dfe8b546bdbc5283462ad14052abfb06600") + + first and second and third and fourth + }) + } yield size and validOut } def e3 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -497,22 +505,23 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") - (firstElem.get[String]("emailAddress") must beRight("jim@acme.com")) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") + val firstElem = contextJ.downArray.downField("data") + val secondElem = contextJ.downArray.right.downField("data") + (firstElem.get[String]("emailAddress") must beRight("jim@acme.com")) and + (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and + (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and + (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) + }) + } yield size and validOut } def e4 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -542,26 +551,27 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") - (firstElem.get[String]("emailAddress") must beRight( - "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" - )) and - (firstElem.get[String]("emailAddress2") must beRight( - "1c6660411341411d5431669699149283d10e070224be4339d52bbc4b007e78c5" - )) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") + val firstElem = contextJ.downArray.downField("data") + val secondElem = contextJ.downArray.right.downField("data") + (firstElem.get[String]("emailAddress") must beRight( + "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" + )) and + (firstElem.get[String]("emailAddress2") must beRight( + "1c6660411341411d5431669699149283d10e070224be4339d52bbc4b007e78c5" + )) and + (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and + (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) + }) + } yield size and validOut } def e5 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -589,26 +599,27 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") - (firstElem.get[String]("emailAddress") must beRight( - "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" - )) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight( - "09e4160b10703767dcb28d834c1905a182af0f828d6d3512dd07d466c283c840" - )) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") + val firstElem = contextJ.downArray.downField("data") + val secondElem = contextJ.downArray.right.downField("data") + (firstElem.get[String]("emailAddress") must beRight( + "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" + )) and + (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and + (secondElem.get[String]("emailAddress") must beRight( + "09e4160b10703767dcb28d834c1905a182af0f828d6d3512dd07d466c283c840" + )) and + (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) + }) + } yield size and validOut } def e6 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -636,23 +647,24 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") - (firstElem.get[String]("emailAddress") must beRight("jim@acme.com")) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) and - (secondElem.get[Int]("someInt") must beRight(1)) - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") + val firstElem = contextJ.downArray.downField("data") + val secondElem = contextJ.downArray.right.downField("data") + (firstElem.get[String]("emailAddress") must beRight("jim@acme.com")) and + (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and + (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and + (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) and + (secondElem.get[Int]("someInt") must beRight(1)) + }) + } yield size and validOut } def e7 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -694,37 +706,39 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher expected.pii = """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_fingerprint","originalValue":"its_you_again!","modifiedValue":"27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4"},{"fieldName":"user_ipaddress","originalValue":"70.46.123.145","modifiedValue":"dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5"},{"fieldName":"user_id","originalValue":"john@acme.com","modifiedValue":"7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9"}],"json":[{"fieldName":"unstruct_event","originalValue":"50.56.129.169","modifiedValue":"269c433d0cc00395e3bc5fe7f06c5ad822096a38bec2d8a005367b52c0dfb428","jsonPath":"$.ip","schema":"iglu:com.mailgun/message_clicked/jsonschema/1-0-0"},{"fieldName":"contexts","originalValue":"bob@acme.com","modifiedValue":"1c6660411341411d5431669699149283d10e070224be4339d52bbc4b007e78c5","jsonPath":"$.data.emailAddress2","schema":"iglu:com.acme/email_sent/jsonschema/1-1-0"},{"fieldName":"contexts","originalValue":"jim@acme.com","modifiedValue":"72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6","jsonPath":"$.emailAddress","schema":"iglu:com.acme/email_sent/jsonschema/1-0-0"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}}""" - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") - val unstructEventJ = - parse(enrichedEvent.unstruct_event).toOption.get.hcursor.downField("data") - - (enrichedEvent.pii must_== expected.pii) and // This is the important test, the rest just verify that nothing has changed. - (enrichedEvent.app_id must_== expected.app_id) and - (enrichedEvent.ip_domain must_== expected.ip_domain) and - (enrichedEvent.geo_city must_== expected.geo_city) and - (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and - (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) and - (secondElem - .downField("data") - .get[String]("emailAddress") must beRight("jim@acme.com")) and - (secondElem.get[String]("schema") must beRight( - "iglu:com.acme/email_sent/jsonschema/1-0-0" - )) and - (unstructEventJ.downField("data").get[String]("myVar2") must beRight("awesome")) - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- + output.map(_.head must beValid.like { + case enrichedEvent => + val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") + val firstElem = contextJ.downArray.downField("data") + val secondElem = contextJ.downArray.right.downField("data") + val unstructEventJ = + parse(enrichedEvent.unstruct_event).toOption.get.hcursor.downField("data") + + (enrichedEvent.pii must_== expected.pii) and // This is the important test, the rest just verify that nothing has changed. + (enrichedEvent.app_id must_== expected.app_id) and + (enrichedEvent.ip_domain must_== expected.ip_domain) and + (enrichedEvent.geo_city must_== expected.geo_city) and + (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and + (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and + (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and + (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and + (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) and + (secondElem + .downField("data") + .get[String]("emailAddress") must beRight("jim@acme.com")) and + (secondElem.get[String]("schema") must beRight( + "iglu:com.acme/email_sent/jsonschema/1-0-0" + )) and + (unstructEventJ.downField("data").get[String]("myVar2") must beRight("awesome")) + }) + } yield size and validOut } def e8 = { - val enrichmentReg = EnrichmentRegistry[Id]( + val enrichmentReg = EnrichmentRegistry[IO]( ipLookups = ipEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( @@ -743,19 +757,21 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ).some ) val output = commonSetup(enrichmentReg) - val size = output.size must_== 1 - val validOut = output.head must beValid.like { - case enrichedEvent => - val context = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data").downArray - val data = context.downField("data") - - val one = data.get[String]("emailAddress") must beRight("72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6") - val two = data.get[String]("emailAddress2") must beRight("bob@acme.com") - val three = data.downField("nonExistentEmailAddress").focus must beNone - - one and two and three - } - size and validOut + for { + size <- output.map(_.size must_== 1) + validOut <- output.map(_.head must beValid.like { + case enrichedEvent => + val context = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data").downArray + val data = context.downField("data") + + val one = + data.get[String]("emailAddress") must beRight("72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6") + val two = data.get[String]("emailAddress2") must beRight("bob@acme.com") + val three = data.downField("nonExistentEmailAddress").focus must beNone + + one and two and three + }) + } yield size and validOut } def e9 = { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentIntegrationTest.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentIntegrationTest.scala index ad140a8bd..d38f5a780 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentIntegrationTest.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentIntegrationTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2012-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -13,20 +13,25 @@ package com.snowplowanalytics.snowplow.enrich.common package enrichments.registry.sqlquery +import scala.concurrent.ExecutionContext + import io.circe._ import io.circe.literal._ import io.circe.parser._ -import cats.Id +import cats.data.NonEmptyList + +import cats.effect.{Blocker, IO} +import cats.effect.testing.specs2.CatsIO import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} -import com.snowplowanalytics.snowplow.enrich.common.utils.Clock.idClock import org.specs2.Specification +import org.specs2.matcher.ValidatedMatchers -import outputs.EnrichedEvent -import utils.{BlockerF, ShiftExecution} -import cats.data.NonEmptyList +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.utils.ShiftExecution +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent object SqlQueryEnrichmentIntegrationTest { def continuousIntegration: Boolean = @@ -37,7 +42,9 @@ object SqlQueryEnrichmentIntegrationTest { } import SqlQueryEnrichmentIntegrationTest._ -class SqlQueryEnrichmentIntegrationTest extends Specification { +class SqlQueryEnrichmentIntegrationTest extends Specification with ValidatedMatchers with CatsIO { + val blocker: Blocker = Blocker.liftExecutionContext(ExecutionContext.global) + def is = skipAllUnless(continuousIntegration) ^ s2""" Basic case $e1 @@ -92,19 +99,19 @@ class SqlQueryEnrichmentIntegrationTest extends Specification { } """ - val event = new EnrichedEvent - - val config = SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[Id](BlockerF.noop, ShiftExecution.noop)) - val context = config.toEither.flatMap(_.lookup(event, Nil, Nil, None).toEither) - - val correctContext = + val expected = SelfDescribingData( SchemaKey("com.acme", "singleColumn", "jsonschema", SchemaVer.Full(1, 0, 0)), json"""{"singleColumn": 42}""" ) - context must beRight.like { - case List(json) => json must beEqualTo(correctContext) + ShiftExecution.ofSingleThread[IO].use { shift => + for { + enrichment <- SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[IO](blocker, shift)).toOption.get + contexts <- enrichment.lookup(new EnrichedEvent, Nil, Nil, None) + } yield contexts must beValid.like { + case List(json) => json must beEqualTo(expected) + } } } @@ -318,47 +325,43 @@ class SqlQueryEnrichmentIntegrationTest extends Specification { json""" {"applicationName": "ue_test_london"} """ ) - val config = SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).toEither.map(_.enrichment[Id](BlockerF.noop, ShiftExecution.noop)) - - val context1 = - config.flatMap(_.lookup(event1, List(weatherContext1), List(geoContext1), Some(ue1)).toEither) - val result_context1 = + val expected1 = SelfDescribingData( SchemaKey("com.acme", "demographic", "jsonschema", SchemaVer.Full(1, 0, 0)), json"""{"city": "Krasnoyarsk", "country": "Russia", "pk": 1}""" ) - val context2 = - config.flatMap(_.lookup(event2, List(weatherContext2), List(geoContext2), Some(ue2)).toEither) - val result_context2 = + val expected2 = SelfDescribingData( SchemaKey("com.acme", "demographic", "jsonschema", SchemaVer.Full(1, 0, 0)), json"""{"city": "London", "country": "England", "pk": 2 }""" ) - val context3 = - config.flatMap(_.lookup(event3, List(weatherContext3), List(geoContext3), Some(ue3)).toEither) - val result_context3 = + val expected3 = SelfDescribingData( SchemaKey("com.acme", "demographic", "jsonschema", SchemaVer.Full(1, 0, 0)), json"""{"city": "New York", "country": "USA", "pk": 3} """ ) - val context4 = config.flatMap( - _.lookup(event4, List(weatherContext4), List(geoContext4, clientSession4), Some(ue4)).toEither - ) - val result_context4 = + val expected4 = SelfDescribingData( SchemaKey("com.acme", "demographic", "jsonschema", SchemaVer.Full(1, 0, 0)), json"""{"city": "London", "country": "England", "pk": 2 } """ ) - val res1 = context1 must beRight(List(result_context1)) - val res2 = context2 must beRight(List(result_context2)) - val res3 = context3 must beRight(List(result_context3)) - val res4 = context4 must beRight(List(result_context4)) - - res1 and res2 and res3 and res4 + ShiftExecution.ofSingleThread[IO].use { shift => + for { + enrichment <- SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[IO](blocker, shift)).toOption.get + actual1 <- enrichment.lookup(event1, List(weatherContext1), List(geoContext1), Some(ue1)) + res1 = actual1 must beValid(List(expected1)) + actual2 <- enrichment.lookup(event2, List(weatherContext2), List(geoContext2), Some(ue2)) + res2 = actual2 must beValid(List(expected2)) + actual3 <- enrichment.lookup(event3, List(weatherContext3), List(geoContext3), Some(ue3)) + res3 = actual3 must beValid(List(expected3)) + actual4 <- enrichment.lookup(event4, List(weatherContext4), List(geoContext4, clientSession4), Some(ue4)) + res4 = actual4 must beValid(List(expected4)) + } yield res1 and res2 and res3 and res4 + } } def e3 = { @@ -409,25 +412,27 @@ class SqlQueryEnrichmentIntegrationTest extends Specification { val event = new EnrichedEvent event.user_id = null - val config = SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[Id](BlockerF.noop, ShiftExecution.noop)) - val context = config.toEither.flatMap(_.lookup(event, Nil, Nil, None).toEither) - - context must beRight(Nil) + ShiftExecution.ofSingleThread[IO].use { shift => + for { + enrichment <- SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[IO](blocker, shift)).toOption.get + contexts <- enrichment.lookup(event, Nil, Nil, None) + } yield contexts must beValid(Nil) + } } def e4 = { val result = invalidCreds(ignoreOnError = false) - result must beLeft.like { + result.map(_ must beLeft.like { case NonEmptyList(one, two :: Nil) if one.toString.contains("Error while executing the sql lookup") && two.toString.contains("FATAL: password authentication failed for user") => ok case left => ko(s"error(s) don't contain the expected error messages: $left") - } + }) } def e5 = - invalidCreds(ignoreOnError = true) must beRight(List.empty) + invalidCreds(ignoreOnError = true).map(_ must beRight(List.empty)) private def invalidCreds(ignoreOnError: Boolean) = { val configuration = @@ -469,7 +474,12 @@ class SqlQueryEnrichmentIntegrationTest extends Specification { """ val event = new EnrichedEvent - val config = SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[Id](BlockerF.noop, ShiftExecution.noop)) - config.toEither.flatMap(_.lookup(event, Nil, Nil, None).toEither) + + ShiftExecution.ofSingleThread[IO].use { shift => + for { + enrichment <- SqlQueryEnrichment.parse(configuration, SCHEMA_KEY).map(_.enrichment[IO](blocker, shift)).toOption.get + contexts <- enrichment.lookup(event, Nil, Nil, None) + } yield contexts.toEither + } } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CljTomcatLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CljTomcatLoaderSpec.scala deleted file mode 100644 index df6ce59a2..000000000 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CljTomcatLoaderSpec.scala +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import cats.data.NonEmptyList -import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows._ -import org.joda.time.DateTime -import org.specs2.{ScalaCheck, Specification} -import org.specs2.matcher.{DataTables, ValidatedMatchers} - -import SpecHelpers._ - -class CljTomcatLoaderSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { - val Process = Processor("CljTomcatLoaderSpec", "v1") - - def is = s2""" - toCollectorPayload should return a CanonicalInput for a valid raw event $e1 - toCollectorPayload should return a Validation Failure for a log record with body but with operation other than POST $e2 - toCollectorPayload should return a Validation Failure for a POST log record with corrupted API vendor/version $e3 - toCollectorPayload should return a Validation Failure for an unparseable Clj-Tomcat log record $e4 - """ - - object Expected { - val collector = "clj-tomcat" - val encoding = "UTF-8" - val vendor = "com.snowplowanalytics.snowplow" - val ipAddress = "37.157.33.123".some - } - - def e1 = - "SPEC NAME" || "RAW" | "EXP. VERSION" | "EXP. PAYLOAD" | "EXP. CONTENT TYPE" | "EXP. BODY" | "EXP. TIMESTAMP" | "EXP. USER AGENT" | "EXP. REFERER URI" | - "Snowplow Tp1 GET w/ v0.6.0 collector" !! "2013-08-29 00:18:48 - 830 37.157.33.123 GET d3v6ndkyapxc2w.cloudfront.net /i 200 http://snowplowanalytics.com/analytics/index.html Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0 e=pv&page=Introduction%20-%20Snowplow%20Analytics%25&dtm=1377735557970&tid=567074&vp=1024x635&ds=1024x635&vid=1&duid=7969620089de36eb&p=web&tv=js-0.12.0&fp=308909339&aid=snowplowweb&lang=en-US&cs=UTF-8&tz=America%2FLos_Angeles&refr=http%3A%2F%2Fwww.metacrawler.com%2Fsearch%2Fweb%3Ffcoid%3D417%26fcop%3Dtopnav%26fpid%3D27%26q%3Dsnowplow%2Banalytics%26ql%3D&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1024x768&cd=24&cookie=1&url=http%3A%2F%2Fsnowplowanalytics.com%2Fanalytics%2Findex.html - - -" ! - "tp1" ! toNameValuePairs( - "e" -> "pv", - "page" -> "Introduction - Snowplow Analytics%", - "dtm" -> "1377735557970", - "tid" -> "567074", - "vp" -> "1024x635", - "ds" -> "1024x635", - "vid" -> "1", - "duid" -> "7969620089de36eb", - "p" -> "web", - "tv" -> "js-0.12.0", - "fp" -> "308909339", - "aid" -> "snowplowweb", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "America/Los_Angeles", - "refr" -> "http://www.metacrawler.com/search/web?fcoid=417&fcop=topnav&fpid=27&q=snowplow+analytics&ql=", - "f_pdf" -> "1", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "1", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "1024x768", - "cd" -> "24", - "cookie" -> "1", - "url" -> "http://snowplowanalytics.com/analytics/index.html" - ) ! None ! None ! DateTime.parse("2013-08-29T00:18:48.000+00:00") ! - "Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0".some ! "http://snowplowanalytics.com/analytics/index.html".some | - "Snowplow Tp1 GET w/ v0.7.0 collector" !! "2013-08-29 00:18:48 - 830 37.157.33.123 GET d3v6ndkyapxc2w.cloudfront.net /i 200 http://snowplowanalytics.com/analytics/index.html Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0 e=pv&page=Introduction%20-%20Snowplow%20Analytics%25&dtm=1377735557970&tid=567074&vp=1024x635&ds=1024x635&vid=1&duid=7969620089de36eb&p=web&tv=js-0.12.0&fp=308909339&aid=snowplowweb&lang=en-US&cs=UTF-8&tz=America%2FLos_Angeles&refr=http%3A%2F%2Fwww.metacrawler.com%2Fsearch%2Fweb%3Ffcoid%3D417%26fcop%3Dtopnav%26fpid%3D27%26q%3Dsnowplow%2Banalytics%26ql%3D&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1024x768&cd=24&cookie=1&url=http%3A%2F%2Fsnowplowanalytics.com%2Fanalytics%2Findex.html - - - - -" ! - "tp1" ! toNameValuePairs( - "e" -> "pv", - "page" -> "Introduction - Snowplow Analytics%", - "dtm" -> "1377735557970", - "tid" -> "567074", - "vp" -> "1024x635", - "ds" -> "1024x635", - "vid" -> "1", - "duid" -> "7969620089de36eb", - "p" -> "web", - "tv" -> "js-0.12.0", - "fp" -> "308909339", - "aid" -> "snowplowweb", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "America/Los_Angeles", - "refr" -> "http://www.metacrawler.com/search/web?fcoid=417&fcop=topnav&fpid=27&q=snowplow+analytics&ql=", - "f_pdf" -> "1", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "1", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "1024x768", - "cd" -> "24", - "cookie" -> "1", - "url" -> "http://snowplowanalytics.com/analytics/index.html" - ) ! None ! None ! DateTime.parse("2013-08-29T00:18:48.000+00:00") ! - "Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0".some ! "http://snowplowanalytics.com/analytics/index.html".some | - "Snowplow Tp2 POST w/ v0.6.0 collector" !! "2014-09-08 13:59:07 - - 37.157.33.123 POST - /com.snowplowanalytics.snowplow/tp2 200 - python-requests%2F2.2.1+CPython%2F3.3.5+Linux%2F3.2.0-61-generic &cv=clj-0.7.0-tom-0.1.0&nuid=5c6c40e4-eff8-409b-9327-471f303e30b6 - - - application%2Fjson%3B+charset%3Dutf-8 eyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvcGF5bG9hZF9kYXRhL2pzb25zY2hlbWEvMS0wLTAiLCAiZGF0YSI6IFt7ImR0bSI6ICIxNDEwMTg0NzQ2ODk0IiwgImUiOiAicHYiLCAiZWlkIjogIjJjYWU0MTkxLTMxY2QtNDc4My04MmE4LWRmNTMxOGY0NGFmZiIsICJ1cmwiOiAiaHR0cDovL3d3dy5leGFtcGxlLmNvbSIsICJ0diI6ICJweS0wLjUuMCIsICJjeCI6ICJleUp6WTJobGJXRWlPaUFpYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2WTI5dWRHVjRkSE12YW5OdmJuTmphR1Z0WVM4eExUQXRNQ0lzSUNKa1lYUmhJam9nVzNzaWMyTm9aVzFoSWpvZ0ltbG5iSFU2WTI5dExuTnViM2R3Ykc5M1lXNWhiSGwwYVdOekxuTnViM2R3Ykc5M0wyMXZZbWxzWlY5amIyNTBaWGgwTDJwemIyNXpZMmhsYldFdk1TMHdMVEFpTENBaVpHRjBZU0k2SUhzaVpHVjJhV05sVFdGdWRXWmhZM1IxY21WeUlqb2dJa0Z0YzNSeVlXUWlMQ0FpWVc1a2NtOXBaRWxrWm1FaU9pQWljMjl0WlY5aGJtUnliMmxrU1dSbVlTSXNJQ0prWlhacFkyVk5iMlJsYkNJNklDSnNZWEpuWlNJc0lDSnZjR1Z1U1dSbVlTSTZJQ0p6YjIxbFgwbGtabUVpTENBaVkyRnljbWxsY2lJNklDSnpiMjFsWDJOaGNuSnBaWElpTENBaVlYQndiR1ZKWkdaaElqb2dJbk52YldWZllYQndiR1ZKWkdaaElpd2dJbTl6Vm1WeWMybHZiaUk2SUNJekxqQXVNQ0lzSUNKaGNIQnNaVWxrWm5ZaU9pQWljMjl0WlY5aGNIQnNaVWxrWm5ZaUxDQWliM05VZVhCbElqb2dJazlUV0NKOWZTd2dleUp6WTJobGJXRWlPaUFpYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2WjJWdmJHOWpZWFJwYjI1ZlkyOXVkR1Y0ZEM5cWMyOXVjMk5vWlcxaEx6RXRNQzB3SWl3Z0ltUmhkR0VpT2lCN0lteHZibWRwZEhWa1pTSTZJREV3TENBaVlXeDBhWFIxWkdWQlkyTjFjbUZqZVNJNklEQXVNeXdnSW14aGRHbDBkV1JsSWpvZ055d2dJbXhoZEdsMGRXUmxURzl1WjJsMGRXUmxRV05qZFhKaFkza2lPaUF3TGpVc0lDSmlaV0Z5YVc1bklqb2dOVEFzSUNKaGJIUnBkSFZrWlNJNklESXdMQ0FpYzNCbFpXUWlPaUF4Tm4xOVhYMD0iLCAicCI6ICJwYyJ9LCB7ImR0bSI6ICIxNDEwMTg0NzQ2ODk0IiwgImUiOiAic2UiLCAiZWlkIjogIjVhNzExODg1LTY5ZGMtNGY0Mi04Nzg1LWZjNjVmMTc1OGVjMCIsICJzZV9hYyI6ICJteV9hY3Rpb24iLCAidHYiOiAicHktMC41LjAiLCAiY3giOiAiZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdlkyOXVkR1Y0ZEhNdmFuTnZibk5qYUdWdFlTOHhMVEF0TUNJc0lDSmtZWFJoSWpvZ1czc2ljMk5vWlcxaElqb2dJbWxuYkhVNlkyOXRMbk51YjNkd2JHOTNZVzVoYkhsMGFXTnpMbk51YjNkd2JHOTNMMjF2WW1sc1pWOWpiMjUwWlhoMEwycHpiMjV6WTJobGJXRXZNUzB3TFRBaUxDQWlaR0YwWVNJNklIc2laR1YyYVdObFRXRnVkV1poWTNSMWNtVnlJam9nSWtGdGMzUnlZV1FpTENBaVlXNWtjbTlwWkVsa1ptRWlPaUFpYzI5dFpWOWhibVJ5YjJsa1NXUm1ZU0lzSUNKa1pYWnBZMlZOYjJSbGJDSTZJQ0pzWVhKblpTSXNJQ0p2Y0dWdVNXUm1ZU0k2SUNKemIyMWxYMGxrWm1FaUxDQWlZMkZ5Y21sbGNpSTZJQ0p6YjIxbFgyTmhjbkpwWlhJaUxDQWlZWEJ3YkdWSlpHWmhJam9nSW5OdmJXVmZZWEJ3YkdWSlpHWmhJaXdnSW05elZtVnljMmx2YmlJNklDSXpMakF1TUNJc0lDSmhjSEJzWlVsa1puWWlPaUFpYzI5dFpWOWhjSEJzWlVsa1puWWlMQ0FpYjNOVWVYQmxJam9nSWs5VFdDSjlmU3dnZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdloyVnZiRzlqWVhScGIyNWZZMjl1ZEdWNGRDOXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2dJbVJoZEdFaU9pQjdJbXh2Ym1kcGRIVmtaU0k2SURFd0xDQWlZV3gwYVhSMVpHVkJZMk4xY21GamVTSTZJREF1TXl3Z0lteGhkR2wwZFdSbElqb2dOeXdnSW14aGRHbDBkV1JsVEc5dVoybDBkV1JsUVdOamRYSmhZM2tpT2lBd0xqVXNJQ0ppWldGeWFXNW5Jam9nTlRBc0lDSmhiSFJwZEhWa1pTSTZJREl3TENBaWMzQmxaV1FpT2lBeE5uMTlYWDA9IiwgInNlX2NhIjogIm15X2NhdGVnb3J5IiwgInAiOiAicGMifSwgeyJkdG0iOiAiMTQxMDE4NDc0Njg5NSIsICJlIjogInNlIiwgImVpZCI6ICI4M2VhYzIyNy03MTI5LTQyYTctYWY0NS00MGY2M2VkNGI5ZGQiLCAic2VfYWMiOiAiYW5vdGhlcl9hY3Rpb24iLCAidHYiOiAicHktMC41LjAiLCAiY3giOiAiZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdlkyOXVkR1Y0ZEhNdmFuTnZibk5qYUdWdFlTOHhMVEF0TUNJc0lDSmtZWFJoSWpvZ1czc2ljMk5vWlcxaElqb2dJbWxuYkhVNlkyOXRMbk51YjNkd2JHOTNZVzVoYkhsMGFXTnpMbk51YjNkd2JHOTNMMjF2WW1sc1pWOWpiMjUwWlhoMEwycHpiMjV6WTJobGJXRXZNUzB3TFRBaUxDQWlaR0YwWVNJNklIc2laR1YyYVdObFRXRnVkV1poWTNSMWNtVnlJam9nSWtGdGMzUnlZV1FpTENBaVlXNWtjbTlwWkVsa1ptRWlPaUFpYzI5dFpWOWhibVJ5YjJsa1NXUm1ZU0lzSUNKa1pYWnBZMlZOYjJSbGJDSTZJQ0pzWVhKblpTSXNJQ0p2Y0dWdVNXUm1ZU0k2SUNKemIyMWxYMGxrWm1FaUxDQWlZMkZ5Y21sbGNpSTZJQ0p6YjIxbFgyTmhjbkpwWlhJaUxDQWlZWEJ3YkdWSlpHWmhJam9nSW5OdmJXVmZZWEJ3YkdWSlpHWmhJaXdnSW05elZtVnljMmx2YmlJNklDSXpMakF1TUNJc0lDSmhjSEJzWlVsa1puWWlPaUFpYzI5dFpWOWhjSEJzWlVsa1puWWlMQ0FpYjNOVWVYQmxJam9nSWs5VFdDSjlmU3dnZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdloyVnZiRzlqWVhScGIyNWZZMjl1ZEdWNGRDOXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2dJbVJoZEdFaU9pQjdJbXh2Ym1kcGRIVmtaU0k2SURFd0xDQWlZV3gwYVhSMVpHVkJZMk4xY21GamVTSTZJREF1TXl3Z0lteGhkR2wwZFdSbElqb2dOeXdnSW14aGRHbDBkV1JsVEc5dVoybDBkV1JsUVdOamRYSmhZM2tpT2lBd0xqVXNJQ0ppWldGeWFXNW5Jam9nTlRBc0lDSmhiSFJwZEhWa1pTSTZJREl3TENBaWMzQmxaV1FpT2lBeE5uMTlYWDA9IiwgInNlX2NhIjogImFub3RoZXJfY2F0ZWdvcnkiLCAicCI6ICJwYyJ9XX0" ! - "tp2" ! toNameValuePairs( - "cv" -> "clj-0.7.0-tom-0.1.0", - "nuid" -> "5c6c40e4-eff8-409b-9327-471f303e30b6" - ) ! "application/json; charset=utf-8".some ! """{"schema": "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0", "data": [{"dtm": "1410184746894", "e": "pv", "eid": "2cae4191-31cd-4783-82a8-df5318f44aff", "url": "http://www.example.com", "tv": "py-0.5.0", "cx": "eyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvY29udGV4dHMvanNvbnNjaGVtYS8xLTAtMCIsICJkYXRhIjogW3sic2NoZW1hIjogImlnbHU6Y29tLnNub3dwbG93YW5hbHl0aWNzLnNub3dwbG93L21vYmlsZV9jb250ZXh0L2pzb25zY2hlbWEvMS0wLTAiLCAiZGF0YSI6IHsiZGV2aWNlTWFudWZhY3R1cmVyIjogIkFtc3RyYWQiLCAiYW5kcm9pZElkZmEiOiAic29tZV9hbmRyb2lkSWRmYSIsICJkZXZpY2VNb2RlbCI6ICJsYXJnZSIsICJvcGVuSWRmYSI6ICJzb21lX0lkZmEiLCAiY2FycmllciI6ICJzb21lX2NhcnJpZXIiLCAiYXBwbGVJZGZhIjogInNvbWVfYXBwbGVJZGZhIiwgIm9zVmVyc2lvbiI6ICIzLjAuMCIsICJhcHBsZUlkZnYiOiAic29tZV9hcHBsZUlkZnYiLCAib3NUeXBlIjogIk9TWCJ9fSwgeyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvZ2VvbG9jYXRpb25fY29udGV4dC9qc29uc2NoZW1hLzEtMC0wIiwgImRhdGEiOiB7ImxvbmdpdHVkZSI6IDEwLCAiYWx0aXR1ZGVBY2N1cmFjeSI6IDAuMywgImxhdGl0dWRlIjogNywgImxhdGl0dWRlTG9uZ2l0dWRlQWNjdXJhY3kiOiAwLjUsICJiZWFyaW5nIjogNTAsICJhbHRpdHVkZSI6IDIwLCAic3BlZWQiOiAxNn19XX0=", "p": "pc"}, {"dtm": "1410184746894", "e": "se", "eid": "5a711885-69dc-4f42-8785-fc65f1758ec0", "se_ac": "my_action", "tv": "py-0.5.0", "cx": "eyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvY29udGV4dHMvanNvbnNjaGVtYS8xLTAtMCIsICJkYXRhIjogW3sic2NoZW1hIjogImlnbHU6Y29tLnNub3dwbG93YW5hbHl0aWNzLnNub3dwbG93L21vYmlsZV9jb250ZXh0L2pzb25zY2hlbWEvMS0wLTAiLCAiZGF0YSI6IHsiZGV2aWNlTWFudWZhY3R1cmVyIjogIkFtc3RyYWQiLCAiYW5kcm9pZElkZmEiOiAic29tZV9hbmRyb2lkSWRmYSIsICJkZXZpY2VNb2RlbCI6ICJsYXJnZSIsICJvcGVuSWRmYSI6ICJzb21lX0lkZmEiLCAiY2FycmllciI6ICJzb21lX2NhcnJpZXIiLCAiYXBwbGVJZGZhIjogInNvbWVfYXBwbGVJZGZhIiwgIm9zVmVyc2lvbiI6ICIzLjAuMCIsICJhcHBsZUlkZnYiOiAic29tZV9hcHBsZUlkZnYiLCAib3NUeXBlIjogIk9TWCJ9fSwgeyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvZ2VvbG9jYXRpb25fY29udGV4dC9qc29uc2NoZW1hLzEtMC0wIiwgImRhdGEiOiB7ImxvbmdpdHVkZSI6IDEwLCAiYWx0aXR1ZGVBY2N1cmFjeSI6IDAuMywgImxhdGl0dWRlIjogNywgImxhdGl0dWRlTG9uZ2l0dWRlQWNjdXJhY3kiOiAwLjUsICJiZWFyaW5nIjogNTAsICJhbHRpdHVkZSI6IDIwLCAic3BlZWQiOiAxNn19XX0=", "se_ca": "my_category", "p": "pc"}, {"dtm": "1410184746895", "e": "se", "eid": "83eac227-7129-42a7-af45-40f63ed4b9dd", "se_ac": "another_action", "tv": "py-0.5.0", "cx": "eyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvY29udGV4dHMvanNvbnNjaGVtYS8xLTAtMCIsICJkYXRhIjogW3sic2NoZW1hIjogImlnbHU6Y29tLnNub3dwbG93YW5hbHl0aWNzLnNub3dwbG93L21vYmlsZV9jb250ZXh0L2pzb25zY2hlbWEvMS0wLTAiLCAiZGF0YSI6IHsiZGV2aWNlTWFudWZhY3R1cmVyIjogIkFtc3RyYWQiLCAiYW5kcm9pZElkZmEiOiAic29tZV9hbmRyb2lkSWRmYSIsICJkZXZpY2VNb2RlbCI6ICJsYXJnZSIsICJvcGVuSWRmYSI6ICJzb21lX0lkZmEiLCAiY2FycmllciI6ICJzb21lX2NhcnJpZXIiLCAiYXBwbGVJZGZhIjogInNvbWVfYXBwbGVJZGZhIiwgIm9zVmVyc2lvbiI6ICIzLjAuMCIsICJhcHBsZUlkZnYiOiAic29tZV9hcHBsZUlkZnYiLCAib3NUeXBlIjogIk9TWCJ9fSwgeyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvZ2VvbG9jYXRpb25fY29udGV4dC9qc29uc2NoZW1hLzEtMC0wIiwgImRhdGEiOiB7ImxvbmdpdHVkZSI6IDEwLCAiYWx0aXR1ZGVBY2N1cmFjeSI6IDAuMywgImxhdGl0dWRlIjogNywgImxhdGl0dWRlTG9uZ2l0dWRlQWNjdXJhY3kiOiAwLjUsICJiZWFyaW5nIjogNTAsICJhbHRpdHVkZSI6IDIwLCAic3BlZWQiOiAxNn19XX0=", "se_ca": "another_category", "p": "pc"}]}""".some ! DateTime - .parse("2014-09-08T13:59:07.000+00:00") ! - "python-requests%2F2.2.1+CPython%2F3.3.5+Linux%2F3.2.0-61-generic".some ! None | - "CallRail-style POST w/o body, content-type" !! "2013-08-29 00:18:48 - 830 37.157.33.123 POST d3v6ndkyapxc2w.cloudfront.net /i 200 http://snowplowanalytics.com/analytics/index.html Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0 e=pv&page=Introduction%20-%20Snowplow%20Analytics%25&dtm=1377735557970&tid=567074&vp=1024x635&ds=1024x635&vid=1&duid=7969620089de36eb&p=web&tv=js-0.12.0&fp=308909339&aid=snowplowweb&lang=en-US&cs=UTF-8&tz=America%2FLos_Angeles&refr=http%3A%2F%2Fwww.metacrawler.com%2Fsearch%2Fweb%3Ffcoid%3D417%26fcop%3Dtopnav%26fpid%3D27%26q%3Dsnowplow%2Banalytics%26ql%3D&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1024x768&cd=24&cookie=1&url=http%3A%2F%2Fsnowplowanalytics.com%2Fanalytics%2Findex.html - - -" ! - "tp1" ! toNameValuePairs( - "e" -> "pv", - "page" -> "Introduction - Snowplow Analytics%", - "dtm" -> "1377735557970", - "tid" -> "567074", - "vp" -> "1024x635", - "ds" -> "1024x635", - "vid" -> "1", - "duid" -> "7969620089de36eb", - "p" -> "web", - "tv" -> "js-0.12.0", - "fp" -> "308909339", - "aid" -> "snowplowweb", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "America/Los_Angeles", - "refr" -> "http://www.metacrawler.com/search/web?fcoid=417&fcop=topnav&fpid=27&q=snowplow+analytics&ql=", - "f_pdf" -> "1", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "1", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "1024x768", - "cd" -> "24", - "cookie" -> "1", - "url" -> "http://snowplowanalytics.com/analytics/index.html" - ) ! None ! None ! DateTime.parse("2013-08-29T00:18:48.000+00:00") ! - "Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0".some ! "http://snowplowanalytics.com/analytics/index.html".some | - // This may not be a valid GET but we need to accept it because Lumia emits it (#2743, #489) - "Snowplow Tp1 GET w/ content-type no body " !! "2013-08-29 00:18:48 - 830 37.157.33.123 GET d3v6ndkyapxc2w.cloudfront.net /i 200 http://snowplowanalytics.com/analytics/index.html Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0 e=pv&page=Introduction%20-%20Snowplow%20Analytics%25&dtm=1377735557970&tid=567074&vp=1024x635&ds=1024x635&vid=1&duid=7969620089de36eb&p=web&tv=js-0.12.0&fp=308909339&aid=snowplowweb&lang=en-US&cs=UTF-8&tz=America%2FLos_Angeles&refr=http%3A%2F%2Fwww.metacrawler.com%2Fsearch%2Fweb%3Ffcoid%3D417%26fcop%3Dtopnav%26fpid%3D27%26q%3Dsnowplow%2Banalytics%26ql%3D&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1024x768&cd=24&cookie=1&url=http%3A%2F%2Fsnowplowanalytics.com%2Fanalytics%2Findex.html - - - application%2Fx-www-form-urlencoded%3B+charset%3Dutf-8 -" ! - "tp1" ! toNameValuePairs( - "e" -> "pv", - "page" -> "Introduction - Snowplow Analytics%", - "dtm" -> "1377735557970", - "tid" -> "567074", - "vp" -> "1024x635", - "ds" -> "1024x635", - "vid" -> "1", - "duid" -> "7969620089de36eb", - "p" -> "web", - "tv" -> "js-0.12.0", - "fp" -> "308909339", - "aid" -> "snowplowweb", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "America/Los_Angeles", - "refr" -> "http://www.metacrawler.com/search/web?fcoid=417&fcop=topnav&fpid=27&q=snowplow+analytics&ql=", - "f_pdf" -> "1", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "1", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "1024x768", - "cd" -> "24", - "cookie" -> "1", - "url" -> "http://snowplowanalytics.com/analytics/index.html" - ) ! "application/x-www-form-urlencoded; charset=utf-8".some ! None ! DateTime.parse( - "2013-08-29T00:18:48.000+00:00" - ) ! - "Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0".some ! "http://snowplowanalytics.com/analytics/index.html".some |> { - - (_, raw, version, payload, contentType, body, timestamp, userAgent, refererUri) => - val canonicalEvent = CljTomcatLoader.toCollectorPayload(raw, Process) - - val expected = CollectorPayload( - api = CollectorPayload.Api(Expected.vendor, version), - querystring = payload, - contentType = contentType, - body = body, - source = CollectorPayload.Source(Expected.collector, Expected.encoding, None), - context = CollectorPayload - .Context(timestamp.some, Expected.ipAddress, userAgent, refererUri, Nil, None) - ) - - canonicalEvent must beValid(expected.some) - } - - def e2 = { - val raw = - "2014-09-08 13:59:07 - - 37.157.33.123 GET - /com.snowplowanalytics.snowplow/tp2 200 - python-requests%2F2.2.1+CPython%2F3.3.5+Linux%2F3.2.0-61-generic &cv=clj-0.7.0-tom-0.1.0&nuid=5c6c40e4-eff8-409b-9327-471f303e30b6 - - - application%2Fjson%3B+charset%3Dutf-8 eyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvcGF5bG9hZF9kYXRhL2pzb25zY2hlbWEvMS0wLTAiLCAiZGF0YSI6IFt7ImR0bSI6ICIxNDEwMTg0NzQ2ODk0IiwgImUiOiAicHYiLCAiZWlkIjogIjJjYWU0MTkxLTMxY2QtNDc4My04MmE4LWRmNTMxOGY0NGFmZiIsICJ1cmwiOiAiaHR0cDovL3d3dy5leGFtcGxlLmNvbSIsICJ0diI6ICJweS0wLjUuMCIsICJjeCI6ICJleUp6WTJobGJXRWlPaUFpYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2WTI5dWRHVjRkSE12YW5OdmJuTmphR1Z0WVM4eExUQXRNQ0lzSUNKa1lYUmhJam9nVzNzaWMyTm9aVzFoSWpvZ0ltbG5iSFU2WTI5dExuTnViM2R3Ykc5M1lXNWhiSGwwYVdOekxuTnViM2R3Ykc5M0wyMXZZbWxzWlY5amIyNTBaWGgwTDJwemIyNXpZMmhsYldFdk1TMHdMVEFpTENBaVpHRjBZU0k2SUhzaVpHVjJhV05sVFdGdWRXWmhZM1IxY21WeUlqb2dJa0Z0YzNSeVlXUWlMQ0FpWVc1a2NtOXBaRWxrWm1FaU9pQWljMjl0WlY5aGJtUnliMmxrU1dSbVlTSXNJQ0prWlhacFkyVk5iMlJsYkNJNklDSnNZWEpuWlNJc0lDSnZjR1Z1U1dSbVlTSTZJQ0p6YjIxbFgwbGtabUVpTENBaVkyRnljbWxsY2lJNklDSnpiMjFsWDJOaGNuSnBaWElpTENBaVlYQndiR1ZKWkdaaElqb2dJbk52YldWZllYQndiR1ZKWkdaaElpd2dJbTl6Vm1WeWMybHZiaUk2SUNJekxqQXVNQ0lzSUNKaGNIQnNaVWxrWm5ZaU9pQWljMjl0WlY5aGNIQnNaVWxrWm5ZaUxDQWliM05VZVhCbElqb2dJazlUV0NKOWZTd2dleUp6WTJobGJXRWlPaUFpYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2WjJWdmJHOWpZWFJwYjI1ZlkyOXVkR1Y0ZEM5cWMyOXVjMk5vWlcxaEx6RXRNQzB3SWl3Z0ltUmhkR0VpT2lCN0lteHZibWRwZEhWa1pTSTZJREV3TENBaVlXeDBhWFIxWkdWQlkyTjFjbUZqZVNJNklEQXVNeXdnSW14aGRHbDBkV1JsSWpvZ055d2dJbXhoZEdsMGRXUmxURzl1WjJsMGRXUmxRV05qZFhKaFkza2lPaUF3TGpVc0lDSmlaV0Z5YVc1bklqb2dOVEFzSUNKaGJIUnBkSFZrWlNJNklESXdMQ0FpYzNCbFpXUWlPaUF4Tm4xOVhYMD0iLCAicCI6ICJwYyJ9LCB7ImR0bSI6ICIxNDEwMTg0NzQ2ODk0IiwgImUiOiAic2UiLCAiZWlkIjogIjVhNzExODg1LTY5ZGMtNGY0Mi04Nzg1LWZjNjVmMTc1OGVjMCIsICJzZV9hYyI6ICJteV9hY3Rpb24iLCAidHYiOiAicHktMC41LjAiLCAiY3giOiAiZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdlkyOXVkR1Y0ZEhNdmFuTnZibk5qYUdWdFlTOHhMVEF0TUNJc0lDSmtZWFJoSWpvZ1czc2ljMk5vWlcxaElqb2dJbWxuYkhVNlkyOXRMbk51YjNkd2JHOTNZVzVoYkhsMGFXTnpMbk51YjNkd2JHOTNMMjF2WW1sc1pWOWpiMjUwWlhoMEwycHpiMjV6WTJobGJXRXZNUzB3TFRBaUxDQWlaR0YwWVNJNklIc2laR1YyYVdObFRXRnVkV1poWTNSMWNtVnlJam9nSWtGdGMzUnlZV1FpTENBaVlXNWtjbTlwWkVsa1ptRWlPaUFpYzI5dFpWOWhibVJ5YjJsa1NXUm1ZU0lzSUNKa1pYWnBZMlZOYjJSbGJDSTZJQ0pzWVhKblpTSXNJQ0p2Y0dWdVNXUm1ZU0k2SUNKemIyMWxYMGxrWm1FaUxDQWlZMkZ5Y21sbGNpSTZJQ0p6YjIxbFgyTmhjbkpwWlhJaUxDQWlZWEJ3YkdWSlpHWmhJam9nSW5OdmJXVmZZWEJ3YkdWSlpHWmhJaXdnSW05elZtVnljMmx2YmlJNklDSXpMakF1TUNJc0lDSmhjSEJzWlVsa1puWWlPaUFpYzI5dFpWOWhjSEJzWlVsa1puWWlMQ0FpYjNOVWVYQmxJam9nSWs5VFdDSjlmU3dnZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdloyVnZiRzlqWVhScGIyNWZZMjl1ZEdWNGRDOXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2dJbVJoZEdFaU9pQjdJbXh2Ym1kcGRIVmtaU0k2SURFd0xDQWlZV3gwYVhSMVpHVkJZMk4xY21GamVTSTZJREF1TXl3Z0lteGhkR2wwZFdSbElqb2dOeXdnSW14aGRHbDBkV1JsVEc5dVoybDBkV1JsUVdOamRYSmhZM2tpT2lBd0xqVXNJQ0ppWldGeWFXNW5Jam9nTlRBc0lDSmhiSFJwZEhWa1pTSTZJREl3TENBaWMzQmxaV1FpT2lBeE5uMTlYWDA9IiwgInNlX2NhIjogIm15X2NhdGVnb3J5IiwgInAiOiAicGMifSwgeyJkdG0iOiAiMTQxMDE4NDc0Njg5NSIsICJlIjogInNlIiwgImVpZCI6ICI4M2VhYzIyNy03MTI5LTQyYTctYWY0NS00MGY2M2VkNGI5ZGQiLCAic2VfYWMiOiAiYW5vdGhlcl9hY3Rpb24iLCAidHYiOiAicHktMC41LjAiLCAiY3giOiAiZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdlkyOXVkR1Y0ZEhNdmFuTnZibk5qYUdWdFlTOHhMVEF0TUNJc0lDSmtZWFJoSWpvZ1czc2ljMk5vWlcxaElqb2dJbWxuYkhVNlkyOXRMbk51YjNkd2JHOTNZVzVoYkhsMGFXTnpMbk51YjNkd2JHOTNMMjF2WW1sc1pWOWpiMjUwWlhoMEwycHpiMjV6WTJobGJXRXZNUzB3TFRBaUxDQWlaR0YwWVNJNklIc2laR1YyYVdObFRXRnVkV1poWTNSMWNtVnlJam9nSWtGdGMzUnlZV1FpTENBaVlXNWtjbTlwWkVsa1ptRWlPaUFpYzI5dFpWOWhibVJ5YjJsa1NXUm1ZU0lzSUNKa1pYWnBZMlZOYjJSbGJDSTZJQ0pzWVhKblpTSXNJQ0p2Y0dWdVNXUm1ZU0k2SUNKemIyMWxYMGxrWm1FaUxDQWlZMkZ5Y21sbGNpSTZJQ0p6YjIxbFgyTmhjbkpwWlhJaUxDQWlZWEJ3YkdWSlpHWmhJam9nSW5OdmJXVmZZWEJ3YkdWSlpHWmhJaXdnSW05elZtVnljMmx2YmlJNklDSXpMakF1TUNJc0lDSmhjSEJzWlVsa1puWWlPaUFpYzI5dFpWOWhjSEJzWlVsa1puWWlMQ0FpYjNOVWVYQmxJam9nSWs5VFdDSjlmU3dnZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdloyVnZiRzlqWVhScGIyNWZZMjl1ZEdWNGRDOXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2dJbVJoZEdFaU9pQjdJbXh2Ym1kcGRIVmtaU0k2SURFd0xDQWlZV3gwYVhSMVpHVkJZMk4xY21GamVTSTZJREF1TXl3Z0lteGhkR2wwZFdSbElqb2dOeXdnSW14aGRHbDBkV1JsVEc5dVoybDBkV1JsUVdOamRYSmhZM2tpT2lBd0xqVXNJQ0ppWldGeWFXNW5Jam9nTlRBc0lDSmhiSFJwZEhWa1pTSTZJREl3TENBaWMzQmxaV1FpT2lBeE5uMTlYWDA9IiwgInNlX2NhIjogImFub3RoZXJfY2F0ZWdvcnkiLCAicCI6ICJwYyJ9XX0" - val actual = CljTomcatLoader.toCollectorPayload(raw, Process) - actual must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "clj-tomcat", f), - Payload.RawPayload(l) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.InputData( - "verb", - "GET".some, - "operation must be POST if content type and/or body are provided" - ) - l must_== raw - } - } - - def e3 = { - val raw = - "2014-09-08 13:59:07 - - 37.157.33.123 POST - /com.sendgrid-api-v3 200 - python-requests%2F2.2.1+CPython%2F3.3.5+Linux%2F3.2.0-61-generic &cv=clj-0.7.0-tom-0.1.0&nuid=5c6c40e4-eff8-409b-9327-471f303e30b6 - - - application%2Fjson%3B+charset%3Dutf-8 eyJzY2hlbWEiOiAiaWdsdTpjb20uc25vd3Bsb3dhbmFseXRpY3Muc25vd3Bsb3cvcGF5bG9hZF9kYXRhL2pzb25zY2hlbWEvMS0wLTAiLCAiZGF0YSI6IFt7ImR0bSI6ICIxNDEwMTg0NzQ2ODk0IiwgImUiOiAicHYiLCAiZWlkIjogIjJjYWU0MTkxLTMxY2QtNDc4My04MmE4LWRmNTMxOGY0NGFmZiIsICJ1cmwiOiAiaHR0cDovL3d3dy5leGFtcGxlLmNvbSIsICJ0diI6ICJweS0wLjUuMCIsICJjeCI6ICJleUp6WTJobGJXRWlPaUFpYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2WTI5dWRHVjRkSE12YW5OdmJuTmphR1Z0WVM4eExUQXRNQ0lzSUNKa1lYUmhJam9nVzNzaWMyTm9aVzFoSWpvZ0ltbG5iSFU2WTI5dExuTnViM2R3Ykc5M1lXNWhiSGwwYVdOekxuTnViM2R3Ykc5M0wyMXZZbWxzWlY5amIyNTBaWGgwTDJwemIyNXpZMmhsYldFdk1TMHdMVEFpTENBaVpHRjBZU0k2SUhzaVpHVjJhV05sVFdGdWRXWmhZM1IxY21WeUlqb2dJa0Z0YzNSeVlXUWlMQ0FpWVc1a2NtOXBaRWxrWm1FaU9pQWljMjl0WlY5aGJtUnliMmxrU1dSbVlTSXNJQ0prWlhacFkyVk5iMlJsYkNJNklDSnNZWEpuWlNJc0lDSnZjR1Z1U1dSbVlTSTZJQ0p6YjIxbFgwbGtabUVpTENBaVkyRnljbWxsY2lJNklDSnpiMjFsWDJOaGNuSnBaWElpTENBaVlYQndiR1ZKWkdaaElqb2dJbk52YldWZllYQndiR1ZKWkdaaElpd2dJbTl6Vm1WeWMybHZiaUk2SUNJekxqQXVNQ0lzSUNKaGNIQnNaVWxrWm5ZaU9pQWljMjl0WlY5aGNIQnNaVWxrWm5ZaUxDQWliM05VZVhCbElqb2dJazlUV0NKOWZTd2dleUp6WTJobGJXRWlPaUFpYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2WjJWdmJHOWpZWFJwYjI1ZlkyOXVkR1Y0ZEM5cWMyOXVjMk5vWlcxaEx6RXRNQzB3SWl3Z0ltUmhkR0VpT2lCN0lteHZibWRwZEhWa1pTSTZJREV3TENBaVlXeDBhWFIxWkdWQlkyTjFjbUZqZVNJNklEQXVNeXdnSW14aGRHbDBkV1JsSWpvZ055d2dJbXhoZEdsMGRXUmxURzl1WjJsMGRXUmxRV05qZFhKaFkza2lPaUF3TGpVc0lDSmlaV0Z5YVc1bklqb2dOVEFzSUNKaGJIUnBkSFZrWlNJNklESXdMQ0FpYzNCbFpXUWlPaUF4Tm4xOVhYMD0iLCAicCI6ICJwYyJ9LCB7ImR0bSI6ICIxNDEwMTg0NzQ2ODk0IiwgImUiOiAic2UiLCAiZWlkIjogIjVhNzExODg1LTY5ZGMtNGY0Mi04Nzg1LWZjNjVmMTc1OGVjMCIsICJzZV9hYyI6ICJteV9hY3Rpb24iLCAidHYiOiAicHktMC41LjAiLCAiY3giOiAiZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdlkyOXVkR1Y0ZEhNdmFuTnZibk5qYUdWdFlTOHhMVEF0TUNJc0lDSmtZWFJoSWpvZ1czc2ljMk5vWlcxaElqb2dJbWxuYkhVNlkyOXRMbk51YjNkd2JHOTNZVzVoYkhsMGFXTnpMbk51YjNkd2JHOTNMMjF2WW1sc1pWOWpiMjUwWlhoMEwycHpiMjV6WTJobGJXRXZNUzB3TFRBaUxDQWlaR0YwWVNJNklIc2laR1YyYVdObFRXRnVkV1poWTNSMWNtVnlJam9nSWtGdGMzUnlZV1FpTENBaVlXNWtjbTlwWkVsa1ptRWlPaUFpYzI5dFpWOWhibVJ5YjJsa1NXUm1ZU0lzSUNKa1pYWnBZMlZOYjJSbGJDSTZJQ0pzWVhKblpTSXNJQ0p2Y0dWdVNXUm1ZU0k2SUNKemIyMWxYMGxrWm1FaUxDQWlZMkZ5Y21sbGNpSTZJQ0p6YjIxbFgyTmhjbkpwWlhJaUxDQWlZWEJ3YkdWSlpHWmhJam9nSW5OdmJXVmZZWEJ3YkdWSlpHWmhJaXdnSW05elZtVnljMmx2YmlJNklDSXpMakF1TUNJc0lDSmhjSEJzWlVsa1puWWlPaUFpYzI5dFpWOWhjSEJzWlVsa1puWWlMQ0FpYjNOVWVYQmxJam9nSWs5VFdDSjlmU3dnZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdloyVnZiRzlqWVhScGIyNWZZMjl1ZEdWNGRDOXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2dJbVJoZEdFaU9pQjdJbXh2Ym1kcGRIVmtaU0k2SURFd0xDQWlZV3gwYVhSMVpHVkJZMk4xY21GamVTSTZJREF1TXl3Z0lteGhkR2wwZFdSbElqb2dOeXdnSW14aGRHbDBkV1JsVEc5dVoybDBkV1JsUVdOamRYSmhZM2tpT2lBd0xqVXNJQ0ppWldGeWFXNW5Jam9nTlRBc0lDSmhiSFJwZEhWa1pTSTZJREl3TENBaWMzQmxaV1FpT2lBeE5uMTlYWDA9IiwgInNlX2NhIjogIm15X2NhdGVnb3J5IiwgInAiOiAicGMifSwgeyJkdG0iOiAiMTQxMDE4NDc0Njg5NSIsICJlIjogInNlIiwgImVpZCI6ICI4M2VhYzIyNy03MTI5LTQyYTctYWY0NS00MGY2M2VkNGI5ZGQiLCAic2VfYWMiOiAiYW5vdGhlcl9hY3Rpb24iLCAidHYiOiAicHktMC41LjAiLCAiY3giOiAiZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdlkyOXVkR1Y0ZEhNdmFuTnZibk5qYUdWdFlTOHhMVEF0TUNJc0lDSmtZWFJoSWpvZ1czc2ljMk5vWlcxaElqb2dJbWxuYkhVNlkyOXRMbk51YjNkd2JHOTNZVzVoYkhsMGFXTnpMbk51YjNkd2JHOTNMMjF2WW1sc1pWOWpiMjUwWlhoMEwycHpiMjV6WTJobGJXRXZNUzB3TFRBaUxDQWlaR0YwWVNJNklIc2laR1YyYVdObFRXRnVkV1poWTNSMWNtVnlJam9nSWtGdGMzUnlZV1FpTENBaVlXNWtjbTlwWkVsa1ptRWlPaUFpYzI5dFpWOWhibVJ5YjJsa1NXUm1ZU0lzSUNKa1pYWnBZMlZOYjJSbGJDSTZJQ0pzWVhKblpTSXNJQ0p2Y0dWdVNXUm1ZU0k2SUNKemIyMWxYMGxrWm1FaUxDQWlZMkZ5Y21sbGNpSTZJQ0p6YjIxbFgyTmhjbkpwWlhJaUxDQWlZWEJ3YkdWSlpHWmhJam9nSW5OdmJXVmZZWEJ3YkdWSlpHWmhJaXdnSW05elZtVnljMmx2YmlJNklDSXpMakF1TUNJc0lDSmhjSEJzWlVsa1puWWlPaUFpYzI5dFpWOWhjSEJzWlVsa1puWWlMQ0FpYjNOVWVYQmxJam9nSWs5VFdDSjlmU3dnZXlKelkyaGxiV0VpT2lBaWFXZHNkVHBqYjIwdWMyNXZkM0JzYjNkaGJtRnNlWFJwWTNNdWMyNXZkM0JzYjNjdloyVnZiRzlqWVhScGIyNWZZMjl1ZEdWNGRDOXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2dJbVJoZEdFaU9pQjdJbXh2Ym1kcGRIVmtaU0k2SURFd0xDQWlZV3gwYVhSMVpHVkJZMk4xY21GamVTSTZJREF1TXl3Z0lteGhkR2wwZFdSbElqb2dOeXdnSW14aGRHbDBkV1JsVEc5dVoybDBkV1JsUVdOamRYSmhZM2tpT2lBd0xqVXNJQ0ppWldGeWFXNW5Jam9nTlRBc0lDSmhiSFJwZEhWa1pTSTZJREl3TENBaWMzQmxaV1FpT2lBeE5uMTlYWDA9IiwgInNlX2NhIjogImFub3RoZXJfY2F0ZWdvcnkiLCAicCI6ICJwYyJ9XX0" - val actual = CljTomcatLoader.toCollectorPayload(raw, Process) - actual must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "clj-tomcat", f), - Payload.RawPayload(l) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.InputData( - "path", - "/com.sendgrid-api-v3".some, - "path does not match (/)vendor/version(/) nor is a legacy /i(ce.png) request" - ) - l must_== raw - } - } - - // A bit of fun: the chances of generating a valid Clojure Collector log record at random are - // so low that we can just use ScalaCheck here - def e4 = - prop { (raw: String) => - CljTomcatLoader.toCollectorPayload(raw, Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "clj-tomcat", f), - Payload.RawPayload(l) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.Fallback( - "does not match the raw event format" - ) - l must_== raw - } - } -} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CloudfrontLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CloudfrontLoaderSpec.scala deleted file mode 100644 index 97880a7fa..000000000 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CloudfrontLoaderSpec.scala +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import cats.data.NonEmptyList -import cats.syntax.option._ - -import com.snowplowanalytics.snowplow.badrows._ - -import org.joda.time.DateTime - -import org.scalacheck.Arbitrary._ - -import org.specs2.{ScalaCheck, Specification} -import org.specs2.matcher.{DataTables, ValidatedMatchers} - -import SpecHelpers._ -import utils.ConversionUtils - -class CloudfrontLoaderSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { - val Process = Processor("CloudfrontLoaderSpec", "v1") - - def is = s2""" - toTimestamp should create a DateTime from valid date and time Strings $e1 - toOption should return a None if the querystring is empty $e2 - toCleanUri should remove a trailing % from a URI correctly $e3 - singleEncodePcts should correctly single-encoding double-encoded % signs $e4 - toCollectorPayload should return a CanonicalInput for a valid CloudFront log record $e5 - toCollectorPayload should return a Validation Failure for a non-GET request to /i $e6 - toCollectorPayload should return a Validation Failure for an invalid or corrupted CloudFront log record $e7 - """ - - object Expected { - val collector = "cloudfront" - val encoding = "UTF-8" - val api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") - } - - def e1 = - "SPEC NAME" || "DATE" | "TIME" | "EXP. DATETIME" | - "Valid with ms #1" !! "2003-12-04" ! "00:18:48.234" ! DateTime.parse( - "2003-12-04T00:18:48.234+00:00" - ) | - "Valid with ms #2" !! "2011-08-29" ! "23:56:01.003" ! DateTime.parse( - "2011-08-29T23:56:01.003+00:00" - ) | - "Valid without ms #1" !! "2013-05-12" ! "17:34:10" ! DateTime.parse( - "2013-05-12T17:34:10+00:00" - ) | - "Valid without ms #2" !! "1980-04-01" ! "21:20:04" ! DateTime.parse( - "1980-04-01T21:20:04+00:00" - ) |> { (_, date, time, expected) => - val actual = CloudfrontLoader.toTimestamp(date, time) - actual must beRight(expected) - } - - def e2 = - foreach(Seq(null, "", "-")) { empty: String => - CloudfrontLoader.toOption(empty) must beNone - } - - def e3 = - "SPEC NAME" || "URI" | "EXP. URI" | - "URI with trailing % #1" !! "https://github.com/snowplow/snowplow/issues/494%" ! "https://github.com/snowplow/snowplow/issues/494" | - "URI with trailing % #2" !! "http://bbc.co.uk%" ! "http://bbc.co.uk" | - "URI without trailing % #1" !! "https://github.com/snowplow/snowplow/issues/494" ! "https://github.com/snowplow/snowplow/issues/494" | - "URI without trailing % #2" !! "" ! "" | - "URI without trailing % #3" !! "http://bbc.co.uk" ! "http://bbc.co.uk" |> { (_, uri, expected) => - val actual = CloudfrontLoader.toCleanUri(uri) - actual must_== expected - } - - def e4 = - "SPEC NAME" || "QUERYSTRING" | "EXP. QUERYSTRING" | - "Double-encoded %s, modify" !! "e=pv&page=Celestial%2520Tarot%2520-%2520Psychic%2520Bazaar&dtm=1376487150616&tid=483686&vp=1097x482&ds=1097x1973&vid=1&duid=1f2719e9217b5e1b&p=web&tv=js-0.12.0&fp=3748874661&aid=pbzsite&lang=en-IE&cs=utf-8&tz=Europe%252FLondon&refr=http%253A%252F%252Fwww.psychicbazaar.com%252Fsearch%253Fsearch_query%253Dcelestial%252Btarot%252Bdeck&f_java=1&res=1097x617&cd=24&cookie=1&url=http%253A%252F%252Fwww.psychicbazaar.com%252Ftarot-cards%252F48-celestial-tarot.html" ! "e=pv&page=Celestial%20Tarot%20-%20Psychic%20Bazaar&dtm=1376487150616&tid=483686&vp=1097x482&ds=1097x1973&vid=1&duid=1f2719e9217b5e1b&p=web&tv=js-0.12.0&fp=3748874661&aid=pbzsite&lang=en-IE&cs=utf-8&tz=Europe%2FLondon&refr=http%3A%2F%2Fwww.psychicbazaar.com%2Fsearch%3Fsearch_query%3Dcelestial%2Btarot%2Bdeck&f_java=1&res=1097x617&cd=24&cookie=1&url=http%3A%2F%2Fwww.psychicbazaar.com%2Ftarot-cards%2F48-celestial-tarot.html" | - "Ambiguous - assume double-encoded, modify" !! "%2588 is 1x-encoded 25 percent OR 2x-encoded ^" ! "%88 is 1x-encoded 25 percent OR 2x-encoded ^" | - "Single-encoded %s, leave" !! "e=pp&page=Dreaming%20Way%20Tarot%20-%20Psychic%20Bazaar&pp_mix=0&pp_max=0&pp_miy=0&pp_may=0&dtm=1376984181667&tid=056188&vp=1440x838&ds=1440x1401&vid=1&duid=8ac2d67163d6d36a&p=web&tv=js-0.12.0&fp=1569742263&aid=pbzsite&lang=en-us&cs=UTF-8&tz=Australia%2FSydney&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1440x900&cd=24&cookie=1&url=http%3A%2F%2Fwww.psychicbazaar.com%2Ftarot-cards%2F312-dreaming-way-tarot.html" ! "e=pp&page=Dreaming%20Way%20Tarot%20-%20Psychic%20Bazaar&pp_mix=0&pp_max=0&pp_miy=0&pp_may=0&dtm=1376984181667&tid=056188&vp=1440x838&ds=1440x1401&vid=1&duid=8ac2d67163d6d36a&p=web&tv=js-0.12.0&fp=1569742263&aid=pbzsite&lang=en-us&cs=UTF-8&tz=Australia%2FSydney&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1440x900&cd=24&cookie=1&url=http%3A%2F%2Fwww.psychicbazaar.com%2Ftarot-cards%2F312-dreaming-way-tarot.html" | - "Single-encoded % sign itself, leave" !! "Loading - 70%25 Complete" ! "Loading - 70%25 Complete" |> { (_, qs, expected) => - val actual = ConversionUtils.singleEncodePcts(qs) - actual must_== expected - } - - def e5 = - "SPEC NAME" || "RAW" | "EXP. TIMESTAMP" | "EXP. PAYLOAD" | "EXP. IP ADDRESS" | "EXP. USER AGENT" | "EXP. REFERER URI" | - "CloudFront with 2 spaces" !! "2013-08-29 00:18:48 LAX3 830 255.255.255.255 GET d3v6ndkyapxc2w.cloudfront.net /i 200 http://snowplowanalytics.com/analytics/index.html Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0 e=pv&page=Introduction%20-%20Snowplow%20Analytics%25&dtm=1377735557970&tid=567074&vp=1024x635&ds=1024x635&vid=1&duid=7969620089de36eb&p=web&tv=js-0.12.0&fp=308909339&aid=snowplowweb&lang=en-US&cs=UTF-8&tz=America%2FLos_Angeles&refr=http%3A%2F%2Fwww.metacrawler.com%2Fsearch%2Fweb%3Ffcoid%3D417%26fcop%3Dtopnav%26fpid%3D27%26q%3Dsnowplow%2Banalytics%26ql%3D&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=0&res=1024x768&cd=24&cookie=1&url=http%3A%2F%2Fsnowplowanalytics.com%2Fanalytics%2Findex.html - Hit wQ1OBZtQlGgfM_tPEJ-lIQLsdra0U-lXgmfJfwja2KAV_SfTdT3lZg==" ! - DateTime.parse("2013-08-29T00:18:48.000+00:00") ! toNameValuePairs( - "e" -> "pv", - "page" -> "Introduction - Snowplow Analytics%", - "dtm" -> "1377735557970", - "tid" -> "567074", - "vp" -> "1024x635", - "ds" -> "1024x635", - "vid" -> "1", - "duid" -> "7969620089de36eb", - "p" -> "web", - "tv" -> "js-0.12.0", - "fp" -> "308909339", - "aid" -> "snowplowweb", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "America/Los_Angeles", - "refr" -> "http://www.metacrawler.com/search/web?fcoid=417&fcop=topnav&fpid=27&q=snowplow+analytics&ql=", - "f_pdf" -> "1", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "1", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "1024x768", - "cd" -> "24", - "cookie" -> "1", - "url" -> "http://snowplowanalytics.com/analytics/index.html" - ) ! - "255.255.255.255".some ! "Mozilla/5.0%20(Windows%20NT%205.1;%20rv:23.0)%20Gecko/20100101%20Firefox/23.0".some ! "http://snowplowanalytics.com/analytics/index.html".some | - "CloudFront with 4 spaces" !! "2014-01-28 02:52:24 HKG50 829 202.134.75.113 GET d3v6ndkyapxc2w.cloudfront.net /i 200 http://snowplowanalytics.com/product/index.html Mozilla/5.0%2520(Windows%2520NT%25205.1)%2520AppleWebKit/537.36%2520(KHTML,%2520like%2520Gecko)%2520Chrome/31.0.1650.57%2520Safari/537.36 e=pv&page=Snowplow%2520-%2520the%2520most%2520powerful%252C%2520scalable%252C%2520flexible%2520web%2520analytics%2520platform%2520in%2520the%2520world.%2520-%2520Snowplow%2520Analytics&tid=322602&vp=1600x739&ds=1600x739&vid=1&duid=5c34698b211e8949&p=web&tv=js-0.13.0&aid=snowplowweb&lang=zh-CN&cs=UTF-8&tz=Asia%252FShanghai&refr=http%253A%252F%252Fsnowplowanalytics.com%252Fabout%252Findex.html&f_pdf=1&f_qt=1&f_realp=0&f_wma=1&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=1&res=1600x900&cookie=1&url=http%253A%252F%252Fsnowplowanalytics.com%252Fproduct%252Findex.html - Hit VtgzUTq1UoySDN3m_B-5DqmpTjgAS5YaAcvk_uz_D0-0TrDrZJJu2Q== d3v6ndkyapxc2w.cloudfront.net http 881" ! - DateTime.parse("2014-01-28T02:52:24.000+00:00") ! toNameValuePairs( - "e" -> "pv", - "page" -> "Snowplow - the most powerful, scalable, flexible web analytics platform in the world. - Snowplow Analytics", - "tid" -> "322602", - "vp" -> "1600x739", - "ds" -> "1600x739", - "vid" -> "1", - "duid" -> "5c34698b211e8949", - "p" -> "web", - "tv" -> "js-0.13.0", - "aid" -> "snowplowweb", - "lang" -> "zh-CN", - "cs" -> "UTF-8", - "tz" -> "Asia/Shanghai", - "refr" -> "http://snowplowanalytics.com/about/index.html", - "f_pdf" -> "1", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "1", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "1", - "res" -> "1600x900", - "cookie" -> "1", - "url" -> "http://snowplowanalytics.com/product/index.html" - ) ! - "202.134.75.113".some ! "Mozilla/5.0%20(Windows%20NT%205.1)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/31.0.1650.57%20Safari/537.36".some ! "http://snowplowanalytics.com/product/index.html".some | - "CloudFront with tabs" !! "2014-01-28 03:41:59 IAD12 828 67.71.16.237 GET d10wr4jwvp55f9.cloudfront.net /i 200 http://www.psychicbazaar.com/oracles/107-magdalene-oracle.html Mozilla/5.0%2520(Windows%2520NT%25206.1;%2520Trident/7.0;%2520rv:11.0)%2520like%2520Gecko e=pp&page=Magdalene%2520Oracle%2520-%2520Psychic%2520Bazaar&tid=151507&vp=975x460&ds=1063x1760&vid=1&duid=44a32544aac965f4&p=web&tv=js-0.13.0&aid=pbzsite&lang=en-CA&cs=utf-8&tz=America%252FHavana&refr=http%253A%252F%252Fwww.google.ca%252Furl%253Fsa%253Dt%2526rct%253Dj%2526q%253D%2526esrc%253Ds%2526source%253Dweb%2526cd%253D16%2526ved%253D0CIIBEBYwDw%2526url%253Dhttp%25253A%25252F%25252Fwww.psychicbazaar.com%25252Foracles%25252F107-magdalene-oracle.html%2526ei%253DIibnUsfBDMiM2gXGoICoDg%2526usg%253DAFQjCNE6fEqO8lnxDHeke0LOuAZIa1iSFQ%2526sig2%253DV7KJR0VmGw5yaHoMKKJHhg%2526bvm%253Dbv.59930103%252Cd.b2I&f_pdf=0&f_qt=0&f_realp=0&f_wma=0&f_dir=0&f_fla=0&f_java=1&f_gears=0&f_ag=1&res=975x571&cookie=1&url=http%253A%252F%252Fwww.psychicbazaar.com%252Foracles%252F107-magdalene-oracle.html - Hit 7T7tuHtEcdoDvUuGnQ3F0RI_UEWOUeb0b-YIhcoxjziuEBMDcKv_OA== d10wr4jwvp55f9.cloudfront.net http 1047" ! - DateTime.parse("2014-01-28T03:41:59.000+00:00") ! toNameValuePairs( - "e" -> "pp", - "page" -> "Magdalene Oracle - Psychic Bazaar", - "tid" -> "151507", - "vp" -> "975x460", - "ds" -> "1063x1760", - "vid" -> "1", - "duid" -> "44a32544aac965f4", - "p" -> "web", - "tv" -> "js-0.13.0", - "aid" -> "pbzsite", - "lang" -> "en-CA", - "cs" -> "utf-8", - "tz" -> "America/Havana", - "refr" -> "http://www.google.ca/url?sa=t&rct=j&q=&esrc=s&source=web&cd=16&ved=0CIIBEBYwDw&url=http%3A%2F%2Fwww.psychicbazaar.com%2Foracles%2F107-magdalene-oracle.html&ei=IibnUsfBDMiM2gXGoICoDg&usg=AFQjCNE6fEqO8lnxDHeke0LOuAZIa1iSFQ&sig2=V7KJR0VmGw5yaHoMKKJHhg&bvm=bv.59930103,d.b2I", - "f_pdf" -> "0", - "f_qt" -> "0", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "0", - "f_java" -> "1", - "f_gears" -> "0", - "f_ag" -> "1", - "res" -> "975x571", - "cookie" -> "1", - "url" -> "http://www.psychicbazaar.com/oracles/107-magdalene-oracle.html" - ) ! - "67.71.16.237".some ! "Mozilla/5.0%20(Windows%20NT%206.1;%20Trident/7.0;%20rv:11.0)%20like%20Gecko".some ! "http://www.psychicbazaar.com/oracles/107-magdalene-oracle.html".some | - "CloudFront with x-forwarded-for" !! "2016-07-01 13:17:26 AMS50 480 255.255.255.255 GET d1f6ajd7ltcrsx.cloudfront.net /i 200 http://www.simplybusiness.co.uk/knowledge/articles/2016/06/guide-to-facebook-professional-services-for-small-business/ Mozilla/5.0%20(Windows%20NT%206.1;%20Trident/7.0;%20rv:11.0)%20like%20Gecko e=pv&url=http%253A%252F%252Fwww.simplybusiness.co.uk%252Fknowledge%252Farticles%252F2016%252F06%252Fguide-to-facebook-professional-services-for-small-business%252F&page=Guide%2520to%2520Facebook%2520Professional%2520Services%2520for%2520small%2520business&tv=js-2.4.0&tna=sb-cf-pv&p=web&tz=Europe%252FLondon&lang=en-US&cs=UTF-8&f_pdf=1&f_qt=0&f_realp=0&f_wma=0&f_dir=0&f_fla=1&f_java=0&f_gears=0&f_ag=0&res=1600x900&cd=24&cookie=1&eid=e3793bd1-fcf5-4fbb-bf4c-f0315a5821c3&dtm=1467379046723&vp=1600x799&ds=1583x4043&vid=1&duid=685e511b67c86d5c&fp=2811351631 - Hit LLzvdlIbJ0d6siOm-EY3-2nBYTiM6b5RZLWRyPbyTCE-RIE9bC7_eQ== d1f6ajd7ltcrsx.cloudfront.net http 1627 0.003 67.71.16.237,%20202.134.75.113 - - Hit" ! - DateTime.parse("2016-07-01T13:17:26.000+00:00") ! toNameValuePairs( - "e" -> "pv", - "url" -> "http://www.simplybusiness.co.uk/knowledge/articles/2016/06/guide-to-facebook-professional-services-for-small-business/", - "page" -> "Guide to Facebook Professional Services for small business", - "tv" -> "js-2.4.0", - "tna" -> "sb-cf-pv", - "p" -> "web", - "tz" -> "Europe/London", - "lang" -> "en-US", - "cs" -> "UTF-8", - "f_pdf" -> "1", - "f_qt" -> "0", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "1600x900", - "cd" -> "24", - "cookie" -> "1", - "eid" -> "e3793bd1-fcf5-4fbb-bf4c-f0315a5821c3", - "dtm" -> "1467379046723", - "vp" -> "1600x799", - "ds" -> "1583x4043", - "vid" -> "1", - "duid" -> "685e511b67c86d5c", - "fp" -> "2811351631" - ) ! - "67.71.16.237".some ! "Mozilla/5.0%20(Windows%20NT%206.1;%20Trident/7.0;%20rv:11.0)%20like%20Gecko".some ! "http://www.simplybusiness.co.uk/knowledge/articles/2016/06/guide-to-facebook-professional-services-for-small-business/".some |> { - - (_, raw, timestamp, payload, ipAddress, userAgent, refererUri) => - val canonicalEvent = CloudfrontLoader.toCollectorPayload(raw, Process) - - val expected = CollectorPayload( - api = Expected.api, - querystring = payload, - body = None, - contentType = None, - source = CollectorPayload.Source(Expected.collector, Expected.encoding, None), - context = CollectorPayload - .Context(timestamp.some, ipAddress, userAgent, refererUri, Nil, None) - ) - - canonicalEvent must beValid(expected.some) - } - - def e6 = { - val raw = - "2012-05-24 11:35:53 DFW3 3343 99.116.172.58 POST d3gs014xn8p70.cloudfront.net /i 200 http://www.psychicbazaar.com/2-tarot-cards/genre/all/type/all?p=5 Mozilla/5.0%20(Windows%20NT%206.1;%20WOW64;%20rv:12.0)%20Gecko/20100101%20Firefox/12.0 e=pv&page=Tarot%2520cards%2520-%2520Psychic%2520Bazaar&tid=344260&uid=288112e0a5003be2&vid=1&lang=en-US&refr=http%253A%252F%252Fwww.psychicbazaar.com%252F2-tarot-cards%252Fgenre%252Fall%252Ftype%252Fall%253Fp%253D4&f_pdf=1&f_qt=0&f_realp=0&f_wma=0&f_dir=0&f_fla=1&f_java=1&f_gears=0&f_ag=1&res=1366x768&cookie=1" - CloudfrontLoader.toCollectorPayload(raw, Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "cloudfront", f), - Payload.RawPayload(l) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.InputData( - "verb", - "POST".some, - "operation must be GET" - ) - l must_== raw - } - } - - // A bit of fun: the chances of generating a valid CloudFront row at random are - // so low that we can just use ScalaCheck here - def e7 = - prop { (raw: String) => - CloudfrontLoader.toCollectorPayload(raw, Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "cloudfront", f), - Payload.RawPayload(l) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.Fallback( - "does not match header or data row formats" - ) - l must_== raw - } - } -} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/LoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/LoaderSpec.scala index e848ae5a0..76a22667c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/LoaderSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/LoaderSpec.scala @@ -48,24 +48,6 @@ object LoaderSpec { class LoaderSpec extends Specification with DataTables with ValidatedMatchers { import LoaderSpec._ - "getLoader" should { - "return the CloudfrontLoader" in { - Loader.getLoader("cloudfront") must beRight(CloudfrontLoader) - } - - "return the CljTomcatLoader" in { - Loader.getLoader("clj-tomcat") must beRight(CljTomcatLoader) - } - - "return the ThriftLoader" in { - Loader.getLoader("thrift") must beRight(ThriftLoader) - } - - "return the NDJSON loader" in { - Loader.getLoader("ndjson/example.test/v1") must beRight(NdjsonLoader("example.test/v1")) - } - } - "extractGetPayload" should { val Encoding = UTF_8 // TODO: add more tests diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/NdjsonLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/NdjsonLoaderSpec.scala deleted file mode 100644 index 693312986..000000000 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/NdjsonLoaderSpec.scala +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2015-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders - -import cats.data.NonEmptyList -import com.snowplowanalytics.snowplow.badrows._ -import org.specs2.matcher.ValidatedMatchers -import org.specs2.mutable.Specification - -class NdjsonLoaderSpec extends Specification with ValidatedMatchers { - val Process = Processor("NdjsonLoaderSpec", "v1") - - "toCollectorPayload" should { - "return failure on unparsable json" in { - val invalid = NdjsonLoader("com.abc/v1").toCollectorPayload("""{ ... """, Process) - invalid must beInvalid - } - - "return success on parsable json" in { - val valid = NdjsonLoader("com.abc/v1").toCollectorPayload("""{ "key": "value" }""", Process) - valid must beValid - } - - "return success with no content for empty rows" in { - NdjsonLoader("com.abc/v1").toCollectorPayload("\r\n", Process) must beValid(None) - } - - "fail if multiple lines passed in as one line" in { - val line = List("""{"key":"value1"}""", """{"key":"value2"}""").mkString("\n") - NdjsonLoader("com.abc/v1").toCollectorPayload(line, Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "ndjson", f), - Payload.RawPayload(l) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.Fallback( - "expected a single line, found 2" - ) - l must_== line - } - } - } -} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/TsvLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/TsvLoaderSpec.scala deleted file mode 100644 index 758b831c4..000000000 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/TsvLoaderSpec.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common.loaders - -import cats.syntax.option._ -import org.specs2.Specification -import org.specs2.matcher.{DataTables, ValidatedMatchers} -import com.snowplowanalytics.snowplow.badrows._ - -class TsvLoaderSpec extends Specification with DataTables with ValidatedMatchers { - val processor = Processor("TsvLoaderSpec", "v1") - - def is = s2""" - toCollectorPayload should return a CollectorPayload for a normal TSV $e1 - toCollectorPayload should return None for the first two lines of a Cloudfront web distribution access log $e2 - """ - - def e1 = { - val expected = CollectorPayload( - api = CollectorPayload.Api("com.amazon.aws.cloudfront", "wd_access_log"), - querystring = Nil, - body = "a\tb".some, - contentType = None, - source = CollectorPayload.Source("tsv", "UTF-8", None), - context = CollectorPayload.Context(None, None, None, None, Nil, None) - ) - TsvLoader("com.amazon.aws.cloudfront/wd_access_log") - .toCollectorPayload("a\tb", processor) must beValid( - expected.some - ) - } - - def e2 = - TsvLoader("com.amazon.aws.cloudfront/wd_access_log") - .toCollectorPayload("#Version: 1.0", processor) must beValid( - None - ) -} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala index 1ca03b883..4139ea8a1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala @@ -13,16 +13,8 @@ package com.snowplowanalytics.snowplow.enrich.common package utils -import java.util.concurrent.TimeUnit - -import cats.Id -import cats.effect.{Clock => CEClock} +import cats.effect.{Clock => CEClock, IO} object Clock { - implicit val idClock: CEClock[Id] = new CEClock[Id] { - final def realTime(unit: TimeUnit): Id[Long] = - unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS) - final def monotonic(unit: TimeUnit): Id[Long] = - unit.convert(System.nanoTime(), TimeUnit.NANOSECONDS) - } + implicit val ioClock: CEClock[IO] = CEClock.create[IO] } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala index 611ef07e3..2c8d55ff3 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2022 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2014-2023 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -15,22 +15,25 @@ package com.snowplowanalytics.snowplow.enrich.common.utils import org.specs2.mutable.Specification import org.specs2.matcher.ValidatedMatchers -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} -import com.snowplowanalytics.iglu.client.ClientError.{ResolutionError, ValidationError} - -import com.snowplowanalytics.snowplow.badrows._ +import cats.effect.testing.specs2.CatsIO import io.circe.parser.parse import cats.data.NonEmptyList +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.iglu.client.ClientError.{ResolutionError, ValidationError} + +import com.snowplowanalytics.snowplow.badrows._ + import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload -class IgluUtilsSpec extends Specification with ValidatedMatchers { +class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsIO { val raw = RawEvent( CollectorPayload.Api("vendor", "version"), @@ -134,7 +137,8 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { "extractAndValidateUnstructEvent" should { "return None if unstruct_event field is empty" >> { IgluUtils - .extractAndValidateUnstructEvent(new EnrichedEvent, SpecHelpers.client) must beValid(None) + .extractAndValidateUnstructEvent(new EnrichedEvent, SpecHelpers.client) + .map(_ must beValid(None)) } "return a SchemaViolation.NotJson if unstruct_event does not contain a properly formatted JSON string" >> { @@ -142,10 +146,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(notJson) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case _: FailureDetails.SchemaViolation.NotJson => ok - case err => ko(s"[$err] is not NotJson") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case _: FailureDetails.SchemaViolation.NotJson => ok + case err => ko(s"[$err] is not NotJson") + }) } "return a SchemaViolation.NotIglu if unstruct_event contains a properly formatted JSON string that is not self-describing" >> { @@ -153,10 +158,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(notIglu) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case _: FailureDetails.SchemaViolation.NotIglu => ok - case err => ko(s"[$err] is not NotIglu") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case _: FailureDetails.SchemaViolation.NotIglu => ok + case err => ko(s"[$err] is not NotIglu") + }) } "return a SchemaViolation.CriterionMismatch if unstruct_event contains a self-describing JSON but not with the expected schema for unstructured events" >> { @@ -164,10 +170,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(noSchema) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case _: FailureDetails.SchemaViolation.CriterionMismatch => ok - case err => ko(s"[$err] is not CriterionMismatch") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case _: FailureDetails.SchemaViolation.CriterionMismatch => ok + case err => ko(s"[$err] is not CriterionMismatch") + }) } "return a SchemaViolation.NotJson if the JSON in .data is not a JSON" >> { @@ -175,10 +182,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(buildUnstruct(notJson)) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case _: FailureDetails.SchemaViolation.NotJson => ok - case err => ko(s"[$err] is not NotJson") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case _: FailureDetails.SchemaViolation.NotJson => ok + case err => ko(s"[$err] is not NotJson") + }) } "return a SchemaViolation.IgluError containing a ValidationError if the JSON in .data is not self-describing" >> { @@ -186,12 +194,13 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(buildUnstruct(notIglu)) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)) => ok - case ie: FailureDetails.SchemaViolation.IgluError => - ko(s"IgluError [$ie] is not ValidationError") - case err => ko(s"[$err] is not IgluError") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)) => ok + case ie: FailureDetails.SchemaViolation.IgluError => + ko(s"IgluError [$ie] is not ValidationError") + case err => ko(s"[$err] is not IgluError") + }) } "return a SchemaViolation.IgluError containing a ValidationError if the JSON in .data is not a valid SDJ" >> { @@ -199,12 +208,13 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(buildUnstruct(invalidEmailSent)) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)) => ok - case ie: FailureDetails.SchemaViolation.IgluError => - ko(s"IgluError [$ie] is not ValidationError") - case err => ko(s"[$err] is not IgluError") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)) => ok + case ie: FailureDetails.SchemaViolation.IgluError => + ko(s"IgluError [$ie] is not ValidationError") + case err => ko(s"[$err] is not IgluError") + }) } "return a SchemaViolation.IgluError containing a ResolutionError if the schema of the SDJ in .data can't be resolved" >> { @@ -212,12 +222,13 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(buildUnstruct(noSchema)) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beInvalid.like { - case FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_)) => ok - case ie: FailureDetails.SchemaViolation.IgluError => - ko(s"IgluError [$ie] is not ResolutionError") - case err => ko(s"[$err] is not IgluError") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_)) => ok + case ie: FailureDetails.SchemaViolation.IgluError => + ko(s"IgluError [$ie] is not ResolutionError") + case err => ko(s"[$err] is not IgluError") + }) } "return the extracted unstructured event if .data is a valid SDJ" >> { @@ -225,14 +236,15 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setUnstruct_event(buildUnstruct(emailSent1)) IgluUtils - .extractAndValidateUnstructEvent(input, SpecHelpers.client) must beValid.like { - case Some(IgluUtils.SdjExtractResult(sdj, None)) if sdj.schema == emailSentSchema => ok - case Some(s) => - ko( - s"unstructured event's schema [${s.sdj.schema}] does not match expected schema [${emailSentSchema}]" - ) - case None => ko("no unstructured event was extracted") - } + .extractAndValidateUnstructEvent(input, SpecHelpers.client) + .map(_ must beValid.like { + case Some(IgluUtils.SdjExtractResult(sdj, None)) if sdj.schema == emailSentSchema => ok + case Some(s) => + ko( + s"unstructured event's schema [${s.sdj.schema}] does not match expected schema [${emailSentSchema}]" + ) + case None => ko("no unstructured event was extracted") + }) } "return the extracted unstructured event when schema is superseded by another schema" >> { @@ -245,32 +257,35 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { val expectedValidationInfo = IgluUtils.ValidationInfo(supersedingExampleSchema100, supersedingExampleSchema101.version) IgluUtils - .extractAndValidateUnstructEvent(input1, SpecHelpers.client) must beValid.like { - case Some(IgluUtils.SdjExtractResult(sdj, Some(`expectedValidationInfo`))) if sdj.schema == supersedingExampleSchema101 => ok - case Some(s) => - ko( - s"unstructured event's schema [${s.sdj.schema}] does not match expected schema [${supersedingExampleSchema101}]" - ) - case None => ko("no unstructured event was extracted") - } + .extractAndValidateUnstructEvent(input1, SpecHelpers.client) + .map(_ must beValid.like { + case Some(IgluUtils.SdjExtractResult(sdj, Some(`expectedValidationInfo`))) if sdj.schema == supersedingExampleSchema101 => ok + case Some(s) => + ko( + s"unstructured event's schema [${s.sdj.schema}] does not match expected schema [${supersedingExampleSchema101}]" + ) + case None => ko("no unstructured event was extracted") + }) // input2 wouldn't be validated with 1-0-0. It would be validated with 1-0-1 only. IgluUtils - .extractAndValidateUnstructEvent(input2, SpecHelpers.client) must beValid.like { - case Some(IgluUtils.SdjExtractResult(sdj, Some(`expectedValidationInfo`))) if sdj.schema == supersedingExampleSchema101 => ok - case Some(s) => - ko( - s"unstructured event's schema [${s.sdj.schema}] does not match expected schema [${supersedingExampleSchema101}]" - ) - case None => ko("no unstructured event was extracted") - } + .extractAndValidateUnstructEvent(input2, SpecHelpers.client) + .map(_ must beValid.like { + case Some(IgluUtils.SdjExtractResult(sdj, Some(`expectedValidationInfo`))) if sdj.schema == supersedingExampleSchema101 => ok + case Some(s) => + ko( + s"unstructured event's schema [${s.sdj.schema}] does not match expected schema [${supersedingExampleSchema101}]" + ) + case None => ko("no unstructured event was extracted") + }) } } "extractAndValidateInputContexts" should { "return Nil if contexts field is empty" >> { IgluUtils - .extractAndValidateInputContexts(new EnrichedEvent, SpecHelpers.client) must beValid(Nil) + .extractAndValidateInputContexts(new EnrichedEvent, SpecHelpers.client) + .map(_ must beValid(Nil)) } "return a SchemaViolation.NotJson if .contexts does not contain a properly formatted JSON string" >> { @@ -278,10 +293,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(notJson) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(_: FailureDetails.SchemaViolation.NotJson, Nil) => ok - case err => ko(s"[$err] is not one NotJson") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(_: FailureDetails.SchemaViolation.NotJson, Nil) => ok + case err => ko(s"[$err] is not one NotJson") + }) } "return a SchemaViolation.NotIglu if .contexts contains a properly formatted JSON string that is not self-describing" >> { @@ -289,10 +305,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(notIglu) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(_: FailureDetails.SchemaViolation.NotIglu, Nil) => ok - case err => ko(s"[$err] is not one NotIglu") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(_: FailureDetails.SchemaViolation.NotIglu, Nil) => ok + case err => ko(s"[$err] is not one NotIglu") + }) } "return a SchemaViolation.CriterionMismatch if .contexts contains a self-describing JSON but not with the right schema" >> { @@ -300,10 +317,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(noSchema) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(_: FailureDetails.SchemaViolation.CriterionMismatch, Nil) => ok - case err => ko(s"[$err] is not one CriterionMismatch") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(_: FailureDetails.SchemaViolation.CriterionMismatch, Nil) => ok + case err => ko(s"[$err] is not one CriterionMismatch") + }) } "return a SchemaViolation.IgluError containing a ValidationError if .data does not contain an array of JSON objects" >> { @@ -313,13 +331,14 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(notArrayContexts) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => - ok - case NonEmptyList(ie: FailureDetails.SchemaViolation.IgluError, Nil) => - ko(s"IgluError [$ie] is not ValidationError") - case err => ko(s"[$err] is not one IgluError") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => + ok + case NonEmptyList(ie: FailureDetails.SchemaViolation.IgluError, Nil) => + ko(s"IgluError [$ie] is not ValidationError") + case err => ko(s"[$err] is not one IgluError") + }) } "return a SchemaViolation.IgluError containing a ValidationError if .data contains one invalid context" >> { @@ -327,13 +346,14 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(buildInputContexts(List(invalidEmailSent))) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => - ok - case NonEmptyList(ie: FailureDetails.SchemaViolation.IgluError, Nil) => - ko(s"IgluError [$ie] is not ValidationError") - case err => ko(s"[$err] is not one IgluError") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => + ok + case NonEmptyList(ie: FailureDetails.SchemaViolation.IgluError, Nil) => + ko(s"IgluError [$ie] is not ValidationError") + case err => ko(s"[$err] is not one IgluError") + }) } "return a SchemaViolation.IgluError containing a ResolutionError if .data contains one context whose schema can't be resolved" >> { @@ -341,13 +361,14 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(buildInputContexts(List(noSchema))) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_)), Nil) => - ok - case NonEmptyList(ie: FailureDetails.SchemaViolation.IgluError, Nil) => - ko(s"IgluError [$ie] is not ResolutionError") - case err => ko(s"[$err] is not one IgluError") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_)), Nil) => + ok + case NonEmptyList(ie: FailureDetails.SchemaViolation.IgluError, Nil) => + ko(s"IgluError [$ie] is not ResolutionError") + case err => ko(s"[$err] is not one IgluError") + }) } "return 2 expected failures for 2 invalid contexts" >> { @@ -355,14 +376,15 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(buildInputContexts(List(invalidEmailSent, noSchema))) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList( - FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), - List(FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_))) - ) => - ok - case errs => ko(s"[$errs] is not one ValidationError and one ResolutionError") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList( + FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), + List(FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_))) + ) => + ok + case errs => ko(s"[$errs] is not one ValidationError and one ResolutionError") + }) } "return an expected failure if one context is valid and the other invalid" >> { @@ -370,10 +392,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(buildInputContexts(List(emailSent1, noSchema))) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beInvalid.like { - case NonEmptyList(_: FailureDetails.SchemaViolation.IgluError, Nil) => ok - case err => ko(s"[$err] is not one IgluError") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beInvalid.like { + case NonEmptyList(_: FailureDetails.SchemaViolation.IgluError, Nil) => ok + case err => ko(s"[$err] is not one IgluError") + }) } "return the extracted SDJs for 2 valid input contexts" >> { @@ -381,36 +404,41 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { input.setContexts(buildInputContexts(List(emailSent1, emailSent2))) IgluUtils - .extractAndValidateInputContexts(input, SpecHelpers.client) must beValid.like { - case sdjs if sdjs.size == 2 && sdjs.forall(i => i.sdj.schema == emailSentSchema && i.validationInfo.isEmpty) => - ok - case res => - ko(s"[$res] are not 2 SDJs with expected schema [${emailSentSchema.toSchemaUri}]") - } + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beValid.like { + case sdjs if sdjs.size == 2 && sdjs.forall(i => i.sdj.schema == emailSentSchema && i.validationInfo.isEmpty) => + ok + case res => + ko(s"[$res] are not 2 SDJs with expected schema [${emailSentSchema.toSchemaUri}]") + }) } "return the extracted SDJ for an input that has a required property set to null if the schema explicitly allows it" >> { val input = new EnrichedEvent input.setContexts(buildInputContexts(List(clientSession))) - IgluUtils.extractAndValidateInputContexts(input, SpecHelpers.client) must beValid.like { - case sdj if sdj.size == 1 && sdj.forall(_.sdj.schema == clientSessionSchema) => - ok - case _ => - ko("$.previousSessionId: is missing but it is required") - } + IgluUtils + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beValid.like { + case sdj if sdj.size == 1 && sdj.forall(_.sdj.schema == clientSessionSchema) => + ok + case _ => + ko("$.previousSessionId: is missing but it is required") + }) } "return the extracted context when schema is superseded by another schema" >> { val input = new EnrichedEvent input.setContexts(buildInputContexts(List(supersedingExample1, supersedingExample2))) - IgluUtils.extractAndValidateInputContexts(input, SpecHelpers.client) must beValid.like { - case sdj if sdj.size == 2 && sdj.forall(_.sdj.schema == supersedingExampleSchema101) => - ok - case _ => - ko("Failed to extract context when schema is superseded by another schema") - } + IgluUtils + .extractAndValidateInputContexts(input, SpecHelpers.client) + .map(_ must beValid.like { + case sdj if sdj.size == 2 && sdj.forall(_.sdj.schema == supersedingExampleSchema101) => + ok + case _ => + ko("Failed to extract context when schema is superseded by another schema") + }) } } @@ -422,21 +450,22 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { IgluUtils .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched) - .value must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - _ - ) => - ok - case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") - } + .value + .map(_ must beLeft.like { + case BadRow.EnrichmentFailures(_, failures, _) => + failures.messages match { + case NonEmptyList( + FailureDetails.EnrichmentFailure( + _, + FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) + ), + _ + ) => + ok + case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") + } + case br => ko(s"bad row [$br] is not EnrichmentFailures") + }) } "return a BadRow.EnrichmentFailures 2 expected failures for 2 invalid contexts" >> { @@ -447,29 +476,30 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { IgluUtils .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched) - .value must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - List( + .value + .map(_ must beLeft.like { + case BadRow.EnrichmentFailures(_, failures, _) => + failures.messages match { + case NonEmptyList( FailureDetails.EnrichmentFailure( _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ResolutionError(_)) + FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) + ), + List( + FailureDetails.EnrichmentFailure( + _, + FailureDetails.EnrichmentFailureMessage.IgluError(_, ResolutionError(_)) + ) ) - ) - ) => - ok - case errs => - ko( - s"bad row is EnrichmentFailures but [$errs] is not one ValidationError and one ResolutionError" - ) - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") - } + ) => + ok + case errs => + ko( + s"bad row is EnrichmentFailures but [$errs] is not one ValidationError and one ResolutionError" + ) + } + case br => ko(s"bad row [$br] is not EnrichmentFailures") + }) } "return a BadRow.EnrichmentFailures with an expected failure for 1 valid context and one invalid" >> { @@ -480,21 +510,22 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { IgluUtils .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched) - .value must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - Nil - ) => - ok - case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") - } + .value + .map(_ must beLeft.like { + case BadRow.EnrichmentFailures(_, failures, _) => + failures.messages match { + case NonEmptyList( + FailureDetails.EnrichmentFailure( + _, + FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) + ), + Nil + ) => + ok + case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") + } + case br => ko(s"bad row [$br] is not EnrichmentFailures") + }) } "not return any error for 2 valid contexts" >> { @@ -505,7 +536,8 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { IgluUtils .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched) - .value must beRight + .value + .map(_ must beRight) } } @@ -521,10 +553,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { raw, processor ) - .value must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") - } + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok + case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") + }) } "return a SchemaViolations containing 1 error if the input event contains 1 invalid context" >> { @@ -538,10 +571,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { raw, processor ) - .value must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") - } + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok + case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") + }) } "return a SchemaViolations containing 2 errors if the input event contains an invalid unstructured event and 1 invalid context" >> { @@ -556,10 +590,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { raw, processor ) - .value must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 2 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 2 errors") - } + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 2 => ok + case br => ko(s"bad row [$br] is not a SchemaViolations containing 2 errors") + }) } "return the extracted unstructured event and the extracted input contexts if they are all valid" >> { @@ -574,17 +609,18 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { raw, processor ) - .value must beRight.like { - case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), validationInfos) - if contexts.size == 2 - && validationInfos.isEmpty - && (unstructEvent :: contexts).forall(_.schema == emailSentSchema) => - ok - case res => - ko( - s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" - ) - } + .value + .map(_ must beRight.like { + case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), validationInfos) + if contexts.size == 2 + && validationInfos.isEmpty + && (unstructEvent :: contexts).forall(_.schema == emailSentSchema) => + ok + case res => + ko( + s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" + ) + }) } "return the extracted unstructured event and the extracted input contexts when schema is superseded by another schema" >> { @@ -606,19 +642,20 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { raw, processor ) - .value must beRight.like { - case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), List(validationInfo)) - if contexts.size == 2 - && unstructEvent.schema == supersedingExampleSchema101 - && contexts.count(_.schema == supersedingExampleSchema101) == 2 - && validationInfo.schema == IgluUtils.ValidationInfo.schemaKey - && validationInfo.data == expectedValidationInfoContext => - ok - case res => - ko( - s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" - ) - } + .value + .map(_ must beRight.like { + case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), List(validationInfo)) + if contexts.size == 2 + && unstructEvent.schema == supersedingExampleSchema101 + && contexts.count(_.schema == supersedingExampleSchema101) == 2 + && validationInfo.schema == IgluUtils.ValidationInfo.schemaKey + && validationInfo.data == expectedValidationInfoContext => + ok + case res => + ko( + s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" + ) + }) } } diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/Enrich.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/Enrich.scala deleted file mode 100644 index 0633bb426..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/Enrich.scala +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.io.File -import java.net.URI - -import scala.sys.process._ - -import cats.Id -import cats.implicits._ -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.RemoteAdapter -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, JsonUtils, ShiftExecution} -import com.snowplowanalytics.snowplow.enrich.stream.sources.Source -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import com.typesafe.config.ConfigFactory -import io.circe.Json -import io.circe.syntax._ -import org.slf4j.LoggerFactory -import pureconfig._ -import pureconfig.generic.auto._ -import pureconfig.generic.{FieldCoproductHint, ProductHint} - -import config._ -import model._ -import utils._ - -/** Interface for the entry point for Stream Enrich. */ -trait Enrich { - protected type EitherS[A] = Either[String, A] - - lazy val log = LoggerFactory.getLogger(getClass()) - - val FilepathRegex = "^file:(.+)$".r - private val regexMsg = "'file:[filename]'" - - implicit val creds: Credentials = NoCredentials - - def run(args: Array[String]): Unit = { - val trackerSource: Either[String, (Option[Tracker[Id]], Source)] = for { - config <- parseConfig(args) - (enrichConfig, resolverArg, enrichmentsArg, forceDownload) = config - credsWithRegion <- enrichConfig.streams.sourceSink match { - case c: CloudAgnosticPlatformConfig => (extractCredentials(c), c.region).asRight - case _ => "Configured source/sink is not a cloud agnostic target".asLeft - } - (credentials, awsRegion) = credsWithRegion - client <- parseClient(resolverArg) - enrichmentsConf <- parseEnrichmentRegistry(enrichmentsArg, client)(implicitly) - _ <- cacheFiles( - enrichmentsConf, - forceDownload, - credentials.aws, - credentials.gcp, - awsRegion - ) - tracker = enrichConfig.monitoring.map(c => SnowplowTracking.initializeTracker(c.snowplow)) - enrichmentRegistry <- EnrichmentRegistry.build[Id](enrichmentsConf, BlockerF.noop, ShiftExecution.noop).value - adapterRegistry = new AdapterRegistry(prepareRemoteAdapters(enrichConfig.remoteAdapters)) - processor = Processor(generated.BuildInfo.name, generated.BuildInfo.version) - source <- getSource( - enrichConfig.streams, - enrichConfig.sentry, - client, - adapterRegistry, - enrichmentRegistry, - tracker, - processor - ) - } yield (tracker, source) - - trackerSource match { - case Left(e) => - System.err.println(s"An error occured: $e") - System.exit(1) - case Right((tracker, source)) => - tracker.foreach(SnowplowTracking.initializeSnowplowTracking) - source.run() - } - } - - /** - * Source of events - * @param streamsConfig configuration for the streams - * @param client iglu client - * @param enrichmentRegistry registry of enrichments - * @param tracker optional tracker - * @return a validated source, ready to be read from - */ - def getSource( - streamsConfig: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor - ): Either[String, sources.Source] - - implicit def hint[T]: ProductHint[T] = ProductHint[T](ConfigFieldMapping(CamelCase, CamelCase)) - implicit val _ = new FieldCoproductHint[TargetPlatformConfig]("enabled") - - /** - * Parses the configuration from cli arguments - * @param args cli arguments - * @return a validated tuple containing the parsed enrich configuration, the resolver argument, - * the optional enrichments argument and the force download flag - */ - def parseConfig(args: Array[String]): Either[String, (EnrichConfig, String, Option[String], Boolean)] = - for { - parsedCliArgs <- parser - .parse(args, FileConfig()) - .toRight("Error while parsing command line arguments") - unparsedConfig = Either - .catchNonFatal(ConfigFactory.parseFile(parsedCliArgs.config).resolve()) - .fold( - t => t.getMessage.asLeft, - c => (c, parsedCliArgs.resolver, parsedCliArgs.enrichmentsDir, parsedCliArgs.forceDownload).asRight - ) - validatedConfig <- unparsedConfig.filterOrElse( - t => t._1.hasPath("enrich"), - "No top-level \"enrich\" could be found in the configuration" - ) - (config, resolverArg, enrichmentsArg, forceDownload) = validatedConfig - parsedConfig <- Either - .catchNonFatal(loadConfigOrThrow[EnrichConfig](config.getConfig("enrich"))) - .map(ec => (ec, resolverArg, enrichmentsArg, forceDownload)) - .leftMap(_.getMessage) - } yield parsedConfig - - /** Cli arguments parser */ - def parser: scopt.OptionParser[FileConfig] - val localParser = - new scopt.OptionParser[FileConfig](generated.BuildInfo.name) with FileConfigOptions { - head(generated.BuildInfo.name, generated.BuildInfo.version) - help("help") - version("version") - configOption() - localResolverOption() - localEnrichmentsOption() - forceCachedFilesDownloadOption() - } - - /** - * Retrieve and parse an iglu resolver from the corresponding cli argument value - * @param resolverArg location of the resolver as a cli argument - * a * @param creds optionally necessary credentials to download the resolver - * @return a validated iglu resolver - */ - def parseClient(resolverArg: String)(implicit creds: Credentials): Either[String, IgluCirceClient[Id]] = - for { - parsedResolver <- extractResolver(resolverArg) - json <- JsonUtils.extractJson(parsedResolver) - client <- IgluCirceClient.parseDefault[Id](json).leftMap(_.toString).value - } yield client - - /** - * Return a JSON string based on the resolver argument - * @param resolverArg location of the resolver - * @param creds optionally necessary credentials to download the resolver - * @return JSON from a local file or stored in DynamoDB - */ - def extractResolver(resolverArg: String)(implicit creds: Credentials): Either[String, String] - val localResolverExtractor = (resolverArgument: String) => - resolverArgument match { - case FilepathRegex(filepath) => - val file = new File(filepath) - if (file.exists) scala.io.Source.fromFile(file).mkString.asRight - else "Iglu resolver configuration file \"%s\" does not exist".format(filepath).asLeft - case _ => s"Resolver argument [$resolverArgument] must match $regexMsg".asLeft - } - - /** - * Retrieve and parse an enrichment registry from the corresponding cli argument value - * @param enrichmentsDirArg location of the enrichments directory as a cli argument - * @param client iglu client - * @param creds optionally necessary credentials to download the enrichments - * @return a validated enrichment registry - */ - def parseEnrichmentRegistry( - enrichmentsDirArg: Option[String], - client: IgluCirceClient[Id] - )( - implicit creds: Credentials - ): Either[String, List[EnrichmentConf]] = - for { - enrichmentConfig <- extractEnrichmentConfigs(enrichmentsDirArg) - reg <- EnrichmentRegistry.parse(enrichmentConfig, client, false).leftMap(_.toList.mkString("; ")).toEither - } yield reg - - /** - * Return an enrichment configuration JSON based on the enrichments argument - * @param enrichmentArgument location of the enrichments directory - * @param creds optionally necessary credentials to download the enrichments - * @return JSON containing configuration for all enrichments - */ - def extractEnrichmentConfigs(enrichmentArgument: Option[String])(implicit creds: Credentials): Either[String, Json] - val localEnrichmentConfigsExtractor = (enrichmentArgument: Option[String]) => { - val jsons: Either[String, List[String]] = enrichmentArgument - .map { - case FilepathRegex(path) => - new File(path).listFiles - .filter(_.getName.endsWith(".json")) - .map(scala.io.Source.fromFile(_).mkString) - .toList - .asRight - case other => s"Enrichments argument [$other] must match $regexMsg".asLeft - } - .getOrElse(Nil.asRight) - - val schemaKey = SchemaKey( - "com.snowplowanalytics.snowplow", - "enrichments", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ) - - jsons - .flatMap(_.map(JsonUtils.extractJson).sequence[EitherS, Json]) - .map(jsons => SelfDescribingData[Json](schemaKey, Json.fromValues(jsons)).asJson) - } - - /** - * Download a file locally - * @param uri of the file to be downloaded - * @param targetFile local file to download to - * @param awsCreds optionally necessary credentials to download the file - * @param gcpCreds optionally necessary credentials to download the file - * @return the return code of the downloading command - */ - def download( - uri: URI, - targetFile: File, - awsCreds: Credentials, - gcpCreds: Credentials, - awsRegion: Option[String] - ): Either[String, Unit] = - uri.getScheme match { - case "http" | "https" => - (uri.toURL #> targetFile).! match { - case 0 => ().asRight - case exitCode => s"http(s) download failed with exit code $exitCode".asLeft - } - case "s3" => - for { - provider <- getAWSCredentialsProvider(awsCreds) - downloadResult <- downloadFromS3(provider, uri, targetFile, awsRegion) - } yield downloadResult - case "gs" => - for { - creds <- getGoogleCredentials(gcpCreds) - downloadResult <- downloadFromGCS(creds, uri, targetFile) - } yield downloadResult - case s => s"Scheme $s for file $uri not supported".asLeft - } - - /** - * Download the IP lookup files locally. - * @param confs List of enrichment configuration - * @param forceDownload CLI flag that invalidates the cached files on each startup - * @param awsCreds optionally necessary aws credentials to cache the files - * @param gcpCreds optionally necessary gcp credentials to cache the files - * @return a list of download command return codes - */ - def cacheFiles( - confs: List[EnrichmentConf], - forceDownload: Boolean, - awsCreds: Credentials, - gcpCreds: Credentials, - awsRegion: Option[String] - ): Either[String, Unit] = - confs - .flatMap(_.filesToCache) - .map { - case (uri, path) => - ( - new URI(uri.toString.replaceAll("(? - forceDownload || targetFile.length == 0L - } - .map { - case (cleanURI, targetFile) => - download(cleanURI, targetFile, awsCreds, gcpCreds, awsRegion).leftMap { err => - s"Attempt to download $cleanURI to $targetFile failed: $err" - } - } - .sequence_ - - /** - * Sets up the Remote adapters for the ETL - * @param remoteAdaptersConfig List of configuration per remote adapter - * @return Mapping of vender-version and the adapter assigned for it - */ - def prepareRemoteAdapters(remoteAdaptersConfig: Option[List[RemoteAdapterConfig]]) = - remoteAdaptersConfig match { - case Some(configList) => - configList.map { config => - val adapter = new RemoteAdapter( - config.url, - config.connectionTimeout, - config.readTimeout - ) - (config.vendor, config.version) -> adapter - }.toMap - case None => Map.empty[(String, String), RemoteAdapter] - } -} diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/SnowplowTracking.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/SnowplowTracking.scala deleted file mode 100644 index 1ebc62015..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/SnowplowTracking.scala +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2015-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import scala.concurrent.ExecutionContext.Implicits.global - -import cats.Id -import cats.data.NonEmptyList - -import io.circe.Json -import io.circe.JsonObject - -import com.snowplowanalytics.iglu.core._ - -import com.snowplowanalytics.snowplow.scalatracker.idimplicits._ -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import com.snowplowanalytics.snowplow.scalatracker.Emitter.EndpointParams -import com.snowplowanalytics.snowplow.scalatracker.emitters.id.AsyncEmitter - -import model.SnowplowMonitoringConfig - -/** - * Functionality for sending Snowplow events for monitoring purposes - */ -object SnowplowTracking { - - private val HeartbeatInterval = 300000L - - /** - * Configure a Tracker based on the configuration HOCON - * - * @param config The "monitoring.snowplow" section of the HOCON - * @return a new tracker instance - */ - def initializeTracker(config: SnowplowMonitoringConfig): Tracker[Id] = { - val emitter = AsyncEmitter.createAndStart(EndpointParams(config.collectorUri, Some(config.collectorPort))) - new Tracker(NonEmptyList.one(emitter), generated.BuildInfo.name, config.appId) - } - - /** - * If a tracker has been configured, send a sink_write_failed event - * - * @param tracker a Tracker instance - * @param errorType the type of error encountered - * @param errorMessage the error message - * @param streamName the name of the stream in which - * the error occurred - * @param appName the name of the application - * @param retryCount the amount of times we have tried - * to put to the stream - * @param putSize the size in bytes of the put request - */ - def sendFailureEvent( - tracker: Tracker[Id], - errorType: String, - errorMessage: String, - streamName: String, - appName: String, - retryCount: Long, - putSize: Long - ): Unit = - tracker.trackSelfDescribingEvent( - SelfDescribingData[Json]( - SchemaKey( - "com.snowplowanalytics.monitoring.kinesis", - "stream_write_failed", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ), - Json.obj( - ("errorType", Json.fromString(errorType)), - ("errorMessage", Json.fromString(errorMessage)), - ("streamName", Json.fromString(streamName)), - ("appName", Json.fromString(appName)), - ("retryCount", Json.fromLong(retryCount)), - ("putSize", Json.fromLong(putSize)) - ) - ) - ) - - /** - * Send an initialization event and schedule heartbeat and shutdown events - * - * @param tracker a Tracker instance - */ - def initializeSnowplowTracking(tracker: Tracker[Id]): Unit = { - trackApplicationInitialization(tracker) - - Runtime.getRuntime.addShutdownHook(new Thread() { - override def run(): Unit = - trackApplicationShutdown(tracker) - }) - - val heartbeatThread = new Thread { - override def run(): Unit = - while (true) { - trackApplicationHeartbeat(tracker, HeartbeatInterval) - Thread.sleep(HeartbeatInterval) - } - } - - heartbeatThread.start() - } - - /** - * Send an application_initialized unstructured event - * - * @param tracker a Tracker instance - */ - private def trackApplicationInitialization(tracker: Tracker[Id]): Unit = - tracker.trackSelfDescribingEvent( - SelfDescribingData[Json]( - SchemaKey( - "com.snowplowanalytics.monitoring.kinesis", - "app_initialized", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ), - Json.fromJsonObject(JsonObject.empty) - ) - ) - - /** - * Send an application_shutdown unstructured event - * - * @param tracker a Tracker instance - */ - def trackApplicationShutdown(tracker: Tracker[Id]): Unit = - tracker.trackSelfDescribingEvent( - SelfDescribingData[Json]( - SchemaKey( - "com.snowplowanalytics.monitoring.kinesis", - "app_shutdown", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ), - Json.fromJsonObject(JsonObject.empty) - ) - ) - - /** - * Send a warning unstructured event - * - * @param tracker a Tracker instance - * @param message The warning message - */ - def trackApplicationWarning(tracker: Tracker[Id], message: String): Unit = - tracker.trackSelfDescribingEvent( - SelfDescribingData[Json]( - SchemaKey( - "com.snowplowanalytics.monitoring.kinesis", - "app_warning", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ), - Json.obj(("warning", Json.fromString(message))) - ) - ) - - /** - * Send a heartbeat unstructured event - * - * @param tracker a Tracker instance - * @param heartbeatInterval Time between heartbeats in milliseconds - */ - private def trackApplicationHeartbeat(tracker: Tracker[Id], heartbeatInterval: Long): Unit = - tracker.trackSelfDescribingEvent( - SelfDescribingData[Json]( - SchemaKey( - "com.snowplowanalytics.monitoring.kinesis", - "app_heartbeat", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ), - Json.obj(("interval", Json.fromLong(heartbeatInterval))) - ) - ) -} diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/config.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/config.scala deleted file mode 100644 index 655d15814..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/config.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics -package snowplow.enrich -package stream - -import java.io.File - -import scopt.{OptionDef, OptionParser} - -object config { - - final case class FileConfig( - config: File = new File("."), - resolver: String = "", - enrichmentsDir: Option[String] = None, - forceDownload: Boolean = false - ) - - trait FileConfigOptions { self: OptionParser[FileConfig] => - - val FilepathRegex = "^file:(.+)$".r - private val regexMsg = "'file:[filename]'" - - def configOption(): OptionDef[File, FileConfig] = - opt[File]("config") - .required() - .valueName("") - .action((f: File, c: FileConfig) => c.copy(config = f)) - .validate(f => - if (f.exists) success - else failure(s"Configuration file $f does not exist") - ) - def localResolverOption(): OptionDef[String, FileConfig] = - opt[String]("resolver") - .required() - .valueName("") - .text(s"Iglu resolver file, $regexMsg") - .action((r: String, c: FileConfig) => c.copy(resolver = r)) - .validate(_ match { - case FilepathRegex(_) => success - case _ => failure(s"Resolver doesn't match accepted uris: $regexMsg") - }) - def localEnrichmentsOption(): OptionDef[String, FileConfig] = - opt[String]("enrichments") - .optional() - .valueName("") - .text(s"Directory of enrichment configuration JSONs, $regexMsg") - .action((e: String, c: FileConfig) => c.copy(enrichmentsDir = Some(e))) - .validate(_ match { - case FilepathRegex(_) => success - case _ => failure(s"Enrichments directory doesn't match accepted uris: $regexMsg") - }) - def forceCachedFilesDownloadOption(): OptionDef[Unit, FileConfig] = - opt[Unit]("force-cached-files-download") - .text("Invalidate the cached IP lookup / IAB database files and download them anew") - .action((_, c) => c.copy(forceDownload = true)) - } -} diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/model.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/model.scala deleted file mode 100644 index e75bf6501..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/model.scala +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.text.SimpleDateFormat -import java.net.URI - -import cats.syntax.either._ - -object model { - - sealed trait Credentials - case object NoCredentials extends Credentials - final case class AWSCredentials(accessKey: String, secretKey: String) extends Credentials - final case class GCPCredentials(creds: String) extends Credentials - - /** - * Pair of optional AWS Credentials and optional GCP Credentials - * @param aws optional AWS Credentials - * @param gcp optional GCP Credentials - */ - final case class DualCloudCredentialsPair(aws: Credentials, gcp: Credentials) - - // Case classes necessary to the decoding of the configuration - final case class StreamsConfig( - in: InConfig, - out: OutConfig, - sourceSink: TargetPlatformConfig, - buffer: BufferConfig, - appName: String - ) - final case class InConfig(raw: String) - final case class OutConfig( - enriched: String, - pii: Option[String], - bad: String, - partitionKey: String - ) - final case class KinesisBackoffPolicyConfig(minBackoff: Long, maxBackoff: Long) - - /** Represents configurations of all target platforms */ - sealed trait TargetPlatformConfig - - /** - * Represents configurations of all Stream Enrich targets that is AWS native - * Credentials of other cloud providers might be provided in case an enrichment - * requires a private data hosted on that platform - */ - sealed trait AWSNativePlatformConfig extends TargetPlatformConfig { - def aws: AWSCredentials - def gcp: Option[GCPCredentials] - - /** - * Represents AWS region - * - * Note: Stream Enrich has been configured for AWS only until 1.1.0 and - * backward compatibility of the configuration requires not to change - * field name. - */ - def region: String - } - - /** - * Represents configurations of all Stream Enrich targets that is cloud agnostic - * Credentials of any cloud provider might be provided in case an enrichment - * requires a private data hosted on that platform - */ - sealed trait CloudAgnosticPlatformConfig extends TargetPlatformConfig { - def aws: Option[AWSCredentials] - def gcp: Option[GCPCredentials] - - /** - * Represents optional AWS region of the S3 bucket which stores private - * data required for any enrichment - */ - def region: Option[String] - } - - final case class Kinesis( - aws: AWSCredentials, - gcp: Option[GCPCredentials], - region: String, - maxRecords: Int, - initialPosition: String, - initialTimestamp: Option[String], - backoffPolicy: KinesisBackoffPolicyConfig, - customEndpoint: Option[String], - dynamodbCustomEndpoint: Option[String], - disableCloudWatch: Option[Boolean] - ) extends AWSNativePlatformConfig { - val timestamp = initialTimestamp - .toRight("An initial timestamp needs to be provided when choosing AT_TIMESTAMP") - .right - .flatMap { s => - val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'") - Either.catchNonFatal(format.parse(s)).leftMap(_.getMessage) - } - require(initialPosition != "AT_TIMESTAMP" || timestamp.isRight, timestamp.left.getOrElse("")) - - val streamEndpoint = customEndpoint.getOrElse(region match { - case cn @ "cn-north-1" => s"https://kinesis.$cn.amazonaws.com.cn" - case _ => s"https://kinesis.$region.amazonaws.com" - }) - - val dynamodbEndpoint = dynamodbCustomEndpoint.getOrElse(region match { - case cn @ "cn-north-1" => s"https://dynamodb.$cn.amazonaws.com.cn" - case _ => s"https://dynamodb.$region.amazonaws.com" - }) - } - final case class Kafka( - aws: Option[AWSCredentials], - gcp: Option[GCPCredentials], - region: Option[String], - brokers: String, - retries: Int, - consumerConf: Option[Map[String, String]], - producerConf: Option[Map[String, String]] - ) extends CloudAgnosticPlatformConfig - final case class Nsq( - aws: Option[AWSCredentials], - gcp: Option[GCPCredentials], - region: Option[String], - rawChannel: String, - host: String, - port: Int, - lookupHost: String, - lookupPort: Int - ) extends CloudAgnosticPlatformConfig - final case class Stdin( - aws: Option[AWSCredentials], - gcp: Option[GCPCredentials], - region: Option[String] - ) extends CloudAgnosticPlatformConfig - - final case class BufferConfig( - byteLimit: Long, - recordLimit: Long, - timeLimit: Long - ) - final case class MonitoringConfig(snowplow: SnowplowMonitoringConfig) - final case class SnowplowMonitoringConfig( - collectorUri: String, - collectorPort: Int, - appId: String, - method: String - ) - final case class RemoteAdapterConfig( - vendor: String, - version: String, - connectionTimeout: Option[Long], - readTimeout: Option[Long], - url: String - ) - final case class SentryConfig(dsn: URI) - final case class EnrichConfig( - streams: StreamsConfig, - remoteAdapters: Option[List[RemoteAdapterConfig]], - monitoring: Option[MonitoringConfig], - sentry: Option[SentryConfig] - ) -} diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/Sink.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/Sink.scala deleted file mode 100644 index bbfac2a9c..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/Sink.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich -package stream.sinks - -import org.slf4j.LoggerFactory - -/** - * An interface for all sinks to use to store events. - */ -trait Sink { - - lazy val log = LoggerFactory.getLogger(getClass()) - - /** Side-effecting function to store the EnrichedEvent to the given output stream. */ - def storeEnrichedEvents(events: List[(String, String)]): Boolean - - def flush(): Unit -} diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala deleted file mode 100644 index 8425e8a91..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import java.nio.ByteBuffer -import java.nio.charset.StandardCharsets.UTF_8 -import java.time.Instant -import java.util.UUID - -import scala.util.Random - -import cats.Id -import cats.data.{Validated, ValidatedNel} -import cats.data.Validated.{Invalid, Valid} -import cats.implicits._ -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.snowplow.enrich.common.EtlPipeline -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.enrich.common.loaders.{CollectorPayload, ThriftLoader} -import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.ConversionUtils -import com.snowplowanalytics.snowplow.enrich.stream.model.SentryConfig -import org.joda.time.DateTime -import org.slf4j.LoggerFactory -import io.sentry.Sentry -import io.sentry.SentryClient -import org.joda.time.DateTime - -import sinks._ -import utils._ - -object Source { - val processor = Processor(generated.BuildInfo.name, generated.BuildInfo.version) - - /** - * If a bad row JSON is too big, reduce it's size - * @param value Bad row which is too large - * @param maxSizeBytes maximum size in bytes - * @param processor current processor - * @return Bad row embedding truncated bad row - */ - def adjustOversizedFailureJson( - badRow: BadRow, - maxSizeBytes: Int, - processor: Processor - ): BadRow.SizeViolation = { - val size = getSizeBr(badRow) - BadRow.SizeViolation( - processor, - Failure.SizeViolation(Instant.now(), maxSizeBytes, size, "bad row exceeded the maximum size"), - Payload.RawPayload(brToString(badRow).take(maxSizeBytes / 10)) - ) - } - - /** - * Convert a too-large successful event to a failure - * @param value Event which passed enrichment but was too large - * @param maxSizeBytes maximum size in bytes - * @param processor current processor - * @return Bad row JSON - */ - def oversizedSuccessToFailure( - value: String, - maxSizeBytes: Int, - processor: Processor - ): BadRow.SizeViolation = { - val size = getSize(value) - val msg = "event passed enrichment but exceeded the maximum allowed size as a result" - BadRow.SizeViolation( - processor, - Failure.SizeViolation(Instant.now(), maxSizeBytes, size, msg), - Payload.RawPayload(value.take(maxSizeBytes / 10)) - ) - } - - val brToString: BadRow => String = br => br.compact - - /** The size of a string in bytes */ - val getSize: String => Int = evt => ByteBuffer.wrap(evt.getBytes(UTF_8)).capacity - - /** The size of a bad row in bytes */ - val getSizeBr: BadRow => Int = - (brToString andThen getSize)(_) -} - -/** Abstract base for the different sources we support. */ -abstract class Source( - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor, - partitionKey: String, - sentryConfig: Option[SentryConfig] -) { - - val sentryClient: Option[SentryClient] = sentryConfig.map(_.dsn.toString).map(Sentry.init) - - val MaxRecordSize: Option[Int] - - lazy val log = LoggerFactory.getLogger(getClass()) - - /** Never-ending processing loop over source stream. */ - def run(): Unit - - val threadLocalGoodSink: ThreadLocal[Sink] - val threadLocalPiiSink: Option[ThreadLocal[Sink]] - val threadLocalBadSink: ThreadLocal[Sink] - - // Iterate through an enriched EnrichedEvent object and tab separate - // the fields to a string. - def tabSeparateEnrichedEvent(output: EnrichedEvent): String = - output.getClass.getDeclaredFields - .filterNot(_.getName.equals("pii")) - .map { field => - field.setAccessible(true) - Option(field.get(output)).getOrElse("") - } - .mkString("\t") - - def getProprertyValue(ee: EnrichedEvent, property: String): Option[String] = - property match { - case "event_id" => Option(ee.event_id) - case "event_fingerprint" => Option(ee.event_fingerprint) - case "domain_userid" => Option(ee.domain_userid) - case "network_userid" => Option(ee.network_userid) - case "user_ipaddress" => Option(ee.user_ipaddress) - case "domain_sessionid" => Option(ee.domain_sessionid) - case "user_fingerprint" => Option(ee.user_fingerprint) - case _ => None - } - - /** - * Convert incoming binary Thrift records to lists of enriched events - * @param binaryData Thrift raw event - * @return List containing failed, successful and, if present, pii events. Successful and failed, each specify a - * partition key. - */ - def enrichEvents(binaryData: Array[Byte]): List[Validated[(BadRow, String), (String, String, Option[String])]] = { - val canonicalInput: ValidatedNel[BadRow, Option[CollectorPayload]] = - ThriftLoader.toCollectorPayload(binaryData, processor) - val featureFlags = EtlPipeline.FeatureFlags( - acceptInvalid = true, // See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 - legacyEnrichmentOrder = false // Stream enrich has always used the correct order - ) - Either.catchNonFatal( - EtlPipeline.processEvents[Id]( - adapterRegistry, - enrichmentRegistry, - client, - processor, - new DateTime(System.currentTimeMillis), - canonicalInput, - featureFlags, - () // See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 - ) - ) match { - case Left(throwable) => - log.error( - s"Problem occured while processing CollectorPayload", - throwable - ) - sentryClient.foreach { client => - client.sendException(throwable) - } - Nil - case Right(processedEvents) => - processedEvents.map(event => - event.bimap( - br => (br, Random.nextInt().toString()), - enriched => - ( - tabSeparateEnrichedEvent(enriched), - getProprertyValue(enriched, partitionKey).getOrElse(UUID.randomUUID().toString), - ConversionUtils.getPiiEvent(processor, enriched).map(tabSeparateEnrichedEvent) - ) - ) - ) - } - } - - /** - * Deserialize and enrich incoming Thrift records and store the results - * in the appropriate sinks. If doing so causes the number of events - * stored in a sink to become sufficiently large, all sinks are flushed - * and we return `true`, signalling that it is time to checkpoint - * @param binaryData Thrift raw event - * @return Whether to checkpoint - */ - def enrichAndStoreEvents(binaryData: List[Array[Byte]]): Boolean = { - val enrichedEvents = binaryData.flatMap(enrichEvents) - val successes = enrichedEvents.collect { case Valid(ee) => ee } - val sizeUnadjustedFailures = enrichedEvents.collect { case Invalid(br) => br } - val failures = sizeUnadjustedFailures.map { - case (value, key) => - MaxRecordSize.flatMap(s => if (Source.getSizeBr(value) >= s) s.some else none) match { - case None => value -> key - case Some(s) => Source.adjustOversizedFailureJson(value, s, processor) -> key - } - } - - val (tooBigSuccesses, smallEnoughSuccesses) = - successes.partition { s => - isTooLarge(s._1) - } - - val sizeBasedFailures = for { - (value, key, _) <- tooBigSuccesses - m <- MaxRecordSize - } yield Source.oversizedSuccessToFailure(value, m, processor) -> key - - val anonymizedSuccesses = smallEnoughSuccesses.map { - case (event, partition, _) => (event, partition) - } - val piiSuccesses = smallEnoughSuccesses.flatMap { - case (_, partition, pii) => pii.map((_, partition)) - } - - val successesTriggeredFlush = threadLocalGoodSink.get.storeEnrichedEvents(anonymizedSuccesses) - val piiTriggeredFlush = - threadLocalPiiSink.map(_.get.storeEnrichedEvents(piiSuccesses)).getOrElse(false) - val allFailures = (failures ++ sizeBasedFailures) - .map { case (br, k) => Source.brToString(br) -> k } - val failuresTriggeredFlush = - threadLocalBadSink.get.storeEnrichedEvents(allFailures) - - if (successesTriggeredFlush == true || failuresTriggeredFlush == true || piiTriggeredFlush == true) { - // Block until the records have been sent to Kinesis - threadLocalGoodSink.get.flush - threadLocalPiiSink.map(_.get.flush) - threadLocalBadSink.get.flush - true - } else - false - } - - private val isTooLarge: String => Boolean = evt => MaxRecordSize.map(Source.getSize(evt) >= _).getOrElse(false) -} diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/utils.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/utils.scala deleted file mode 100644 index f097d154b..000000000 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/utils.scala +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.io.{File, FileInputStream} -import java.net.URI -import java.nio.file.{Files, Paths} -import java.util.UUID -import java.util.concurrent.TimeUnit - -import cats.Id -import cats.effect.Clock -import cats.syntax.either._ -import com.amazonaws.auth.{ - AWSCredentialsProvider, - AWSStaticCredentialsProvider, - BasicAWSCredentials, - DefaultAWSCredentialsProviderChain, - EnvironmentVariableCredentialsProvider, - InstanceProfileCredentialsProvider -} -import com.amazonaws.services.s3.AmazonS3ClientBuilder -import com.amazonaws.services.s3.model.GetObjectRequest -import com.google.auth.oauth2.GoogleCredentials -import com.google.cloud.storage.{BlobId, StorageOptions} -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.enrich.stream.model.{ - AWSCredentials, - CloudAgnosticPlatformConfig, - Credentials, - DualCloudCredentialsPair, - GCPCredentials, - NoCredentials -} -import com.snowplowanalytics.snowplow.scalatracker.UUIDProvider - -object utils { - def emitPii(enrichmentRegistry: EnrichmentRegistry[Id]): Boolean = - enrichmentRegistry.piiPseudonymizer.exists(_.emitIdentificationEvent) - - def validatePii(emitPii: Boolean, streamName: Option[String]): Either[String, Unit] = - (emitPii, streamName) match { - case (true, None) => "PII was configured to emit, but no PII stream name was given".asLeft - case _ => ().asRight - } - - implicit val clockProvider: Clock[Id] = new Clock[Id] { - final def realTime(unit: TimeUnit): Id[Long] = - unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS) - final def monotonic(unit: TimeUnit): Id[Long] = - unit.convert(System.nanoTime(), TimeUnit.NANOSECONDS) - } - - implicit val uuidProvider: UUIDProvider[Id] = new UUIDProvider[Id] { - override def generateUUID: Id[UUID] = UUID.randomUUID() - } - - def getAWSCredentialsProvider(creds: Credentials): Either[String, AWSCredentialsProvider] = { - def isDefault(key: String): Boolean = key == "default" - def isIam(key: String): Boolean = key == "iam" - def isEnv(key: String): Boolean = key == "env" - - for { - provider <- creds match { - case NoCredentials => "No AWS credentials provided".asLeft - case _: GCPCredentials => "GCP credentials provided".asLeft - case AWSCredentials(a, s) if isDefault(a) && isDefault(s) => - new DefaultAWSCredentialsProviderChain().asRight - case AWSCredentials(a, s) if isDefault(a) || isDefault(s) => - "accessKey and secretKey must both be set to 'default' or neither".asLeft - case AWSCredentials(a, s) if isIam(a) && isIam(s) => - InstanceProfileCredentialsProvider.getInstance().asRight - case AWSCredentials(a, s) if isIam(a) && isIam(s) => - "accessKey and secretKey must both be set to 'iam' or neither".asLeft - case AWSCredentials(a, s) if isEnv(a) && isEnv(s) => - new EnvironmentVariableCredentialsProvider().asRight - case AWSCredentials(a, s) if isEnv(a) || isEnv(s) => - "accessKey and secretKey must both be set to 'env' or neither".asLeft - case AWSCredentials(a, s) => - new AWSStaticCredentialsProvider(new BasicAWSCredentials(a, s)).asRight - } - } yield provider - } - - /** - * Create GoogleCredentials based on provided service account credentials file - * @param creds path to service account file - * @return Either an error or GoogleCredentials - */ - def getGoogleCredentials(creds: Credentials): Either[String, GoogleCredentials] = { - def createIfRegular(isRegular: Boolean, path: String): Either[String, GoogleCredentials] = - if (isRegular) - Either - .catchNonFatal( - GoogleCredentials - .fromStream(new FileInputStream(path)) - .createScoped("https://www.googleapis.com/auth/cloud-platform") - ) - .leftMap(_.getMessage) - else - "Provided Google Credentials Path isn't valid".asLeft - - creds match { - case NoCredentials => "No GCP Credentials provided".asLeft - case _: AWSCredentials => "AWS credentials provided".asLeft - case GCPCredentials(credsPath) => - for { - path <- Either.catchNonFatal(Paths.get(credsPath)).leftMap(_.getMessage) - isRegular <- Either.catchNonFatal(Files.isRegularFile(path)).leftMap(_.getMessage) - gCreds <- createIfRegular(isRegular, credsPath) - } yield gCreds - } - } - - /** - * Downloads an object from S3 and returns whether or not it was successful. - * @param uri The URI to reconstruct into a signed S3 URL - * @param targetFile The file object to write to - * @param provider necessary credentials to download from S3 - * @return the download result - */ - def downloadFromS3( - provider: AWSCredentialsProvider, - uri: URI, - targetFile: File, - region: Option[String] - ): Either[String, Unit] = - for { - s3Client <- Either - .catchNonFatal( - region - .fold(AmazonS3ClientBuilder.standard().withCredentials(provider).build())(r => - AmazonS3ClientBuilder.standard().withCredentials(provider).withRegion(r).build() - ) - ) - .leftMap(_.getMessage) - bucketName = uri.getHost - key = extractObjectKey(uri) - _ <- Either - .catchNonFatal(s3Client.getObject(new GetObjectRequest(bucketName, key), targetFile)) - .leftMap(_.getMessage) - } yield () - - def downloadFromGCS( - creds: GoogleCredentials, - uri: URI, - targetFile: File - ): Either[String, Unit] = - for { - storage <- Either - .catchNonFatal(StorageOptions.newBuilder().setCredentials(creds).build().getService) - .leftMap(_.getMessage) - bucketName = uri.getHost - key = extractObjectKey(uri) - _ <- Either - .catchNonFatal(storage.get(BlobId.of(bucketName, key)).downloadTo(targetFile.toPath)) - .leftMap(_.getMessage) - } yield () - - /** Remove leading slash from given uri's path, if exists */ - def extractObjectKey(uri: URI): String = - uri.getPath match { - case path if path.length > 0 && path.charAt(0) == '/' => path.substring(1) - case path => path - } - - /** - * Extracts a DualCloudCredentialsPair from given cloud agnostic platform config - * @param config A configuration belonging to a cloud agnostic platform - * @return A DualCloudCredentialsPair instance - */ - def extractCredentials(config: CloudAgnosticPlatformConfig): DualCloudCredentialsPair = - DualCloudCredentialsPair( - config.aws.fold[Credentials](NoCredentials)(identity), - config.gcp.fold[Credentials](NoCredentials)(identity) - ) -} diff --git a/modules/stream/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/referer-tests.json b/modules/stream/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/referer-tests.json deleted file mode 100644 index a876b65e1..000000000 --- a/modules/stream/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/referer-tests.json +++ /dev/null @@ -1,1093 +0,0 @@ -{ - "search": { - "AOL": { - "domains": [ - "search.aol.com", - "search.aol.it", - "aolsearch.aol.com", - "aolsearch.com", - "www.aolrecherche.aol.fr", - "www.aolrecherches.aol.fr", - "www.aolimages.aol.fr", - "aim.search.aol.com", - "www.recherche.aol.fr", - "find.web.aol.com", - "recherche.aol.ca", - "aolsearch.aol.co.uk", - "search.aol.co.uk", - "aolrecherche.aol.fr", - "sucheaol.aol.de", - "suche.aol.de", - "suche.aolsvc.de", - "aolbusqueda.aol.com.mx", - "alicesuche.aol.de", - "alicesuchet.aol.de", - "suchet2.aol.de", - "search.hp.my.aol.com.au", - "search.hp.my.aol.de", - "search.hp.my.aol.it", - "search-intl.netscape.com" - ], - "parameters": [ - "q", - "query" - ] - }, - "Google Images": { - "domains": [ - "google.ac/imgres", - "google.ad/imgres", - "google.ae/imgres", - "google.am/imgres", - "google.as/imgres", - "google.at/imgres", - "google.az/imgres", - "google.ba/imgres", - "google.be/imgres", - "google.bf/imgres", - "google.bg/imgres", - "google.bi/imgres", - "google.bj/imgres", - "google.bs/imgres", - "google.by/imgres", - "google.ca/imgres", - "google.cat/imgres", - "google.cc/imgres", - "google.cd/imgres", - "google.cf/imgres", - "google.cg/imgres", - "google.ch/imgres", - "google.ci/imgres", - "google.cl/imgres", - "google.cm/imgres", - "google.cn/imgres", - "google.co.bw/imgres", - "google.co.ck/imgres", - "google.co.cr/imgres", - "google.co.id/imgres", - "google.co.il/imgres", - "google.co.in/imgres", - "google.co.jp/imgres", - "google.co.ke/imgres", - "google.co.kr/imgres", - "google.co.ls/imgres", - "google.co.ma/imgres", - "google.co.mz/imgres", - "google.co.nz/imgres", - "google.co.th/imgres", - "google.co.tz/imgres", - "google.co.ug/imgres", - "google.co.uk/imgres", - "google.co.uz/imgres", - "google.co.ve/imgres", - "google.co.vi/imgres", - "google.co.za/imgres", - "google.co.zm/imgres", - "google.co.zw/imgres", - "google.com/imgres", - "google.com.af/imgres", - "google.com.ag/imgres", - "google.com.ai/imgres", - "google.com.ar/imgres", - "google.com.au/imgres", - "google.com.bd/imgres", - "google.com.bh/imgres", - "google.com.bn/imgres", - "google.com.bo/imgres", - "google.com.br/imgres", - "google.com.by/imgres", - "google.com.bz/imgres", - "google.com.co/imgres", - "google.com.cu/imgres", - "google.com.cy/imgres", - "google.com.do/imgres", - "google.com.ec/imgres", - "google.com.eg/imgres", - "google.com.et/imgres", - "google.com.fj/imgres", - "google.com.gh/imgres", - "google.com.gi/imgres", - "google.com.gt/imgres", - "google.com.hk/imgres", - "google.com.jm/imgres", - "google.com.kh/imgres", - "google.com.kw/imgres", - "google.com.lb/imgres", - "google.com.lc/imgres", - "google.com.ly/imgres", - "google.com.mt/imgres", - "google.com.mx/imgres", - "google.com.my/imgres", - "google.com.na/imgres", - "google.com.nf/imgres", - "google.com.ng/imgres", - "google.com.ni/imgres", - "google.com.np/imgres", - "google.com.om/imgres", - "google.com.pa/imgres", - "google.com.pe/imgres", - "google.com.ph/imgres", - "google.com.pk/imgres", - "google.com.pr/imgres", - "google.com.py/imgres", - "google.com.qa/imgres", - "google.com.sa/imgres", - "google.com.sb/imgres", - "google.com.sg/imgres", - "google.com.sl/imgres", - "google.com.sv/imgres", - "google.com.tj/imgres", - "google.com.tn/imgres", - "google.com.tr/imgres", - "google.com.tw/imgres", - "google.com.ua/imgres", - "google.com.uy/imgres", - "google.com.vc/imgres", - "google.com.vn/imgres", - "google.cv/imgres", - "google.cz/imgres", - "google.de/imgres", - "google.dj/imgres", - "google.dk/imgres", - "google.dm/imgres", - "google.dz/imgres", - "google.ee/imgres", - "google.es/imgres", - "google.fi/imgres", - "google.fm/imgres", - "google.fr/imgres", - "google.ga/imgres", - "google.gd/imgres", - "google.ge/imgres", - "google.gf/imgres", - "google.gg/imgres", - "google.gl/imgres", - "google.gm/imgres", - "google.gp/imgres", - "google.gr/imgres", - "google.gy/imgres", - "google.hn/imgres", - "google.hr/imgres", - "google.ht/imgres", - "google.hu/imgres", - "google.ie/imgres", - "google.im/imgres", - "google.io/imgres", - "google.iq/imgres", - "google.is/imgres", - "google.it/imgres", - "google.it.ao/imgres", - "google.je/imgres", - "google.jo/imgres", - "google.kg/imgres", - "google.ki/imgres", - "google.kz/imgres", - "google.la/imgres", - "google.li/imgres", - "google.lk/imgres", - "google.lt/imgres", - "google.lu/imgres", - "google.lv/imgres", - "google.md/imgres", - "google.me/imgres", - "google.mg/imgres", - "google.mk/imgres", - "google.ml/imgres", - "google.mn/imgres", - "google.ms/imgres", - "google.mu/imgres", - "google.mv/imgres", - "google.mw/imgres", - "google.ne/imgres", - "google.nl/imgres", - "google.no/imgres", - "google.nr/imgres", - "google.nu/imgres", - "google.pl/imgres", - "google.pn/imgres", - "google.ps/imgres", - "google.pt/imgres", - "google.ro/imgres", - "google.rs/imgres", - "google.ru/imgres", - "google.rw/imgres", - "google.sc/imgres", - "google.se/imgres", - "google.sh/imgres", - "google.si/imgres", - "google.sk/imgres", - "google.sm/imgres", - "google.sn/imgres", - "google.so/imgres", - "google.st/imgres", - "google.td/imgres", - "google.tg/imgres", - "google.tk/imgres", - "google.tl/imgres", - "google.tm/imgres", - "google.to/imgres", - "google.tt/imgres", - "google.us/imgres", - "google.vg/imgres", - "google.vu/imgres", - "images.google.ws", - "images.google.ac", - "images.google.ad", - "images.google.ae", - "images.google.am", - "images.google.as", - "images.google.at", - "images.google.az", - "images.google.ba", - "images.google.be", - "images.google.bf", - "images.google.bg", - "images.google.bi", - "images.google.bj", - "images.google.bs", - "images.google.by", - "images.google.ca", - "images.google.cat", - "images.google.cc", - "images.google.cd", - "images.google.cf", - "images.google.cg", - "images.google.ch", - "images.google.ci", - "images.google.cl", - "images.google.cm", - "images.google.cn", - "images.google.co.bw", - "images.google.co.ck", - "images.google.co.cr", - "images.google.co.id", - "images.google.co.il", - "images.google.co.in", - "images.google.co.jp", - "images.google.co.ke", - "images.google.co.kr", - "images.google.co.ls", - "images.google.co.ma", - "images.google.co.mz", - "images.google.co.nz", - "images.google.co.th", - "images.google.co.tz", - "images.google.co.ug", - "images.google.co.uk", - "images.google.co.uz", - "images.google.co.ve", - "images.google.co.vi", - "images.google.co.za", - "images.google.co.zm", - "images.google.co.zw", - "images.google.com", - "images.google.com.af", - "images.google.com.ag", - "images.google.com.ai", - "images.google.com.ar", - "images.google.com.au", - "images.google.com.bd", - "images.google.com.bh", - "images.google.com.bn", - "images.google.com.bo", - "images.google.com.br", - "images.google.com.by", - "images.google.com.bz", - "images.google.com.co", - "images.google.com.cu", - "images.google.com.cy", - "images.google.com.do", - "images.google.com.ec", - "images.google.com.eg", - "images.google.com.et", - "images.google.com.fj", - "images.google.com.gh", - "images.google.com.gi", - "images.google.com.gt", - "images.google.com.hk", - "images.google.com.jm", - "images.google.com.kh", - "images.google.com.kh", - "images.google.com.kw", - "images.google.com.lb", - "images.google.com.lc", - "images.google.com.ly", - "images.google.com.mt", - "images.google.com.mx", - "images.google.com.my", - "images.google.com.na", - "images.google.com.nf", - "images.google.com.ng", - "images.google.com.ni", - "images.google.com.np", - "images.google.com.om", - "images.google.com.pa", - "images.google.com.pe", - "images.google.com.ph", - "images.google.com.pk", - "images.google.com.pr", - "images.google.com.py", - "images.google.com.qa", - "images.google.com.sa", - "images.google.com.sb", - "images.google.com.sg", - "images.google.com.sl", - "images.google.com.sv", - "images.google.com.tj", - "images.google.com.tn", - "images.google.com.tr", - "images.google.com.tw", - "images.google.com.ua", - "images.google.com.uy", - "images.google.com.vc", - "images.google.com.vn", - "images.google.cv", - "images.google.cz", - "images.google.de", - "images.google.dj", - "images.google.dk", - "images.google.dm", - "images.google.dz", - "images.google.ee", - "images.google.es", - "images.google.fi", - "images.google.fm", - "images.google.fr", - "images.google.ga", - "images.google.gd", - "images.google.ge", - "images.google.gf", - "images.google.gg", - "images.google.gl", - "images.google.gm", - "images.google.gp", - "images.google.gr", - "images.google.gy", - "images.google.hn", - "images.google.hr", - "images.google.ht", - "images.google.hu", - "images.google.ie", - "images.google.im", - "images.google.io", - "images.google.iq", - "images.google.is", - "images.google.it", - "images.google.it.ao", - "images.google.je", - "images.google.jo", - "images.google.kg", - "images.google.ki", - "images.google.kz", - "images.google.la", - "images.google.li", - "images.google.lk", - "images.google.lt", - "images.google.lu", - "images.google.lv", - "images.google.md", - "images.google.me", - "images.google.mg", - "images.google.mk", - "images.google.ml", - "images.google.mn", - "images.google.ms", - "images.google.mu", - "images.google.mv", - "images.google.mw", - "images.google.ne", - "images.google.nl", - "images.google.no", - "images.google.nr", - "images.google.nu", - "images.google.pl", - "images.google.pn", - "images.google.ps", - "images.google.pt", - "images.google.ro", - "images.google.rs", - "images.google.ru", - "images.google.rw", - "images.google.sc", - "images.google.se", - "images.google.sh", - "images.google.si", - "images.google.sk", - "images.google.sm", - "images.google.sn", - "images.google.so", - "images.google.st", - "images.google.td", - "images.google.tg", - "images.google.tk", - "images.google.tl", - "images.google.tm", - "images.google.to", - "images.google.tt", - "images.google.us", - "images.google.vg", - "images.google.vu", - "images.google.ws" - ], - "parameters": [ - "q" - ] - }, - "Yahoo! Images": { - "domains": [ - "image.yahoo.cn", - "images.search.yahoo.com" - ], - "parameters": [ - "p", - "q" - ] - }, - "Yandex Images": { - "domains": [ - "images.yandex.ru", - "images.yandex.ua", - "images.yandex.com" - ], - "parameters": [ - "text" - ] - }, - "Bing Images": { - "domains": [ - "bing.com/images/search", - "www.bing.com/images/search" - ], - "parameters": [ - "q", - "Q" - ] - }, - "Yahoo!": { - "domains": [ - "search.yahoo.com", - "yahoo.com", - "ar.search.yahoo.com", - "ar.yahoo.com", - "au.search.yahoo.com", - "au.yahoo.com", - "br.search.yahoo.com", - "br.yahoo.com", - "cade.searchde.yahoo.com", - "cade.yahoo.com", - "chinese.searchinese.yahoo.com", - "chinese.yahoo.com", - "cn.search.yahoo.com", - "cn.yahoo.com", - "de.search.yahoo.com", - "de.yahoo.com", - "dk.search.yahoo.com", - "dk.yahoo.com", - "es.search.yahoo.com", - "es.yahoo.com", - "espanol.searchpanol.yahoo.com", - "espanol.searchpanol.yahoo.com", - "espanol.yahoo.com", - "espanol.yahoo.com", - "fr.search.yahoo.com", - "fr.yahoo.com", - "ie.search.yahoo.com", - "ie.yahoo.com", - "it.search.yahoo.com", - "it.yahoo.com", - "kr.search.yahoo.com", - "kr.yahoo.com", - "mx.search.yahoo.com", - "mx.yahoo.com", - "no.search.yahoo.com", - "no.yahoo.com", - "nz.search.yahoo.com", - "nz.yahoo.com", - "one.cn.yahoo.com", - "one.searchn.yahoo.com", - "qc.search.yahoo.com", - "qc.search.yahoo.com", - "qc.search.yahoo.com", - "qc.yahoo.com", - "qc.yahoo.com", - "se.search.yahoo.com", - "se.search.yahoo.com", - "se.yahoo.com", - "search.searcharch.yahoo.com", - "search.yahoo.com", - "uk.search.yahoo.com", - "uk.yahoo.com", - "www.yahoo.co.jp", - "search.yahoo.co.jp", - "www.cercato.it", - "search.offerbox.com", - "ys.mirostart.com" - ], - "parameters": [ - "p", - "q" - ] - }, - "Ask": { - "domains": [ - "ask.com", - "www.ask.com", - "web.ask.com", - "int.ask.com", - "mws.ask.com", - "uk.ask.com", - "images.ask.com", - "ask.reference.com", - "www.askkids.com", - "iwon.ask.com", - "www.ask.co.uk", - "www.qbyrd.com", - "search-results.com", - "uk.search-results.com", - "www.search-results.com", - "int.search-results.com" - ], - "parameters": [ - "q" - ] - }, - "Google": { - "domains": [ - "www.google.com", - "www.google.ac", - "www.google.ad", - "www.google.com.af", - "www.google.com.ag", - "www.google.com.ai", - "www.google.am", - "www.google.it.ao", - "www.google.com.ar", - "www.google.as", - "www.google.at", - "www.google.com.au", - "www.google.az", - "www.google.ba", - "www.google.com.bd", - "www.google.be", - "www.google.bf", - "www.google.bg", - "www.google.com.bh", - "www.google.bi", - "www.google.bj", - "www.google.com.bn", - "www.google.com.bo", - "www.google.com.br", - "www.google.bs", - "www.google.co.bw", - "www.google.com.by", - "www.google.by", - "www.google.com.bz", - "www.google.ca", - "www.google.com.kh", - "www.google.cc", - "www.google.cd", - "www.google.cf", - "www.google.cat", - "www.google.cg", - "www.google.ch", - "www.google.ci", - "www.google.co.ck", - "www.google.cl", - "www.google.cm", - "www.google.cn", - "www.google.com.co", - "www.google.co.cr", - "www.google.com.cu", - "www.google.cv", - "www.google.com.cy", - "www.google.cz", - "www.google.de", - "www.google.dj", - "www.google.dk", - "www.google.dm", - "www.google.com.do", - "www.google.dz", - "www.google.com.ec", - "www.google.ee", - "www.google.com.eg", - "www.google.es", - "www.google.com.et", - "www.google.fi", - "www.google.com.fj", - "www.google.fm", - "www.google.fr", - "www.google.ga", - "www.google.gd", - "www.google.ge", - "www.google.gf", - "www.google.gg", - "www.google.com.gh", - "www.google.com.gi", - "www.google.gl", - "www.google.gm", - "www.google.gp", - "www.google.gr", - "www.google.com.gt", - "www.google.gy", - "www.google.com.hk", - "www.google.hn", - "www.google.hr", - "www.google.ht", - "www.google.hu", - "www.google.co.id", - "www.google.iq", - "www.google.ie", - "www.google.co.il", - "www.google.im", - "www.google.co.in", - "www.google.io", - "www.google.is", - "www.google.it", - "www.google.je", - "www.google.com.jm", - "www.google.jo", - "www.google.co.jp", - "www.google.co.ke", - "www.google.com.kh", - "www.google.ki", - "www.google.kg", - "www.google.co.kr", - "www.google.com.kw", - "www.google.kz", - "www.google.la", - "www.google.com.lb", - "www.google.com.lc", - "www.google.li", - "www.google.lk", - "www.google.co.ls", - "www.google.lt", - "www.google.lu", - "www.google.lv", - "www.google.com.ly", - "www.google.co.ma", - "www.google.md", - "www.google.me", - "www.google.mg", - "www.google.mk", - "www.google.ml", - "www.google.mn", - "www.google.ms", - "www.google.com.mt", - "www.google.mu", - "www.google.mv", - "www.google.mw", - "www.google.com.mx", - "www.google.com.my", - "www.google.co.mz", - "www.google.com.na", - "www.google.ne", - "www.google.com.nf", - "www.google.com.ng", - "www.google.com.ni", - "www.google.nl", - "www.google.no", - "www.google.com.np", - "www.google.nr", - "www.google.nu", - "www.google.co.nz", - "www.google.com.om", - "www.google.com.pa", - "www.google.com.pe", - "www.google.com.ph", - "www.google.com.pk", - "www.google.pl", - "www.google.pn", - "www.google.com.pr", - "www.google.ps", - "www.google.pt", - "www.google.com.py", - "www.google.com.qa", - "www.google.ro", - "www.google.rs", - "www.google.ru", - "www.google.rw", - "www.google.com.sa", - "www.google.com.sb", - "www.google.sc", - "www.google.se", - "www.google.com.sg", - "www.google.sh", - "www.google.si", - "www.google.sk", - "www.google.com.sl", - "www.google.sn", - "www.google.sm", - "www.google.so", - "www.google.st", - "www.google.com.sv", - "www.google.td", - "www.google.tg", - "www.google.co.th", - "www.google.com.tj", - "www.google.tk", - "www.google.tl", - "www.google.tm", - "www.google.to", - "www.google.com.tn", - "www.google.com.tr", - "www.google.tt", - "www.google.com.tw", - "www.google.co.tz", - "www.google.com.ua", - "www.google.co.ug", - "www.google.ae", - "www.google.co.uk", - "www.google.us", - "www.google.com.uy", - "www.google.co.uz", - "www.google.com.vc", - "www.google.co.ve", - "www.google.vg", - "www.google.co.vi", - "www.google.com.vn", - "www.google.vu", - "www.google.ws", - "www.google.co.za", - "www.google.co.zm", - "www.google.co.zw", - "google.com", - "google.ac", - "google.ad", - "google.com.af", - "google.com.ag", - "google.com.ai", - "google.am", - "google.it.ao", - "google.com.ar", - "google.as", - "google.at", - "google.com.au", - "google.az", - "google.ba", - "google.com.bd", - "google.be", - "google.bf", - "google.bg", - "google.com.bh", - "google.bi", - "google.bj", - "google.com.bn", - "google.com.bo", - "google.com.br", - "google.bs", - "google.co.bw", - "google.com.by", - "google.by", - "google.com.bz", - "google.ca", - "google.com.kh", - "google.cc", - "google.cd", - "google.cf", - "google.cat", - "google.cg", - "google.ch", - "google.ci", - "google.co.ck", - "google.cl", - "google.cm", - "google.cn", - "google.com.co", - "google.co.cr", - "google.com.cu", - "google.cv", - "google.com.cy", - "google.cz", - "google.de", - "google.dj", - "google.dk", - "google.dm", - "google.com.do", - "google.dz", - "google.com.ec", - "google.ee", - "google.com.eg", - "google.es", - "google.com.et", - "google.fi", - "google.com.fj", - "google.fm", - "google.fr", - "google.ga", - "google.gd", - "google.ge", - "google.gf", - "google.gg", - "google.com.gh", - "google.com.gi", - "google.gl", - "google.gm", - "google.gp", - "google.gr", - "google.com.gt", - "google.gy", - "google.com.hk", - "google.hn", - "google.hr", - "google.ht", - "google.hu", - "google.co.id", - "google.iq", - "google.ie", - "google.co.il", - "google.im", - "google.co.in", - "google.io", - "google.is", - "google.it", - "google.je", - "google.com.jm", - "google.jo", - "google.co.jp", - "google.co.ke", - "google.com.kh", - "google.ki", - "google.kg", - "google.co.kr", - "google.com.kw", - "google.kz", - "google.la", - "google.com.lb", - "google.com.lc", - "google.li", - "google.lk", - "google.co.ls", - "google.lt", - "google.lu", - "google.lv", - "google.com.ly", - "google.co.ma", - "google.md", - "google.me", - "google.mg", - "google.mk", - "google.ml", - "google.mn", - "google.ms", - "google.com.mt", - "google.mu", - "google.mv", - "google.mw", - "google.com.mx", - "google.com.my", - "google.co.mz", - "google.com.na", - "google.ne", - "google.com.nf", - "google.com.ng", - "google.com.ni", - "google.nl", - "google.no", - "google.com.np", - "google.nr", - "google.nu", - "google.co.nz", - "google.com.om", - "google.com.pa", - "google.com.pe", - "google.com.ph", - "google.com.pk", - "google.pl", - "google.pn", - "google.com.pr", - "google.ps", - "google.pt", - "google.com.py", - "google.com.qa", - "google.ro", - "google.rs", - "google.ru", - "google.rw", - "google.com.sa", - "google.com.sb", - "google.sc", - "google.se", - "google.com.sg", - "google.sh", - "google.si", - "google.sk", - "google.com.sl", - "google.sn", - "google.sm", - "google.so", - "google.st", - "google.com.sv", - "google.td", - "google.tg", - "google.co.th", - "google.com.tj", - "google.tk", - "google.tl", - "google.tm", - "google.to", - "google.com.tn", - "google.com.tr", - "google.tt", - "google.com.tw", - "google.co.tz", - "google.com.ua", - "google.co.ug", - "google.ae", - "google.co.uk", - "google.us", - "google.com.uy", - "google.co.uz", - "google.com.vc", - "google.co.ve", - "google.vg", - "google.co.vi", - "google.com.vn", - "google.vu", - "google.ws", - "google.co.za", - "google.co.zm", - "google.co.zw", - "search.avg.com", - "isearch.avg.com", - "www.cnn.com", - "darkoogle.com", - "search.darkoogle.com", - "search.foxtab.com", - "www.gooofullsearch.com", - "search.hiyo.com", - "search.incredimail.com", - "search1.incredimail.com", - "search2.incredimail.com", - "search3.incredimail.com", - "search4.incredimail.com", - "search.incredibar.com", - "search.sweetim.com", - "www.fastweb.it", - "search.juno.com", - "find.tdc.dk", - "searchresults.verizon.com", - "search.walla.co.il", - "search.alot.com", - "www.googleearth.de", - "www.googleearth.fr", - "webcache.googleusercontent.com", - "encrypted.google.com", - "googlesyndicatedsearch.com" - ], - "parameters": [ - "q", - "query", - "Keywords" - ] - }, - "PriceRunner": { - "domains": [ - "www.pricerunner.co.uk" - ], - "parameters": [ - "q" - ] - }, - "IXquick": { - "domains": [ - "ixquick.com", - "www.eu.ixquick.com", - "ixquick.de", - "www.ixquick.de", - "us.ixquick.com", - "s1.us.ixquick.com", - "s2.us.ixquick.com", - "s3.us.ixquick.com", - "s4.us.ixquick.com", - "s5.us.ixquick.com", - "eu.ixquick.com", - "s8-eu.ixquick.com", - "s1-eu.ixquick.de" - ], - "parameters": [ - "query" - ] - }, - "Mail.ru": { - "domains": [ - "go.mail.ru" - ], - "parameters": [ - "q" - ] - }, - "Ask Toolbar": { - "domains": [ - "search.tb.ask.com" - ], - "parameters": [ - "searchfor" - ] - }, - "Voila": { - "domains": [ - "search.ke.voila.fr", - "www.lemoteur.fr" - ], - "parameters": [ - "rdata", - "kw" - ] - }, - "Dalesearch": { - "domains": [ - "www.dalesearch.com" - ], - "parameters": [ - "q" - ] - } - }, - "email": { - "Outlook.com": { - "domains": [ - "mail.live.com" - ] - }, - "Yahoo! Mail": { - "domains": [ - "mail.yahoo.net", - "mail.yahoo.com", - "mail.yahoo.co.uk", - "mail.yahoo.co.jp" - ] - }, - "Orange Webmail": { - "domains": [ - "orange.fr/webmail" - ] - } - }, - "social": { - "Twitter": { - "domains": [ - "twitter.com", - "t.co" - ] - }, - "Facebook": { - "domains": [ - "facebook.com", - "fb.me", - "m.facebook.com", - "l.facebook.com", - "lm.facebook.com" - ] - }, - "Tumblr": { - "domains": [ - "tumblr.com" - ] - }, - "Odnoklassniki": { - "domains": [ - "odnoklassniki.ru" - ] - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/SpecHelpers.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/SpecHelpers.scala deleted file mode 100644 index 908a67096..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/SpecHelpers.scala +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.util.regex.Pattern - -import scala.util.matching.Regex -import cats.Id -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.RemoteAdapter -import com.snowplowanalytics.snowplow.enrich.common.enrichments.{EnrichmentRegistry, MiscEnrichments} -import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, JsonUtils, ShiftExecution} -import com.snowplowanalytics.snowplow.enrich.stream.model.{AWSCredentials, CloudAgnosticPlatformConfig, GCPCredentials, Kafka, Nsq, Stdin} -import org.specs2.matcher.{Expectable, Matcher} -import sources.TestSource -import utils._ - -/** - * Defines some useful helpers for the specs. - */ -object SpecHelpers { - - implicit def stringToJustString(s: String) = JustString(s) - implicit def regexToJustRegex(r: Regex) = JustRegex(r) - - val v_etl = MiscEnrichments.etlVersion(Processor(generated.BuildInfo.name, generated.BuildInfo.version)) - - val TimestampRegex = - "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,2} [0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\\.\\d{3})?".r - - /** - * The regexp pattern for a Type 4 UUID. - * - * Taken from Gajus Kuizinas's SO answer: - * http://stackoverflow.com/a/14166194/255627 - * - * TODO: should this be a Specs2 contrib? - */ - val Uuid4Regexp = "[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89aAbB][a-f0-9]{3}-[a-f0-9]{12}".r - - val ContextWithUuid4Regexp = - new Regex( - Pattern.quote( - """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/parent_event/jsonschema/1-0-0","data":{"parentEventId":"""" - ) + - "[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89aAbB][a-f0-9]{3}-[a-f0-9]{12}" + - Pattern.quote("\"}}]}") - ) - - /** - * The names of the fields written out - */ - lazy val OutputFields = classOf[EnrichedEvent].getDeclaredFields - .map(_.getName) - - /** - * User-friendly wrapper to instantiate - * a BeFieldEqualTo Matcher. - */ - def beFieldEqualTo(expected: StringOrRegex, withIndex: Int) = - new BeFieldEqualTo(expected, withIndex) - - /** - * A Specs2 matcher to check if a EnrichedEvent - * field is correctly set. - * - * A couple of neat tricks: - * - * 1. Applies a regexp comparison if the field is - * only regexpable, not equality-comparable - * 2. On failure, print out the field's name as - * well as the mismatch, to help with debugging - */ - class BeFieldEqualTo(expected: StringOrRegex, index: Int) extends Matcher[String] { - - private val field = OutputFields(index) - - private val regexp = expected match { - case JustRegex(_) => true - case JustString(_) => false - } - - def apply[S <: String](actual: Expectable[S]) = { - - lazy val successMsg = - s"$field: ${actual.description} %s $expected".format(if (regexp) "matches" else "equals") - - lazy val failureMsg = - s"$field: ${actual.description} does not %s $expected" - .format(if (regexp) "match" else "equal") - - result(equalsOrMatches(actual.value, expected), successMsg, failureMsg, actual) - } - - /** - * Checks that the fields equal each other, - * or matches the regular expression as - * required. - * - * @param actual The actual value - * @param expected The expected value, or - * regular expression to match against - * @return true if the actual equals or - * matches expected, false otherwise - */ - private def equalsOrMatches(actual: String, expected: StringOrRegex): Boolean = - expected match { - case JustRegex(r) => r.pattern.matcher(actual).matches - case JustString(s) => actual == s - } - } - - /** - * A TestSource for testing against. - * Built using an inline configuration file - * with both source and sink set to test. - */ - lazy val TestSource = new TestSource(client, adapterRegistry, enrichmentRegistry) - val igluCentralDefaultConfig = - """{ - "schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-0", - "data": { - "cacheSize": 500, - "repositories": [ - { - "name": "Iglu Central", - "priority": 0, - "vendorPrefixes": [ "com.snowplowanalytics" ], - "connection": { - "http": { - "uri": "http://iglucentral.com" - } - } - }, - { - "name": "referer 2.0", - "priority": 0, - "vendorPrefixes": [ "com.snowplowanalytics" ], - "connection": { - "http": { - "uri": "http://iglucentral-dev.com.s3-website-us-east-1.amazonaws.com/referer-parser-2" - } - } - } - ] - } - } - """ - - val igluConfig = { - val resolverEnvVar = for { - config <- sys.env.get("ENRICH_RESOLVER_CONFIG") - if config.nonEmpty - } yield config - resolverEnvVar.getOrElse(igluCentralDefaultConfig) - } - val validatedResolver = for { - json <- JsonUtils.extractJson(igluConfig) - resolver <- IgluCirceClient.parseDefault[Id](json).leftMap(_.toString).value - } yield resolver - - val client = validatedResolver.fold( - e => throw new RuntimeException(e), - s => s - ) - - // Vendor and name are intentionally tweaked in the first enrichment - // to test that we are no longer validating them (users were confused about such validation) - val enrichmentConfig = - """|{ - |"schema": "iglu:com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0", - |"data": [ - |{ - |"schema": "iglu:com.snowplowanalytics.snowplow/anon_ip/jsonschema/1-0-0", - |"data": { - |"vendor": "com.snowplowanalytics.snowplow_custom", - |"name": "anon_ip_custom", - |"enabled": true, - |"parameters": { - |"anonOctets": 1 - |} - |} - |}, - |{ - |"schema": "iglu:com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-0", - |"data": { - |"vendor": "com.snowplowanalytics.snowplow", - |"name": "campaign_attribution", - |"enabled": true, - |"parameters": { - |"mapping": "static", - |"fields": { - |"mktMedium": ["utm_medium", "medium"], - |"mktSource": ["utm_source", "source"], - |"mktTerm": ["utm_term", "legacy_term"], - |"mktContent": ["utm_content"], - |"mktCampaign": ["utm_campaign", "cid", "legacy_campaign"] - |} - |} - |} - |}, - |{ - |"schema": "iglu:com.snowplowanalytics.snowplow/user_agent_utils_config/jsonschema/1-0-0", - |"data": { - |"vendor": "com.snowplowanalytics.snowplow", - |"name": "user_agent_utils_config", - |"enabled": true, - |"parameters": { - |} - |} - |}, - |{ - |"schema": "iglu:com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0", - |"data": { - |"vendor": "com.snowplowanalytics.snowplow", - |"name": "referer_parser", - |"enabled": true, - |"parameters": { - |"internalDomains": ["www.subdomain1.snowplowanalytics.com"], - |"database": "referer-tests.json", - |"uri": "http://snowplow.com" - |} - |} - |}, - |{ - |"schema": "iglu:com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0", - |"data": { - |"vendor": "com.snowplowanalytics.snowplow.enrichments", - |"name": "pii_enrichment_config", - |"emitEvent": true, - |"enabled": true, - |"parameters": { - |"pii": [ - |{ - |"pojo": { - |"field": "user_id" - |} - |}, - |{ - |"pojo": { - |"field": "user_ipaddress" - |} - |}, - |{ - |"json": { - |"field": "unstruct_event", - |"schemaCriterion": "iglu:com.mailgun/message_delivered/jsonschema/1-0-*", - |"jsonPath": "$$['recipient']" - |} - |}, - |{ - |"json": { - |"field": "unstruct_event", - |"schemaCriterion": "iglu:com.mailchimp/subscribe/jsonschema/1-*-*", - |"jsonPath": "$$.data.['email', 'ip_opt']" - |} - |} - |], - |"strategy": { - |"pseudonymize": { - |"hashFunction": "SHA-1", - |"salt": "pepper123" - |} - |} - |} - |} - |} - |] - |}""".stripMargin.replaceAll("[\n\r]", "").stripMargin.replaceAll("[\n\r]", "") - - val enrichmentRegistry = (for { - registryConfig <- JsonUtils.extractJson(enrichmentConfig) - confs <- EnrichmentRegistry.parse(registryConfig, client, true).leftMap(_.toString).toEither - reg <- EnrichmentRegistry.build[Id](confs, BlockerF.noop, ShiftExecution.noop).value - } yield reg) fold ( - e => throw new RuntimeException(e), - s => s - ) - - // Init AdapterRegistry with one RemoteAdapter used for integration tests - val adapterRegistry = new AdapterRegistry( - Map(("remoteVendor", "v42") -> new RemoteAdapter("http://localhost:9090/", None, None)) - ) - - val kafkaConfig: CloudAgnosticPlatformConfig = - Kafka(Some(AWSCredentials("access1", "secret1")), None, None, "", 0, None, None) - val nsqConfigWithoutCreds: CloudAgnosticPlatformConfig = Nsq(None, None, None, "", "", 0, "", 0) - val nsqConfigWithCreds: CloudAgnosticPlatformConfig = Nsq( - Some(AWSCredentials("access2", "secret2")), - Some(GCPCredentials("credsPath1")), - None, - "", - "", - 0, - "", - 0 - ) - val stdinConfig: CloudAgnosticPlatformConfig = - Stdin(None, Some(GCPCredentials("credsPath2")), None) -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/UtilsSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/UtilsSpec.scala deleted file mode 100644 index a35254ebf..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/UtilsSpec.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import com.snowplowanalytics.snowplow.enrich.stream.model.{AWSCredentials, DualCloudCredentialsPair, GCPCredentials, NoCredentials} -import org.specs2.mutable.Specification - -class UtilsSpec extends Specification { - "validatePii" should { - "return left if the enrichment is on and there is no stream name" in { - utils.validatePii(true, None) must beLeft( - "PII was configured to emit, but no PII stream name was given" - ) - } - - "return right otherwise" in { - utils.validatePii(true, Some("s")) must beRight(()) - utils.validatePii(false, Some("s")) must beRight(()) - utils.validatePii(false, None) must beRight(()) - } - } - - "emitPii" should { - "return true if the emit event enrichment setting is true" in { - utils.emitPii(SpecHelpers.enrichmentRegistry) must_== true - } - } - - "extractCredentials" should { - "extract optional AWS and GCP credential from cloud agnostic configuration" in { - utils.extractCredentials(SpecHelpers.kafkaConfig) mustEqual DualCloudCredentialsPair( - AWSCredentials("access1", "secret1"), - NoCredentials - ) - utils.extractCredentials(SpecHelpers.nsqConfigWithoutCreds) mustEqual DualCloudCredentialsPair( - NoCredentials, - NoCredentials - ) - utils.extractCredentials(SpecHelpers.nsqConfigWithCreds) mustEqual DualCloudCredentialsPair( - AWSCredentials("access2", "secret2"), - GCPCredentials("credsPath1") - ) - utils.extractCredentials(SpecHelpers.stdinConfig) mustEqual DualCloudCredentialsPair( - NoCredentials, - GCPCredentials("credsPath2") - ) - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/bad/InvalidEnrichedEventSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/bad/InvalidEnrichedEventSpec.scala deleted file mode 100644 index 02903fedf..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/bad/InvalidEnrichedEventSpec.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package bad - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification - -import SpecHelpers._ - -object InvalidEnrichedEventSpec { - - val raw = - "CgABAAABSVEMALYLABQAAAAQc3NjLTAuMi4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAAA1lPW5vbmV4aXN0ZW50AAsALQAAAAlsb2NhbGhvc3QLADIAAABjY3VybC83LjIyLjAgKHg4Nl82NC1wYy1saW51eC1nbnUpIGxpYmN1cmwvNy4yMi4wIE9wZW5TU0wvMS4wLjEgemxpYi8xLjIuMy40IGxpYmlkbi8xLjIzIGxpYnJ0bXAvMi4zDwBGCwAAAAMAAAALQWNjZXB0OiAqLyoAAAAUSG9zdDogbG9jYWxob3N0OjgwODAAAABvVXNlci1BZ2VudDogY3VybC83LjIyLjAgKHg4Nl82NC1wYy1saW51eC1nbnUpIGxpYmN1cmwvNy4yMi4wIE9wZW5TU0wvMS4wLjEgemxpYi8xLjIuMy40IGxpYmlkbi8xLjIzIGxpYnJ0bXAvMi4zCwBQAAAAJGE1YWUwMWQ1LTQ5NTctNGQyZS1iMzRjLTM4YTU1ZDExNGZlMQA=" -} - -class InvalidEnrichedEventSpec extends Specification { - - // TODO: update this after https://github.com/snowplow/snowplow/issues/463 - "Stream Enrich" should { - - "return None for a valid SnowplowRawEvent which fails enrichment" in { - - val rawEvent = Base64.decodeBase64(InvalidEnrichedEventSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isInvalid must beTrue - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/PagePingWithContextSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/PagePingWithContextSpec.scala deleted file mode 100644 index f104c91d6..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/PagePingWithContextSpec.scala +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification -import org.specs2.execute.Result - -import SpecHelpers._ - -object PagePingWithContextSpec { - - val raw = - "CgABAAABQ/RDKr8LABQAAAAQc3NjLTAuMS4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAAC/WU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmN4PWV5SnpZMmhsYldFaU9pSnBaMngxT21OdmJTNXpibTkzY0d4dmQyRnVZV3g1ZEdsamN5NXpibTkzY0d4dmR5OWpiMjUwWlhoMGN5OXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2laR0YwWVNJNlczc2ljMk5vWlcxaElqb2lhV2RzZFRwamIyMHVjMjV2ZDNCc2IzZGhibUZzZVhScFkzTXVjMjV2ZDNCc2IzY3ZkMlZpWDNCaFoyVXZhbk52Ym5OamFHVnRZUzh4TFRBdE1DSXNJbVJoZEdFaU9uc2lhV1FpT2lKaU1EVmlNekZqTXkwNE1XRmpMVFJoWmpVdE9USmtNUzB4TVRNeE16TTVOamcyTlRVaWZYMWRmUSZkdG09MTM5MTM3MjQ3OTMyOSZ0aWQ9NTc2NjY4JnZwPTE2ODB4NDE1JmRzPTE2ODB4NDE1JnZpZD0yNSZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD13ZWImdHY9anMtMC4xMy4xJmZwPTE4MDQ5NTQ3OTAmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0xOTIweDEwODAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA5MzE1MjEuMTM5MTExMDU4Mi43OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjcuMTM5MTExMTgxOS4xMzkwOTMxNTQ1OyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IHNwPTc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NDsgd2NzaWQ9S1JoaGs0SEVMcDJBaXBxTDdNNVZvbkNQT1B5QW5GMUo7IF9va2x2PTEzOTExMTE3NzkzMjglMkNLUmhoazRIRUxwMkFpcHFMN001Vm9uQ1BPUHlBbkYxSjsgX191dG1jPTExMTg3MjI4MTsgX29rYms9Y2Q0JTNEdHJ1ZSUyQ3ZpNSUzRDAlMkN2aTQlM0QxMzkxMTEwNTg1NDkwJTJDdmkzJTNEYWN0aXZlJTJDdmkyJTNEZmFsc2UlMkN2aTElM0RmYWxzZSUyQ2NkOCUzRGNoYXQlMkNjZDYlM0QwJTJDY2Q1JTNEYXdheSUyQ2NkMyUzRGZhbHNlJTJDY2QyJTNEMCUyQ2NkMSUzRDAlMkM7IF9vaz05NzUyLTUwMy0xMC01MjI3AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" - - val expected = List[StringOrRegex]( - "CFe23a", - "web", - TimestampRegex, - "2014-02-02 20:21:19.167", - "2014-02-02 20:21:19.329", - "page_ping", - Uuid4Regexp, - "576668", - "", // No tracker name - "js-0.13.1", - "ssc-0.1.0-stdout", - v_etl, - "d1a21f2589511b4ed04ee297d88d950efb2612dc", - "850474a1f035479d332a5c2d2ad6fe4d07a3f722", - "1804954790", - "3c1757544e39bca4", - "25", - "75a13583-5c99-40e3-81fc-541084dfc784", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "Asynchronous website/webapp examples for snowplow.js", - "", - "file", - "file", - "80", - "///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/web_page/jsonschema/1-0-0","data":{"id":"b05b31c3-81ac-4af5-92d1-113133968655"}}]}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "0", - "0", - "0", - "0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Firefox 26", - "Firefox", - "26.0", - "Browser", - "GECKO", - "en-US", - "0", - "1", - "0", - "0", - "1", - "0", - "0", - "0", - "0", - "1", - "24", - "1680", - "415", - "Mac OS X", - "Mac OS X", - "Apple Inc.", - "Europe/London", - "Computer", - "0", - "1920", - "1080", - "UTF-8", - "1680", - "415", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2014-02-02 20:21:19.167", - "com.snowplowanalytics.snowplow", - "page_ping", - "jsonschema", - "1-0-0", - "", - "" - ) - val pii = List[StringOrRegex]( - "CFe23a", - "srv", - TimestampRegex, - "2014-02-02 20:21:19.167", - "", - "pii_transformation", - Uuid4Regexp, // Regexp match - "", - "", - "", - "", - v_etl, - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ContextWithUuid4Regexp, - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"10.0.2.x","modifiedValue":"850474a1f035479d332a5c2d2ad6fe4d07a3f722"},{"fieldName":"user_id","originalValue":"alex 123","modifiedValue":"d1a21f2589511b4ed04ee297d88d950efb2612dc"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-1"}}}}}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "pii_transformation", - "jsonschema", - "1-0-0", - "", - TimestampRegex - ) - -} - -class PagePingWithContextSpec extends Specification { - - "Stream Enrich" should { - - "enrich a valid page ping with context" in { - - val rawEvent = Base64.decodeBase64(PagePingWithContextSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - val piiFields = enrichedEvent.toOption.get._3.get.split("\t", -1) - fields.size must beEqualTo(PagePingWithContextSpec.expected.size) - piiFields.size must beEqualTo(PagePingWithContextSpec.pii.size) - Result.unit { - for (idx <- PagePingWithContextSpec.expected.indices) - fields(idx) must beFieldEqualTo(PagePingWithContextSpec.expected(idx), withIndex = idx) - for (idx <- PagePingWithContextSpec.pii.indices) - piiFields(idx) must beFieldEqualTo(PagePingWithContextSpec.pii(idx), withIndex = idx) - } - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/PageViewWithContextSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/PageViewWithContextSpec.scala deleted file mode 100644 index 40878a317..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/PageViewWithContextSpec.scala +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification -import org.specs2.execute.Result - -import SpecHelpers._ - -object PageViewWithContextSpec { - - val raw = - "CgABAAABQ/RBpSMLABQAAAAQc3NjLTAuMS4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACx2U9cHYmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJmN4PWV5SnpZMmhsYldFaU9pSnBaMngxT21OdmJTNXpibTkzY0d4dmQyRnVZV3g1ZEdsamN5NXpibTkzY0d4dmR5OWpiMjUwWlhoMGN5OXFjMjl1YzJOb1pXMWhMekV0TUMwd0lpd2laR0YwWVNJNlczc2ljMk5vWlcxaElqb2lhV2RzZFRwamIyMHVjMjV2ZDNCc2IzZGhibUZzZVhScFkzTXVjMjV2ZDNCc2IzY3ZkMlZpWDNCaFoyVXZhbk52Ym5OamFHVnRZUzh4TFRBdE1DSXNJbVJoZEdFaU9uc2lhV1FpT2lKaU1EVmlNekZqTXkwNE1XRmpMVFJoWmpVdE9USmtNUzB4TVRNeE16TTVOamcyTlRVaWZYMWRmUSZ0aWQ9MTI1NTQ2JnZwPTE2ODB4NDE1JmRzPTE2ODB4NDE1JnZpZD0yNSZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD13ZWImdHY9anMtMC4xMy4xJmZwPTE4MDQ5NTQ3OTAmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0xOTIweDEwODAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA5MzE1MjEuMTM5MTExMDU4Mi43OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjcuMTM5MTExMTgxOS4xMzkwOTMxNTQ1OyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IHNwPTc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NDsgd2NzaWQ9S1JoaGs0SEVMcDJBaXBxTDdNNVZvbkNQT1B5QW5GMUo7IF9va2x2PTEzOTExMTE3NzkzMjglMkNLUmhoazRIRUxwMkFpcHFMN001Vm9uQ1BPUHlBbkYxSjsgX191dG1jPTExMTg3MjI4MTsgX29rYms9Y2Q0JTNEdHJ1ZSUyQ3ZpNSUzRDAlMkN2aTQlM0QxMzkxMTEwNTg1NDkwJTJDdmkzJTNEYWN0aXZlJTJDdmkyJTNEZmFsc2UlMkN2aTElM0RmYWxzZSUyQ2NkOCUzRGNoYXQlMkNjZDYlM0QwJTJDY2Q1JTNEYXdheSUyQ2NkMyUzRGZhbHNlJTJDY2QyJTNEMCUyQ2NkMSUzRDAlMkM7IF9vaz05NzUyLTUwMy0xMC01MjI3AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" - - val expected = List[StringOrRegex]( - "CFe23a", - "web", - TimestampRegex, - "2014-02-02 20:19:39.427", - "", - "page_view", - Uuid4Regexp, // Regexp match - "125546", - "", - "js-0.13.1", - "ssc-0.1.0-stdout", - v_etl, - "d1a21f2589511b4ed04ee297d88d950efb2612dc", - "850474a1f035479d332a5c2d2ad6fe4d07a3f722", - "1804954790", - "3c1757544e39bca4", - "25", - "75a13583-5c99-40e3-81fc-541084dfc784", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "Asynchronous website/webapp examples for snowplow.js", - "", - "file", - "file", - "80", - "///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/web_page/jsonschema/1-0-0","data":{"id":"b05b31c3-81ac-4af5-92d1-113133968655"}}]}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Firefox 26", - "Firefox", - "26.0", - "Browser", - "GECKO", - "en-US", - "0", - "1", - "0", - "0", - "1", - "0", - "0", - "0", - "0", - "1", - "24", - "1680", - "415", - "Mac OS X", - "Mac OS X", - "Apple Inc.", - "Europe/London", - "Computer", - "0", - "1920", - "1080", - "UTF-8", - "1680", - "415", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2014-02-02 20:19:39.427", - "com.snowplowanalytics.snowplow", - "page_view", - "jsonschema", - "1-0-0", - "", - "" - ) - - val pii = List[StringOrRegex]( - "CFe23a", - "srv", - TimestampRegex, - "2014-02-02 20:19:39.427", - "", - "pii_transformation", - Uuid4Regexp, // Regexp match - "", - "", - "", - "", - v_etl, - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ContextWithUuid4Regexp, - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"10.0.2.x","modifiedValue":"850474a1f035479d332a5c2d2ad6fe4d07a3f722"},{"fieldName":"user_id","originalValue":"alex 123","modifiedValue":"d1a21f2589511b4ed04ee297d88d950efb2612dc"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-1"}}}}}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "pii_transformation", - "jsonschema", - "1-0-0", - "", - TimestampRegex - ) -} - -class PageViewWithContextSpec extends Specification { - - "Stream Enrich" should { - - "enrich a valid page view with context" in { - - val rawEvent = Base64.decodeBase64(PageViewWithContextSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - fields.size must beEqualTo(PageViewWithContextSpec.expected.size) - enrichedEvent.toOption.get._3 must not beNone - val piiFields = enrichedEvent.toOption.get._3.get.split("\t", -1) - piiFields.size must beEqualTo(PageViewWithContextSpec.pii.size) - Result.unit { - for (idx <- PageViewWithContextSpec.expected.indices) - fields(idx) must beFieldEqualTo(PageViewWithContextSpec.expected(idx), withIndex = idx) - for (idx <- PageViewWithContextSpec.pii.indices) - piiFields(idx) must beFieldEqualTo(PageViewWithContextSpec.pii(idx), withIndex = idx) - } - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/RemoteAdapterIntegrationTest.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/RemoteAdapterIntegrationTest.scala deleted file mode 100644 index 91be3ef4e..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/RemoteAdapterIntegrationTest.scala +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import java.io.InputStream -import java.net.InetSocketAddress - -import cats.syntax.either._ -import cats.syntax.option._ -import com.snowplowanalytics.snowplow.CollectorPayload.thrift.model1.{CollectorPayload => CollectorPayload1} -import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} -import io.circe.generic.auto._ -import io.circe.parser._ -import io.circe.syntax._ -import org.apache.thrift.TSerializer -import org.specs2.execute.Result -import org.specs2.mutable.Specification -import org.specs2.specification.BeforeAfterAll - -import SpecHelpers._ - -final case class Payload( - queryString: Map[String, String], - headers: List[String], - body: Option[String], - contentType: Option[String] -) - -final case class Response(events: Option[List[Map[String, String]]], error: Option[String]) - -// This integration test instantiates an HTTP server acting like a remote adapter -// and creates payloads to be sent to it. -object RemoteAdapterIntegrationTest { - - val transactionId = "123456" // added to the event by the remote adapter - - def localHttpAdapter(tcpPort: Int, basePath: String = ""): HttpServer = { - def _handle(body: String): String = - (for { - json <- parse(body).leftMap(_ => "not json") - payload <- json.as[Payload].leftMap(_ => "doesn't match payload format") - } yield List(payload.queryString ++ Map("tid" -> transactionId))) - .fold( - f => Response(None, f.some), - l => Response(l.some, None) - ) - .asJson - .noSpaces - - def inputStreamToString(is: InputStream): String = { - val s = new java.util.Scanner(is).useDelimiter("\\A") - if (s.hasNext) s.next() else "" - } - - val localAdapter = HttpServer.create(new InetSocketAddress(tcpPort), 0) - localAdapter.createContext( - s"/$basePath", - new HttpHandler { - def handle(exchange: HttpExchange): Unit = { - val bodyStr = inputStreamToString(exchange.getRequestBody) - val response = _handle(bodyStr) - exchange.sendResponseHeaders(200, 0) - exchange.getResponseBody.write(response.getBytes) - exchange.getResponseBody.close() - } - } - ) - localAdapter - } -} - -class RemoteAdapterIntegrationTest extends Specification with BeforeAfterAll { - import RemoteAdapterIntegrationTest._ - - val localAdapter: HttpServer = localHttpAdapter(9090) - - def beforeAll() = localAdapter.start() - - def afterAll() = localAdapter.stop(0) - - val ThriftSerializer = new ThreadLocal[TSerializer] { - override def initialValue = new TSerializer() - } - val serializer = ThriftSerializer.get() - - val e = new CollectorPayload1( - "iglu:com.snowplowanalytics.snowplow/CollectorPayload/thrift/1-0-0", - "79.213.165.223", - System.currentTimeMillis, - "UTF-8", - "cloudfront" - ) - e.path = "/remoteVendor/v42" // path that will lead to remote adapter, same as in SpecHelpers - e.querystring = "&e=pp&pp_mix=0&pp_max=7&pp_miy=0&pp_may=746" // page ping event - - sequential - "Stream Enrich" should { - - "be able to send payloads to a remote HTTP adapter and the enriched events should contain fields added by the remote adapter" in { - - e.body = "{}" // required by remote adapter - val goodPayload = serializer.serialize(e) - - val expected = List[StringOrRegex]( - "", - "", - TimestampRegex, - TimestampRegex, - "", - "page_ping", - Uuid4Regexp, - "123456", - "", - "", - "cloudfront", - v_etl, - "", - "fbc9cb674bbaeb0dfe13b743bc043790928931e1", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "0", - "7", - "0", - "746", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "page_ping", - "jsonschema", - "1-0-0", - "", - "" - ) - - val enrichedEvent = TestSource.enrichEvents(goodPayload)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - fields.contains(transactionId) must beTrue // added by remote adapter - fields.size must beEqualTo(StructEventSpec.expected.size) - Result.unit { - for (idx <- StructEventSpec.expected.indices) - fields(idx) must beFieldEqualTo(expected(idx), withIndex = idx) - } - } - - "be able to send payloads to a remote HTTP adapter and handle a problem on the remote adapter" in { - e.body = null // required by the remote adapter - val badPayload = serializer.serialize(e) - - val enrichedEvent = TestSource.enrichEvents(badPayload)(0) - enrichedEvent.isValid must beFalse - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/StructEventSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/StructEventSpec.scala deleted file mode 100644 index 29649146e..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/StructEventSpec.scala +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification -import org.specs2.execute.Result - -import SpecHelpers._ - -object StructEventSpec { - - val raw = - "CgABAAABQ/Sevy0LABQAAAAQc3NjLTAuMS4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAABvGU9c2Umc2VfY2E9TWl4ZXMmc2VfYWM9UGxheSZzZV9sYT1NUkMvZmFicmljLTA1MDMtbWl4JnNlX3ZhPTAuMCZkdG09MTM5MTM3ODQ4MTA3MSZ0aWQ9MzQ0MjE0JnZwPTE2ODB4NDE1JmRzPTE2ODB4NDE1JnZpZD0yNiZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD13ZWImdHY9anMtMC4xMy4xJmZwPTE4MDQ5NTQ3OTAmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0xOTIweDEwODAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA5MzE1MjEuMTM5MTExMDU4Mi43OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjcuMTM5MTExMTgxOS4xMzkwOTMxNTQ1OyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IHNwPTc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NDsgd2NzaWQ9S1JoaGs0SEVMcDJBaXBxTDdNNVZvbkNQT1B5QW5GMUo7IF9va2x2PTEzOTExMTE3NzkzMjglMkNLUmhoazRIRUxwMkFpcHFMN001Vm9uQ1BPUHlBbkYxSjsgX191dG1jPTExMTg3MjI4MTsgX29rYms9Y2Q0JTNEdHJ1ZSUyQ3ZpNSUzRDAlMkN2aTQlM0QxMzkxMTEwNTg1NDkwJTJDdmkzJTNEYWN0aXZlJTJDdmkyJTNEZmFsc2UlMkN2aTElM0RmYWxzZSUyQ2NkOCUzRGNoYXQlMkNjZDYlM0QwJTJDY2Q1JTNEYXdheSUyQ2NkMyUzRGZhbHNlJTJDY2QyJTNEMCUyQ2NkMSUzRDAlMkM7IF9vaz05NzUyLTUwMy0xMC01MjI3AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" - - val expected = List[StringOrRegex]( - "CFe23a", - "web", - TimestampRegex, - "2014-02-02 22:01:20.941", - "2014-02-02 22:01:21.071", - "struct", - Uuid4Regexp, // Regexp match - "344214", - "", - "js-0.13.1", - "ssc-0.1.0-stdout", - v_etl, - "d1a21f2589511b4ed04ee297d88d950efb2612dc", - "850474a1f035479d332a5c2d2ad6fe4d07a3f722", - "1804954790", - "3c1757544e39bca4", - "26", - "75a13583-5c99-40e3-81fc-541084dfc784", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "file", - "file", - "80", - "///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "Mixes", - "Play", - "MRC/fabric-0503-mix", - "", - "0.0", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Firefox 26", - "Firefox", - "26.0", - "Browser", - "GECKO", - "en-US", - "0", - "1", - "0", - "0", - "1", - "0", - "0", - "0", - "0", - "1", - "24", - "1680", - "415", - "Mac OS X", - "Mac OS X", - "Apple Inc.", - "Europe/London", - "Computer", - "0", - "1920", - "1080", - "UTF-8", - "1680", - "415", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2014-02-02 22:01:20.941", - "com.google.analytics", - "event", - "jsonschema", - "1-0-0", - "", - "" - ) - - val pii = List[StringOrRegex]( - "CFe23a", - "srv", - TimestampRegex, - "2014-02-02 22:01:20.941", - "", - "pii_transformation", - Uuid4Regexp, // Regexp match - "", - "", - "", - "", - v_etl, - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ContextWithUuid4Regexp, - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"10.0.2.x","modifiedValue":"850474a1f035479d332a5c2d2ad6fe4d07a3f722"},{"fieldName":"user_id","originalValue":"alex 123","modifiedValue":"d1a21f2589511b4ed04ee297d88d950efb2612dc"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-1"}}}}}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "pii_transformation", - "jsonschema", - "1-0-0", - "", - TimestampRegex - ) - -} - -class StructEventSpec extends Specification { - - "Stream Enrich" should { - - "enrich a valid structured event" in { - - val rawEvent = Base64.decodeBase64(StructEventSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - fields.size must beEqualTo(StructEventSpec.expected.size) - enrichedEvent.toOption.get._3 must not beNone - val piiFields = enrichedEvent.toOption.get._3.get.split("\t", -1) - piiFields.size must beEqualTo(StructEventSpec.pii.size) - Result.unit { - for (idx <- StructEventSpec.expected.indices) - fields(idx) must beFieldEqualTo(StructEventSpec.expected(idx), withIndex = idx) - for (idx <- StructEventSpec.pii.indices) - piiFields(idx) must beFieldEqualTo(StructEventSpec.pii(idx), withIndex = idx) - } - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/StructEventWithContextSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/StructEventWithContextSpec.scala deleted file mode 100644 index 2d1689130..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/StructEventWithContextSpec.scala +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification -import org.specs2.execute.Result - -import SpecHelpers._ - -object StructEventWithContextSpec { - - val raw = - "CgABAAABQ/SgCPELABQAAAAQc3NjLTAuMS4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAAC4GU9c2Umc2VfY2E9Q2hlY2tvdXQmc2VfYWM9QWRkJnNlX2xhPUFTTzAxMDQzJnNlX3ByPWJsdWU6eHhsJnNlX3ZhPTIuMCZjeD1leUp6WTJobGJXRWlPaUpwWjJ4MU9tTnZiUzV6Ym05M2NHeHZkMkZ1WVd4NWRHbGpjeTV6Ym05M2NHeHZkeTlqYjI1MFpYaDBjeTlxYzI5dWMyTm9aVzFoTHpFdE1DMHdJaXdpWkdGMFlTSTZXM3NpYzJOb1pXMWhJam9pYVdkc2RUcGpiMjB1YzI1dmQzQnNiM2RoYm1Gc2VYUnBZM011YzI1dmQzQnNiM2N2ZDJWaVgzQmhaMlV2YW5OdmJuTmphR1Z0WVM4eExUQXRNQ0lzSW1SaGRHRWlPbnNpYVdRaU9pSmlNRFZpTXpGak15MDRNV0ZqTFRSaFpqVXRPVEprTVMweE1UTXhNek01TmpnMk5UVWlmWDFkZlEmZHRtPTEzOTEzNzg1NjU0OTMmdGlkPTc4MjQ0OSZ2cD0xNjgweDQxNSZkcz0xNjgweDQxNSZ2aWQ9MjYmZHVpZD0zYzE3NTc1NDRlMzliY2E0JnA9d2ViJnR2PWpzLTAuMTMuMSZmcD0xODA0OTU0NzkwJmFpZD1DRmUyM2EmbGFuZz1lbi1VUyZjcz1VVEYtOCZ0ej1FdXJvcGUvTG9uZG9uJnVpZD1hbGV4KzEyMyZmX3BkZj0wJmZfcXQ9MSZmX3JlYWxwPTAmZl93bWE9MCZmX2Rpcj0wJmZfZmxhPTEmZl9qYXZhPTAmZl9nZWFycz0wJmZfYWc9MCZyZXM9MTkyMHgxMDgwJmNkPTI0JmNvb2tpZT0xJnVybD1maWxlOi8vZmlsZTovLy9Vc2Vycy9hbGV4L0RldmVsb3BtZW50L2Rldi1lbnZpcm9ubWVudC9kZW1vLzEtdHJhY2tlci9ldmVudHMuaHRtbC9vdmVycmlkZGVuLXVybC8ACwAtAAAACWxvY2FsaG9zdAsAMgAAAFFNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAPAEYLAAAABwAAABZDb25uZWN0aW9uOiBrZWVwLWFsaXZlAAACcENvb2tpZTogX191dG1hPTExMTg3MjI4MS44NzgwODQ0ODcuMTM5MDIzNzEwNy4xMzkwOTMxNTIxLjEzOTExMTA1ODIuNzsgX191dG16PTExMTg3MjI4MS4xMzkwMjM3MTA3LjEuMS51dG1jc3I9KGRpcmVjdCl8dXRtY2NuPShkaXJlY3QpfHV0bWNtZD0obm9uZSk7IF9zcF9pZC4xZmZmPWI4OWE2ZmE2MzFlZWZhYzIuMTM5MDIzNzEwNy43LjEzOTExMTE4MTkuMTM5MDkzMTU0NTsgaGJsaWQ9Q1BqanVodkYwNXprdFA3SjdNNVZvM05JR1BMSnkxU0Y7IG9sZnNrPW9sZnNrNTYyOTIzNjM1NjE3NTU0OyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQ7IHdjc2lkPUtSaGhrNEhFTHAyQWlwcUw3TTVWb25DUE9QeUFuRjFKOyBfb2tsdj0xMzkxMTExNzc5MzI4JTJDS1JoaGs0SEVMcDJBaXBxTDdNNVZvbkNQT1B5QW5GMUo7IF9fdXRtYz0xMTE4NzIyODE7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MTExMDU4NTQ5MCUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBfb2s9OTc1Mi01MDMtMTAtNTIyNwAAAB5BY2NlcHQtRW5jb2Rpbmc6IGd6aXAsIGRlZmxhdGUAAAAaQWNjZXB0LUxhbmd1YWdlOiBlbi1VUywgZW4AAAArQWNjZXB0OiBpbWFnZS9wbmcsIGltYWdlLyo7cT0wLjgsICovKjtxPTAuNQAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" - - val expected = List[StringOrRegex]( - "CFe23a", - "web", - TimestampRegex, - "2014-02-02 22:02:45.361", - "2014-02-02 22:02:45.493", - "struct", - Uuid4Regexp, // Regexp match - "782449", - "", - "js-0.13.1", - "ssc-0.1.0-stdout", - v_etl, - "d1a21f2589511b4ed04ee297d88d950efb2612dc", - "850474a1f035479d332a5c2d2ad6fe4d07a3f722", - "1804954790", - "3c1757544e39bca4", - "26", - "75a13583-5c99-40e3-81fc-541084dfc784", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "file", - "file", - "80", - "///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/web_page/jsonschema/1-0-0","data":{"id":"b05b31c3-81ac-4af5-92d1-113133968655"}}]}""", - "Checkout", - "Add", - "ASO01043", - "blue:xxl", - "2.0", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Firefox 26", - "Firefox", - "26.0", - "Browser", - "GECKO", - "en-US", - "0", - "1", - "0", - "0", - "1", - "0", - "0", - "0", - "0", - "1", - "24", - "1680", - "415", - "Mac OS X", - "Mac OS X", - "Apple Inc.", - "Europe/London", - "Computer", - "0", - "1920", - "1080", - "UTF-8", - "1680", - "415", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2014-02-02 22:02:45.361", - "com.google.analytics", - "event", - "jsonschema", - "1-0-0", - "", - "" - ) - - val pii = List[StringOrRegex]( - "CFe23a", - "srv", - TimestampRegex, - "2014-02-02 22:02:45.361", - "", - "pii_transformation", - Uuid4Regexp, // Regexp match - "", - "", - "", - "", - v_etl, - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ContextWithUuid4Regexp, - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"10.0.2.x","modifiedValue":"850474a1f035479d332a5c2d2ad6fe4d07a3f722"},{"fieldName":"user_id","originalValue":"alex 123","modifiedValue":"d1a21f2589511b4ed04ee297d88d950efb2612dc"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-1"}}}}}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "pii_transformation", - "jsonschema", - "1-0-0", - "", - TimestampRegex - ) -} - -class StructEventWithContextSpec extends Specification { - - "Stream Enrich" should { - - "enrich a valid structured event with context" in { - - val rawEvent = Base64.decodeBase64(StructEventWithContextSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - fields.size must beEqualTo(StructEventWithContextSpec.expected.size) - enrichedEvent.toOption.get._3 must not beNone - val piiFields = enrichedEvent.toOption.get._3.get.split("\t", -1) - piiFields.size must beEqualTo(StructEventWithContextSpec.pii.size) - Result.unit { - for (idx <- StructEventWithContextSpec.expected.indices) - fields(idx) must beFieldEqualTo( - StructEventWithContextSpec.expected(idx), - withIndex = idx - ) - for (idx <- StructEventWithContextSpec.pii.indices) - piiFields(idx) must beFieldEqualTo(StructEventWithContextSpec.pii(idx), withIndex = idx) - } - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala deleted file mode 100644 index 7e378f3b3..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification -import org.specs2.execute.Result - -import SpecHelpers._ - -object TransactionItemSpec { - - val raw = - "CgABAAABQ/SiVfkLABQAAAAQc3NjLTAuMS4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAAByWU9dGkmdGlfaWQ9b3JkZXItMTIzJnRpX3NrPTEwMDImdGlfbmE9UmVkK3Nob2VzJnRpX3ByPTQwMDAmdGlfcXU9MSZ0aV9jdT1KUFkmZHRtPTEzOTEzNzg3MTYyNzUmdGlkPTQwMDAxNyZ2cD0xNjgweDQxNSZkcz0xNjgweDQxNSZ2aWQ9MjYmZHVpZD0zYzE3NTc1NDRlMzliY2E0JnA9d2ViJnR2PWpzLTAuMTMuMSZmcD0xODA0OTU0NzkwJmFpZD1DRmUyM2EmbGFuZz1lbi1VUyZjcz1VVEYtOCZ0ej1FdXJvcGUvTG9uZG9uJnVpZD1hbGV4KzEyMyZmX3BkZj0wJmZfcXQ9MSZmX3JlYWxwPTAmZl93bWE9MCZmX2Rpcj0wJmZfZmxhPTEmZl9qYXZhPTAmZl9nZWFycz0wJmZfYWc9MCZyZXM9MTkyMHgxMDgwJmNkPTI0JmNvb2tpZT0xJnVybD1maWxlOi8vZmlsZTovLy9Vc2Vycy9hbGV4L0RldmVsb3BtZW50L2Rldi1lbnZpcm9ubWVudC9kZW1vLzEtdHJhY2tlci9ldmVudHMuaHRtbC9vdmVycmlkZGVuLXVybC8ACwAtAAAACWxvY2FsaG9zdAsAMgAAAFFNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAPAEYLAAAABwAAABZDb25uZWN0aW9uOiBrZWVwLWFsaXZlAAACcENvb2tpZTogX191dG1hPTExMTg3MjI4MS44NzgwODQ0ODcuMTM5MDIzNzEwNy4xMzkwOTMxNTIxLjEzOTExMTA1ODIuNzsgX191dG16PTExMTg3MjI4MS4xMzkwMjM3MTA3LjEuMS51dG1jc3I9KGRpcmVjdCl8dXRtY2NuPShkaXJlY3QpfHV0bWNtZD0obm9uZSk7IF9zcF9pZC4xZmZmPWI4OWE2ZmE2MzFlZWZhYzIuMTM5MDIzNzEwNy43LjEzOTExMTE4MTkuMTM5MDkzMTU0NTsgaGJsaWQ9Q1BqanVodkYwNXprdFA3SjdNNVZvM05JR1BMSnkxU0Y7IG9sZnNrPW9sZnNrNTYyOTIzNjM1NjE3NTU0OyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQ7IHdjc2lkPUtSaGhrNEhFTHAyQWlwcUw3TTVWb25DUE9QeUFuRjFKOyBfb2tsdj0xMzkxMTExNzc5MzI4JTJDS1JoaGs0SEVMcDJBaXBxTDdNNVZvbkNQT1B5QW5GMUo7IF9fdXRtYz0xMTE4NzIyODE7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MTExMDU4NTQ5MCUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBfb2s9OTc1Mi01MDMtMTAtNTIyNwAAAB5BY2NlcHQtRW5jb2Rpbmc6IGd6aXAsIGRlZmxhdGUAAAAaQWNjZXB0LUxhbmd1YWdlOiBlbi1VUywgZW4AAAArQWNjZXB0OiBpbWFnZS9wbmcsIGltYWdlLyo7cT0wLjgsICovKjtxPTAuNQAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" - - val expected = List[StringOrRegex]( - "CFe23a", - "web", - TimestampRegex, - "2014-02-02 22:05:16.153", - "2014-02-02 22:05:16.275", - "transaction_item", - Uuid4Regexp, // Regexp match - "400017", - "", - "js-0.13.1", - "ssc-0.1.0-stdout", - v_etl, - "d1a21f2589511b4ed04ee297d88d950efb2612dc", - "850474a1f035479d332a5c2d2ad6fe4d07a3f722", - "1804954790", - "3c1757544e39bca4", - "26", - "75a13583-5c99-40e3-81fc-541084dfc784", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "file", - "file", - "80", - "///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "order-123", - "1002", - "Red shoes", - "", - "4000", - "1", - "", - "", - "", - "", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Firefox 26", - "Firefox", - "26.0", - "Browser", - "GECKO", - "en-US", - "0", - "1", - "0", - "0", - "1", - "0", - "0", - "0", - "0", - "1", - "24", - "1680", - "415", - "Mac OS X", - "Mac OS X", - "Apple Inc.", - "Europe/London", - "Computer", - "0", - "1920", - "1080", - "UTF-8", - "1680", - "415", - "", - "", - "", - "", - "JPY", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2014-02-02 22:05:16.153", - "com.snowplowanalytics.snowplow", - "transaction_item", - "jsonschema", - "1-0-0", - "", - "" - ) - val pii = List[StringOrRegex]( - "CFe23a", - "srv", - TimestampRegex, - "2014-02-02 22:05:16.153", - "", - "pii_transformation", - Uuid4Regexp, // Regexp match - "", - "", - "", - "", - v_etl, - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ContextWithUuid4Regexp, - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"10.0.2.x","modifiedValue":"850474a1f035479d332a5c2d2ad6fe4d07a3f722"},{"fieldName":"user_id","originalValue":"alex 123","modifiedValue":"d1a21f2589511b4ed04ee297d88d950efb2612dc"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-1"}}}}}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "pii_transformation", - "jsonschema", - "1-0-0", - "", - TimestampRegex - ) -} - -class TransactionItemSpec extends Specification { - - "Stream Enrich" should { - - "enrich a valid transaction item" in { - - val rawEvent = Base64.decodeBase64(TransactionItemSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - fields.size must beEqualTo(TransactionItemSpec.expected.size) - enrichedEvent.toOption.get._3 must not beNone - val piiFields = enrichedEvent.toOption.get._3.get.split("\t", -1) - piiFields.size must beEqualTo(TransactionItemSpec.pii.size) - Result.unit { - for (idx <- TransactionItemSpec.expected.indices) - fields(idx) must beFieldEqualTo(TransactionItemSpec.expected(idx), withIndex = idx) - - for (idx <- TransactionItemSpec.pii.indices) - piiFields(idx) must beFieldEqualTo(TransactionItemSpec.pii(idx), withIndex = idx) - } - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala deleted file mode 100644 index c11901f99..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package good - -import org.apache.commons.codec.binary.Base64 -import org.specs2.mutable.Specification -import org.specs2.execute.Result - -import SpecHelpers._ - -object TransactionSpec { - - val raw = - "CgABAAABQ/SiVe8LABQAAAAQc3NjLTAuMS4wLXN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAABpmU9dHImdHJfaWQ9b3JkZXItMTIzJnRyX3R0PTgwMDAmdHJfY3U9SlBZJmR0bT0xMzkxMzc4NzE2MjcxJnRpZD02MzYyMzkmdnA9MTY4MHg0MTUmZHM9MTY4MHg0MTUmdmlkPTI2JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPXdlYiZ0dj1qcy0wLjEzLjEmZnA9MTgwNDk1NDc5MCZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTE5MjB4MTA4MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDkzMTUyMS4xMzkxMTEwNTgyLjc7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNy4xMzkxMTExODE5LjEzOTA5MzE1NDU7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0OyB3Y3NpZD1LUmhoazRIRUxwMkFpcHFMN001Vm9uQ1BPUHlBbkYxSjsgX29rbHY9MTM5MTExMTc3OTMyOCUyQ0tSaGhrNEhFTHAyQWlwcUw3TTVWb25DUE9QeUFuRjFKOyBfX3V0bWM9MTExODcyMjgxOyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTExMTA1ODU0OTAlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgX29rPTk3NTItNTAzLTEwLTUyMjcAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" - - val expected = List[StringOrRegex]( - "CFe23a", - "web", - TimestampRegex, - "2014-02-02 22:05:16.143", - "2014-02-02 22:05:16.271", - "transaction", - Uuid4Regexp, // Regexp match - "636239", - "", - "js-0.13.1", - "ssc-0.1.0-stdout", - v_etl, - "d1a21f2589511b4ed04ee297d88d950efb2612dc", - "850474a1f035479d332a5c2d2ad6fe4d07a3f722", - "1804954790", - "3c1757544e39bca4", - "26", - "75a13583-5c99-40e3-81fc-541084dfc784", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "file", - "file", - "80", - "///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "order-123", - "", - "8000", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Firefox 26", - "Firefox", - "26.0", - "Browser", - "GECKO", - "en-US", - "0", - "1", - "0", - "0", - "1", - "0", - "0", - "0", - "0", - "1", - "24", - "1680", - "415", - "Mac OS X", - "Mac OS X", - "Apple Inc.", - "Europe/London", - "Computer", - "0", - "1920", - "1080", - "UTF-8", - "1680", - "415", - "JPY", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "2014-02-02 22:05:16.143", - "com.snowplowanalytics.snowplow", - "transaction", - "jsonschema", - "1-0-0", - "", - "" - ) - val pii = List[StringOrRegex]( - "CFe23a", - "srv", - TimestampRegex, - "2014-02-02 22:05:16.143", - "", - "pii_transformation", - Uuid4Regexp, // Regexp match - "", - "", - "", - "", - v_etl, - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ContextWithUuid4Regexp, - "", - "", - "", - "", - "", - """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"10.0.2.x","modifiedValue":"850474a1f035479d332a5c2d2ad6fe4d07a3f722"},{"fieldName":"user_id","originalValue":"alex 123","modifiedValue":"d1a21f2589511b4ed04ee297d88d950efb2612dc"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-1"}}}}}""", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - TimestampRegex, - "com.snowplowanalytics.snowplow", - "pii_transformation", - "jsonschema", - "1-0-0", - "", - TimestampRegex - ) -} - -class TransactionSpec extends Specification { - - "Stream Enrich" should { - - "enrich a valid transaction" in { - - val rawEvent = Base64.decodeBase64(TransactionSpec.raw) - - val enrichedEvent = TestSource.enrichEvents(rawEvent)(0) - enrichedEvent.isValid must beTrue - - // "-1" prevents empty strings from being discarded from the end of the array - val fields = enrichedEvent.toOption.get._1.split("\t", -1) - fields.size must beEqualTo(TransactionSpec.expected.size) - enrichedEvent.toOption.get._3 must not beNone - val piiFields = enrichedEvent.toOption.get._3.get.split("\t", -1) - piiFields.size must beEqualTo(TransactionSpec.pii.size) - Result.unit { - for (idx <- TransactionSpec.expected.indices) - fields(idx) must beFieldEqualTo(TransactionSpec.expected(idx), withIndex = idx) - for (idx <- TransactionSpec.pii.indices) - piiFields(idx) must beFieldEqualTo(TransactionSpec.pii(idx), withIndex = idx) - } - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/package.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/package.scala deleted file mode 100644 index 2acedd40a..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/package.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich - -import scala.util.matching.Regex - -package object stream { - type StringOrRegex = Either[String, Regex] - val JustString = Left - val JustRegex = Right -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/SourceSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/SourceSpec.scala deleted file mode 100644 index 9a7d30847..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/SourceSpec.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import java.time.Instant - -import org.specs2.mutable.Specification -import com.snowplowanalytics.snowplow.badrows._ - -class SourceSpec extends Specification { - - "getSize" should { - "get the size of a string of ASCII characters" in { - Source.getSize("abcdefg") must_== 7 - } - - "get the size of a string containing non-ASCII characters" in { - Source.getSize("™®字") must_== 8 - } - } - - "adjustOversizedFailureJson" should { - "truncate the original bad row" in { - val processor = Processor("se", "1.0.0") - val original = BadRow.CPFormatViolation( - processor, - Failure.CPFormatViolation( - Instant.ofEpochSecond(12), - "tsv", - FailureDetails.CPFormatViolationMessage.Fallback("ah") - ), - Payload.RawPayload("ah") - ) - val res = Source.adjustOversizedFailureJson(original, 200, processor) - res.schemaKey must_== Schemas.SizeViolation - val failure = res.failure - failure.actualSizeBytes must_== 267 - failure.maximumAllowedSizeBytes must_== 200 - failure.expectation must_== "bad row exceeded the maximum size" - res.payload must_== Payload.RawPayload("""{"schema":"iglu:com.""") - res.processor must_== processor - } - } - - "oversizedSuccessToFailure" should { - "create a bad row JSON from an oversized success" in { - val processor = Processor("se", "1.0.0") - val res = - Source.oversizedSuccessToFailure("abcdefghijklmnopqrstuvwxy", 10, processor) - res.schemaKey must_== Schemas.SizeViolation - val failure = res.failure - failure.actualSizeBytes must_== 25 - failure.maximumAllowedSizeBytes must_== 10 - failure.expectation must_== "event passed enrichment but exceeded the maximum allowed size as a result" - res.payload must_== Payload.RawPayload("a") - res.processor must_== processor - } - } -} diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/TestSource.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/TestSource.scala deleted file mode 100644 index ea6ea7453..000000000 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/TestSource.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import cats.Id -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry - -import sinks.Sink - -/** - * Source to allow the testing framework to enrich events - * using the same methods from AbstractSource as the other - * sources. - */ -class TestSource( - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id] -) extends Source(client, adapterRegistry, enrichmentRegistry, Processor(generated.BuildInfo.name, generated.BuildInfo.version), "", None) { - - override val MaxRecordSize = None - - override val threadLocalGoodSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = null - } - override val threadLocalPiiSink: Option[ThreadLocal[Sink]] = Some(new ThreadLocal[Sink] { - override def initialValue: Sink = null - }) - override val threadLocalBadSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = null - } - - override def run(): Unit = - throw new RuntimeException("run() should not be called on TestSource") -} diff --git a/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaEnrich.scala b/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaEnrich.scala deleted file mode 100644 index 6ebf9f25a..000000000 --- a/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaEnrich.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import cats.Id -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import io.circe.Json - -import config.FileConfig -import model.{Credentials, SentryConfig, StreamsConfig} -import sources.{KafkaSource, Source} - -/** The main entry point for Stream Enrich for Kafka. */ -object KafkaEnrich extends Enrich { - - def main(args: Array[String]): Unit = run(args) - - override def getSource( - streamsConfig: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor - ): Either[String, Source] = - KafkaSource.create( - streamsConfig, - sentryConfig, - client, - adapterRegistry, - enrichmentRegistry, - processor - ) - - override val parser: scopt.OptionParser[FileConfig] = localParser - - override def extractResolver(resolverArgument: String)(implicit creds: Credentials): Either[String, String] = - localResolverExtractor(resolverArgument) - - override def extractEnrichmentConfigs(enrichmentArg: Option[String])(implicit creds: Credentials): Either[String, Json] = - localEnrichmentConfigsExtractor(enrichmentArg) -} diff --git a/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala b/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala deleted file mode 100644 index 80cf0db38..000000000 --- a/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow -package enrich.stream -package sinks - -import java.util.Properties - -import cats.syntax.either._ -import org.apache.kafka.clients.producer._ - -import model.{BufferConfig, Kafka} - -/** KafkaSink companion object with factory method */ -object KafkaSink { - def validateAndCreateProducer(kafkaConfig: Kafka, bufferConfig: BufferConfig): Either[String, KafkaProducer[String, String]] = - createProducer(kafkaConfig, bufferConfig).asRight - - /** - * Instantiates a producer on an existing topic with the given configuration options. - * This can fail if the producer can't be created. - * @return a Kafka producer - */ - private def createProducer(kafkaConfig: Kafka, bufferConfig: BufferConfig): KafkaProducer[String, String] = { - val properties = createProperties(kafkaConfig, bufferConfig) - kafkaConfig.producerConf.getOrElse(Map()).foreach { case (k, v) => properties.setProperty(k, v) } - new KafkaProducer[String, String](properties) - } - - private def createProperties(kafkaConfig: Kafka, bufferConfig: BufferConfig): Properties = { - val props = new Properties() - props.put("bootstrap.servers", kafkaConfig.brokers) - props.put("acks", "all") - props.put("retries", kafkaConfig.retries.toString) - props.put("buffer.memory", bufferConfig.byteLimit.toString) - props.put("linger.ms", bufferConfig.timeLimit.toString) - props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") - props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") - props - } -} - -/** Kafka Sink for Scala enrichment */ -class KafkaSink(kafkaProducer: KafkaProducer[String, String], topicName: String) extends Sink { - - /** - * Side-effecting function to store the EnrichedEvent to the given output stream. - * EnrichedEvent takes the form of a tab-delimited String until such time as - * https://github.com/snowplow/snowplow/issues/211 is implemented. - * @param events List of events together with their partition keys - * @return whether to send the stored events to Kafka - */ - override def storeEnrichedEvents(events: List[(String, String)]): Boolean = { - for ((value, key) <- events) - kafkaProducer.send( - new ProducerRecord(topicName, key, value), - new Callback { - override def onCompletion(metadata: RecordMetadata, e: Exception): Unit = - if (e != null) log.error(s"Sending event failed: ${e.getMessage}") - } - ) - true - } - - /** Blocking method to send all buffered records to Kafka. */ - override def flush(): Unit = kafkaProducer.flush() - -} diff --git a/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala b/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala deleted file mode 100644 index 7df4671fc..000000000 --- a/modules/stream/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import java.util.Properties - -import scala.collection.JavaConverters._ - -import cats.Id -import cats.syntax.either._ -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import org.apache.kafka.clients.consumer.KafkaConsumer -import org.apache.kafka.clients.producer._ - -import model.{Kafka, SentryConfig, StreamsConfig} -import sinks.{KafkaSink, Sink} -import java.time.Duration - -/** KafkaSubSource companion object with factory method */ -object KafkaSource { - def create( - config: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor - ): Either[String, KafkaSource] = - for { - kafkaConfig <- config.sourceSink match { - case c: Kafka => c.asRight - case _ => "Configured source/sink is not Kafka".asLeft - } - goodProducer <- KafkaSink.validateAndCreateProducer(kafkaConfig, config.buffer) - emitPii = utils.emitPii(enrichmentRegistry) - _ <- utils.validatePii(emitPii, config.out.pii) - piiProducer <- config.out.pii match { - case Some(_) => - KafkaSink - .validateAndCreateProducer(kafkaConfig, config.buffer) - .map(Some(_)) - case None => None.asRight - } - badProducer <- KafkaSink.validateAndCreateProducer(kafkaConfig, config.buffer) - } yield new KafkaSource( - goodProducer, - piiProducer, - badProducer, - client, - adapterRegistry, - enrichmentRegistry, - processor, - config, - kafkaConfig, - sentryConfig - ) -} - -/** Source to read events from a Kafka topic */ -class KafkaSource private ( - goodProducer: KafkaProducer[String, String], - piiProducer: Option[KafkaProducer[String, String]], - badProducer: KafkaProducer[String, String], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor, - config: StreamsConfig, - kafkaConfig: Kafka, - sentryConfig: Option[SentryConfig] -) extends Source(client, adapterRegistry, enrichmentRegistry, processor, config.out.partitionKey, sentryConfig) { - - override val MaxRecordSize = None - - override val threadLocalGoodSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = - new KafkaSink(goodProducer, config.out.enriched) - } - - override val threadLocalPiiSink: Option[ThreadLocal[Sink]] = piiProducer.flatMap { somePiiProducer => - config.out.pii.map { piiTopicName => - new ThreadLocal[Sink] { - override def initialValue: Sink = - new KafkaSink(somePiiProducer, piiTopicName) - } - } - } - - override val threadLocalBadSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = - new KafkaSink(badProducer, config.out.bad) - } - - /** Never-ending processing loop over source stream. */ - override def run(): Unit = { - val consumer = createConsumer(kafkaConfig.brokers, config.appName) - - log.info(s"Running Kafka consumer group: ${config.appName}.") - log.info(s"Processing raw input Kafka topic: ${config.in.raw}") - - consumer.subscribe(List(config.in.raw).asJava) - while (true) { - val recordValues = consumer - .poll(Duration.ofMillis(100)) // Wait 100 ms if data is not available - .asScala - .toList - .map(_.value) // Get the values - - enrichAndStoreEvents(recordValues) - } - } - - private def createConsumer(brokers: String, groupId: String): KafkaConsumer[String, Array[Byte]] = { - val properties = createProperties(brokers, groupId) - kafkaConfig.consumerConf.getOrElse(Map()).foreach { case (k, v) => properties.setProperty(k, v) } - new KafkaConsumer[String, Array[Byte]](properties) - } - - private def createProperties(brokers: String, groupId: String): Properties = { - val props = new Properties() - props.put("bootstrap.servers", brokers) - props.put("group.id", groupId) - props.put("enable.auto.commit", "true") - props.put("auto.commit.interval.ms", "1000") - props.put("auto.offset.reset", "earliest") - props.put("session.timeout.ms", "30000") - props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") - props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") - props - } -} diff --git a/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/KinesisEnrich.scala b/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/KinesisEnrich.scala deleted file mode 100644 index 14899a20e..000000000 --- a/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/KinesisEnrich.scala +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.io.File - -import scala.annotation.tailrec -import scala.collection.JavaConverters._ -import scala.io.Source -import cats.Id -import cats.implicits._ -import com.amazonaws.auth._ -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration -import com.amazonaws.services.dynamodbv2.model.{AttributeValue, ScanRequest} -import com.amazonaws.services.dynamodbv2.document.{DynamoDB, Item} -import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.iglu.core._ -import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.enrich.common.utils.{BlockerF, JsonUtils, ShiftExecution} -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import io.circe.Json -import io.circe.syntax._ -import config._ -import model.{Credentials, DualCloudCredentialsPair, Kinesis, NoCredentials, SentryConfig, StreamsConfig} -import sources.KinesisSource -import utils.{clockProvider, getAWSCredentialsProvider} - -/** The main entry point for Stream Enrich for Kinesis. */ -object KinesisEnrich extends Enrich { - - val DynamoDBRegex = "^dynamodb:([^/]*)/([^/]*)/([^/]*)$".r - private val regexMsg = "'file:[filename]' or 'dynamodb:[region/table/key]'" - - def main(args: Array[String]): Unit = { - val trackerSource = for { - config <- parseConfig(args) - (enrichConfig, resolverArg, enrichmentsArg, forceDownload) = config - credsWithRegion <- enrichConfig.streams.sourceSink match { - case k: Kinesis => - ( - DualCloudCredentialsPair(k.aws, k.gcp.fold[Credentials](NoCredentials)(identity)), - k.region - ).asRight - case _ => "Configured source/sink is not Kinesis".asLeft - } - (credentials, awsRegion) = credsWithRegion - client <- parseClient(resolverArg)(credsWithRegion._1.aws) - enrichmentsConf <- parseEnrichmentRegistry(enrichmentsArg, client)(credsWithRegion._1.aws) - _ <- cacheFiles( - enrichmentsConf, - forceDownload, - credentials.aws, - credentials.gcp, - Option(awsRegion) - ) - enrichmentRegistry <- EnrichmentRegistry.build[Id](enrichmentsConf, BlockerF.noop, ShiftExecution.noop).value - tracker = enrichConfig.monitoring.map(c => SnowplowTracking.initializeTracker(c.snowplow)) - adapterRegistry = new AdapterRegistry(prepareRemoteAdapters(enrichConfig.remoteAdapters)) - processor = Processor(generated.BuildInfo.name, generated.BuildInfo.version) - source <- getSource( - enrichConfig.streams, - enrichConfig.sentry, - client, - adapterRegistry, - enrichmentRegistry, - tracker, - processor - ) - } yield (tracker, source) - - trackerSource match { - case Left(e) => - System.err.println(e) - System.exit(1) - case Right((tracker, source)) => - tracker.foreach(SnowplowTracking.initializeSnowplowTracking) - source.run() - } - } - - override def getSource( - streamsConfig: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor - ): Either[String, sources.Source] = - KinesisSource.createAndInitialize( - streamsConfig, - sentryConfig, - client, - adapterRegistry, - enrichmentRegistry, - tracker, - processor - ) - - override lazy val parser: scopt.OptionParser[FileConfig] = - new scopt.OptionParser[FileConfig](generated.BuildInfo.name) with FileConfigOptions { - head(generated.BuildInfo.name, generated.BuildInfo.version) - help("help") - version("version") - configOption() - opt[String]("resolver") - .required() - .valueName("") - .text(s"Iglu resolver file, $regexMsg") - .action((r: String, c: FileConfig) => c.copy(resolver = r)) - .validate(_ match { - case FilepathRegex(_) | DynamoDBRegex(_, _, _) => success - case _ => failure(s"Resolver doesn't match accepted uris: $regexMsg") - }) - opt[String]("enrichments") - .optional() - .valueName("") - .text(s"Directory of enrichment configuration JSONs, $regexMsg") - .action((e: String, c: FileConfig) => c.copy(enrichmentsDir = Some(e))) - .validate(_ match { - case FilepathRegex(_) | DynamoDBRegex(_, _, _) => success - case _ => failure(s"Enrichments directory doesn't match accepted uris: $regexMsg") - }) - forceCachedFilesDownloadOption() - } - - override def extractResolver(resolverArgument: String)(implicit creds: Credentials): Either[String, String] = - resolverArgument match { - case FilepathRegex(filepath) => - val file = new File(filepath) - if (file.exists) Source.fromFile(file).mkString.asRight - else "Iglu resolver configuration file \"%s\" does not exist".format(filepath).asLeft - case DynamoDBRegex(region, table, key) => - for { - provider <- getAWSCredentialsProvider(creds) - resolver <- lookupDynamoDBResolver(provider, region, table, key) - } yield resolver - case _ => s"Resolver argument [$resolverArgument] must match $regexMsg".asLeft - } - - /** - * Fetch configuration from DynamoDB, assumes the primary key is "id" and the configuration key is - * "json" - * @param provider aws credentials provider - * @param region DynamoDB region, e.g. "eu-west-1" - * @param table DynamoDB table containing the resolver - * @param key The value of the primary key for the configuration - * @return The JSON stored in DynamoDB - */ - private def lookupDynamoDBResolver( - provider: AWSCredentialsProvider, - region: String, - table: String, - key: String - ): Either[String, String] = { - val dynamoDBClient = AmazonDynamoDBClientBuilder - .standard() - .withCredentials(provider) - .withEndpointConfiguration(new EndpointConfiguration(getDynamodbEndpoint(region), region)) - .build() - val dynamoDB = new DynamoDB(dynamoDBClient) - for { - // getTable doesn't involve any IO apparently so it's safe to chain - item <- Option(dynamoDB.getTable(table).getItem("id", key)) - .fold(s"Key $key doesn't exist in DynamoDB table $table".asLeft[Item])(_.asRight[String]) - json <- Option(item.getString("json")) - .fold(s"""Field "json" not found at key $key in DynamoDB table $table""".asLeft[String])( - _.asRight[String] - ) - } yield json - } - - override def extractEnrichmentConfigs(enrichmentArg: Option[String])(implicit creds: Credentials): Either[String, Json] = { - val jsons: Either[String, List[String]] = enrichmentArg - .map { - case FilepathRegex(dir) => - new File(dir).listFiles - .filter(_.getName.endsWith(".json")) - .map(scala.io.Source.fromFile(_).mkString) - .toList - .asRight - case DynamoDBRegex(region, table, keyNamePrefix) => - for { - provider <- getAWSCredentialsProvider(creds) - enrichmentList = lookupDynamoDBEnrichments(provider, region, table, keyNamePrefix) - enrichments <- enrichmentList match { - case Nil => s"No enrichments found with prefix $keyNamePrefix".asLeft - case js => js.asRight - } - } yield enrichments - case other => s"Enrichments argument [$other] must match $regexMsg".asLeft - } - .getOrElse(Nil.asRight) - - val schemaKey = SchemaKey( - "com.snowplowanalytics.snowplow", - "enrichments", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ) - - jsons - .flatMap(_.map(JsonUtils.extractJson).sequence[EitherS, Json]) - .map(jsons => SelfDescribingData[Json](schemaKey, Json.fromValues(jsons)).asJson) - } - - /** - * Get a list of enrichment JSONs from DynamoDB - * @param provider aws credentials provider - * @param region DynamoDB region, e.g. "eu-west-1" - * @param table - * @param keyNamePrefix Primary key prefix, e.g. "enrichments-" - * @return List of JSONs - */ - private def lookupDynamoDBEnrichments( - provider: AWSCredentialsProvider, - region: String, - table: String, - keyNamePrefix: String - ): List[String] = { - val dynamoDBClient = AmazonDynamoDBClientBuilder - .standard() - .withCredentials(provider) - .withEndpointConfiguration(new EndpointConfiguration(getDynamodbEndpoint(region), region)) - .build() - - // Each scan can only return up to 1MB - // See http://techtraits.com/cloud/nosql/2012/06/27/Amazon-DynamoDB--Understanding-Query-and-Scan-operations/ - @tailrec - def partialScan( - sofar: List[Map[String, String]], - lastEvaluatedKey: java.util.Map[String, AttributeValue] = null - ): List[Map[String, String]] = { - val scanRequest = new ScanRequest().withTableName(table) - scanRequest.setExclusiveStartKey(lastEvaluatedKey) - val lastResult = dynamoDBClient.scan(scanRequest) - val combinedResults = sofar ++ - lastResult.getItems.asScala.map(_.asScala.toMap.mapValues(_.getS)) - lastResult.getLastEvaluatedKey match { - case null => combinedResults - case startKey => partialScan(combinedResults, startKey) - } - } - val allItems = partialScan(Nil) - allItems - .filter { item => - item.get("id") match { - case Some(value) if value.startsWith(keyNamePrefix) => true - case _ => false - } - } - .flatMap(_.get("json")) - } - - private def getDynamodbEndpoint(region: String): String = - region match { - case cn @ "cn-north-1" => s"https://dynamodb.$cn.amazonaws.com.cn" - case _ => s"https://dynamodb.$region.amazonaws.com" - } -} diff --git a/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KinesisSink.scala b/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KinesisSink.scala deleted file mode 100644 index 6332f846e..000000000 --- a/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KinesisSink.scala +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow -package enrich -package stream -package sinks - -import java.nio.ByteBuffer -import java.nio.charset.StandardCharsets.UTF_8 - -import scala.collection.JavaConverters._ -import scala.concurrent.{Await, Future} -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.duration._ -import scala.util.control.NonFatal - -import cats.Id -import cats.syntax.either._ -import com.amazonaws.services.kinesis.model._ -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration -import com.amazonaws.services.kinesis.{AmazonKinesis, AmazonKinesisClientBuilder} - -import model._ -import scalatracker.Tracker -import utils.getAWSCredentialsProvider - -/** KinesisSink companion object with factory method */ -object KinesisSink { - def validate(kinesisConfig: Kinesis, streamName: String): Either[String, Unit] = - for { - provider <- getAWSCredentialsProvider(kinesisConfig.aws) - endpointConfiguration = new EndpointConfiguration( - kinesisConfig.streamEndpoint, - kinesisConfig.region - ) - client = AmazonKinesisClientBuilder - .standard() - .withCredentials(provider) - .withEndpointConfiguration(endpointConfiguration) - .build() - _ <- streamExists(client, streamName) - .leftMap(_.getMessage) - .ensure(s"Kinesis stream $streamName doesn't exist")(_ == true) - } yield () - - /** - * Check whether a Kinesis stream exists - * @param name Name of the stream - * @return Whether the stream exists - */ - private def streamExists(client: AmazonKinesis, name: String): Either[Throwable, Boolean] = - Either.catchNonFatal { - val describeStreamResult = client.describeStream(name) - val status = describeStreamResult.getStreamDescription.getStreamStatus - status == "ACTIVE" || status == "UPDATING" - } -} - -/** Kinesis Sink for Scala enrichment */ -class KinesisSink( - client: AmazonKinesis, - backoffPolicy: KinesisBackoffPolicyConfig, - buffer: BufferConfig, - streamName: String, - tracker: Option[Tracker[Id]] -) extends Sink { - - /** Kinesis records must not exceed 1MB */ - private val MaxBytes = 1000000L - - private val maxBackoff = backoffPolicy.maxBackoff - private val minBackoff = backoffPolicy.minBackoff - private val randomGenerator = new java.util.Random() - - val ByteThreshold = buffer.byteLimit - val RecordThreshold = buffer.recordLimit - val TimeThreshold = buffer.timeLimit - var nextRequestTime = 0L - - /** - * Object to store events while waiting for the ByteThreshold, RecordThreshold, or TimeThreshold to be reached - */ - object EventStorage { - // Each complete batch is the contents of a single PutRecords API call - var completeBatches = List[List[(ByteBuffer, String)]]() - // The batch currently under constructon - var currentBatch = List[(ByteBuffer, String)]() - // Length of the current batch - var eventCount = 0 - // Size in bytes of the current batch - var byteCount = 0 - - /** - * Finish work on the current batch and create a new one. - */ - def sealBatch(): Unit = { - completeBatches = currentBatch :: completeBatches - eventCount = 0 - byteCount = 0 - currentBatch = Nil - } - - /** - * Add a new event to the current batch. - * If this would take the current batch above ByteThreshold bytes, - * first seal the current batch. - * If this takes the current batch up to RecordThreshold records, - * seal the current batch and make a new batch. - * - * @param event New event - */ - def addEvent(event: (ByteBuffer, String)): Unit = { - val newBytes = event._1.capacity - - if (newBytes >= MaxBytes) { - val original = new String(event._1.array, UTF_8) - log.error(s"Dropping record with size $newBytes bytes: [$original]") - } else { - - if (byteCount + newBytes >= ByteThreshold) - sealBatch() - - byteCount += newBytes - - eventCount += 1 - currentBatch = event :: currentBatch - - if (eventCount == RecordThreshold) - sealBatch() - } - } - - /** - * Reset everything. - */ - def clear(): Unit = { - completeBatches = Nil - currentBatch = Nil - eventCount = 0 - byteCount = 0 - } - } - - /** - * Side-effecting function to store the EnrichedEvent - * to the given output stream. - * - * EnrichedEvent takes the form of a tab-delimited - * String until such time as https://github.com/snowplow/snowplow/issues/211 - * is implemented. - * - * This method blocks until the request has finished. - * - * @param events List of events together with their partition keys - * @return whether to send the stored events to Kinesis - */ - override def storeEnrichedEvents(events: List[(String, String)]): Boolean = { - val wrappedEvents = events.map(e => ByteBuffer.wrap(e._1.getBytes(UTF_8)) -> e._2) - wrappedEvents.foreach(EventStorage.addEvent(_)) - if (!EventStorage.currentBatch.isEmpty && System.currentTimeMillis() > nextRequestTime) { - nextRequestTime = System.currentTimeMillis() + TimeThreshold - true - } else - !EventStorage.completeBatches.isEmpty - } - - /** - * Blocking method to send all stored records to Kinesis - * Splits the stored records into smaller batches (by byte size or record number) if necessary - */ - override def flush(): Unit = { - EventStorage.sealBatch() - // Send events in the order they were received - EventStorage.completeBatches.reverse.foreach(b => sendBatch(b.reverse)) - EventStorage.clear() - } - - /** - * Send a single batch of events in one blocking PutRecords API call - * Loop until all records have been sent successfully - * Cannot be made tail recursive (http://stackoverflow.com/questions/8233089/why-wont-scala-optimize-tail-call-with-try-catch) - * - * @param batch Events to send - */ - def sendBatch(batch: List[(ByteBuffer, String)]): Unit = - if (!batch.isEmpty) { - log.info(s"Writing ${batch.size} records to Kinesis stream $streamName") - var unsentRecords = batch - var backoffTime = minBackoff - var sentBatchSuccessfully = false - var attemptNumber = 0 - while (!sentBatchSuccessfully) { - attemptNumber += 1 - - val putData = for { - p <- multiPut(streamName, unsentRecords) - } yield p - - try { - val results = Await.result(putData, 10.seconds).getRecords.asScala.toList - val failurePairs = unsentRecords zip results filter { _._2.getErrorMessage != null } - log.info( - s"Successfully wrote ${unsentRecords.size - failurePairs.size} out of ${unsentRecords.size} records" - ) - if (failurePairs.nonEmpty) { - val (failedRecords, failedResults) = failurePairs.unzip - unsentRecords = failedRecords - logErrorsSummary(getErrorsSummary(failedResults)) - backoffTime = getNextBackoff(backoffTime) - log.error(s"Retrying all failed records in $backoffTime milliseconds...") - - val err = s"Failed to send ${failurePairs.size} events" - val putSize: Long = unsentRecords.foldLeft(0L)((a, b) => a + b._1.capacity) - - tracker match { - case Some(t) => - SnowplowTracking.sendFailureEvent( - t, - "PUT Failure", - err, - streamName, - "snowplow-stream-enrich", - attemptNumber.toLong, - putSize - ) - case _ => None - } - - Thread.sleep(backoffTime) - } else - sentBatchSuccessfully = true - } catch { - case NonFatal(f) => - backoffTime = getNextBackoff(backoffTime) - log.error(s"Writing failed.", f) - log.error(s" + Retrying in $backoffTime milliseconds...") - - val putSize: Long = unsentRecords.foldLeft(0L)((a, b) => a + b._1.capacity) - - tracker match { - case Some(t) => - SnowplowTracking.sendFailureEvent( - t, - "PUT Failure", - f.toString, - streamName, - "snowplow-stream-enrich", - attemptNumber.toLong, - putSize - ) - case _ => None - } - - Thread.sleep(backoffTime) - } - } - } - - private def multiPut(name: String, batch: List[(ByteBuffer, String)]): Future[PutRecordsResult] = - Future { - val putRecordsRequest = { - val prr = new PutRecordsRequest() - prr.setStreamName(name) - val putRecordsRequestEntryList = batch.map { - case (b, s) => - val prre = new PutRecordsRequestEntry() - prre.setPartitionKey(s) - prre.setData(b) - prre - } - prr.setRecords(putRecordsRequestEntryList.asJava) - prr - } - client.putRecords(putRecordsRequest) - } - - private[sinks] def getErrorsSummary(badResponses: List[PutRecordsResultEntry]): Map[String, (Long, String)] = - badResponses.foldLeft(Map[String, (Long, String)]())((counts, r) => - if (counts.contains(r.getErrorCode)) - counts + (r.getErrorCode -> (counts(r.getErrorCode)._1 + 1 -> r.getErrorMessage)) - else - counts + (r.getErrorCode -> ((1, r.getErrorMessage))) - ) - - private[sinks] def logErrorsSummary(errorsSummary: Map[String, (Long, String)]): Unit = - for ((errorCode, (count, sampleMessage)) <- errorsSummary) - log.error( - s"$count records failed with error code ${errorCode}. Example error message: ${sampleMessage}" - ) - - /** - * How long to wait before sending the next request - * - * @param lastBackoff The previous backoff time - * @return Minimum of maxBackoff and a random number between minBackoff and three times lastBackoff - */ - private def getNextBackoff(lastBackoff: Long): Long = { - val offset: Long = (randomGenerator.nextDouble() * (lastBackoff * 3 - minBackoff)).toLong - val sum: Long = minBackoff + offset - sum min maxBackoff - } -} diff --git a/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KinesisSource.scala b/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KinesisSource.scala deleted file mode 100644 index d1df45dcd..000000000 --- a/modules/stream/kinesis/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KinesisSource.scala +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import java.net.InetAddress -import java.util.{List, UUID} - -import scala.util.control.Breaks._ -import scala.collection.JavaConverters._ -import scala.util.control.NonFatal - -import cats.Id -import cats.syntax.either._ -import com.amazonaws.auth.AWSCredentialsProvider -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration -import com.amazonaws.services.kinesis.AmazonKinesisClientBuilder -import com.amazonaws.services.kinesis.clientlibrary.interfaces._ -import com.amazonaws.services.kinesis.clientlibrary.exceptions._ -import com.amazonaws.services.kinesis.clientlibrary.lib.worker._ -import com.amazonaws.services.kinesis.model.Record -import com.amazonaws.services.kinesis.metrics.impl.NullMetricsFactory -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.scalatracker.Tracker - -import model.{Kinesis, SentryConfig, StreamsConfig} -import sinks._ -import utils.getAWSCredentialsProvider - -/** KinesisSource companion object with factory method */ -object KinesisSource { - def createAndInitialize( - config: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor - ): Either[String, KinesisSource] = - for { - kinesisConfig <- config.sourceSink match { - case c: Kinesis => c.asRight - case _ => "Configured source/sink is not Kinesis".asLeft - } - emitPii = utils.emitPii(enrichmentRegistry) - _ <- KinesisSink.validate(kinesisConfig, config.out.enriched) - _ <- utils.validatePii(emitPii, config.out.pii) - _ <- KinesisSink.validate(kinesisConfig, config.out.bad) - provider <- getAWSCredentialsProvider(kinesisConfig.aws) - } yield new KinesisSource( - client, - adapterRegistry, - enrichmentRegistry, - tracker, - processor, - config, - kinesisConfig, - sentryConfig, - provider - ) -} - -/** Source to read events from a Kinesis stream */ -class KinesisSource private ( - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor, - config: StreamsConfig, - kinesisConfig: Kinesis, - sentryConfig: Option[SentryConfig], - provider: AWSCredentialsProvider -) extends Source(client, adapterRegistry, enrichmentRegistry, processor, config.out.partitionKey, sentryConfig) { - - override val MaxRecordSize = Some(1000000) - - private val kClient = { - val endpointConfiguration = - new EndpointConfiguration(kinesisConfig.streamEndpoint, kinesisConfig.region) - AmazonKinesisClientBuilder - .standard() - .withCredentials(provider) - .withEndpointConfiguration(endpointConfiguration) - .build() - } - - override val threadLocalGoodSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = - new KinesisSink( - kClient, - kinesisConfig.backoffPolicy, - config.buffer, - config.out.enriched, - tracker - ) - } - override val threadLocalPiiSink: Option[ThreadLocal[Sink]] = { - val emitPii = utils.emitPii(enrichmentRegistry) - utils - .validatePii(emitPii, config.out.pii) - .toOption - .flatMap { _ => - config.out.pii.map { piiStreamName => - new ThreadLocal[Sink] { - override def initialValue: Sink = - new KinesisSink( - kClient, - kinesisConfig.backoffPolicy, - config.buffer, - piiStreamName, - tracker - ) - } - } - } - } - - override val threadLocalBadSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = - new KinesisSink(kClient, kinesisConfig.backoffPolicy, config.buffer, config.out.bad, tracker) - } - - /** Never-ending processing loop over source stream. */ - override def run(): Unit = { - val workerId = InetAddress.getLocalHost().getCanonicalHostName() + ":" + UUID.randomUUID() - log.info("Using workerId: " + workerId) - - val kinesisClientLibConfiguration = { - // https://github.com/awslabs/amazon-kinesis-client/issues/737 - @annotation.nowarn("msg=constructor KinesisClientLibConfiguration in class KinesisClientLibConfiguration is deprecated") - val kclc = new KinesisClientLibConfiguration( - config.appName, - config.in.raw, - provider, - workerId - ).withKinesisEndpoint(kinesisConfig.streamEndpoint) - .withMaxRecords(kinesisConfig.maxRecords) - .withRegionName(kinesisConfig.region) - // If the record list is empty, we still check whether it is time to flush the buffer - .withCallProcessRecordsEvenForEmptyRecordList(true) - .withDynamoDBEndpoint(kinesisConfig.dynamodbEndpoint) - - val position = InitialPositionInStream.valueOf(kinesisConfig.initialPosition) - kinesisConfig.timestamp.right.toOption - .filter(_ => position == InitialPositionInStream.AT_TIMESTAMP) - .map(kclc.withTimestampAtInitialPositionInStream(_)) - .getOrElse(kclc.withInitialPositionInStream(position)) - } - - log.info(s"Running: ${config.appName}.") - log.info(s"Processing raw input stream: ${config.in.raw}") - - val rawEventProcessorFactory = new RawEventProcessorFactory() - val worker = kinesisConfig.disableCloudWatch match { - case Some(true) => - new Worker.Builder() - .recordProcessorFactory(rawEventProcessorFactory) - .config(kinesisClientLibConfiguration) - .metricsFactory(new NullMetricsFactory()) - .build() - case _ => - new Worker.Builder() - .recordProcessorFactory(rawEventProcessorFactory) - .config(kinesisClientLibConfiguration) - .build() - } - - worker.run() - } - - // Factory needed by the Amazon Kinesis Consumer library to - // create a processor. - class RawEventProcessorFactory extends IRecordProcessorFactory { - override def createProcessor: IRecordProcessor = new RawEventProcessor() - } - - // Process events from a Kinesis stream. - class RawEventProcessor extends IRecordProcessor { - private var kinesisShardId: String = _ - - // Backoff and retry settings. - private val BACKOFF_TIME_IN_MILLIS = 3000L - private val NUM_RETRIES = 10 - - override def initialize(shardId: String) = { - log.info("Initializing record processor for shard: " + shardId) - this.kinesisShardId = shardId - } - - override def processRecords(records: List[Record], checkpointer: IRecordProcessorCheckpointer) = { - - if (!records.isEmpty) - log.info(s"Processing ${records.size} records from $kinesisShardId") - val shouldCheckpoint = processRecordsWithRetries(records) - - if (shouldCheckpoint) - checkpoint(checkpointer) - } - - private def processRecordsWithRetries(records: List[Record]): Boolean = - try enrichAndStoreEvents(records.asScala.map(_.getData.array).toList) - catch { - case NonFatal(e) => - // TODO: send an event when something goes wrong here - log.error(s"Caught throwable while processing records $records", e) - false - } - - override def shutdown(checkpointer: IRecordProcessorCheckpointer, reason: ShutdownReason) = { - log.info(s"Shutting down record processor for shard: $kinesisShardId") - if (reason == ShutdownReason.TERMINATE) - checkpoint(checkpointer) - } - - private def checkpoint(checkpointer: IRecordProcessorCheckpointer) = { - log.info(s"Checkpointing shard $kinesisShardId") - breakable { - for (i <- 0 to NUM_RETRIES - 1) { - try { - checkpointer.checkpoint() - break - } catch { - case se: ShutdownException => - log.error("Caught shutdown exception, skipping checkpoint.", se) - break - case e: ThrottlingException => - if (i >= (NUM_RETRIES - 1)) - log.error(s"Checkpoint failed after ${i + 1} attempts.", e) - else - log.info( - s"Transient issue when checkpointing - attempt ${i + 1} of " - + NUM_RETRIES, - e - ) - case e: InvalidStateException => - log.error( - "Cannot save checkpoint to the DynamoDB table used by " + - "the Amazon Kinesis Client Library.", - e - ) - break - } - Thread.sleep(BACKOFF_TIME_IN_MILLIS) - } - } - } - } -} diff --git a/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/NsqEnrich.scala b/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/NsqEnrich.scala deleted file mode 100644 index 662338bbc..000000000 --- a/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/NsqEnrich.scala +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import cats.Id -import cats.syntax.either._ -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import io.circe.Json - -import config.FileConfig -import model.{Credentials, SentryConfig, StreamsConfig} -import sources.NsqSource - -/** The main entry point for Stream Enrich for NSQ. */ -object NsqEnrich extends Enrich { - - def main(args: Array[String]): Unit = run(args) - - def getSource( - streamsConfig: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor - ): Either[String, sources.Source] = - NsqSource - .create(streamsConfig, sentryConfig, client, adapterRegistry, enrichmentRegistry, processor) - .leftMap(_.getMessage) - - override val parser: scopt.OptionParser[FileConfig] = localParser - - override def extractResolver(resolverArgument: String)(implicit creds: Credentials): Either[String, String] = - localResolverExtractor(resolverArgument) - - override def extractEnrichmentConfigs(enrichmentArg: Option[String])(implicit creds: Credentials): Either[String, Json] = - localEnrichmentConfigsExtractor(enrichmentArg) -} diff --git a/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/NsqSink.scala b/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/NsqSink.scala deleted file mode 100644 index fd9ee0306..000000000 --- a/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/NsqSink.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sinks - -import java.nio.charset.StandardCharsets.UTF_8 - -import scala.collection.JavaConverters._ - -import cats.syntax.either._ -import com.snowplowanalytics.client.nsq.NSQProducer - -import model.Nsq - -/** NsqSink companion object with factory method */ -object NsqSink { - def validateAndCreateProducer(nsqConfig: Nsq): Either[Throwable, NSQProducer] = - new NSQProducer().addAddress(nsqConfig.host, nsqConfig.port).asRight -} - -/** - * NSQSink for Scala enrichment - */ -class NsqSink(nsqProducer: NSQProducer, topicName: String) extends Sink { - val producer = nsqProducer.start() - - /** - * @param events Sequence of enriched events and (unused) partition keys - * @return Whether to checkpoint - */ - override def storeEnrichedEvents(events: List[(String, String)]): Boolean = { - val msgList = events.unzip._1.map(_.getBytes(UTF_8)).asJava - producer.produceMulti(topicName, msgList) - !events.isEmpty - } - - override def flush(): Unit = () -} diff --git a/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/NsqSource.scala b/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/NsqSource.scala deleted file mode 100644 index dd03672f7..000000000 --- a/modules/stream/nsq/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/NsqSource.scala +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import cats.Id -import cats.syntax.either._ -import com.snowplowanalytics.client.nsq._ -import com.snowplowanalytics.client.nsq.callbacks._ -import com.snowplowanalytics.client.nsq.exceptions.NSQException -import com.snowplowanalytics.client.nsq.lookup.DefaultNSQLookup -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry - -import model.{Nsq, SentryConfig, StreamsConfig} -import sinks.{NsqSink, Sink} - -/** NsqSource companion object with factory method */ -object NsqSource { - def create( - config: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor - ): Either[Throwable, NsqSource] = - for { - nsqConfig <- config.sourceSink match { - case c: Nsq => c.asRight - case _ => new IllegalArgumentException("Configured source/sink is not Nsq").asLeft - } - goodProducer <- NsqSink.validateAndCreateProducer(nsqConfig) - emitPii = utils.emitPii(enrichmentRegistry) - _ <- utils - .validatePii(emitPii, config.out.pii) - .leftMap(new IllegalArgumentException(_)) - piiProducer <- config.out.pii match { - case Some(_) => NsqSink.validateAndCreateProducer(nsqConfig).map(Some(_)) - case None => None.asRight - } - badProducer <- NsqSink.validateAndCreateProducer(nsqConfig) - } yield new NsqSource( - goodProducer, - piiProducer, - badProducer, - client, - adapterRegistry, - enrichmentRegistry, - processor, - config, - nsqConfig, - sentryConfig - ) -} - -/** Source to read raw events from NSQ. */ -class NsqSource private ( - goodProducer: NSQProducer, - piiProducer: Option[NSQProducer], - badProducer: NSQProducer, - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor, - config: StreamsConfig, - nsqConfig: Nsq, - sentryConfig: Option[SentryConfig] -) extends Source(client, adapterRegistry, enrichmentRegistry, processor, config.out.partitionKey, sentryConfig) { - - override val MaxRecordSize = None - - override val threadLocalGoodSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = new NsqSink(goodProducer, config.out.enriched) - } - - override val threadLocalPiiSink: Option[ThreadLocal[Sink]] = piiProducer.flatMap { somePiiProducer => - config.out.pii.map { piiTopicName => - new ThreadLocal[Sink] { - override def initialValue: Sink = new NsqSink(somePiiProducer, piiTopicName) - } - } - } - - override val threadLocalBadSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = new NsqSink(badProducer, config.out.bad) - } - - /** Consumer will be started to wait new message. */ - override def run(): Unit = { - - val nsqCallback = new NSQMessageCallback { - override def message(msg: NSQMessage): Unit = { - val bytes = msg.getMessage() - enrichAndStoreEvents(List(bytes)) match { - case true => msg.finished() - case false => log.error(s"Error while enriching the event") - } - } - } - - val errorCallback = new NSQErrorCallback { - override def error(e: NSQException): Unit = - log.error(s"Exception while consuming topic ${config.in.raw}", e) - } - - // use NSQLookupd - val lookup = new DefaultNSQLookup - lookup.addLookupAddress(nsqConfig.lookupHost, nsqConfig.lookupPort) - val consumer = new NSQConsumer( - lookup, - config.in.raw, - nsqConfig.rawChannel, - nsqCallback, - new NSQConfig(), - errorCallback - ) - consumer.start() - () - } -} diff --git a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/StdinEnrich.scala b/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/StdinEnrich.scala deleted file mode 100644 index 0821d3456..000000000 --- a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/StdinEnrich.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import cats.Id -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import io.circe.Json - -import config.FileConfig -import model.{Credentials, SentryConfig, StreamsConfig} -import sources.{Source, StdinSource} - -/** The main entry point for Stream Enrich for stdin/out. */ -object StdinEnrich extends Enrich { - - def main(args: Array[String]): Unit = run(args) - - override def getSource( - streamsConfig: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]], - processor: Processor - ): Either[String, Source] = - StdinSource.create( - streamsConfig, - sentryConfig, - client, - adapterRegistry, - enrichmentRegistry, - processor - ) - - override val parser: scopt.OptionParser[FileConfig] = localParser - - override def extractResolver(resolverArgument: String)(implicit creds: Credentials): Either[String, String] = - localResolverExtractor(resolverArgument) - - override def extractEnrichmentConfigs(enrichmentArg: Option[String])(implicit creds: Credentials): Either[String, Json] = - localEnrichmentConfigsExtractor(enrichmentArg) -} diff --git a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/StderrSink.scala b/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/StderrSink.scala deleted file mode 100644 index 69f246b80..000000000 --- a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/StderrSink.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sinks - -/** Stdout Sink for Scala enrichment */ -class StderrSink() extends Sink { - - /** - * Side-effecting function to store the EnrichedEvent - * to the given output stream. - * EnrichedEvent takes the form of a tab-delimited - * String until such time as https://github.com/snowplow/snowplow/issues/211 - * is implemented. - * @param events Sequence of enriched events and (unused) partition keys - * @return Whether to checkpoint - */ - override def storeEnrichedEvents(events: List[(String, String)]): Boolean = { - events.foreach(e => System.err.println(e._1)) - !events.isEmpty - } - - override def flush(): Unit = () -} diff --git a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/StdoutSink.scala b/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/StdoutSink.scala deleted file mode 100644 index 3b96b2773..000000000 --- a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/StdoutSink.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream -package sinks - -/** Stdout Sink for Scala enrichment */ -class StdoutSink() extends Sink { - - /** - * Side-effecting function to store the EnrichedEvent - * to the given output stream. - * EnrichedEvent takes the form of a tab-delimited - * String until such time as https://github.com/snowplow/snowplow/issues/211 - * is implemented. - * @param events Sequence of enriched events and (unused) partition keys - * @return Whether to checkpoint - */ - override def storeEnrichedEvents(events: List[(String, String)]): Boolean = { - events.foreach(e => println(e._1)) - !events.isEmpty - } - - override def flush(): Unit = () -} diff --git a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/StdinSource.scala b/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/StdinSource.scala deleted file mode 100644 index 45da1aa77..000000000 --- a/modules/stream/stdin/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/StdinSource.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2013-2022 Snowplow Analytics Ltd. - * All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache - * License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. - * - * See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream -package sources - -import org.apache.commons.codec.binary.Base64 - -import cats.Id -import cats.syntax.either._ -import com.snowplowanalytics.iglu.client.IgluCirceClient -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry - -import model.{SentryConfig, Stdin, StreamsConfig} -import sinks.{Sink, StderrSink, StdoutSink} - -/** StdinSource companion object with factory method */ -object StdinSource { - def create( - config: StreamsConfig, - sentryConfig: Option[SentryConfig], - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor - ): Either[String, StdinSource] = - for { - _ <- config.sourceSink match { - case Stdin(_, _, _) => ().asRight - case _ => "Configured source/sink is not Stdin".asLeft - } - } yield new StdinSource( - client, - adapterRegistry, - enrichmentRegistry, - processor, - config.out.partitionKey, - sentryConfig - ) -} - -/** Source to decode raw events (in base64) from stdin. */ -class StdinSource private ( - client: IgluCirceClient[Id], - adapterRegistry: AdapterRegistry, - enrichmentRegistry: EnrichmentRegistry[Id], - processor: Processor, - partitionKey: String, - sentryConfig: Option[SentryConfig] -) extends Source(client, adapterRegistry, enrichmentRegistry, processor, partitionKey, sentryConfig) { - - override val MaxRecordSize = None - - override val threadLocalGoodSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = new StdoutSink() - } - override val threadLocalPiiSink: Option[ThreadLocal[Sink]] = Some(new ThreadLocal[Sink] { - override def initialValue: Sink = new StdoutSink() - }) - - override val threadLocalBadSink: ThreadLocal[Sink] = new ThreadLocal[Sink] { - override def initialValue: Sink = new StderrSink() - } - - /** Never-ending processing loop over source stream. */ - override def run(): Unit = - for (ln <- scala.io.Source.stdin.getLines) { - val bytes = Base64.decodeBase64(ln) - enrichAndStoreEvents(List(bytes)) - } -} diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index 3a54d1a6c..1f293816c 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -44,38 +44,6 @@ object BuildSettings { description := "Common functionality for enriching raw Snowplow events" ) - lazy val streamCommonProjectSettings = projectSettings ++ Seq( - name := "snowplow-stream-enrich", - moduleName := "snowplow-stream-enrich", - description := "Common functionality for legacy streaming enrich applications", - buildInfoKeys := Seq[BuildInfoKey](organization, name, version, description), - buildInfoPackage := "com.snowplowanalytics.snowplow.enrich.stream.generated" - ) - - lazy val streamKinesisProjectSettings = projectSettings ++ Seq( - name := "snowplow-stream-enrich-kinesis", - moduleName := "snowplow-stream-enrich-kinesis", - description := "Legacy streaming enrich app with Kinesis source" - ) - - lazy val streamKafkaProjectSettings = projectSettings ++ Seq( - name := "snowplow-stream-enrich-kafka", - moduleName := "snowplow-stream-enrich-kafka", - description := "Legacy streaming enrich app with Kafka source" - ) - - lazy val streamNsqProjectSettings = projectSettings ++ Seq( - name := "snowplow-stream-enrich-nsq", - moduleName := "snowplow-stream-enrich-nsq", - description := "Legacy streaming enrich app with NSQ source" - ) - - lazy val streamStdinProjectSettings = projectSettings ++ Seq( - name := "snowplow-stream-enrich-stdin", - moduleName := "snowplow-stream-enrich-stdin", - description := "Legacy streaming enrich app with stdin source (for testing)" - ) - lazy val commonFs2ProjectSettings = projectSettings ++ Seq( name := "snowplow-enrich-common-fs2", moduleName := "snowplow-enrich-common-fs2", @@ -244,51 +212,6 @@ object BuildSettings { scoverageSettings ++ noParallelTestExecution } - lazy val streamCommonBuildSettings = { - // Project - streamCommonProjectSettings ++ buildSettings ++ - // Tests - scoverageSettings ++ - Seq(coverageMinimumStmtTotal := 20) // override value from scoverageSettings - } - - lazy val streamKinesisBuildSettings = { - // Project - streamKinesisProjectSettings ++ buildSettings ++ - // Build and publish - assemblySettings ++ dockerSettingsFocal ++ - Seq(Docker / packageName := "stream-enrich-kinesis") - } - - lazy val streamKinesisDistrolessBuildSettings = streamKinesisBuildSettings.diff(dockerSettingsFocal) ++ dockerSettingsDistroless - - lazy val streamKafkaBuildSettings = { - // Project - streamKafkaProjectSettings ++ buildSettings ++ - // Build and publish - assemblySettings ++ dockerSettingsFocal ++ - Seq(Docker / packageName := "stream-enrich-kafka") - } - - lazy val streamKafkaDistrolessBuildSettings = streamKafkaBuildSettings.diff(dockerSettingsFocal) ++ dockerSettingsDistroless - - lazy val streamNsqBuildSettings = { - // Project - streamNsqProjectSettings ++ buildSettings ++ - // Build and publish - assemblySettings ++ dockerSettingsFocal ++ - Seq(Docker / packageName := "stream-enrich-nsq") - } - - lazy val streamNsqDistrolessBuildSettings = streamNsqBuildSettings.diff(dockerSettingsFocal) ++ dockerSettingsDistroless - - lazy val streamStdinBuildSettings = { - // Project - streamStdinProjectSettings ++ buildSettings ++ - // Build and publish - assemblySettings - } - lazy val commonFs2BuildSettings = { // Project commonFs2ProjectSettings ++ buildSettings ++ diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 67912f556..c24d450f7 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -46,7 +46,6 @@ object Dependencies { val log4jToSlf4j = "2.18.0" val guava = "28.1-jre" val slf4j = "2.0.3" - val log4j = "2.17.0" // CVE-2021-44228 val thrift = "0.15.0" // override transitive dependency to mitigate security vulnerabilities val sprayJson = "1.3.6" // override transitive dependency to mitigate security vulnerabilities val netty = "4.1.87.Final" // override transitive dependency to mitigate security vulnerabilities @@ -71,15 +70,12 @@ object Dependencies { val awsSdk = "1.12.129" val gcpSdk = "2.14.0" - val kinesisClient = "1.14.5" val awsSdk2 = "2.18.7" val kinesisClient2 = "2.4.3" val kafka = "2.8.2" val mskAuth = "1.1.4" val nsqClient = "1.3.0" val jackson = "2.13.4.2" - val jacksonCbor = "2.13.4" - val config = "1.3.4" val decline = "1.0.0" val fs2 = "2.5.5" @@ -95,8 +91,6 @@ object Dependencies { val eventGen = "0.2.0" val fs2RabbitMQ = "3.0.1" // latest version without CE3 - val scopt = "3.7.1" - val pureconfig = "0.11.0" val snowplowTracker = "1.0.0" val specs2 = "4.17.0" @@ -104,11 +98,8 @@ object Dependencies { val specs2CE = "0.4.1" val scalacheck = "1.14.0" val testcontainers = "0.40.10" - val jinJava = "2.5.0" val parserCombinators = "2.1.1" val sentry = "1.7.30" - val grpc = "1.32.2" - val macros = "2.1.1" val betterMonadicFor = "0.3.1" } @@ -130,16 +121,12 @@ object Dependencies { val yauaa = "nl.basjes.parse.useragent" % "yauaa" % V.yauaa val log4jToSlf4j = "org.apache.logging.log4j" % "log4j-to-slf4j" % V.log4jToSlf4j val guava = "com.google.guava" % "guava" % V.guava - val log4j = "org.apache.logging.log4j" % "log4j-core" % V.log4j - val log4jApi = "org.apache.logging.log4j" % "log4j-api" % V.log4j val circeCore = "io.circe" %% "circe-core" % V.circe val circeGeneric = "io.circe" %% "circe-generic" % V.circe val circeExtras = "io.circe" %% "circe-generic-extras" % V.circe val circeParser = "io.circe" %% "circe-parser" % V.circe val circeLiteral = "io.circe" %% "circe-literal" % V.circe - val circeJava8 = "io.circe" %% "circe-java8" % V.circe - val circeJawn = "io.circe" %% "circe-jawn" % V.circe val circeConfig = "io.circe" %% "circe-config" % V.circeConfig val circeOptics = "io.circe" %% "circe-optics" % V.circeOptics val circeJackson = "io.circe" %% "circe-jackson210" % V.circeJackson @@ -175,26 +162,15 @@ object Dependencies { val parserCombinators = "org.scala-lang.modules" %% "scala-parser-combinators" % V.parserCombinators % Test val testContainersIt = "com.dimafeng" %% "testcontainers-scala-core" % V.testcontainers % IntegrationTest - // Stream val kinesisSdk = "com.amazonaws" % "aws-java-sdk-kinesis" % V.awsSdk val dynamodbSdk = "com.amazonaws" % "aws-java-sdk-dynamodb" % V.awsSdk - val s3Sdk = "com.amazonaws" % "aws-java-sdk-s3" % V.awsSdk - val kinesisClient = "com.amazonaws" % "amazon-kinesis-client" % V.kinesisClient val sts = "com.amazonaws" % "aws-java-sdk-sts" % V.awsSdk % Runtime val gcs = "com.google.cloud" % "google-cloud-storage" % V.gcpSdk val kafkaClients = "org.apache.kafka" % "kafka-clients" % V.kafka val mskAuth = "software.amazon.msk" % "aws-msk-iam-auth" % V.mskAuth % Runtime - val jacksonCbor = "com.fasterxml.jackson.dataformat" % "jackson-dataformat-cbor" % V.jacksonCbor - val config = "com.typesafe" % "config" % V.config - val log4jOverSlf4j = "org.slf4j" % "log4j-over-slf4j" % V.slf4j - val scopt = "com.github.scopt" %% "scopt" % V.scopt - val pureconfig = "com.github.pureconfig" %% "pureconfig" % V.pureconfig val nsqClient = "com.snowplowanalytics" % "nsq-java-client" % V.nsqClient val catsEffect = "org.typelevel" %% "cats-effect" % V.catsEffect - val snowplowTracker = "com.snowplowanalytics" %% "snowplow-scala-tracker-emitter-id" % V.snowplowTracker val scalacheck = "org.scalacheck" %% "scalacheck" % V.scalacheck % Test - val kafka = "org.apache.kafka" %% "kafka" % V.kafka % Test - val jinJava = "com.hubspot.jinjava" % "jinjava" % V.jinJava % Test // FS2 val decline = "com.monovore" %% "decline" % V.decline @@ -268,43 +244,11 @@ object Dependencies { specs2Cats, specs2Scalacheck, specs2Mock, + specs2CE, circeLiteral % Test, parserCombinators ) - val streamCommonDependencies = Seq( - config, - sentry, - slf4j, - log4jOverSlf4j, - s3Sdk, - gcs, - scopt, - pureconfig, - snowplowTracker, - jacksonCbor, - specs2, - scalacheck - ) - - val streamKinesisDependencies = streamCommonDependencies ++ Seq( - kinesisClient, - kinesisSdk, - dynamodbSdk, - sts - ) - - val streamKafkaDependencies = streamCommonDependencies ++ Seq( - kafkaClients, - mskAuth - ) - - val streamNsqDependencies = streamCommonDependencies ++ Seq( - log4j, - log4jApi, - nsqClient - ) - val commonFs2Dependencies = Seq( decline, circeExtras,