opensearch-project · YANG-DB · Aug 23, 2023 · Aug 23, 2023 · Sep 1, 2023 · Sep 1, 2023
@@ -0,0 +1,3 @@
+# version for opensearch & opensearch-dashboards docker image
+VERSION=2.9.0
+
diff --git a/build.sbt b/build.sbt
@@ -43,7 +43,7 @@ lazy val commonSettings = Seq(
   Test / test := ((Test / test) dependsOn testScalastyle).value)
 
 lazy val root = (project in file("."))
-  .aggregate(flintCore, flintSparkIntegration)
+  .aggregate(flintCore, flintSparkIntegration, pplSparkIntegration)
   .disablePlugins(AssemblyPlugin)
   .settings(name := "flint", publish / skip := true)
 
@@ -99,10 +99,47 @@ lazy val flintSparkIntegration = (project in file("flint-spark-integration"))
         oldStrategy(x)
     },
     assembly / test := (Test / test).value)
+lazy val pplSparkIntegration = (project in file("ppl-spark-integration"))
+  .enablePlugins(AssemblyPlugin, Antlr4Plugin)
+  .settings(
+    commonSettings,
+    name := "ppl-spark-integration",
+    scalaVersion := scala212,
+    libraryDependencies ++= Seq(
+      "com.amazonaws" % "aws-java-sdk" % "1.12.397" % "provided"
+        exclude ("com.fasterxml.jackson.core", "jackson-databind"),
+      "org.scalactic" %% "scalactic" % "3.2.15" % "test",
+      "org.scalatest" %% "scalatest" % "3.2.15" % "test",
+      "org.scalatest" %% "scalatest-flatspec" % "3.2.15" % "test",
+      "org.scalatestplus" %% "mockito-4-6" % "3.2.15.0" % "test",
+      "com.stephenn" %% "scalatest-json-jsonassert" % "0.2.5" % "test",
+      "com.github.sbt" % "junit-interface" % "0.13.3" % "test"),
+    libraryDependencies ++= deps(sparkVersion),
+    // ANTLR settings
+    Antlr4 / antlr4Version := "4.8",
+    Antlr4 / antlr4PackageName := Some("org.opensearch.flint.spark.ppl"),
+    Antlr4 / antlr4GenListener := true,
+    Antlr4 / antlr4GenVisitor := true,
+    // Assembly settings
+    assemblyPackageScala / assembleArtifact := false,
+    assembly / assemblyOption ~= {
+      _.withIncludeScala(false)
+    },
+    assembly / assemblyMergeStrategy := {
+      case PathList(ps @ _*) if ps.last endsWith ("module-info.class") =>
+        MergeStrategy.discard
+      case PathList("module-info.class") => MergeStrategy.discard
+      case PathList("META-INF", "versions", xs @ _, "module-info.class") =>
+        MergeStrategy.discard
+      case x =>
+        val oldStrategy = (assembly / assemblyMergeStrategy).value
+        oldStrategy(x)
+    },
+    assembly / test := (Test / test).value)
 
 // Test assembly package with integration test.
 lazy val integtest = (project in file("integ-test"))
-  .dependsOn(flintSparkIntegration % "test->test")
+  .dependsOn(flintSparkIntegration % "test->test", pplSparkIntegration % "test->test" )
   .settings(
     commonSettings,
     name := "integ-test",

@@ -0,0 +1,133 @@
+# Copyright The OpenTelemetry Authors
+# SPDX-License-Identifier: Apache-2.0
+version: '3.9'
+x-default-logging: &logging
+  driver: "json-file"
+  options:
+    max-size: "5m"
+    max-file: "2"
+
+volumes:
+  opensearch-data:
+
+services:
+  spark-master:
+    image: our-own-apache-spark:3.4.0
+    ports:
+      - "9090:8080"
+      - "7077:7077"
+    volumes:
+      - ./apps:/opt/spark-apps
+      - ./data:/opt/spark-data
+    environment:
+      - SPARK_LOCAL_IP=spark-master
+      - SPARK_WORKLOAD=master
+  spark-worker-1:
+    image: our-own-apache-spark:3.4.0
+    ports:
+      - "9091:8080"
+      - "7000:7000"
+    depends_on:
+      - spark-master
+    environment:
+      - SPARK_MASTER=spark://spark-master:7077
+      - SPARK_WORKER_CORES=1
+      - SPARK_WORKER_MEMORY=1G
+      - SPARK_DRIVER_MEMORY=1G
+      - SPARK_EXECUTOR_MEMORY=1G
+      - SPARK_WORKLOAD=worker
+      - SPARK_LOCAL_IP=spark-worker-1
+    volumes:
+      - ./apps:/opt/spark-apps
+      - ./data:/opt/spark-data
+  spark-worker-2:
+    image: our-own-apache-spark:3.4.0
+    ports:
+      - "9092:8080"
+      - "7001:7000"
+    depends_on:
+      - spark-master
+    environment:
+      - SPARK_MASTER=spark://spark-master:7077
+      - SPARK_WORKER_CORES=1
+      - SPARK_WORKER_MEMORY=1G
+      - SPARK_DRIVER_MEMORY=1G
+      - SPARK_EXECUTOR_MEMORY=1G
+      - SPARK_WORKLOAD=worker
+      - SPARK_LOCAL_IP=spark-worker-2
+    volumes:
+      - ./apps:/opt/spark-apps
+      - ./data:/opt/spark-data
+
+  livy-server:
+    container_name: livy_server
+    build: ./docker/livy/
+    command: ["sh", "-c", "/opt/bitnami/livy/bin/livy-server"]
+    user: root
+    volumes:
+      - type: bind
+        source: ./docker/livy/conf/
+        target: /opt/bitnami/livy/conf/
+      - type: bind
+        source: ./docker/livy/target/
+        target: /target/
+      - type: bind
+        source: ./docker/livy/data/
+        target: /data/
+    ports:
+      - '8998:8998'
+    networks:
+      - net
+    depends_on:
+      - spark-master
+      - spark-worker-1
+      - spark-worker-2
+  # OpenSearch store - node (not for production - no security - only for test purpose )
+  opensearch:
+    image: opensearchstaging/opensearch:${VERSION}
+    container_name: opensearch
+    environment:
+      - cluster.name=opensearch-cluster
+      - node.name=opensearch
+      - discovery.seed_hosts=opensearch
+      - cluster.initial_cluster_manager_nodes=opensearch
+      - bootstrap.memory_lock=true
+      - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
+      - "DISABLE_INSTALL_DEMO_CONFIG=true"
+      - "DISABLE_SECURITY_PLUGIN=true"
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536
+        hard: 65536
+    volumes:
+      - opensearch-data:/usr/share/opensearch/data # Creates volume called opensearch-data1 and mounts it to the container
+    ports:
+      - 9200:9200
+      - 9600:9600
+    expose:
+      - "9200"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health?wait_for_status=yellow"]
+      interval: 20s
+      timeout: 10s
+      retries: 10
+  # OpenSearch store - dashboard
+  opensearch-dashboards:
+    image: opensearchproject/opensearch-dashboards:${VERSION}
+    container_name: opensearch-dashboards
+
+    ports:
+      - 5601:5601 # Map host port 5601 to container port 5601
+    expose:
+      - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
+    environment:
+      OPENSEARCH_HOSTS: '["http://opensearch:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
+    depends_on:
+      - opensearch
+
+networks:
+  net:
+    driver: bridge
@@ -0,0 +1,15 @@
+FROM docker.io/bitnami/spark:2
+
+USER root
+ENV LIVY_HOME /opt/bitnami/livy
+WORKDIR /opt/bitnami/
+
+RUN install_packages unzip \
+    && curl "https://downloads.apache.org/incubator/livy/0.7.1-incubating/apache-livy-0.7.1-incubating-bin.zip" -O \
+    && unzip "apache-livy-0.7.1-incubating-bin" \
+    && rm -rf "apache-livy-0.7.1-incubating-bin.zip" \
+    && mv "apache-livy-0.7.1-incubating-bin" $LIVY_HOME \
+    && mkdir $LIVY_HOME/logs \
+    && chown -R 1001:1001 $LIVY_HOME
+
+USER 1001
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# LIVY ENVIRONMENT VARIABLES
+#
+# - JAVA_HOME       Java runtime to use. By default use "java" from PATH.
+# - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use.
+# - SPARK_HOME      Spark which you would like to use in Livy.
+# - SPARK_CONF_DIR  Optional directory where the Spark configuration lives.
+#                   (Default: $SPARK_HOME/conf)
+# - LIVY_LOG_DIR    Where log files are stored. (Default: ${LIVY_HOME}/logs)
+# - LIVY_PID_DIR    Where the pid file is stored. (Default: /tmp)
+# - LIVY_SERVER_JAVA_OPTS  Java Opts for running livy server (You can set jvm related setting here,
+#                          like jvm memory/gc algorithm and etc.)
+# - LIVY_IDENT_STRING A name that identifies the Livy server instance, used to generate log file
+#                     names. (Default: name of the user starting Livy).
+# - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. (Default: 5.)
+# - LIVY_NICENESS   Niceness of the Livy server process when running in the background. (Default: 0.)
+
+export SPARK_HOME=/opt/bitnami/spark/
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# version for opensearch & opensearch-dashboards docker image
		VERSION=2.9.0