From 551d74d2a1d9ad276694ea3e924a2c5913cb5b21 Mon Sep 17 00:00:00 2001 From: Grigory Date: Tue, 3 May 2022 17:39:39 -0400 Subject: [PATCH] Make OptimizeSpatial outputLocation optional (#20) --- .../analyticstoolbox/index/H3_ToParent.scala | 2 +- .../index/h3/H3CoreV3Producer.scala | 16 ++++++++++++ .../spark/spatial/OptimizeSpatial.scala | 9 ++++++- .../spark/spatial/package.scala | 26 ++++++++++--------- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/com/carto/analyticstoolbox/index/H3_ToParent.scala b/core/src/main/scala/com/carto/analyticstoolbox/index/H3_ToParent.scala index 6e693c0..847e09e 100644 --- a/core/src/main/scala/com/carto/analyticstoolbox/index/H3_ToParent.scala +++ b/core/src/main/scala/com/carto/analyticstoolbox/index/H3_ToParent.scala @@ -1,5 +1,5 @@ /* - * Copyright 2021 Azavea + * Copyright 2022 Azavea * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/core/src/main/scala/com/carto/analyticstoolbox/index/h3/H3CoreV3Producer.scala b/core/src/main/scala/com/carto/analyticstoolbox/index/h3/H3CoreV3Producer.scala index b8112c6..f48ce44 100644 --- a/core/src/main/scala/com/carto/analyticstoolbox/index/h3/H3CoreV3Producer.scala +++ b/core/src/main/scala/com/carto/analyticstoolbox/index/h3/H3CoreV3Producer.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2022 Azavea + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.carto.analyticstoolbox.index.h3 import com.uber.h3core.H3CoreV3 diff --git a/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/OptimizeSpatial.scala b/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/OptimizeSpatial.scala index c891ce0..fce7a5e 100644 --- a/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/OptimizeSpatial.scala +++ b/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/OptimizeSpatial.scala @@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.types.BinaryType object OptimizeSpatial extends Serializable { - def apply( sourceTable: String, outputTable: String, @@ -123,4 +122,12 @@ object OptimizeSpatial extends Serializable { math.max(dfc.head.getLong(0) * 10 / 2, blockSizeDefault) } + + /** Optimization function defaults. */ + val DEFAULT_OUTPUT_LOCATION: String = "/FileStore/tables/carto_default/" + val DEFAULT_GEOM_COLUMN: String = "geom" + val DEFAULT_ZOOM: Int = 8 + val DEFAULT_BLOCK_SIZE: Long = 2097000 + val DEFAULT_COMPRESSION: String = "lz4" + val DEFAULT_MAX_RECORDS_PER_FILE: Int = 0 } diff --git a/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/package.scala b/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/package.scala index 7e7fa89..5986304 100644 --- a/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/package.scala +++ b/core/src/main/scala/com/carto/analyticstoolbox/spark/spatial/package.scala @@ -20,26 +20,28 @@ import org.apache.spark.sql.SparkSession package object spatial extends Serializable { implicit class SparkSessionOps(val ssc: SparkSession) extends AnyVal { + import OptimizeSpatial._ + def optimizeSpatial( sourceTable: String, outputTable: String, - outputLocation: String, - geomColumn: String = "geom", - zoom: Int = 8, - blockSizeDefault: Long = 2097000, - compression: String = "lz4", - maxRecordsPerFile: Int = 0 + outputLocation: String = DEFAULT_OUTPUT_LOCATION, + geomColumn: String = DEFAULT_GEOM_COLUMN, + zoom: Int = DEFAULT_ZOOM, + blockSizeDefault: Long = DEFAULT_BLOCK_SIZE, + compression: String = DEFAULT_COMPRESSION, + maxRecordsPerFile: Int = DEFAULT_MAX_RECORDS_PER_FILE ): Unit = OptimizeSpatial(sourceTable, outputTable, outputLocation, geomColumn, zoom, blockSizeDefault, compression, maxRecordsPerFile)(ssc) def optimizeSpatialManual( sourceTable: String, outputTable: String, - outputLocation: String, - geomColumn: String = "geom", - zoom: Int = 8, - blockSize: Long = 2097000, - compression: String = "lz4", - maxRecordsPerFile: Int = 0 + outputLocation: String = DEFAULT_OUTPUT_LOCATION, + geomColumn: String = DEFAULT_GEOM_COLUMN, + zoom: Int = DEFAULT_ZOOM, + blockSize: Long = DEFAULT_BLOCK_SIZE, + compression: String = DEFAULT_COMPRESSION, + maxRecordsPerFile: Int = DEFAULT_MAX_RECORDS_PER_FILE ): Unit = OptimizeSpatial(sourceTable, outputTable, outputLocation, geomColumn, zoom, _ => blockSize, compression, maxRecordsPerFile)(ssc) } }