Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sourceQuery in metadata cache #988

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ case class FlintMetadataCache(
refreshInterval: Option[Int],
/** Source table names for building the Flint index. */
sourceTables: Array[String],
/** Source query for MV */
sourceQuery: Option[String],
/** Timestamp when Flint index is last refreshed. Unit: milliseconds */
lastRefreshTime: Option[Long]) {

Expand Down Expand Up @@ -64,13 +66,22 @@ object FlintMetadataCache {
case MV_INDEX_TYPE => getSourceTablesFromMetadata(metadata)
case _ => Array(metadata.source)
}
val sourceQuery = metadata.kind match {
case MV_INDEX_TYPE => Some(metadata.source)
case _ => None
}
val lastRefreshTime: Option[Long] = metadata.latestLogEntry.flatMap { entry =>
entry.lastRefreshCompleteTime match {
case FlintMetadataLogEntry.EMPTY_TIMESTAMP => None
case timestamp => Some(timestamp)
}
}

FlintMetadataCache(metadataCacheVersion, refreshInterval, sourceTables, lastRefreshTime)
FlintMetadataCache(
metadataCacheVersion,
refreshInterval,
sourceTables,
sourceQuery,
lastRefreshTime)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@

package org.opensearch.flint.spark.metadatacache

import java.util
import java.util.{HashMap, Map => JMap}

import scala.collection.JavaConverters._

import org.opensearch.client.RequestOptions
import org.opensearch.client.indices.GetIndexRequest
import org.opensearch.client.indices.PutMappingRequest
import org.opensearch.common.xcontent.XContentType
import org.opensearch.flint.common.metadata.{FlintIndexMetadataService, FlintMetadata}
import org.opensearch.flint.common.metadata.FlintMetadata
import org.opensearch.flint.core.{FlintOptions, IRestHighLevelClient}
import org.opensearch.flint.core.metadata.FlintIndexMetadataServiceBuilder
import org.opensearch.flint.core.metadata.FlintJsonHelper._
import org.opensearch.flint.core.storage.{FlintOpenSearchIndexMetadataService, OpenSearchClientUtils}
import org.opensearch.flint.core.storage.OpenSearchClientUtils

import org.apache.spark.internal.Logging

Expand All @@ -27,27 +27,20 @@ class FlintOpenSearchMetadataCacheWriter(options: FlintOptions)
extends FlintMetadataCacheWriter
with Logging {

/**
* Since metadata cache shares the index mappings _meta field with OpenSearch index metadata
* storage, this flag is to allow for preserving index metadata that is already stored in _meta
* when updating metadata cache.
*/
private val includeSpec: Boolean =
FlintIndexMetadataServiceBuilder
.build(options)
.isInstanceOf[FlintOpenSearchIndexMetadataService]

override def updateMetadataCache(indexName: String, metadata: FlintMetadata): Unit = {
logInfo(s"Updating metadata cache for $indexName with $metadata");
logInfo(s"Updating metadata cache for $indexName");
val osIndexName = OpenSearchClientUtils.sanitizeIndexName(indexName)
var client: IRestHighLevelClient = null
try {
client = OpenSearchClientUtils.createClient(options)
val request = new PutMappingRequest(osIndexName)
val serialized = serialize(metadata)
logInfo(s"Serialized: $serialized")
request.source(serialized, XContentType.JSON)
client.updateIndexMapping(request, RequestOptions.DEFAULT)
val existingMapping = getIndexMapping(client, osIndexName)
val metadataCacheProperties = FlintMetadataCache(metadata).toMap.asJava
val mergedMapping = mergeMapping(existingMapping, metadataCacheProperties)
val serialized = buildJson(builder => {
builder.field("_meta", mergedMapping.get("_meta"))
builder.field("properties", mergedMapping.get("properties"))
})
updateIndexMapping(client, osIndexName, serialized)
} catch {
case e: Exception =>
throw new IllegalStateException(
Expand All @@ -59,50 +52,40 @@ class FlintOpenSearchMetadataCacheWriter(options: FlintOptions)
}
}

/**
* Serialize FlintMetadataCache from FlintMetadata. Modified from {@link
* FlintOpenSearchIndexMetadataService}
*/
private[metadatacache] def serialize(metadata: FlintMetadata): String = {
try {
buildJson(builder => {
objectField(builder, "_meta") {
// If _meta is used as index metadata storage, preserve them.
if (includeSpec) {
builder
.field("version", metadata.version.version)
.field("name", metadata.name)
.field("kind", metadata.kind)
.field("source", metadata.source)
.field("indexedColumns", metadata.indexedColumns)

if (metadata.latestId.isDefined) {
builder.field("latestId", metadata.latestId.get)
}
optionalObjectField(builder, "options", metadata.options)
}

optionalObjectField(builder, "properties", buildPropertiesMap(metadata))
}
builder.field("properties", metadata.schema)
})
} catch {
case e: Exception =>
throw new IllegalStateException("Failed to jsonify cache metadata", e)
}
private[metadatacache] def getIndexMapping(
client: IRestHighLevelClient,
osIndexName: String): JMap[String, AnyRef] = {
val request = new GetIndexRequest(osIndexName)
val response = client.getIndex(request, RequestOptions.DEFAULT)
response.getMappings.get(osIndexName).sourceAsMap()
}

/**
* Since _meta.properties is shared by both index metadata and metadata cache, here we merge the
* two maps.
* Merge existing mapping with metadata cache properties. Metadata cache is written into
* _meta.properties field of index mapping.
*/
private def buildPropertiesMap(metadata: FlintMetadata): util.Map[String, AnyRef] = {
val metadataCacheProperties = FlintMetadataCache(metadata).toMap
private def mergeMapping(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wonder is it easier to do merge at existingMapping._meta.properties in place directly?

existingMapping: JMap[String, AnyRef],
metadataCacheProperties: JMap[String, AnyRef]): JMap[String, AnyRef] = {
val meta =
existingMapping.getOrDefault("_meta", Map.empty.asJava).asInstanceOf[JMap[String, AnyRef]]
val properties =
meta.getOrDefault("properties", Map.empty.asJava).asInstanceOf[JMap[String, AnyRef]]
val updatedProperties = new HashMap[String, AnyRef](properties)
updatedProperties.putAll(metadataCacheProperties)
val updatedMeta = new HashMap[String, AnyRef](meta)
updatedMeta.put("properties", updatedProperties)
val result = new HashMap[String, AnyRef](existingMapping)
result.put("_meta", updatedMeta)
result
}

if (includeSpec) {
(metadataCacheProperties ++ metadata.properties.asScala).asJava
} else {
metadataCacheProperties.asJava
}
private[metadatacache] def updateIndexMapping(
client: IRestHighLevelClient,
osIndexName: String,
mappingSource: String): Unit = {
val request = new PutMappingRequest(osIndexName)
request.source(mappingSource, XContentType.JSON)
client.updateIndexMapping(request, RequestOptions.DEFAULT)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,13 @@ class FlintMetadataCacheSuite extends AnyFlatSpec with Matchers {
}

it should "construct from materialized view FlintMetadata" in {
val testQuery =
"SELECT 1 FROM spark_catalog.default.test_table UNION SELECT 1 FROM spark_catalog.default.another_table"
val content =
s""" {
| "_meta": {
| "kind": "$MV_INDEX_TYPE",
| "source": "spark_catalog.default.wrong_table",
| "source": "$testQuery",
| "options": {
| "auto_refresh": "true",
| "refresh_interval": "10 Minutes"
Expand Down Expand Up @@ -116,6 +118,7 @@ class FlintMetadataCacheSuite extends AnyFlatSpec with Matchers {
metadataCache.sourceTables shouldBe Array(
"spark_catalog.default.test_table",
"spark_catalog.default.another_table")
metadataCache.sourceQuery.get shouldBe testQuery
metadataCache.lastRefreshTime.get shouldBe 1234567890123L
}

Expand Down Expand Up @@ -145,6 +148,7 @@ class FlintMetadataCacheSuite extends AnyFlatSpec with Matchers {
metadataCache.metadataCacheVersion shouldBe FlintMetadataCache.metadataCacheVersion
metadataCache.refreshInterval shouldBe empty
metadataCache.sourceTables shouldBe Array("spark_catalog.default.test_table")
metadataCache.sourceQuery shouldBe empty
metadataCache.lastRefreshTime shouldBe empty
}
}
Loading
Loading