Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sourceQuery in metadata cache #988

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ case class FlintMetadataCache(
refreshInterval: Option[Int],
/** Source table names for building the Flint index. */
sourceTables: Array[String],
/** Source query for MV */
sourceQuery: Option[String],
/** Timestamp when Flint index is last refreshed. Unit: milliseconds */
lastRefreshTime: Option[Long]) {

Expand Down Expand Up @@ -64,13 +66,22 @@ object FlintMetadataCache {
case MV_INDEX_TYPE => getSourceTablesFromMetadata(metadata)
case _ => Array(metadata.source)
}
val sourceQuery = metadata.kind match {
case MV_INDEX_TYPE => Some(metadata.source)
case _ => None
}
val lastRefreshTime: Option[Long] = metadata.latestLogEntry.flatMap { entry =>
entry.lastRefreshCompleteTime match {
case FlintMetadataLogEntry.EMPTY_TIMESTAMP => None
case timestamp => Some(timestamp)
}
}

FlintMetadataCache(metadataCacheVersion, refreshInterval, sourceTables, lastRefreshTime)
FlintMetadataCache(
metadataCacheVersion,
refreshInterval,
sourceTables,
sourceQuery,
lastRefreshTime)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,13 @@ class FlintOpenSearchMetadataCacheWriter(options: FlintOptions)
.isInstanceOf[FlintOpenSearchIndexMetadataService]

override def updateMetadataCache(indexName: String, metadata: FlintMetadata): Unit = {
logInfo(s"Updating metadata cache for $indexName with $metadata");
logInfo(s"Updating metadata cache for $indexName");
val osIndexName = OpenSearchClientUtils.sanitizeIndexName(indexName)
var client: IRestHighLevelClient = null
try {
client = OpenSearchClientUtils.createClient(options)
val request = new PutMappingRequest(osIndexName)
val serialized = serialize(metadata)
logInfo(s"Serialized: $serialized")
request.source(serialized, XContentType.JSON)
client.updateIndexMapping(request, RequestOptions.DEFAULT)
} catch {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,13 @@ class FlintMetadataCacheSuite extends AnyFlatSpec with Matchers {
}

it should "construct from materialized view FlintMetadata" in {
val testQuery =
"SELECT 1 FROM spark_catalog.default.test_table UNION SELECT 1 FROM spark_catalog.default.another_table"
val content =
s""" {
| "_meta": {
| "kind": "$MV_INDEX_TYPE",
| "source": "spark_catalog.default.wrong_table",
| "source": "$testQuery",
| "options": {
| "auto_refresh": "true",
| "refresh_interval": "10 Minutes"
Expand Down Expand Up @@ -116,6 +118,7 @@ class FlintMetadataCacheSuite extends AnyFlatSpec with Matchers {
metadataCache.sourceTables shouldBe Array(
"spark_catalog.default.test_table",
"spark_catalog.default.another_table")
metadataCache.sourceQuery.get shouldBe testQuery
metadataCache.lastRefreshTime.get shouldBe 1234567890123L
}

Expand Down Expand Up @@ -145,6 +148,7 @@ class FlintMetadataCacheSuite extends AnyFlatSpec with Matchers {
metadataCache.metadataCacheVersion shouldBe FlintMetadataCache.metadataCacheVersion
metadataCache.refreshInterval shouldBe empty
metadataCache.sourceTables shouldBe Array("spark_catalog.default.test_table")
metadataCache.sourceQuery shouldBe empty
metadataCache.lastRefreshTime shouldBe empty
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

package org.opensearch.flint.spark.metadatacache

import java.util.{Base64, List}
import java.util.Base64

import scala.collection.JavaConverters._

Expand Down Expand Up @@ -139,7 +139,7 @@ class FlintOpenSearchMetadataCacheWriterITSuite extends FlintSparkSuite with Mat
}

Seq(SKIPPING_INDEX_TYPE, COVERING_INDEX_TYPE).foreach { case kind =>
test(s"write metadata cache to $kind index mappings with source tables") {
test(s"write metadata cache to $kind index mappings with source tables for non mv index") {
val content =
s""" {
| "_meta": {
Expand All @@ -164,10 +164,11 @@ class FlintOpenSearchMetadataCacheWriterITSuite extends FlintSparkSuite with Mat
.get("sourceTables")
.asInstanceOf[java.util.ArrayList[String]] should contain theSameElementsAs Array(
testTable)
properties should not contain key("sourceQuery")
}
}

test("write metadata cache with source tables from index metadata") {
test("write metadata cache with source tables and query from mv index metadata") {
val mv = FlintSparkMaterializedView(
"spark_catalog.default.mv",
s"SELECT 1 FROM $testTable",
Expand All @@ -182,9 +183,12 @@ class FlintOpenSearchMetadataCacheWriterITSuite extends FlintSparkSuite with Mat
properties
.get("sourceTables")
.asInstanceOf[java.util.ArrayList[String]] should contain theSameElementsAs Array(testTable)
properties
.get("sourceQuery")
.asInstanceOf[String] shouldBe s"SELECT 1 FROM $testTable"
}

test("write metadata cache with source tables from deserialized metadata") {
test("write metadata cache with source tables and query from deserialized mv metadata") {
val testTable2 = "spark_catalog.default.metadatacache_test2"
val content =
s""" {
Expand Down Expand Up @@ -272,11 +276,7 @@ class FlintOpenSearchMetadataCacheWriterITSuite extends FlintSparkSuite with Mat
flintMetadataCacheWriter.updateMetadataCache(testFlintIndex, metadata)

val properties = flintIndexMetadataService.getIndexMetadata(testFlintIndex).properties
properties should have size 3
properties should contain allOf (Entry(
"metadataCacheVersion",
FlintMetadataCache.metadataCacheVersion),
Entry("lastRefreshTime", testLastRefreshCompleteTime))
properties should not contain key("refreshInterval")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change because it's actually what this case is supposed to test. Test for other fields is covered in other cases

}

test("exclude last refresh time in metadata cache when index has not been refreshed") {
Expand All @@ -287,9 +287,7 @@ class FlintOpenSearchMetadataCacheWriterITSuite extends FlintSparkSuite with Mat
flintMetadataCacheWriter.updateMetadataCache(testFlintIndex, metadata)

val properties = flintIndexMetadataService.getIndexMetadata(testFlintIndex).properties
properties should have size 2
properties should contain(
Entry("metadataCacheVersion", FlintMetadataCache.metadataCacheVersion))
properties should not contain key("lastRefreshTime")
}

test("write metadata cache to index mappings and preserve other index metadata") {
Expand Down
Loading