diff --git a/datasets/pgc/package/PgcDemStripsRaster.cpp b/datasets/pgc/package/PgcDemStripsRaster.cpp index 015c57e7..ea524c26 100644 --- a/datasets/pgc/package/PgcDemStripsRaster.cpp +++ b/datasets/pgc/package/PgcDemStripsRaster.cpp @@ -189,12 +189,13 @@ void PgcDemStripsRaster::getIndexFile(const std::vector* points, s files.push_back(newFile); } } - mlog(INFO, "Found %zu geojson files with %zu points", files.size(), points->size()); /* Remove any duplicate files */ std::sort(files.begin(), files.end()); files.erase(std::unique(files.begin(), files.end()), files.end()); + mlog(INFO, "Found %zu geojson files with %zu points", files.size(), points->size()); + /* If we have only one file, use it as the index file */ if(files.size() == 1) { diff --git a/datasets/pgc/systests/arcticdem_mosaic_serial_vs_batch_perf.lua b/datasets/pgc/systests/arcticdem_mosaic_serial_vs_batch_perf.lua index 34eff2c8..95ce61c7 100644 --- a/datasets/pgc/systests/arcticdem_mosaic_serial_vs_batch_perf.lua +++ b/datasets/pgc/systests/arcticdem_mosaic_serial_vs_batch_perf.lua @@ -36,8 +36,8 @@ local lat = 66.34 -- Arctic Circle latitude local height = 0 -- Always zero for this test local startLon = 100.0 -local stopLon = 160.0 -local maxPoints = 10^6 -- Total number of points +local stopLon = 110.0 +local maxPoints = 10^5 -- Total number of points local lons, lats, heights = generatePointArrays(startLon, stopLon, lat, height, maxPoints) @@ -81,7 +81,7 @@ print(string.format("Serial sampling: %d points read, time: %f, failed reads: %d local batchResults = {} local starttimeBatch = time.latch() -print(string.format("\n------------------------------------\nMosaics bathc reading %d points along arctic circle, longitude range (%.2f to %.2f)\n------------------------------------", maxPoints, startLon, stopLon)) +print(string.format("\n------------------------------------\nMosaics batch reading %d points along arctic circle, longitude range (%.2f to %.2f)\n------------------------------------", maxPoints, startLon, stopLon)) local tbl, err = dem:batchsample(lons, lats, heights) if err ~= 0 then print("Batch sampling failed") @@ -134,7 +134,7 @@ end print("\n------------------------------------\nPerformance Summary\n------------------------------------") print(string.format("Total Points: %d", maxPoints)) print(string.format("Serial Sampling Time: %6.2f seconds", dtimeSerial)) -print(string.format("Batch Sampling Time: %6.2f seconds", dtimeBatch)) +print(string.format("Batch Sampling Time: %6.2f seconds", dtimeBatch)) print(string.format("Speedup (Serial/Batch): %.2fx", dtimeSerial / dtimeBatch)) print(string.format("Failed Points (Serial): %d", totalFailedSerial)) print(string.format("Failed Points (Batch): %d", totalFailedBatch)) diff --git a/datasets/pgc/systests/arcticdem_strips_serial_vs_batch_perf.lua b/datasets/pgc/systests/arcticdem_strips_serial_vs_batch_perf.lua new file mode 100644 index 00000000..7b7cac93 --- /dev/null +++ b/datasets/pgc/systests/arcticdem_strips_serial_vs_batch_perf.lua @@ -0,0 +1,171 @@ +local console = require("console") +local asset = require("asset") +local csv = require("csv") +local json = require("json") + +-- console.monitor:config(core.LOG, core.DEBUG) +-- sys.setlvl(core.LOG, core.DEBUG) + +local assets = asset.loaddir() + +-- Setup -- + +local failedSamples = 0 + +-- Generate arrays for lons, lats, and heights +local function generatePointArrays(startLon, stopLon, lat, height, maxPoints) + local lons = {} + local lats = {} + local heights = {} + + local step = (stopLon - startLon) / (maxPoints - 1) + for i = 1, maxPoints do + table.insert(lons, startLon + (i - 1) * step) + table.insert(lats, lat) + table.insert(heights, height) + end + + return lons, lats, heights +end + +local verbose = true +local failedSamplesSerial = 0 +local failedSamplesBatch = 0 + +local lat = 66.34 -- Arctic Circle latitude +local height = 0 -- Always zero for this test + +local startLon = 100.0 +local stopLon = 110.0 +local maxPoints = 10^5 -- Total number of points + +local lons, lats, heights = generatePointArrays(startLon, stopLon, lat, height, maxPoints) + +print(string.format("\n------------------------------------\nStrips serial reading %d points along arctic circle, longitude range (%.2f to %.2f)\n------------------------------------", maxPoints, startLon, stopLon)) + +-- Capture results from serial sampling +local serialResults = {} +local dem = geo.raster(geo.parms({ asset = "arcticdem-strips", algorithm = "NearestNeighbour", radius = 0, sort_by_index = true })) + +local starttimeSerial = time.latch() +local intervaltime = starttimeSerial +local modulovalue = maxPoints / 20 + +for i = 1, maxPoints do + local tbl, err = dem:sample(lons[i], lats[i], heights[i]) + if err ~= 0 then + failedSamplesSerial = failedSamplesSerial + 1 + print(string.format("Serial: Point: %d, (%.2f, %.2f) ======> FAILED to read", i, lons[i], lats[i])) + table.insert(serialResults, nil) + else + table.insert(serialResults, tbl) + end + + if verbose then + if (i % modulovalue == 0) then + local midtime = time.latch() + local dtime = midtime - intervaltime + local firstSample = tbl[1] + local el = firstSample["value"] + print(string.format("Point: %7d sampled at (%.2f, %.2f), strips: %3d, first strip elevation: %7.2fm, %d points interval time: %5.2f", i, lons[i], lats[i], #tbl, el, modulovalue, dtime)) + intervaltime = time.latch() + end + end +end + +local stoptimeSerial = time.latch() +local dtimeSerial = stoptimeSerial - starttimeSerial +print(string.format("Serial sampling: %d points read, time: %f, failed reads: %d", maxPoints, dtimeSerial, failedSamplesSerial)) + +-- Capture results from batch sampling +local batchResults = {} +local starttimeBatch = time.latch() + +print(string.format("\n------------------------------------\nStrips batch reading %d points along arctic circle, longitude range (%.2f to %.2f)\n------------------------------------", maxPoints, startLon, stopLon)) +local tbl, err = dem:batchsample(lons, lats, heights) +if err ~= 0 then + print("Batch sampling failed") +else + batchResults = tbl +end + +local stoptimeBatch = time.latch() +local dtimeBatch = stoptimeBatch - starttimeBatch +print(string.format("Batch sampling: %d points read, time: %f, failed reads: %d", maxPoints, dtimeBatch, failedSamplesBatch)) + + +-- Helper function to check if two numbers are equal, considering NaN +local function areEqual(num1, num2) + if num1 ~= num1 and num2 ~= num2 then + -- Both are NaN + return true + else + -- Regular comparison + return math.abs(num1 - num2) <= 1e-6 + end +end + + +-- Compare serial and batch results +print("\n------------------------------------\nComparing Serial and Batch Results\n------------------------------------") +local totalMismatches = 0 +local totalFailedSerial = 0 +local totalFailedBatch = 0 + +for i = 1, maxPoints do + local serialSample = serialResults[i] + local batchSample = batchResults[i] + + if (serialSample == nil and batchSample == nil) then + -- Both failed + totalFailedSerial = totalFailedSerial + 1 + totalFailedBatch = totalFailedBatch + 1 + print(string.format("Point %d: Both methods failed", i)) + elseif (serialSample == nil) then + -- Serial failed + totalFailedSerial = totalFailedSerial + 1 + totalMismatches = totalMismatches + 1 + print(string.format("Point %d: Serial failed, Batch succeeded", i)) + elseif (batchSample == nil) then + -- Batch failed + totalFailedBatch = totalFailedBatch + 1 + totalMismatches = totalMismatches + 1 + print(string.format("Point %d: Batch failed, Serial succeeded", i)) + else + -- Compare number of samples + if #serialSample > #batchSample then + totalMismatches = totalMismatches + 1 + print(string.format("Point %d: (%.2f, %.2f), Error: Serial samples (%d) > Batch samples (%d)", i, lons[i], lats[i], #serialSample, #batchSample)) + else + -- Check if each serial sample matches any batch sample + local mismatchFound = false + for j = 1, #serialSample do + local foundMatch = false + for k = 1, #batchSample do + if areEqual(serialSample[j]["value"], batchSample[k]["value"]) then + foundMatch = true + break + end + end + + if not foundMatch then + mismatchFound = true + totalMismatches = totalMismatches + 1 + print(string.format("Point %d, Serial Sample %d: %.6f has no match in Batch samples", i, j, serialSample[j]["value"])) + end + end + end + end +end + +-- Print summary +print("\n------------------------------------\nPerformance Summary\n------------------------------------") +print(string.format("Total Points: %d", maxPoints)) +print(string.format("Serial Sampling Time: %6.2f seconds", dtimeSerial)) +print(string.format("Batch Sampling Time: %6.2f seconds", dtimeBatch)) +print(string.format("Speedup (Serial/Batch): %.2fx", dtimeSerial / dtimeBatch)) +print(string.format("Failed Points (Serial): %d", totalFailedSerial)) +print(string.format("Failed Points (Batch): %d", totalFailedBatch)) +print(string.format("Mismatched Points: %d", totalMismatches)) + +sys.quit() \ No newline at end of file diff --git a/packages/geo/GeoIndexedRasterBatch.cpp b/packages/geo/GeoIndexedRasterBatch.cpp index f2c5c836..c50eeea7 100644 --- a/packages/geo/GeoIndexedRasterBatch.cpp +++ b/packages/geo/GeoIndexedRasterBatch.cpp @@ -367,7 +367,8 @@ void* GeoIndexedRaster::batchReaderThread(void *param) ur->rinfo->fileId, ur->rinfo->elevationBandNum, ur->rinfo->flagsBandNum, - breader->obj->crscb); + breader->obj->crscb, + &breader->obj->bbox); CHECKPTR(raster); @@ -755,6 +756,19 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector& uniqueRa } } +#if 0 + /* For each unique raster, print its name and points in it */ + mlog(DEBUG, "Unique rasters:"); + for(unique_raster_t* ur : uniqueRasters) + { + mlog(DEBUG, "Unique raster: %s", fileDict.get(ur->rinfo->fileId)); + for(point_sample_t& ps : ur->pointSamples) + { + mlog(DEBUG, "Point index: %ld, (%.2lf, %.2lf)", ps.pointIndex, ps.point.getX(), ps.point.getY()); + } + } +#endif + /* Reduce memory usage */ uniqueRasters.shrink_to_fit(); status = true; diff --git a/targets/slideruleearth-aws/Makefile b/targets/slideruleearth-aws/Makefile index ce9e71cd..2b209701 100644 --- a/targets/slideruleearth-aws/Makefile +++ b/targets/slideruleearth-aws/Makefile @@ -340,6 +340,7 @@ STRIPS_PERFORMANCE_TEST ?= $(ROOT)/datasets/pgc/systests/arcticdem_strips_perf. SUBSET_PERFORMANCE_TEST ?= $(ROOT)/scripts/systests/subset_perf_test.lua MOSAICS_SERIAL_VS_BATCH_PERFORMANCE_TEST ?= $(ROOT)/datasets/pgc/systests/arcticdem_mosaic_serial_vs_batch_perf.lua +STRIPS_SERIAL_VS_BATCH_PERFORMANCE_TEST ?= $(ROOT)/datasets/pgc/systests/arcticdem_strips_serial_vs_batch_perf.lua selftest: ## run the self test on the server code $(SLIDERULE_STAGE_DIR)/bin/sliderule $(TEST) @@ -356,9 +357,12 @@ perfstest: ## run strips performance test on the server code perfsubsetest: ## run subset performance test $(SLIDERULE_STAGE_DIR)/bin/sliderule $(SUBSET_PERFORMANCE_TEST) -perfmsbtest: ## run mosaics serial and batch performance test for 1 million points comparing results +perfmsbtest: ## run mosaics serial and batch performance test comparing results $(SLIDERULE_STAGE_DIR)/bin/sliderule $(MOSAICS_SERIAL_VS_BATCH_PERFORMANCE_TEST) +perfssbtest: ## run strips serial and batch performance test comparing results + $(SLIDERULE_STAGE_DIR)/bin/sliderule $(STRIPS_SERIAL_VS_BATCH_PERFORMANCE_TEST) + OCEANEYES_SHARE=/data/AppServer.1 oceaneyes-test: ## run oceaneyes docker run -it --rm -v /data:/data -v $(OCEANEYES_SHARE):/share --name oceaneyes $(ECR)/oceaneyes:$(VERSION) /bin/bash /runner.sh $(OCEANEYES_SHARE)/settings.json