From e30d765691150711aca627bcc36f53865b47ce33 Mon Sep 17 00:00:00 2001
From: Andrew Byrd <andrew@fastmail.net>
Date: Wed, 3 Apr 2024 11:10:03 +0900
Subject: [PATCH] add check for specific wrong number of csv columns

---
 .../analysis/results/CsvResultWriter.java     |  2 ++
 .../analysis/results/PathCsvResultWriter.java | 20 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/src/main/java/com/conveyal/analysis/results/CsvResultWriter.java b/src/main/java/com/conveyal/analysis/results/CsvResultWriter.java
index ca0cf09c5..1bd9ff9ac 100644
--- a/src/main/java/com/conveyal/analysis/results/CsvResultWriter.java
+++ b/src/main/java/com/conveyal/analysis/results/CsvResultWriter.java
@@ -100,6 +100,8 @@ public void writeOneWorkResult (RegionalWorkResult workResult) throws Exception
         // CsvWriter is not threadsafe and multiple threads may call this, so after values are generated,
         // the actual writing is synchronized (TODO confirm)
         // Is result row generation slow enough to bother synchronizing only the following block?
+        // This first dimension check is specific to each subclass. The check in the loop below is more general,
+        // applying to all subclasses (after the subclass-specific rowValues method may have added some columns).
         checkDimension(workResult);
         Iterable<String[]> rows = rowValues(workResult);
         synchronized (this) {
diff --git a/src/main/java/com/conveyal/analysis/results/PathCsvResultWriter.java b/src/main/java/com/conveyal/analysis/results/PathCsvResultWriter.java
index 0dadb4337..6a7c9ffc7 100644
--- a/src/main/java/com/conveyal/analysis/results/PathCsvResultWriter.java
+++ b/src/main/java/com/conveyal/analysis/results/PathCsvResultWriter.java
@@ -42,6 +42,21 @@ public Iterable<String[]> rowValues (RegionalWorkResult workResult) {
         return rows;
     }
 
+    // Around 2024-04 we wanted to expand the number of CSV columns and needed to update the dimension checks below.
+    // The number of columns is checked twice, once in this specific CsvResultWriter implementation and once in the
+    // abstract superclass.
+    // We don't want to introduce a column count check with tolerance that is applied separately to each row, because
+    // this will not catch a whole class of problems where the worker instances are not producing a consistent number
+    // of columns across origins.
+    // We do ideally want to allow experimental workers that add an unknown number of columns, but they should add those
+    // columns to every row. This requires some kind of negotiated, flexible protocol between the backend and workers.
+    // Or some system where the first worker response received sets expectations and all other responses must match.
+    // We thought this through and decided it was too big a change to introduce immediately.
+    // So we only accept one specific quantity of CSV columns, but fail with a very specific message when we see a
+    // number of CSV columns that we recognize as coming from an obsolete worker version. Breaking backward
+    // compatibility is acceptable here because CSV paths are still considered an experimental feature.
+    // Ideally this very case-specific check and error message will be removed when some more general system is added.
+
     @Override
     protected void checkDimension (RegionalWorkResult workResult) {
         // Path CSV output only supports a single freeform pointset for now.
@@ -53,6 +68,11 @@ protected void checkDimension (RegionalWorkResult workResult) {
         for (ArrayList<String[]> oneDestination : workResult.pathResult) {
             // Number of distinct paths per destination is variable, don't validate it.
             for (String[] iterationDetails : oneDestination) {
+                if (iterationDetails.length == 10) {
+                    throw new IllegalArgumentException(
+                            "Please use worker version newer than v7.1. CSV columns in path results have changed."
+                    );
+                }
                 checkDimension(workResult, "columns", iterationDetails.length, PathResult.DATA_COLUMNS.length);
             }
         }