From c7c06ab9d2ebbc75526076f02c3caa5f921af676 Mon Sep 17 00:00:00 2001
From: Kyle Moore <github@kylemoore.com>
Date: Thu, 30 Oct 2014 23:02:46 +0900
Subject: [PATCH 1/5] Adding test case for variable-width Japanese characters

---
 .../formatter/PrettyFormatterTest.java        | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
diff --git a/java/src/test/java/gherkin/formatter/PrettyFormatterTest.java b/java/src/test/java/gherkin/formatter/PrettyFormatterTest.java
index 4b2aff01..4a31b88b 100644
--- a/java/src/test/java/gherkin/formatter/PrettyFormatterTest.java
+++ b/java/src/test/java/gherkin/formatter/PrettyFormatterTest.java
@@ -99,6 +99,63 @@ public void shouldFormatAsDesigned() throws IOException {
 
     }
 
+    /**
+     * CJK and other character sets often contain 'fullwidth' characters.
+     * <p>
+     * Fullwidth characters are only counted as a single character but occupy twice
+     * the space of a typical ASCII character when using a fixed-width font.
+     * <p>
+     * For example, in Japanese this text is 2 characters, padded with 5 spaces to match the longer column below:
+     * <ul>
+     * <li>{@code 新機     |}
+     * <li>{@code 123456 |}
+     * </ul>
+     *
+     * <p>
+     * The net result is that '|' and '#' characters will be misaligned when
+     * printing {@code --i18n ja} or a feature containing a mix of normal and fullwidth characters.
+     *
+     * The only way to resolve this is to keep a running tally of half- and full-width characters,
+     * then padding the output with a mix of trailing half- (u+0020) and full-width (u+3000) spaces.
+     * <p>
+     * Repeating the above example, we now have:
+     * <ul>
+     * <li>{@code 新機      |} (2 full-width chars + 6 half-width spaces)
+     * <li>{@code 123456　　|} (6 half-width chars + 2 full-width spaces)
+     * </ul>
+     *
+     * @see "http://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms"
+     *
+     * @throws IOException
+     */
+    @Test
+    public void shouldFormatAsDesignedWithFullWidthCharacters() throws IOException {
+
+        StringBuilder featureBuilder = new StringBuilder();
+        featureBuilder.append("# language: ja\n");
+        featureBuilder.append("機能: PrettyFormatter with Japanese\n");
+        featureBuilder.append("シナリオ: Formmat beautifully\n");
+        featureBuilder.append("もしI have this table:\n");
+        featureBuilder.append("\t|名前|?の値1|\n");
+        featureBuilder.append("\t|ab12ａｂ１２|ﾊﾝｶｸ|\n");
+        featureBuilder.append("ならばshould formatt beautifully.\n");
+        String feature = featureBuilder.toString();
+
+        List<String> lines = doFormatter(feature);
+
+        assertEquals("Formatter produces unexpected quantity of lines. ", 8, lines.size());
+
+        assertEquals("# language: ja", lines.get(0));
+        assertEquals("機能: PrettyFormatter with Japanese", lines.get(1));
+        assertEquals("", lines.get(2));
+        assertEquals("  シナリオ: Formmat beautifully", lines.get(3));
+        assertEquals("    もしI have this table:", lines.get(4));
+        assertEquals("      | 名前　　     | ?の値1   |", lines.get(5));
+        assertEquals("      | ab12ａｂ１２ | ﾊﾝｶｸ　　 |", lines.get(6));
+        assertEquals("    ならばshould formatt beautifully.", lines.get(7));
+
+    }
+
     @Test
     public void shouldAppendOnlyCompleteLinesAndFlushBetween() throws IOException {
 

From 122875864409fb12ccbfc1ee860080a865c7d4e7 Mon Sep 17 00:00:00 2001
From: Kyle Moore <github@kylemoore.com>
Date: Fri, 31 Oct 2014 01:39:29 +0900
Subject: [PATCH 2/5] Fixed pretty formatting for Chinese/Japanese text *
 Passes PrettyFormatterTest#shouldFormatAsDesignedWithFullWidthCharacters *
 Swapped gherkin jar into cucumber-jvm; tested with cucumber -i118n * Results
 look good for zh-CH and ja; ko was still a little bit off * Not crazy about
 using multi-dimensional arrays.  Probably need to think of a better way.

---
 .../gherkin/formatter/PrettyFormatter.java    | 92 +++++++++++++++++--
 1 file changed, 83 insertions(+), 9 deletions(-)

diff --git a/java/src/main/java/gherkin/formatter/PrettyFormatter.java b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
index 060e519d..4ec2fed1 100644
--- a/java/src/main/java/gherkin/formatter/PrettyFormatter.java
+++ b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
@@ -19,6 +19,7 @@
 import gherkin.util.Mapper;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -27,6 +28,7 @@
 
 import static gherkin.util.FixJava.join;
 import static gherkin.util.FixJava.map;
+import static java.lang.Character.UnicodeBlock;
 
 /**
  * This class pretty prints feature files like they were in the source, only
@@ -48,8 +50,8 @@ public String map(Tag tag) {
     };
     private Formats formats;
     private Match match;
-    private int[][] cellLengths;
-    private int[] maxLengths;
+    private int[][][] cellLengths;
+    private int[][] maxLengths;
     private int rowIndex;
     private List<? extends Row> rows;
     private Integer rowHeight = null;
@@ -296,16 +298,28 @@ private void prepareTable(List<? extends Row> rows) {
             }
         }
 
-        cellLengths = new int[rows.size()][columnCount];
-        maxLengths = new int[columnCount];
+        cellLengths = new int[rows.size()][columnCount][2];
+        maxLengths = new int[columnCount][2];
         for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
             Row row = rows.get(rowIndex);
             final List<String> cells = row.getCells();
             for (int colIndex = 0; colIndex < columnCount; colIndex++) {
                 final String cell = getCellSafely(cells, colIndex);
-                final int length = escapeCell(cell).length();
-                cellLengths[rowIndex][colIndex] = length;
-                maxLengths[colIndex] = Math.max(maxLengths[colIndex], length);
+                final char[] chars = escapeCell(cell).toCharArray();
+                int numNormalChars = 0;
+                int numFullWidthChars = 0;
+                for(char ch : chars) {
+                    if(isFullWidthChar(ch)) {
+                        numFullWidthChars++;
+                    } else {
+                        numNormalChars++;
+                    }
+
+                }
+                cellLengths[rowIndex][colIndex][0] = numNormalChars;
+                cellLengths[rowIndex][colIndex][1] = numFullWidthChars;
+                maxLengths[colIndex][0] = Math.max(maxLengths[colIndex][0], numNormalChars);
+                maxLengths[colIndex][1] = Math.max(maxLengths[colIndex][1], numFullWidthChars);
             }
         }
         rowIndex = 0;
@@ -315,6 +329,57 @@ private String getCellSafely(final List<String> cells, final int colIndex) {
         return (colIndex < cells.size()) ? cells.get(colIndex) : "";
     }
 
+    private static final List<UnicodeBlock> LATIN = Arrays.asList(
+            UnicodeBlock.BASIC_LATIN,
+            UnicodeBlock.LATIN_1_SUPPLEMENT,
+            UnicodeBlock.LATIN_EXTENDED_A,
+            UnicodeBlock.LATIN_EXTENDED_B,
+            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
+    );
+
+    private static final List<UnicodeBlock> CJK = Arrays.asList(
+            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
+            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
+            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
+            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
+            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
+            UnicodeBlock.CJK_COMPATIBILITY,
+            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
+            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
+            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
+            UnicodeBlock.HANGUL_SYLLABLES,
+            UnicodeBlock.HANGUL_JAMO,
+            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
+            UnicodeBlock.KATAKANA,
+            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
+            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
+            UnicodeBlock.HIRAGANA
+    );
+
+    /**
+     * The range U+FF61~U+FFDC is a special case; do not count these as full-width
+     *
+     * @param c a character
+     * @return True if half-width katakana (ｱｶｻﾀ等), false otherwise
+     */
+    private boolean isHalfWidthKatakana(char c) {
+        return '\uFF61' <= c &&
+               '\uFFDC' >= c;
+    }
+
+    private boolean isFullWidthChar(char c) {
+        final UnicodeBlock block = UnicodeBlock.of(c);
+
+        if(LATIN.contains(block)) {
+            return false;
+        }
+        if(CJK.contains(block) && !isHalfWidthKatakana(c)) {
+            return true;
+        }
+
+        return false;
+    }
+
     public void row(List<CellResult> cellResults) {
         StringBuilder buffer = new StringBuilder();
         Row row = rows.get(rowIndex);
@@ -358,7 +423,11 @@ public void row(List<CellResult> cellResults) {
             }
             Format format = formats.get(status);
             buffer.append(format.text(cellText));
-            int padding = maxLengths[colIndex] - cellLengths[rowIndex][colIndex];
+            int padding = Math.max(maxLengths[colIndex][0] - cellLengths[rowIndex][colIndex][0], 0);
+            int fullWidthPadding = Math.max(maxLengths[colIndex][1] - cellLengths[rowIndex][colIndex][1], 0);
+            // rpad with full-width spaces first, then normal spaces.
+            // the order is not significant but this way prevents padding of single spaces, followed by full-width, then followed with the space and pipe
+            padSpace(buffer, fullWidthPadding, true);
             padSpace(buffer, padding);
             if (colIndex < maxLengths.length - 1) {
                 buffer.append(" | ");
@@ -440,8 +509,13 @@ private void calculateLocationIndentations() {
     }
 
     private void padSpace(StringBuilder buffer, int indent) {
+        padSpace(buffer, indent, false);
+    }
+
+    private void padSpace(StringBuilder buffer, int indent, boolean useFullWidth) {
+        char whitespace = useFullWidth ? '\u3000' : ' ';
         for (int i = 0; i < indent; i++) {
-            buffer.append(" ");
+            buffer.append(whitespace);
         }
     }
 

From 017c5146c5017449fa3819d7f480d11a165f23b4 Mon Sep 17 00:00:00 2001
From: Kyle Moore <github@kylemoore.com>
Date: Fri, 31 Oct 2014 01:48:15 +0900
Subject: [PATCH 3/5] Remove unnecessary rounding

---
 java/src/main/java/gherkin/formatter/PrettyFormatter.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/src/main/java/gherkin/formatter/PrettyFormatter.java b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
index 4ec2fed1..b85958d3 100644
--- a/java/src/main/java/gherkin/formatter/PrettyFormatter.java
+++ b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
@@ -423,8 +423,8 @@ public void row(List<CellResult> cellResults) {
             }
             Format format = formats.get(status);
             buffer.append(format.text(cellText));
-            int padding = Math.max(maxLengths[colIndex][0] - cellLengths[rowIndex][colIndex][0], 0);
-            int fullWidthPadding = Math.max(maxLengths[colIndex][1] - cellLengths[rowIndex][colIndex][1], 0);
+            int padding = maxLengths[colIndex][0] - cellLengths[rowIndex][colIndex][0];
+            int fullWidthPadding = maxLengths[colIndex][1] - cellLengths[rowIndex][colIndex][1];
             // rpad with full-width spaces first, then normal spaces.
             // the order is not significant but this way prevents padding of single spaces, followed by full-width, then followed with the space and pipe
             padSpace(buffer, fullWidthPadding, true);

From b11feb333d631c78da8882f5bc955239b012553d Mon Sep 17 00:00:00 2001
From: Kyle Moore <github@kylemoore.com>
Date: Sat, 1 Nov 2014 14:36:22 +0900
Subject: [PATCH 4/5] Use descriptive getters/setters instead of
 incomprehensible three-dimensional array references

---
 .../gherkin/formatter/PrettyFormatter.java    | 47 ++++++++++++++++---
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/java/src/main/java/gherkin/formatter/PrettyFormatter.java b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
index b85958d3..6d2d5814 100644
--- a/java/src/main/java/gherkin/formatter/PrettyFormatter.java
+++ b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
@@ -316,10 +316,10 @@ private void prepareTable(List<? extends Row> rows) {
                     }
 
                 }
-                cellLengths[rowIndex][colIndex][0] = numNormalChars;
-                cellLengths[rowIndex][colIndex][1] = numFullWidthChars;
-                maxLengths[colIndex][0] = Math.max(maxLengths[colIndex][0], numNormalChars);
-                maxLengths[colIndex][1] = Math.max(maxLengths[colIndex][1], numFullWidthChars);
+                setNumberOfNormalWidthCharsInCell(cellLengths[rowIndex][colIndex], numNormalChars);
+                setNumberOfFullWidthCharsInCell(cellLengths[rowIndex][colIndex], numFullWidthChars);
+                updateMaxLengthOfNormalWidthCharsForColumn(maxLengths[colIndex], numNormalChars);
+                updateMaxLengthOfFullWidthCharsForColumn(maxLengths[colIndex], numFullWidthChars);
             }
         }
         rowIndex = 0;
@@ -329,6 +329,38 @@ private String getCellSafely(final List<String> cells, final int colIndex) {
         return (colIndex < cells.size()) ? cells.get(colIndex) : "";
     }
 
+    private int getNumberOfNormalWidthCharsInCell(int[] cellLength) {
+        return cellLength[0];
+    }
+
+    private void setNumberOfNormalWidthCharsInCell(int[] cellLength, int numNormalChars) {
+        cellLength[0] = numNormalChars;
+    }
+
+    private int getNumberOfFullWidthCharsInCell(int[] cellLength) {
+        return cellLength[1];
+    }
+
+    private void setNumberOfFullWidthCharsInCell(int[] cellLength, int numFullWidthChars) {
+        cellLength[1] = numFullWidthChars;
+    }
+
+    private int getMaxLengthOfNormalWidthCharsForColumn(int[] maxLength) {
+        return maxLength[0];
+    }
+
+    private void updateMaxLengthOfNormalWidthCharsForColumn(int[] maxLength, int numNormalChars) {
+        maxLength[0] = Math.max(maxLength[0], numNormalChars);
+    }
+
+    private int getMaxLengthOfFullWidthCharsForColumn(int[] maxLength) {
+        return maxLength[1];
+    }
+
+    private void updateMaxLengthOfFullWidthCharsForColumn(int[] maxLength, int numFullWidthChars) {
+        maxLength[1] = Math.max(maxLength[1], numFullWidthChars);
+    }
+
     private static final List<UnicodeBlock> LATIN = Arrays.asList(
             UnicodeBlock.BASIC_LATIN,
             UnicodeBlock.LATIN_1_SUPPLEMENT,
@@ -423,10 +455,11 @@ public void row(List<CellResult> cellResults) {
             }
             Format format = formats.get(status);
             buffer.append(format.text(cellText));
-            int padding = maxLengths[colIndex][0] - cellLengths[rowIndex][colIndex][0];
-            int fullWidthPadding = maxLengths[colIndex][1] - cellLengths[rowIndex][colIndex][1];
+            int padding = getMaxLengthOfNormalWidthCharsForColumn(maxLengths[colIndex]) - getNumberOfNormalWidthCharsInCell(cellLengths[rowIndex][colIndex]);
+            int fullWidthPadding = getMaxLengthOfFullWidthCharsForColumn(maxLengths[colIndex]) - getNumberOfFullWidthCharsInCell(cellLengths[rowIndex][colIndex]);
             // rpad with full-width spaces first, then normal spaces.
-            // the order is not significant but this way prevents padding of single spaces, followed by full-width, then followed with the space and pipe
+            // the order is not significant but this way prevents inconsistend padding
+            // such as: single spaces, followed by full-width spaces, then followed with a final single space and pipe delimiter
             padSpace(buffer, fullWidthPadding, true);
             padSpace(buffer, padding);
             if (colIndex < maxLengths.length - 1) {

From 7ce982077b9d5fb0efdca3004ada8a9450f25152 Mon Sep 17 00:00:00 2001
From: Kyle Moore <github@kylemoore.com>
Date: Sat, 1 Nov 2014 14:36:54 +0900
Subject: [PATCH 5/5] Simply full-width detection logic

---
 .../gherkin/formatter/PrettyFormatter.java     | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/java/src/main/java/gherkin/formatter/PrettyFormatter.java b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
index 6d2d5814..57e8c9c6 100644
--- a/java/src/main/java/gherkin/formatter/PrettyFormatter.java
+++ b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
@@ -399,17 +399,17 @@ private boolean isHalfWidthKatakana(char c) {
                '\uFFDC' >= c;
     }
 
+    /**
+     * The majority of characters passed in will be in the LATIN collection.
+     * Therefore we check there first, short-circuit and return as soon as possible.
+     *
+     * @param c The char to evaluate
+     * @return True if it is a Full-Width character, false otherwise
+     */
     private boolean isFullWidthChar(char c) {
         final UnicodeBlock block = UnicodeBlock.of(c);
-
-        if(LATIN.contains(block)) {
-            return false;
-        }
-        if(CJK.contains(block) && !isHalfWidthKatakana(c)) {
-            return true;
-        }
-
-        return false;
+        return(!LATIN.contains(block) &&
+              (CJK.contains(block) && !isHalfWidthKatakana(c)));
     }
 
     public void row(List<CellResult> cellResults) {