cucumber-attic · DPUkyle · Oct 30, 2014 · Oct 30, 2014 · Oct 30, 2014 · Nov 1, 2014
diff --git a/java/src/main/java/gherkin/formatter/PrettyFormatter.java b/java/src/main/java/gherkin/formatter/PrettyFormatter.java
@@ -19,6 +19,7 @@
 import gherkin.util.Mapper;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -27,6 +28,7 @@
 
 import static gherkin.util.FixJava.join;
 import static gherkin.util.FixJava.map;
+import static java.lang.Character.UnicodeBlock;
 
 /**
  * This class pretty prints feature files like they were in the source, only
@@ -48,8 +50,8 @@ public String map(Tag tag) {
  };
  private Formats formats;
  private Match match;
- private int[][] cellLengths;
- private int[] maxLengths;
+ private int[][][] cellLengths;
+ private int[][] maxLengths;
  private int rowIndex;
  private List<? extends Row> rows;
  private Integer rowHeight = null;
@@ -296,16 +298,28 @@ private void prepareTable(List<? extends Row> rows) {
  }
  }
 
- cellLengths = new int[rows.size()][columnCount];
- maxLengths = new int[columnCount];
+ cellLengths = new int[rows.size()][columnCount][2];
+ maxLengths = new int[columnCount][2];
  for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
  Row row = rows.get(rowIndex);
  final List<String> cells = row.getCells();
  for (int colIndex = 0; colIndex < columnCount; colIndex++) {
  final String cell = getCellSafely(cells, colIndex);
- final int length = escapeCell(cell).length();
- cellLengths[rowIndex][colIndex] = length;
- maxLengths[colIndex] = Math.max(maxLengths[colIndex], length);
+ final char[] chars = escapeCell(cell).toCharArray();
+ int numNormalChars = 0;
+ int numFullWidthChars = 0;
+ for(char ch : chars) {
+ if(isFullWidthChar(ch)) {
+ numFullWidthChars++;
+ } else {
+ numNormalChars++;
+ }
+
+ }
+ setNumberOfNormalWidthCharsInCell(cellLengths[rowIndex][colIndex], numNormalChars);
+ setNumberOfFullWidthCharsInCell(cellLengths[rowIndex][colIndex], numFullWidthChars);
+ updateMaxLengthOfNormalWidthCharsForColumn(maxLengths[colIndex], numNormalChars);
+ updateMaxLengthOfFullWidthCharsForColumn(maxLengths[colIndex], numFullWidthChars);
  }
  }
  rowIndex = 0;
@@ -315,6 +329,89 @@ private String getCellSafely(final List<String> cells, final int colIndex) {
  return (colIndex < cells.size()) ? cells.get(colIndex) : "";
  }
 
+ private int getNumberOfNormalWidthCharsInCell(int[] cellLength) {
+ return cellLength[0];
+ }
+
+ private void setNumberOfNormalWidthCharsInCell(int[] cellLength, int numNormalChars) {
+ cellLength[0] = numNormalChars;
+ }
+
+ private int getNumberOfFullWidthCharsInCell(int[] cellLength) {
+ return cellLength[1];
+ }
+
+ private void setNumberOfFullWidthCharsInCell(int[] cellLength, int numFullWidthChars) {
+ cellLength[1] = numFullWidthChars;
+ }
+
+ private int getMaxLengthOfNormalWidthCharsForColumn(int[] maxLength) {
+ return maxLength[0];
+ }
+
+ private void updateMaxLengthOfNormalWidthCharsForColumn(int[] maxLength, int numNormalChars) {
+ maxLength[0] = Math.max(maxLength[0], numNormalChars);
+ }
+
+ private int getMaxLengthOfFullWidthCharsForColumn(int[] maxLength) {
+ return maxLength[1];
+ }
+
+ private void updateMaxLengthOfFullWidthCharsForColumn(int[] maxLength, int numFullWidthChars) {
+ maxLength[1] = Math.max(maxLength[1], numFullWidthChars);
+ }
+
+ private static final List<UnicodeBlock> LATIN = Arrays.asList(
+ UnicodeBlock.BASIC_LATIN,
+ UnicodeBlock.LATIN_1_SUPPLEMENT,
+ UnicodeBlock.LATIN_EXTENDED_A,
+ UnicodeBlock.LATIN_EXTENDED_B,
+ UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
+ );
+
+ private static final List<UnicodeBlock> CJK = Arrays.asList(
+ UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
+ UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
+ UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
+ UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
+ UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
+ UnicodeBlock.CJK_COMPATIBILITY,
+ UnicodeBlock.CJK_COMPATIBILITY_FORMS,
+ UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
+ UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
+ UnicodeBlock.HANGUL_SYLLABLES,
+ UnicodeBlock.HANGUL_JAMO,
+ UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
+ UnicodeBlock.KATAKANA,
+ UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
+ UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
+ UnicodeBlock.HIRAGANA
+ );
+
+ /**
+ * The range U+FF61~U+FFDC is a special case; do not count these as full-width
+ *
+ * @param c a character
+ * @return True if half-width katakana (ｱｶｻﾀ等), false otherwise
+ */
+ private boolean isHalfWidthKatakana(char c) {
+ return '\uFF61' <= c &&
+ '\uFFDC' >= c;
+ }
+
+ /**
+ * The majority of characters passed in will be in the LATIN collection.
+ * Therefore we check there first, short-circuit and return as soon as possible.
+ *
+ * @param c The char to evaluate
+ * @return True if it is a Full-Width character, false otherwise
+ */
+ private boolean isFullWidthChar(char c) {
+ final UnicodeBlock block = UnicodeBlock.of(c);
+ return(!LATIN.contains(block) &&
+ (CJK.contains(block) && !isHalfWidthKatakana(c)));
+ }
+
  public void row(List<CellResult> cellResults) {
  StringBuilder buffer = new StringBuilder();
  Row row = rows.get(rowIndex);
@@ -358,7 +455,12 @@ public void row(List<CellResult> cellResults) {
  }
  Format format = formats.get(status);
  buffer.append(format.text(cellText));
- int padding = maxLengths[colIndex] - cellLengths[rowIndex][colIndex];
+ int padding = getMaxLengthOfNormalWidthCharsForColumn(maxLengths[colIndex]) - getNumberOfNormalWidthCharsInCell(cellLengths[rowIndex][colIndex]);
+ int fullWidthPadding = getMaxLengthOfFullWidthCharsForColumn(maxLengths[colIndex]) - getNumberOfFullWidthCharsInCell(cellLengths[rowIndex][colIndex]);
+ // rpad with full-width spaces first, then normal spaces.
+ // the order is not significant but this way prevents inconsistend padding
+ // such as: single spaces, followed by full-width spaces, then followed with a final single space and pipe delimiter
+ padSpace(buffer, fullWidthPadding, true);
  padSpace(buffer, padding);
  if (colIndex < maxLengths.length - 1) {
  buffer.append(" | ");
@@ -440,8 +542,13 @@ private void calculateLocationIndentations() {
  }
 
  private void padSpace(StringBuilder buffer, int indent) {
+ padSpace(buffer, indent, false);
+ }
+
+ private void padSpace(StringBuilder buffer, int indent, boolean useFullWidth) {
+ char whitespace = useFullWidth ? '\u3000' : ' ';
  for (int i = 0; i < indent; i++) {
- buffer.append(" ");
+ buffer.append(whitespace);
  }
  }
 

diff --git a/java/src/test/java/gherkin/formatter/PrettyFormatterTest.java b/java/src/test/java/gherkin/formatter/PrettyFormatterTest.java
@@ -99,6 +99,63 @@ public void shouldFormatAsDesigned() throws IOException {
 
  }
 
+ /**
+ * CJK and other character sets often contain 'fullwidth' characters.
+ * <p>
+ * Fullwidth characters are only counted as a single character but occupy twice
+ * the space of a typical ASCII character when using a fixed-width font.
+ * <p>
+ * For example, in Japanese this text is 2 characters, padded with 5 spaces to match the longer column below:
+ * <ul>
+ * <li>{@code 新機 |}
+ * <li>{@code 123456 |}
+ * </ul>
+ *
+ * <p>
+ * The net result is that '|' and '#' characters will be misaligned when
+ * printing {@code --i18n ja} or a feature containing a mix of normal and fullwidth characters.
+ *
+ * The only way to resolve this is to keep a running tally of half- and full-width characters,
+ * then padding the output with a mix of trailing half- (u+0020) and full-width (u+3000) spaces.
+ * <p>
+ * Repeating the above example, we now have:
+ * <ul>
+ * <li>{@code 新機 |} (2 full-width chars + 6 half-width spaces)
+ * <li>{@code 123456　　|} (6 half-width chars + 2 full-width spaces)
+ * </ul>
+ *
+ * @see "http://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms"
+ *
+ * @throws IOException
+ */
+ @Test
+ public void shouldFormatAsDesignedWithFullWidthCharacters() throws IOException {
+
+ StringBuilder featureBuilder = new StringBuilder();
+ featureBuilder.append("# language: ja\n");
+ featureBuilder.append("機能: PrettyFormatter with Japanese\n");
+ featureBuilder.append("シナリオ: Formmat beautifully\n");
+ featureBuilder.append("もしI have this table:\n");
+ featureBuilder.append("\t|名前|?の値1|\n");
+ featureBuilder.append("\t|ab12ａｂ１２|ﾊﾝｶｸ|\n");
+ featureBuilder.append("ならばshould formatt beautifully.\n");
+ String feature = featureBuilder.toString();
+
+ List<String> lines = doFormatter(feature);
+
+ assertEquals("Formatter produces unexpected quantity of lines. ", 8, lines.size());
+
+ assertEquals("# language: ja", lines.get(0));
+ assertEquals("機能: PrettyFormatter with Japanese", lines.get(1));
+ assertEquals("", lines.get(2));
+ assertEquals(" シナリオ: Formmat beautifully", lines.get(3));
+ assertEquals(" もしI have this table:", lines.get(4));
+ assertEquals(" | 名前　　 | ?の値1 |", lines.get(5));
+ assertEquals(" | ab12ａｂ１２ | ﾊﾝｶｸ　　 |", lines.get(6));
+ assertEquals(" ならばshould formatt beautifully.", lines.get(7));
+
+ }
+
  @Test
  public void shouldAppendOnlyCompleteLinesAndFlushBetween() throws IOException {