Skip to content
This repository has been archived by the owner on May 28, 2019. It is now read-only.

Pretty CJK formatting #324

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 116 additions & 9 deletions java/src/main/java/gherkin/formatter/PrettyFormatter.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import gherkin.util.Mapper;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
Expand All @@ -27,6 +28,7 @@

import static gherkin.util.FixJava.join;
import static gherkin.util.FixJava.map;
import static java.lang.Character.UnicodeBlock;

/**
* This class pretty prints feature files like they were in the source, only
Expand All @@ -48,8 +50,8 @@ public String map(Tag tag) {
};
private Formats formats;
private Match match;
private int[][] cellLengths;
private int[] maxLengths;
private int[][][] cellLengths;
private int[][] maxLengths;
private int rowIndex;
private List<? extends Row> rows;
private Integer rowHeight = null;
Expand Down Expand Up @@ -296,16 +298,28 @@ private void prepareTable(List<? extends Row> rows) {
}
}

cellLengths = new int[rows.size()][columnCount];
maxLengths = new int[columnCount];
cellLengths = new int[rows.size()][columnCount][2];
maxLengths = new int[columnCount][2];
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
Row row = rows.get(rowIndex);
final List<String> cells = row.getCells();
for (int colIndex = 0; colIndex < columnCount; colIndex++) {
final String cell = getCellSafely(cells, colIndex);
final int length = escapeCell(cell).length();
cellLengths[rowIndex][colIndex] = length;
maxLengths[colIndex] = Math.max(maxLengths[colIndex], length);
final char[] chars = escapeCell(cell).toCharArray();
int numNormalChars = 0;
int numFullWidthChars = 0;
for(char ch : chars) {
if(isFullWidthChar(ch)) {
numFullWidthChars++;
} else {
numNormalChars++;
}

}
setNumberOfNormalWidthCharsInCell(cellLengths[rowIndex][colIndex], numNormalChars);
setNumberOfFullWidthCharsInCell(cellLengths[rowIndex][colIndex], numFullWidthChars);
updateMaxLengthOfNormalWidthCharsForColumn(maxLengths[colIndex], numNormalChars);
updateMaxLengthOfFullWidthCharsForColumn(maxLengths[colIndex], numFullWidthChars);
}
}
rowIndex = 0;
Expand All @@ -315,6 +329,89 @@ private String getCellSafely(final List<String> cells, final int colIndex) {
return (colIndex < cells.size()) ? cells.get(colIndex) : "";
}

private int getNumberOfNormalWidthCharsInCell(int[] cellLength) {
return cellLength[0];
}

private void setNumberOfNormalWidthCharsInCell(int[] cellLength, int numNormalChars) {
cellLength[0] = numNormalChars;
}

private int getNumberOfFullWidthCharsInCell(int[] cellLength) {
return cellLength[1];
}

private void setNumberOfFullWidthCharsInCell(int[] cellLength, int numFullWidthChars) {
cellLength[1] = numFullWidthChars;
}

private int getMaxLengthOfNormalWidthCharsForColumn(int[] maxLength) {
return maxLength[0];
}

private void updateMaxLengthOfNormalWidthCharsForColumn(int[] maxLength, int numNormalChars) {
maxLength[0] = Math.max(maxLength[0], numNormalChars);
}

private int getMaxLengthOfFullWidthCharsForColumn(int[] maxLength) {
return maxLength[1];
}

private void updateMaxLengthOfFullWidthCharsForColumn(int[] maxLength, int numFullWidthChars) {
maxLength[1] = Math.max(maxLength[1], numFullWidthChars);
}

private static final List<UnicodeBlock> LATIN = Arrays.asList(
UnicodeBlock.BASIC_LATIN,
UnicodeBlock.LATIN_1_SUPPLEMENT,
UnicodeBlock.LATIN_EXTENDED_A,
UnicodeBlock.LATIN_EXTENDED_B,
UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
);

private static final List<UnicodeBlock> CJK = Arrays.asList(
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
UnicodeBlock.CJK_COMPATIBILITY,
UnicodeBlock.CJK_COMPATIBILITY_FORMS,
UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
UnicodeBlock.HANGUL_SYLLABLES,
UnicodeBlock.HANGUL_JAMO,
UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
UnicodeBlock.KATAKANA,
UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
UnicodeBlock.HIRAGANA
);

/**
* The range U+FF61~U+FFDC is a special case; do not count these as full-width
*
* @param c a character
* @return True if half-width katakana (アカサタ等), false otherwise
*/
private boolean isHalfWidthKatakana(char c) {
return '\uFF61' <= c &&
'\uFFDC' >= c;
}

/**
* The majority of characters passed in will be in the LATIN collection.
* Therefore we check there first, short-circuit and return as soon as possible.
*
* @param c The char to evaluate
* @return True if it is a Full-Width character, false otherwise
*/
private boolean isFullWidthChar(char c) {
final UnicodeBlock block = UnicodeBlock.of(c);
return(!LATIN.contains(block) &&
(CJK.contains(block) && !isHalfWidthKatakana(c)));
}

public void row(List<CellResult> cellResults) {
StringBuilder buffer = new StringBuilder();
Row row = rows.get(rowIndex);
Expand Down Expand Up @@ -358,7 +455,12 @@ public void row(List<CellResult> cellResults) {
}
Format format = formats.get(status);
buffer.append(format.text(cellText));
int padding = maxLengths[colIndex] - cellLengths[rowIndex][colIndex];
int padding = getMaxLengthOfNormalWidthCharsForColumn(maxLengths[colIndex]) - getNumberOfNormalWidthCharsInCell(cellLengths[rowIndex][colIndex]);
int fullWidthPadding = getMaxLengthOfFullWidthCharsForColumn(maxLengths[colIndex]) - getNumberOfFullWidthCharsInCell(cellLengths[rowIndex][colIndex]);
// rpad with full-width spaces first, then normal spaces.
// the order is not significant but this way prevents inconsistend padding
// such as: single spaces, followed by full-width spaces, then followed with a final single space and pipe delimiter
padSpace(buffer, fullWidthPadding, true);
padSpace(buffer, padding);
if (colIndex < maxLengths.length - 1) {
buffer.append(" | ");
Expand Down Expand Up @@ -440,8 +542,13 @@ private void calculateLocationIndentations() {
}

private void padSpace(StringBuilder buffer, int indent) {
padSpace(buffer, indent, false);
}

private void padSpace(StringBuilder buffer, int indent, boolean useFullWidth) {
char whitespace = useFullWidth ? '\u3000' : ' ';
for (int i = 0; i < indent; i++) {
buffer.append(" ");
buffer.append(whitespace);
}
}

Expand Down
57 changes: 57 additions & 0 deletions java/src/test/java/gherkin/formatter/PrettyFormatterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,63 @@ public void shouldFormatAsDesigned() throws IOException {

}

/**
* CJK and other character sets often contain 'fullwidth' characters.
* <p>
* Fullwidth characters are only counted as a single character but occupy twice
* the space of a typical ASCII character when using a fixed-width font.
* <p>
* For example, in Japanese this text is 2 characters, padded with 5 spaces to match the longer column below:
* <ul>
* <li>{@code 新機 |}
* <li>{@code 123456 |}
* </ul>
*
* <p>
* The net result is that '|' and '#' characters will be misaligned when
* printing {@code --i18n ja} or a feature containing a mix of normal and fullwidth characters.
*
* The only way to resolve this is to keep a running tally of half- and full-width characters,
* then padding the output with a mix of trailing half- (u+0020) and full-width (u+3000) spaces.
* <p>
* Repeating the above example, we now have:
* <ul>
* <li>{@code 新機 |} (2 full-width chars + 6 half-width spaces)
* <li>{@code 123456  |} (6 half-width chars + 2 full-width spaces)
* </ul>
*
* @see "http://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms"
*
* @throws IOException
*/
@Test
public void shouldFormatAsDesignedWithFullWidthCharacters() throws IOException {

StringBuilder featureBuilder = new StringBuilder();
featureBuilder.append("# language: ja\n");
featureBuilder.append("機能: PrettyFormatter with Japanese\n");
featureBuilder.append("シナリオ: Formmat beautifully\n");
featureBuilder.append("もしI have this table:\n");
featureBuilder.append("\t|名前|?の値1|\n");
featureBuilder.append("\t|ab12ab12|ハンカク|\n");
featureBuilder.append("ならばshould formatt beautifully.\n");
String feature = featureBuilder.toString();

List<String> lines = doFormatter(feature);

assertEquals("Formatter produces unexpected quantity of lines. ", 8, lines.size());

assertEquals("# language: ja", lines.get(0));
assertEquals("機能: PrettyFormatter with Japanese", lines.get(1));
assertEquals("", lines.get(2));
assertEquals(" シナリオ: Formmat beautifully", lines.get(3));
assertEquals(" もしI have this table:", lines.get(4));
assertEquals(" | 名前   | ?の値1 |", lines.get(5));
assertEquals(" | ab12ab12 | ハンカク   |", lines.get(6));
assertEquals(" ならばshould formatt beautifully.", lines.get(7));

}

@Test
public void shouldAppendOnlyCompleteLinesAndFlushBetween() throws IOException {

Expand Down