Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Long method breakdown and reducing Cyclomatic Complexity in FixedWidthParser.java #500

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 41 additions & 87 deletions src/main/java/com/univocity/parsers/csv/CsvWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
*/
public class CsvWriter extends AbstractWriter<CsvWriterSettings> {

private char delimiter;
private char[] multiDelimiter;
private char quoteChar;
private char escapeChar;
Expand Down Expand Up @@ -143,8 +142,8 @@ protected final void initialize(CsvWriterSettings settings) {
CsvFormat format = settings.getFormat();
this.multiDelimiter = format.getDelimiterString().toCharArray();
if (multiDelimiter.length == 1) {
delimiter = multiDelimiter[0];
multiDelimiter = null;
//delimiter = multiDelimiter[0];
//multiDelimiter = null;
}
this.quoteChar = format.getQuote();
this.escapeChar = format.getQuoteEscape();
Expand Down Expand Up @@ -209,11 +208,7 @@ protected void processRow(Object[] row) {
}
for (int i = 0; i < row.length; i++) {
if (i != 0) {
if (multiDelimiter == null) {
appendToRow(delimiter);
} else {
appendToRow(multiDelimiter);
}
appendToRow(multiDelimiter);
}

if (dontProcessNormalizedNewLines) {
Expand Down Expand Up @@ -270,36 +265,18 @@ private boolean matchMultiDelimiter(String element, int from) {

private boolean quoteElement(int start, String element) {
final int length = element.length();
if (multiDelimiter == null) {
if (maxTrigger == 0) {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if (nextChar == delimiter || nextChar == newLine) {
return true;
}
}
} else {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if (nextChar == delimiter || nextChar < maxTrigger && quotationTriggers[nextChar]) {
return true;
}
if (maxTrigger == 0) {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if (delimiterChecker(nextChar, element, i)|| nextChar == newLine) {
return true;
}
}
} else {
if (maxTrigger == 0) {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if ((nextChar == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || nextChar == newLine) {
return true;
}
}
} else {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if ((nextChar == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || nextChar < maxTrigger && quotationTriggers[nextChar]) {
return true;
}
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if (delimiterChecker(nextChar, element, i)|| nextChar < maxTrigger && quotationTriggers[nextChar]) {
return true;
}
}
}
Expand Down Expand Up @@ -342,63 +319,32 @@ private boolean append(int columnIndex, boolean isElementQuoted, boolean allowTr
int i = start;
char ch = '\0';

if (multiDelimiter == null) {
for (; i < length; i++) {
ch = element.charAt(i);
if (ch == quoteChar || ch == delimiter || ch == escapeChar || (ch < maxTrigger && quotationTriggers[ch])) {
appender.append(element, start, i);
start = i + 1;

if (ch == quoteChar || ch == escapeChar) {
if (quoteElement(i, element)) {
appendQuoted(i, allowTrim, element);
return true;
} else if (escapeUnquoted) {
appendQuoted(i, allowTrim, element);
} else {
appender.append(element, i, length);
if (allowTrim && ignoreTrailing && element.charAt(length - 1) <= ' ' && whitespaceRangeStart < element.charAt(length - 1)) {
appender.updateWhitespace();
}
}
return isElementQuoted;
} else if (ch == escapeChar && inputNotEscaped && escapeEscape != '\0' && escapeUnquoted) {
appender.append(escapeEscape);
} else if (ch == delimiter || ch < maxTrigger && quotationTriggers[ch]) {
for (; i < length; i++) {
ch = element.charAt(i);
if (ch == quoteChar || delimiterChecker(ch, element, i) || ch == escapeChar || (ch < maxTrigger && quotationTriggers[ch])) {
appender.append(element, start, i);
start = i + 1;

if (ch == quoteChar || ch == escapeChar) {
if (quoteElement(i, element)) {
appendQuoted(i, allowTrim, element);
return true;
}
appender.append(ch);
}
}
} else {
for (; i < length; i++) {
ch = element.charAt(i);
if (ch == quoteChar || (ch == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || ch == escapeChar || (ch < maxTrigger && quotationTriggers[ch])) {
appender.append(element, start, i);
start = i + 1;

if (ch == quoteChar || ch == escapeChar) {
if (quoteElement(i, element)) {
appendQuoted(i, allowTrim, element);
return true;
} else if (escapeUnquoted) {
appendQuoted(i, allowTrim, element);
} else {
appender.append(element, i, length);
if (allowTrim && ignoreTrailing && element.charAt(length - 1) <= ' ' && whitespaceRangeStart < element.charAt(length - 1)) {
appender.updateWhitespace();
}
}
return isElementQuoted;
} else if (ch == escapeChar && inputNotEscaped && escapeEscape != '\0' && escapeUnquoted) {
appender.append(escapeEscape);
} else if ((ch == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || ch < maxTrigger && quotationTriggers[ch]) {
} else if (escapeUnquoted) {
appendQuoted(i, allowTrim, element);
return true;
} else {
appender.append(element, i, length);
if (allowTrim && ignoreTrailing && element.charAt(length - 1) <= ' ' && whitespaceRangeStart < element.charAt(length - 1)) {
appender.updateWhitespace();
}
}
appender.append(ch);
return isElementQuoted;
} else if (ch == escapeChar && inputNotEscaped && escapeEscape != '\0' && escapeUnquoted) {
appender.append(escapeEscape);
} else if (delimiterChecker(ch, element, i)|| ch < maxTrigger && quotationTriggers[ch]) {
appendQuoted(i, allowTrim, element);
return true;
}
appender.append(ch);
}
}

Expand All @@ -409,6 +355,14 @@ private boolean append(int columnIndex, boolean isElementQuoted, boolean allowTr
return isElementQuoted;
}

private boolean delimiterChecker(char ch, String element, int index) {
boolean multi = true;
if(multiDelimiter.length > 1) {
multi = matchMultiDelimiter(element, index + 1);
}
return ch == multiDelimiter[0] && multi;
}

private void appendQuoted(int start, boolean allowTrim, String element) {
final int length = element.length();
int i = start;
Expand Down
127 changes: 77 additions & 50 deletions src/main/java/com/univocity/parsers/fixed/FixedWidthParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,64 +149,92 @@ protected void parseRecord() {
lookaheadInput.lookahead(maxLookupLength);

if (lookaheadFormats != null) {
for (int i = 0; i < lookaheadFormats.length; i++) {
if (lookaheadInput.matches(ch, lookaheadFormats[i].value, wildcard)) {
lengths = lookaheadFormats[i].lengths;
alignments = lookaheadFormats[i].alignments;
paddings = lookaheadFormats[i].paddings;
ignore = lookaheadFormats[i].ignore;
keepPaddingFlags = lookaheadFormats[i].keepPaddingFlags;
lookupFormat = lookaheadFormats[i];
matched = true;
break;
}
}
lookAheadFormatsNotNull(lookaheadInput, matched);
matched = true;

if (lookbehindFormats != null && matched) {
lookbehindFormat = null;
for (int i = 0; i < lookbehindFormats.length; i++) {
if (lookaheadInput.matches(ch, lookbehindFormats[i].value, wildcard)) {
lookbehindFormat = lookbehindFormats[i];
break;
}
}
lookBehindFormatsNotNull(lookaheadInput);
}
} else {
for (int i = 0; i < lookbehindFormats.length; i++) {
if (lookaheadInput.matches(ch, lookbehindFormats[i].value, wildcard)) {
lookbehindFormat = lookbehindFormats[i];
matched = true;
lengths = rootLengths;
ignore = rootIgnore;
keepPaddingFlags = rootKeepPaddingFlags;
break;
}
}
lookAheadFormatsNull(lookaheadInput, matched);
}

if (!matched) {
if (lookbehindFormat == null) {
if (rootLengths == null) {
throw new TextParsingException(context, "Cannot process input with the given configuration. No default field lengths defined and no lookahead/lookbehind value match '" + lookaheadInput.getLookahead(ch) + '\'');
}
lengths = rootLengths;
alignments = rootAlignments;
paddings = rootPaddings;
ignore = rootIgnore;
keepPaddingFlags = rootKeepPaddingFlags;
lookupFormat = null;
} else {
lengths = lookbehindFormat.lengths;
alignments = lookbehindFormat.alignments;
paddings = lookbehindFormat.paddings;
ignore = lookbehindFormat.ignore;
keepPaddingFlags = lookbehindFormat.keepPaddingFlags;
lookupFormat = lookbehindFormat;
}
unmatched();
}
}

process();
}

//New Method (Extract Method)
private void unmatched(){
if (lookbehindFormat == null) {
if (rootLengths == null) {
throw new TextParsingException(context, "Cannot process input with the given configuration. No default field lengths defined and no lookahead/lookbehind value match '" + lookaheadInput.getLookahead(ch) + '\'');
}
lengths = rootLengths;
alignments = rootAlignments;
paddings = rootPaddings;
ignore = rootIgnore;
keepPaddingFlags = rootKeepPaddingFlags;
lookupFormat = null;
} else {
lengths = lookbehindFormat.lengths;
alignments = lookbehindFormat.alignments;
paddings = lookbehindFormat.paddings;
ignore = lookbehindFormat.ignore;
keepPaddingFlags = lookbehindFormat.keepPaddingFlags;
lookupFormat = lookbehindFormat;
}
}

//Extract Method
private void lookAheadFormatsNotNull(LookaheadCharInputReader lookaheadInput, boolean matched){
//lookaheadInput = input;
for (int i = 0; i < lookaheadFormats.length; i++) {
if (lookaheadInput.matches(ch, lookaheadFormats[i].value, wildcard)) {
lengths = lookaheadFormats[i].lengths;
alignments = lookaheadFormats[i].alignments;
paddings = lookaheadFormats[i].paddings;
ignore = lookaheadFormats[i].ignore;
keepPaddingFlags = lookaheadFormats[i].keepPaddingFlags;
lookupFormat = lookaheadFormats[i];
matched = true;
break;
}
}
}

//Extract Method
private void lookBehindFormatsNotNull(LookaheadCharInputReader lookaheadInput){
//lookaheadInput = input;
lookbehindFormat = null;
for (int i = 0; i < lookbehindFormats.length; i++) {
if (lookaheadInput.matches(ch, lookbehindFormats[i].value, wildcard)) {
lookbehindFormat = lookbehindFormats[i];
break;
}
}
}

//Extract Method
private void lookAheadFormatsNull(LookaheadCharInputReader lookaheadInput, boolean matched){
for (int i = 0; i < lookbehindFormats.length; i++) {
if (lookaheadInput.matches(ch, lookbehindFormats[i].value, wildcard)) {
lookbehindFormat = lookbehindFormats[i];
matched = true;
lengths = rootLengths;
ignore = rootIgnore;
keepPaddingFlags = rootKeepPaddingFlags;
break;
}
}
}

int i;
for (i = 0; i < lengths.length; i++) {
//Extract Method
private void process(){
for (int i = 0; i < lengths.length; i++) {
final boolean ignorePadding = keepPaddingFlags[i] == null ? !keepPadding : !keepPaddingFlags[i];
length = lengths[i];
if (paddings != null) {
Expand Down Expand Up @@ -249,7 +277,6 @@ protected void parseRecord() {
skipToNewLine();
}
useDefaultPadding = false;

}

private void skipToNewLine() {
Expand Down
22 changes: 22 additions & 0 deletions src/test/java/com/univocity/parsers/csv/CsvParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@

public class CsvParserTest extends ParserTestCase {


@Test
public void FilterOutRowsWithNoValues() {
String test = "v11, v12, v13\n" + ",,,\n" + "v31, v32, v33\n" + "v41, v42, v43"; //contains multiple rows with no values
CsvParserSettings csvSettings = new CsvParserSettings();
csvSettings.setSkipEmptyLines(true);
csvSettings.setSkipEmptyRecords(false);
csvSettings.setHeaderExtractionEnabled(true);
CsvParser parser = new CsvParser(csvSettings);

List<Record> result = parser.parseAllRecords(new ByteArrayInputStream(test.getBytes()));
assertEquals(result.size(), 4);

csvSettings.setSkipEmptyRecords(true);
result = parser.parseAllRecords(new ByteArrayInputStream(test.getBytes()) );
assertEquals(result.size(), 3);
}

@DataProvider(name = "testProvider")
public Object[][] testProvider() {
return new Object[][]{
Expand Down Expand Up @@ -362,14 +380,18 @@ public void testReadEmptyValue() {
CsvParser parser = new CsvParser(settings);

parser.beginParsing(new StringReader("a,b,,c,\"\",\r\n"));
//parser.parse(new StringReader("a,b,,c,\"\",\r\n"));
String[] row = parser.parseNext();
List<String[]> rows = processor.getRows();

assertEquals(row[0], "a");
assertEquals(row[1], "b");
assertEquals(row[2], null);
assertEquals(row[3], "c");
assertEquals(row[4], "");
assertEquals(row[5], null);


}

@DataProvider
Expand Down