diff --git a/.gitignore b/.gitignore index d596fae..a6e6f0e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ build/ /.metadata/ .classpath .project +*.iml diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ed726b8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,3 @@ +0.0.1 (2016-03-04) +================== +- first release! diff --git a/README.md b/README.md index 5905779..1a6e0a5 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,76 @@ # To Csv filter plugin for Embulk -TODO: Write short description here and build.gradle file. +Convert a record to CSV. ## Overview * **Plugin type**: filter ## Configuration +cf. http://www.embulk.org/docs/built-in.html#csv-formatter-plugin -- **option1**: description (integer, required) -- **option2**: description (string, default: `"myvalue"`) -- **option3**: description (string, default: `null`) +|name|type|description|required?| +|:---|:---|:---|:---| +|column_name|string|Column name used when converting to single value| `"payload"` by default| +|delimiter|string|Delimiter character such as , for CSV, `"\t"` for TSV, `"|"` or any single-byte character| `,` by default| +|quote|string|The character surrounding a quoted value| `"` by default| +|quote_policy|enum|Policy for quote ( `ALL`, `MINIMAL`, `NONE`) (see below)| `MINIMAL` by default| +|escape|string|Escape character to escape quote character|same with quote default (\*1)| +|header_line|boolean|If true, write the header line with column name at the first line| `false` by default| +|null_string|string|Expression of `NULL` values|empty by default| +|newline|enum|Newline character ( `CRLF`, `LF` or `CR`)| `CRLF` by default| +|newline_in_field|enum|Newline character in each field ( `CRLF`, `LF`, `CR`)| `LF` by default| +|charset|enum|Character encoding (eg. `ISO-8859-1`, `UTF-8`)| `UTF-8` by default| +|default_timezone|string|Time zone of timestamp columns. This can be overwritten for each column using `column_options`| `UTC` by default| +|column_options|hash|See bellow|optional| + +(\*1): if quote_policy is `NONE`, quote option is ignored, and default escape is `\`. + +The quote_policy option is used to determine field type to quote. + +|name|description| +|:---|:---| +| `ALL`|Quote all fields| +| `MINIMAL`|Only quote those fields which contain delimiter, quote or any of the characters in lineterminator| +| `NONE`|Never quote fields. When the delimiter occurs in field, escape with escape char| + +The column_options option is a map whose keys are name of columns, and values are configuration with following parameters: + +|name|type|description|required?| +|:---|:---|:---|:---| +|timezone|string|Time zone if type of this column is timestamp. If not set, default_timezone is used.|optional| +|format|string|Timestamp format if type of this column is timestamp.| `%Y-%m-%d %H:%M:%S.%6N %z` by default| + +## Why does this need? +Some output plugin cannot use a formatter plugin, because they are not inherited `FileOutputPlugin`, but sometimes they need formatters. +In that case, this plugin is useful. For example, [embulk-output-bigquery](https://github.com/sonots/embulk-output-bigquery/blob/ruby/README.md#formatter-performance-issue) ## Example ```yaml filters: - type: to_csv - option1: example1 - option2: example2 + column_name: payload + delimiter: "\t" + newline: CRLF + newline_in_field: LF + charset: UTF-8 + quote_policy: MINIMAL + quote: '"' + escape: "\\" + null_string: "\\N" + default_timezone: 'UTC' + column_options: + time: {format: '%Y-%m-%d %H:%M:%S', timezone: 'America/Los_Angeles'} + ``` +## Run the example + +``` +$ ./gradlew classpath +$ embulk run example/config.yml -Ilib +``` ## Build diff --git a/build.gradle b/build.gradle index 298916f..5f4e45a 100644 --- a/build.gradle +++ b/build.gradle @@ -13,14 +13,14 @@ configurations { provided } -version = "0.1.0" +version = "0.0.1" sourceCompatibility = 1.7 targetCompatibility = 1.7 dependencies { - compile "org.embulk:embulk-core:0.8.6" - provided "org.embulk:embulk-core:0.8.6" + compile "org.embulk:embulk-core:0.8.+" + provided "org.embulk:embulk-core:0.8.+" // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION" testCompile "junit:junit:4.+" } @@ -78,7 +78,7 @@ Gem::Specification.new do |spec| spec.description = %[To Csv] spec.email = ["civitaspo@gmail.com"] spec.licenses = ["MIT"] - # TODO set this: spec.homepage = "https://github.com/civitaspo/embulk-filter-to_csv" + spec.homepage = "https://github.com/civitaspo/embulk-filter-to_csv" spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] spec.test_files = spec.files.grep(%r"^(test|spec)/") diff --git a/example/config.yml b/example/config.yml new file mode 100644 index 0000000..8cbe219 --- /dev/null +++ b/example/config.yml @@ -0,0 +1,31 @@ +in: + type: file + path_prefix: example/data.csv + parser: + type: csv + charset: UTF-8 + newline: CRLF + null_string: 'NULL' + skip_header_lines: 1 + comment_line_marker: '#' + columns: + - {name: time, type: timestamp, format: "%Y-%m-%d"} + - {name: id, type: long} + - {name: name, type: string} + - {name: score, type: double} +filters: + - type: to_csv + column_name: payload + delimiter: "\t" + newline: CRLF + newline_in_field: LF + charset: UTF-8 + quote_policy: MINIMAL + quote: '"' + escape: "\\" + null_string: "\\N" + default_timezone: 'UTC' + column_options: + time: {format: '%Y-%m-%d %H:%M:%S', timezone: 'America/Los_Angeles'} +out: + type: stdout diff --git a/example/data.csv b/example/data.csv new file mode 100644 index 0000000..bfdf207 --- /dev/null +++ b/example/data.csv @@ -0,0 +1,100 @@ +time,id,name,score +2015-07-13,0,Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY,1370 +2015-07-13,1,VmjbjAA0tOoSEPv_vKAGMtD_0aXZji0abGe7_VXHmUQ,3962 +2015-07-13,2,C40P5H1WcBx-aWFDJCI8th6QPEI2DOUgupt_gB8UutE,7323 +2015-07-13,3,Prr0_u_T1ts4myUofBorOJFpCYcOTLOmNBMuRmKIPJU,5905 +2015-07-13,4,AEGIhHVW5cV6Xlb62uvx3TVl3kmh3Do8AvvtLDS7MDw,8378 +2015-07-13,5,eupqWLrnCHr_1UaX4dUInLRxx5Q_cyQ4t0oSJBcw0MA,275 +2015-07-13,6,BN8cQ47EXRb_oCGOoN96bhBldoiyoCp5O_vGHwg0XCg,9303 +2015-07-13,7,RvV35-6jY6MC9_Wnm4nPsmyyfqcr-hlnBt88sXtn1nU,6130 +2015-07-13,8,6OZiuPiJKjWNLMPgiEbJarB0F80lTPYkkePP8LMliv0,6652 +2015-07-13,9,13CgEU_ApAMVE6Ll6Y-mSu-aubskNgHbynj2rj8f8oE,6822 +2015-07-13,10,j1evoWRzKrJR0sfo014ZxhZtKigWDkRip5FwpAHAsmU,1311 +2015-07-13,11,4vBBBcArfMGhediXV5Sn80hj4KkI4nUCllECNKxNgnI,4748 +2015-07-13,12,6LSLQGjv46TWsvXrxYCfM5yIz4JGiGd1eEQI4TC-4yc,43 +2015-07-13,13,bgLJeacIPOMH6sDb5tEmca1oYyaMdfqZomGEI2uby7k,1214 +2015-07-13,14,bRHc-42RqKVv3ORxhVCA4T4dLEXyBzBCQoed8VOrDCo,7048 +2015-07-13,15,ysiB3w-K5jb3FxpQY61OHYTlK9qklz3nW84RLvBnh9s,8795 +2015-07-13,16,Rvn7-tMbQM3q0yWQD8AUdURhFB0ZkzLGdIiDg-AJokM,7838 +2015-07-13,17,FDEI99QVJ8xRTOiQ-UDVlPMOBfuA0IwIAbJ872XnKOo,9507 +2015-07-13,18,lZUazYHDEGbQbzN7vEFeLjmnzp1wsjR0D8r8f7Cs6x0,3378 +2015-07-13,19,WmDFEQsDPSnVs8AiAdO3QJqlSFer1K0I8z7F0cl_WRk,1661 +2015-07-13,20,OEDSi7YIj4OjMNqTw12EA04BNtNuVWva6YRhokxL4xQ,5934 +2015-07-13,21,fXYhm19m2FsbWcRQGqJvVOSl2ZIRSNhWTfke-iG8e7Q,680 +2015-07-13,22,LK59zfxizCwr5CI2Wu88B8gY8-G4OeyAXZobplwGzKk,8758 +2015-07-13,23,8i5TVZorCp4YATsaxgybkdOHcmDywvb35Sf-Eb-sl9E,8392 +2015-07-13,24,MrM9vy1U-9_OEYOQAxbshenvvUGdCZfqjx7l3KKBQ2I,8708 +2015-07-13,25,miVWwEwur_7baTxIBHUT9y351AU3tnAcCXgBzvyUR5I,2843 +2015-07-13,26,_vxViqC02KVb7RRBeDGYs9VZ52KB8QmvguzSXUYGfwI,6681 +2015-07-13,27,Ui6BqkQDipo5kQEeVUuC2OFFIB1O4T8ALlM2GI_zvtk,7542 +2015-07-13,28,OT3VLH-RdK0sIgQM3f6LIbBa_rt0YzCD5YOw4qpu6p8,5791 +2015-07-13,29,vassmNeEo_jbn88g7QP58mTxH-b1jhHfwFhy-FL6T8c,9613 +2015-07-13,30,VjzTphngC6V5fphi9fkGeYGCPIQNpDajfkHxrJopF6k,3064 +2015-07-13,31,aqw27tMVvSsLJ8EEY3hphHMb0BRLm-LZysjVV3aX7pQ,7862 +2015-07-13,32,ZXepGbCv7Yw_ejNQyAPjrqG_VwNH_RZoG8lKODl-f9c,397 +2015-07-13,33,-yRoubVSa0oPfg0E1Gh7zYBQfBO8dIxZvQH9c5OsZAU,5003 +2015-07-13,34,UkhBEKU7G0rV58Urs6JTAgC0UF5Y2kP-dffmE6H4nGs,6514 +2015-07-13,35,ktLO3RTpHLZon7AhE9XMwPPh0t_GiOpS8vwCCqoPPnk,8634 +2015-07-13,36,3ktjc_W87j3S8qLOJ0CVEVSSpz_nUAEQVBsqOMabrp0,3679 +2015-07-13,37,KscV-oPqhG_CZXYUgdCmekKdR9FIT5tSt7rd3wpQDcU,1013 +2015-07-13,38,VFiC8YyBk6zZk5bpfZG8s1a3kYfMA1zvnbs6DDSplGY,1556 +2015-07-13,39,s0bxCQyW048GkhEAoEzXYGcTV8BZo6MLnRhL62nepYk,2844 +2015-07-13,40,aWbMyvSxxTqrVONKAeQVvqi_bGqROu9UeR5NqPPlI4A,8035 +2015-07-13,41,qfjEvEY8XSgMEmc-vIZLinOeIdIz6xprQbsYAe0i2WU,3205 +2015-07-13,42,NK2ddaghTrUTS6Y7U1e-l57922ccVOKnqlODcA6lyBQ,302 +2015-07-13,43,JRQpF1luRmNk2stUaZzDQDj93hy4RSW_iWybVgsgzJA,6534 +2015-07-13,44,lz7bs1xZi4qdWLE7fQwpykWDNgp_o9oUuCZXipSLSqw,9250 +2015-07-13,45,TxcwVGwelHKJws_6Q0Nk6I4Eeo9sSThM7M9KorqIGhA,5549 +2015-07-13,46,u_uy6k3TgUIp3NWMFJ8EOH1mKtFhozGBD208z9um88s,3624 +2015-07-13,47,RaI9xr82f0D7Jjuc4QY8Rz-UlCg3V5tw7KgJtczEo44,5278 +2015-07-13,48,u6Nqudxl6vrbKGemO8xXgYojhtBGK3SQkTRPSYcaZuI,9588 +2015-07-13,49,r-IgfD4fE9TiQWarsVxO_4AdieYIUZ9cczPD44_snQ4,4795 +2015-07-13,50,KIiUpd04d3zYDul1mFlcJ3934AYvA_YeXDYG089ub-M,4344 +2015-07-13,51,zZs0iuqm7liPKKHHn8wz-kNvd2zLCqRdXAng4B3gL0A,2116 +2015-07-13,52,Rg7T2IsH0-HIvhgq0mNRC-4q5JoZ5Rcjq4tP7dz_3Ew,5323 +2015-07-13,53,uBNgdXPL6kZGXP-gTic2N-uDRCxAtmI-KixkJWgrObA,9 +2015-07-13,54,fQ_TLG3oByt3sDqM3Kruo69fBd1qLMXbbg10myfFXkQ,2471 +2015-07-13,55,0uNd4TrRpEA1lY_zWikyELZ3MmCTzON_5ftfi-45wic,9831 +2015-07-13,56,Jfp4VCtsFElA6UzzZyPxOwegfGqsYwrimSFp59YshTs,3177 +2015-07-13,57,KAHSwcCwblbPRysuImbzUxx0SLAMIMb6LmMAXJBjUww,1182 +2015-07-13,58,wuyDbV5ljr5275eGWhAe8wkElCzd2d_gRW3SpBkLIyY,183 +2015-07-13,59,R3KTTvKRvPn6vu4qtooBbqYmwdOCC9vjmcsnf_fyu5g,5001 +2015-07-13,60,Pgsf32JIv2cUMdTE9Vydh2Y36B_Xi4T1ufIy7QiKFSU,6182 +2015-07-13,61,EZmz-tWhPPAsXsDZms_HHsDLKBOuZisUDotr72xXQnI,5228 +2015-07-13,62,mk4y32O73DU2z65dFuW1PvIokdB7bB7btUnCoDlSVxM,8094 +2015-07-13,63,fs1HvYjpOvAHnT5W1rCPU9A3k8_Px2XwfprrLrkQibM,5849 +2015-07-13,64,x8WAAde6AqG2YaOEIpCFMzItRrfUXqgc8bwcoWSiMEo,6076 +2015-07-13,65,zuvlwNyn8AgPEvg6qIxzkUp_ClPkMn5A__YyksWbxTo,6439 +2015-07-13,66,ZWWjbJAqVtZz3AzCpacgEabm7SMloLHPBTlS3NMk7GA,6531 +2015-07-13,67,wdHfAVpHp9rFaGhZOC81AusTsZX0KHxTf5RkFBw6gpI,8088 +2015-07-13,68,hw8HUkIQMSS-2gAT7rvA2kgdhXfhHlySxKtssINvcFc,7808 +2015-07-13,69,x1_SLENL6-M1y0n5qmfBF1-GCslEHpVM4Fo1Rdz9Ofg,3617 +2015-07-13,70,E2Uj3TGAwd_B3FOS6KQ1Gjyql2YpoNtbdzWBTUOWmxY,8401 +2015-07-13,71,WkpwSIP4fA42gYd1H3ohw7EmtqdQSqh4ooA7aX8v_7o,1309 +2015-07-13,72,xDdMCHpSKFSZWQBJJgNzNh1R4hXouCsUfKFZpio5cgY,7867 +2015-07-13,73,l0QVMlih2NmGSajDXytku9Em9p61erNKe1LEyk1VZ-Q,7964 +2015-07-13,74,R8G5juHaD9sit1oujjp4FoXzXJT7hdIjEY3Lhu-ep6o,5680 +2015-07-13,75,Ckpy3y166odB33VVWb27XNG_Wi65_qyikeL7dGHceSE,8603 +2015-07-13,76,elFu5tPgUNzhuyswgr3QS7TXR2fInI4PWVZIEffxq6c,4972 +2015-07-13,77,kz663CgkMh9VfcJrfMZb735vJJWYUPAuaskNeg7xRDk,8396 +2015-07-13,78,evuBVl0RR1XQfJHN4jxSBpLcKxjZ7RtpDGYrU2ONYZA,6433 +2015-07-13,79,ZbIJwmWRWscOurtrCam-iLB2mIqREwQwGFRfVYzGxwk,2917 +2015-07-13,80,mzCWiiJFzo1R_anxGFALosK0eKvGfv_RT7iRGZnL790,3162 +2015-07-13,81,JyrXoXLq5RpRwwXNpiW1NFK6ZkVmS55hJsNBGsuY7xY,2385 +2015-07-13,82,fO7A_MQGh3Zojp6HlVZayvJHLu_RQ082ix3Y6BlRCu0,5965 +2015-07-13,83,ib-pOMBLU1sN5fyyJbAElIdWEJgkoqRcBuwo6CVVYsk,3265 +2015-07-13,84,X_6Ren6P7TpqyiWViO72kEwIulMqbTU_v8eAGfEo8k0,8049 +2015-07-13,85,hNI30i9IYx7EreMyG7rI56Y-ZtrRe4sBYjzKMnSrL5I,9222 +2015-07-13,86,kzokOacUOXELAeIHfPbnl-Er8rnHYq2JnksqN1roOSQ,2972 +2015-07-13,87,qKIfkhQObWMadIi5vshcDRv95je4TYcAPSYITfwVTRk,5390 +2015-07-13,88,9xKf3bfWj8Gr1NNocYHZuL0kIkAVD750LCMYDZ-R1tA,4759 +2015-07-13,89,ohbmpvNy7aaaIVZ74SlHSfm0ffdwV-AqJP1bfDSjNUU,2279 +2015-07-13,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,9034 +2015-07-13,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,3945 +2015-07-13,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,8109 +2015-07-13,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,4774 +2015-07-13,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,7253 +2015-07-13,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,3166 +2015-07-13,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,3695 +2015-07-13,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,5065 +,,,9170 diff --git a/src/main/java/org/embulk/filter/to_csv/ToCsvFilterPlugin.java b/src/main/java/org/embulk/filter/to_csv/ToCsvFilterPlugin.java index 0a8efd5..05f0446 100644 --- a/src/main/java/org/embulk/filter/to_csv/ToCsvFilterPlugin.java +++ b/src/main/java/org/embulk/filter/to_csv/ToCsvFilterPlugin.java @@ -1,56 +1,339 @@ package org.embulk.filter.to_csv; import com.google.common.base.Optional; +import com.google.common.collect.ImmutableList; import org.embulk.config.Config; import org.embulk.config.ConfigDefault; -import org.embulk.config.ConfigDiff; import org.embulk.config.ConfigSource; import org.embulk.config.Task; import org.embulk.config.TaskSource; import org.embulk.spi.Column; +import org.embulk.spi.ColumnVisitor; +import org.embulk.spi.Exec; import org.embulk.spi.FilterPlugin; +import org.embulk.spi.Page; +import org.embulk.spi.PageBuilder; import org.embulk.spi.PageOutput; +import org.embulk.spi.PageReader; import org.embulk.spi.Schema; +import org.embulk.spi.time.Timestamp; +import org.embulk.spi.time.TimestampFormatter; +import org.embulk.spi.type.Type; +import org.embulk.spi.type.Types; +import org.embulk.spi.util.Newline; +import org.embulk.spi.util.Timestamps; +import org.msgpack.value.Value; +import org.slf4j.Logger; + +import java.util.Map; + +// Ported from https://github.com/embulk/embulk/blob/c37ddc30520c10760f93f1e489890ab639859dd4/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java public class ToCsvFilterPlugin implements FilterPlugin { + public enum QuotePolicy + { + ALL("ALL"), + MINIMAL("MINIMAL"), + NONE("NONE"); + + private final String string; + + QuotePolicy(String string) + { + this.string = string; + } + + public String getString() + { + return string; + } + } + + public interface TimestampColumnOption + extends Task, TimestampFormatter.TimestampColumnOption + { + } + public interface PluginTask - extends Task + extends Task, TimestampFormatter.Task { - // configuration option 1 (required integer) - @Config("option1") - public int getOption1(); + @Config("column_name") + @ConfigDefault("\"payload\"") + String getColumnName(); + + @Config("header_line") + @ConfigDefault("false") + boolean getHeaderLine(); + + @Config("delimiter") + @ConfigDefault("\",\"") + char getDelimiterChar(); - // configuration option 2 (optional string, null is not allowed) - @Config("option2") - @ConfigDefault("\"myvalue\"") - public String getOption2(); + @Config("quote") + @ConfigDefault("\"\\\"\"") + char getQuoteChar(); - // configuration option 3 (optional string, null is allowed) - @Config("option3") + @Config("quote_policy") + @ConfigDefault("\"MINIMAL\"") + QuotePolicy getQuotePolicy(); + + @Config("escape") @ConfigDefault("null") - public Optional getOption3(); + Optional getEscapeChar(); + + @Config("null_string") + @ConfigDefault("\"\"") + String getNullString(); + + @Config("newline_in_field") + @ConfigDefault("\"LF\"") + Newline getNewlineInField(); + + @Config("column_options") + @ConfigDefault("{}") + Map getColumnOptions(); } + private final Logger logger = Exec.getLogger(ToCsvFilterPlugin.class); + private final static int INDEX = 0; + private final static Type TYPE = Types.STRING; + + @Override public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) { PluginTask task = config.loadConfig(PluginTask.class); - Schema outputSchema = inputSchema; + // validate column_options + for (String columnName : task.getColumnOptions().keySet()) { + inputSchema.lookupColumn(columnName); // throws SchemaConfigException + } + + Schema outputSchema = new Schema(ImmutableList.of(new Column(INDEX, task.getColumnName(), TYPE))); + + logger.debug("output schema: {}", outputSchema); control.run(task.dump(), outputSchema); } @Override - public PageOutput open(TaskSource taskSource, Schema inputSchema, - Schema outputSchema, PageOutput output) + public PageOutput open(TaskSource taskSource, final Schema inputSchema, + final Schema outputSchema, final PageOutput output) + { + final PluginTask task = taskSource.loadTask(PluginTask.class); + final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, inputSchema, task.getColumnOptions()); + final char delimiter = task.getDelimiterChar(); + final QuotePolicy quotePolicy = task.getQuotePolicy(); + final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"'; + final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote); + final String nullString = task.getNullString(); + final String newlineInField = task.getNewlineInField().getString(); + final boolean writeHeaderLine = task.getHeaderLine(); + + return new PageOutput() { + private boolean shouldWriteHeaderLine = writeHeaderLine; + + private final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output); + private final PageReader pageReader = new PageReader(inputSchema); + private final Column outputColumn = outputSchema.getColumn(INDEX); + private final String delimiterString = String.valueOf(delimiter); + private final StringBuilder lineBuilder = new StringBuilder(); + private final ColumnVisitor visitor = new ColumnVisitor() { + @Override + public void booleanColumn(Column column) + { + addDelimiter(column); + if (!pageReader.isNull(column)) { + addValue(Boolean.toString(pageReader.getBoolean(column))); + } else { + addNullString(); + } + } + + @Override + public void longColumn(Column column) + { + addDelimiter(column); + if (!pageReader.isNull(column)) { + addValue(Long.toString(pageReader.getLong(column))); + } else { + addNullString(); + } + } + + @Override + public void doubleColumn(Column column) + { + addDelimiter(column); + if (!pageReader.isNull(column)) { + addValue(Double.toString(pageReader.getDouble(column))); + } else { + addNullString(); + } + } + + @Override + public void stringColumn(Column column) + { + addDelimiter(column); + if (!pageReader.isNull(column)) { + addValue(pageReader.getString(column)); + } else { + addNullString(); + } + } + + @Override + public void timestampColumn(Column column) + { + addDelimiter(column); + if (!pageReader.isNull(column)) { + Timestamp value = pageReader.getTimestamp(column); + addValue(timestampFormatters[column.getIndex()].format(value)); + } else { + addNullString(); + } + } + + @Override + public void jsonColumn(Column column) + { + addDelimiter(column); + if (!pageReader.isNull(column)) { + Value value = pageReader.getJson(column); + addValue(value.toJson()); + } else { + addNullString(); + } + } + + + private void addDelimiter(Column column) + { + if (column.getIndex() != 0) { + lineBuilder.append(delimiterString); + } + } + + private void addValue(String v) + { + lineBuilder.append(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString)); + } + + private void addNullString() + { + lineBuilder.append(nullString); + } + }; + + @Override + public void add(Page page) + { + writeHeader(); + + pageReader.setPage(page); + while (pageReader.nextRecord()) { + pageReader.getSchema().visitColumns(visitor); + addRecord(); + } + } + + @Override + public void finish() + { + pageBuilder.finish(); + } + + @Override + public void close() + { + pageBuilder.close(); + } + + private void addRecord() + { + pageBuilder.setString(outputColumn, lineBuilder.toString()); + pageBuilder.addRecord(); + clearLineBuilder(); + } + + private void clearLineBuilder() + { + // cf. http://ameblo.jp/wataru420/entry-10962037844.html + lineBuilder.setLength(0); + } + + private void writeHeader() + { + if (!shouldWriteHeaderLine) { + return; + } + + for (Column column : pageReader.getSchema().getColumns()) { + if (column.getIndex() != 0) { + lineBuilder.append(delimiterString); + } + lineBuilder.append(setEscapeAndQuoteValue(column.getName(), delimiter, quotePolicy, quote, escape, newlineInField, nullString)); + } + addRecord(); + shouldWriteHeaderLine = false; + } + }; + } + + private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString) + { + StringBuilder escapedValue = new StringBuilder(); + char previousChar = ' '; + + boolean isRequireQuote = (policy == QuotePolicy.ALL || policy == QuotePolicy.MINIMAL && v.equals(nullString)); + + for (int i = 0; i < v.length(); i++) { + char c = v.charAt(i); + + if (policy != QuotePolicy.NONE && c == quote) { + escapedValue.append(escape); + escapedValue.append(c); + isRequireQuote = true; + } else if (c == '\r') { + if (policy == QuotePolicy.NONE) { + escapedValue.append(escape); + } + escapedValue.append(newline); + isRequireQuote = true; + } else if (c == '\n') { + if (previousChar != '\r') { + if (policy == QuotePolicy.NONE) { + escapedValue.append(escape); + } + escapedValue.append(newline); + isRequireQuote = true; + } + } else if (c == delimiter) { + if (policy == QuotePolicy.NONE) { + escapedValue.append(escape); + } + escapedValue.append(c); + isRequireQuote = true; + } else { + escapedValue.append(c); + } + previousChar = c; + } + + if (policy != QuotePolicy.NONE && isRequireQuote) { + return setQuoteValue(escapedValue.toString(), quote); + } else { + return escapedValue.toString(); + } + } + + private String setQuoteValue(String v, char quote) { - PluginTask task = taskSource.loadTask(PluginTask.class); - // Write your code here :) - throw new UnsupportedOperationException("ToCsvFilterPlugin.open method is not implemented yet"); + return String.valueOf(quote) + v + quote; } }