Skip to content

Commit fa0a023

Browse files
committed
Support enclosing character #1
1 parent d449b46 commit fa0a023

File tree

9 files changed

+251
-89
lines changed

9 files changed

+251
-89
lines changed

README.md

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,19 @@ Support following features.
1111
* Updating columns by specified expression(fixed value or dynamic value)
1212
* Ordering columns by specified order
1313

14+
## Related libraries document
15+
16+
* [SpEL provided by Spring Framework](https://docs.spring.io/spring-framework/docs/current/reference/html/core.html#expressions)
17+
* [FlatFileItemReader/FlatFileItemWriter provided by Spring Batch](https://docs.spring.io/spring-batch/docs/current/reference/html/readersAndWriters.html#flatFiles)
18+
1419
## Support CSV file specifications
1520

1621
* The first line is header
1722
* The Record separator is LF or CRLF (Writing separator is OS dependent character)
18-
* No support an enclosing character (no support include a comma and record separator in column value)
23+
* Support an enclosing character(`"`) (support include a record separator(`,`) in column value)
24+
* Support custom delimiter character(e.g. `\t`)
1925
* The default encoding is UTF-8 (can change an any encoding using command line argument)
2026

21-
> **NOTE:**
22-
>
23-
> Will have a plan to supporting various csv format at future.
24-
2527
## How to specify target files
2628

2729
Search files that matches conditions specified by `--dir` and `--files`.
@@ -95,6 +97,10 @@ Search files that matches conditions specified by `--dir` and `--files`.
9597
bar:
9698
"10": "2"
9799
"20": "1"
100+
--delimiter
101+
delimiter character (default: ",")
102+
--ignore-escaped-enclosure
103+
whether escape a enclosing character on writing (default: false)
98104
--h (--help)
99105
print help
100106
@@ -166,3 +172,9 @@ $ ./mvnw clean verify -DskipTests
166172
```
167173
$ java -jar target/csv-bulk-commands.jar
168174
```
175+
176+
### How to specify Tab character on delimiter with bash
177+
178+
```
179+
$ java -jar target/csv-bulk-commands.jar ... --delimiter=$'\t'
180+
```

pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
<groupId>org.springframework.boot</groupId>
2222
<artifactId>spring-boot-starter</artifactId>
2323
</dependency>
24+
<dependency>
25+
<groupId>org.springframework.batch</groupId>
26+
<artifactId>spring-batch-infrastructure</artifactId>
27+
</dependency>
2428

2529
<dependency>
2630
<groupId>org.springframework.boot</groupId>
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package com.example.tools;
2+
3+
import org.slf4j.Logger;
4+
import org.slf4j.LoggerFactory;
5+
import org.springframework.batch.item.ExecutionContext;
6+
import org.springframework.batch.item.ItemStreamReader;
7+
import org.springframework.batch.item.ItemStreamWriter;
8+
import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder;
9+
import org.springframework.batch.item.file.builder.FlatFileItemWriterBuilder;
10+
import org.springframework.batch.item.file.separator.DefaultRecordSeparatorPolicy;
11+
import org.springframework.batch.item.file.transform.DefaultFieldSet;
12+
import org.springframework.batch.item.file.transform.ExtractorLineAggregator;
13+
import org.springframework.batch.item.file.transform.FieldSet;
14+
import org.springframework.beans.factory.InitializingBean;
15+
import org.springframework.core.io.FileSystemResource;
16+
import org.springframework.util.StringUtils;
17+
18+
import java.nio.charset.Charset;
19+
import java.nio.file.Path;
20+
import java.util.ArrayList;
21+
import java.util.Arrays;
22+
import java.util.List;
23+
import java.util.Optional;
24+
import java.util.stream.Collectors;
25+
26+
public abstract class AbstractColumnProcessor {
27+
28+
protected final Logger logger = LoggerFactory.getLogger(getClass());
29+
30+
protected List<String> readHeaderColumns(Path file, Charset encoding, String delimiter) throws Exception {
31+
List<String> headerColumns = new ArrayList<>();
32+
ItemStreamReader<String[]> headerReader = new FlatFileItemReaderBuilder<String[]>()
33+
.recordSeparatorPolicy(new DefaultRecordSeparatorPolicy())
34+
.lineTokenizer(x -> new DefaultFieldSet(StringUtils.delimitedListToStringArray(x, Optional.ofNullable(delimiter).orElse(","))))
35+
.fieldSetMapper(FieldSet::getValues)
36+
.encoding(encoding.name())
37+
.resource(new FileSystemResource(file.toFile()))
38+
.name("default")
39+
.linesToSkip(0)
40+
.maxItemCount(1)
41+
.build();
42+
try {
43+
headerReader.open(new ExecutionContext());
44+
String[] line = headerReader.read();
45+
if (line == null) {
46+
return null;
47+
}
48+
headerColumns.addAll(Arrays.asList(line));
49+
} finally {
50+
headerReader.close();
51+
}
52+
return headerColumns;
53+
}
54+
55+
protected List<String[]> readDataColumns(String[] columnNames, Path file, Charset encoding, String delimiter) throws Exception {
56+
ItemStreamReader<String[]> reader = new FlatFileItemReaderBuilder<String[]>()
57+
.delimited().delimiter(Optional.ofNullable(delimiter).orElse(","))
58+
.names(columnNames)
59+
.fieldSetMapper(FieldSet::getValues)
60+
.recordSeparatorPolicy(new DefaultRecordSeparatorPolicy())
61+
.encoding(encoding.name())
62+
.resource(new FileSystemResource(file.toFile()))
63+
.name("default")
64+
.linesToSkip(1).build();
65+
List<String[]> lines = new ArrayList<>();
66+
reader.open(new ExecutionContext());
67+
try {
68+
String[] items;
69+
while ((items = reader.read()) != null) {
70+
lines.add(items);
71+
}
72+
} finally {
73+
reader.close();
74+
}
75+
return lines;
76+
}
77+
78+
protected void writeLines(List<String[]> lines, Path file, Charset encoding, String delimiter, Boolean ignoreEscapedEnclosure) throws Exception {
79+
EnclosableDelimitedLineAggregator<String[]> aggregator = new EnclosableDelimitedLineAggregator<>();
80+
aggregator.setIgnoreEscapedEnclosure(Optional.ofNullable(ignoreEscapedEnclosure).orElse(false));
81+
aggregator.setDelimiter(Optional.ofNullable(delimiter).orElse(",").toCharArray()[0]);
82+
aggregator.afterPropertiesSet();
83+
ItemStreamWriter<String[]> itemWriter = new FlatFileItemWriterBuilder<String[]>()
84+
.lineAggregator(aggregator)
85+
.encoding(encoding.name())
86+
.resource(new FileSystemResource(file.toFile()))
87+
.name("default")
88+
.shouldDeleteIfEmpty(false)
89+
.transactional(false)
90+
.build();
91+
itemWriter.open(new ExecutionContext());
92+
try {
93+
itemWriter.write(lines);
94+
} finally {
95+
itemWriter.close();
96+
}
97+
}
98+
99+
private static class EnclosableDelimitedLineAggregator<T> extends ExtractorLineAggregator<T> implements InitializingBean {
100+
protected boolean ignoreEscapedEnclosure = false;
101+
private final String enclosure = "\"";
102+
private final String escapedEnclosure;
103+
private String delimiter;
104+
105+
public EnclosableDelimitedLineAggregator() {
106+
this.escapedEnclosure = this.enclosure + this.enclosure;
107+
this.delimiter = ",";
108+
}
109+
110+
public void setIgnoreEscapedEnclosure(boolean ignoreEscapedEnclosure) {
111+
this.ignoreEscapedEnclosure = ignoreEscapedEnclosure;
112+
}
113+
114+
public void setDelimiter(char delimiter) {
115+
this.delimiter = String.valueOf(delimiter);
116+
}
117+
118+
public void afterPropertiesSet() {
119+
if (this.enclosure.equals(this.delimiter)) {
120+
throw new IllegalStateException("the delimiter and enclosure must be different. [value:" + this.enclosure + "]");
121+
}
122+
}
123+
124+
protected String doAggregate(Object[] fields) {
125+
return Arrays.stream(fields)
126+
.map(Object::toString)
127+
.map((field) -> this.hasTargetChar(field) ? this.encloseAndEscape(field) : field)
128+
.collect(Collectors.joining(this.delimiter));
129+
}
130+
131+
private boolean hasTargetChar(String field) {
132+
return field.contains(this.delimiter) || field.contains(this.enclosure) || this.containsCrlf(field);
133+
}
134+
135+
private boolean containsCrlf(String field) {
136+
return field.contains("\r") || field.contains("\n");
137+
}
138+
139+
private String encloseAndEscape(String field) {
140+
return this.enclosure + (ignoreEscapedEnclosure ? field : field.replace(this.enclosure, this.escapedEnclosure)) + this.enclosure;
141+
}
142+
}
143+
144+
}
Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
package com.example.tools;
22

3-
import org.slf4j.Logger;
4-
import org.slf4j.LoggerFactory;
53
import org.springframework.expression.Expression;
64
import org.springframework.expression.ExpressionParser;
75
import org.springframework.expression.spel.standard.SpelExpressionParser;
86
import org.springframework.expression.spel.support.StandardEvaluationContext;
9-
import org.springframework.util.StringUtils;
107

118
import java.io.IOException;
129
import java.io.UncheckedIOException;
1310
import java.nio.charset.Charset;
14-
import java.nio.file.Files;
1511
import java.nio.file.Path;
1612
import java.util.ArrayList;
1713
import java.util.Arrays;
@@ -20,24 +16,23 @@
2016
import java.util.Map;
2117
import java.util.Objects;
2218

23-
public class AddingColumnProcessor {
19+
public class AddingColumnProcessor extends AbstractColumnProcessor {
2420

2521
static final AddingColumnProcessor INSTANCE = new AddingColumnProcessor();
26-
private static final Logger LOGGER = LoggerFactory.getLogger(AddingColumnProcessor.class);
2722
private static final ExpressionParser EXPRESSION_PARSER = new SpelExpressionParser();
2823

2924
private AddingColumnProcessor() {
3025
// NOP
3126
}
3227

33-
void execute(List<String> columnNames, List<String> columnValues, Path file, Charset encoding, Map<String, Object> valueMappings) {
28+
void execute(List<String> columnNames, List<String> columnValues, Path file, Charset encoding, Map<String, Object> valueMappings, String delimiter, Boolean ignoreEscapedEnclosure) {
3429
try {
35-
List<String> lines = Files.readAllLines(file, encoding);
36-
if (lines.isEmpty()) {
37-
LOGGER.warn("Skip adding because file is empty. file:{}", file);
30+
final List<String> originalHeaderColumns = readHeaderColumns(file, encoding, delimiter);
31+
if (originalHeaderColumns == null) {
32+
logger.warn("Skip adding because file is empty. file:{}", file);
3833
return;
3934
}
40-
List<String> headerColumns = new ArrayList<>(Arrays.asList(StringUtils.commaDelimitedListToStringArray(lines.remove(0))));
35+
final List<String> headerColumns = new ArrayList<>(originalHeaderColumns);
4136
Map<String, Integer> headerIndexMap = new LinkedHashMap<>();
4237
for (String column : headerColumns) {
4338
headerIndexMap.put(column, headerIndexMap.size());
@@ -56,23 +51,26 @@ void execute(List<String> columnNames, List<String> columnValues, Path file, Cha
5651
validColumnValues.add(columnValues.get(entry.getKey()));
5752
}
5853
}
59-
List<String> saveLines = new ArrayList<>();
60-
saveLines.add(StringUtils.collectionToCommaDelimitedString(headerColumns));
61-
for (String line : lines) {
62-
List<String> valueColumns = new ArrayList<>(Arrays.asList(StringUtils.commaDelimitedListToStringArray(line)));
54+
List<String[]> lines = readDataColumns(originalHeaderColumns.toArray(new String[0]), file, encoding, delimiter);
55+
List<String[]> saveLines = new ArrayList<>();
56+
saveLines.add(headerColumns.toArray(new String[0]));
57+
for (String[] items : lines) {
58+
List<String> valueColumns = new ArrayList<>(Arrays.asList(items));
6359
StandardEvaluationContext context = new StandardEvaluationContext();
6460
context.setVariable("_valueMappings", valueMappings);
6561
headerIndexMap.forEach((name, index) -> context.setVariable(name, valueColumns.get(index)));
6662
for (String value : validColumnValues) {
6763
Expression expression = EXPRESSION_PARSER.parseExpression(value);
6864
valueColumns.add(Objects.toString(expression.getValue(context)));
6965
}
70-
saveLines.add(StringUtils.collectionToCommaDelimitedString(valueColumns));
66+
saveLines.add(valueColumns.toArray(new String[0]));
7167
}
72-
73-
Files.write(file, saveLines, encoding);
68+
writeLines(saveLines, file, encoding, delimiter, ignoreEscapedEnclosure);
7469
} catch (IOException e) {
7570
throw new UncheckedIOException(e);
71+
} catch (Exception e) {
72+
throw new IllegalStateException(e);
7673
}
7774
}
75+
7876
}

src/main/java/com/example/tools/CsvBulkCommandsApplicationRunner.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ public void run(ApplicationArguments args) throws IOException {
5454
System.out.println(" bar:");
5555
System.out.println(" \"10\": \"2\"");
5656
System.out.println(" \"20\": \"1\"");
57+
System.out.println(" --delimiter");
58+
System.out.println(" delimiter character (default: \",\")");
59+
System.out.println(" --ignore-escaped-enclosure");
60+
System.out.println(" whether escape a enclosing character on writing (default: false)");
5761
System.out.println(" --h (--help)");
5862
System.out.println(" print help");
5963
System.out.println();
@@ -155,30 +159,38 @@ public void run(ApplicationArguments args) throws IOException {
155159
valueMappings = Collections.emptyMap();
156160
}
157161

158-
LOGGER.info("Start. command:{} dir:{} files:{} column-names:{} column-values:{} encoding:{} value-mappings:{}", command, dir, files, columnNames, columnValues, encoding, valueMappings);
162+
String delimiter = args.containsOption("delimiter") ?
163+
args.getOptionValues("delimiter").stream().findFirst().orElse(",") :
164+
",";
165+
166+
Boolean ignoreEscapedEnclosure = args.containsOption("ignore-escaped-enclosure") &&
167+
Boolean.parseBoolean(args.getOptionValues("ignore-escaped-enclosure").stream().findFirst().orElse(null));
168+
169+
LOGGER.info("Start. command:{} dir:{} files:{} column-names:{} column-values:{} encoding:{} value-mappings:{} delimiter:{} ignore-escaped-enclosure:{}",
170+
command, dir, files, columnNames, columnValues, encoding, valueMappings, delimiter, ignoreEscapedEnclosure);
159171

160172
Files.walk(Paths.get(dir))
161173
.filter(Files::isRegularFile)
162174
.filter(file -> files.stream().anyMatch(x -> file.toString().replace('\\', '/').endsWith(x)))
163-
.sorted().forEach(file -> execute(command, columnNames, columnValues, file, encoding, valueMappings));
175+
.sorted().forEach(file -> execute(command, columnNames, columnValues, file, encoding, valueMappings, delimiter, ignoreEscapedEnclosure));
164176

165177
LOGGER.info("End.");
166178
}
167179

168-
private void execute(String command, List<String> columnNames, List<String> columnValues, Path file, Charset encoding, Map<String, Object> valueMappings) {
180+
private void execute(String command, List<String> columnNames, List<String> columnValues, Path file, Charset encoding, Map<String, Object> valueMappings, String delimiter, Boolean ignoreEscapedEnclosure) {
169181
LOGGER.info("processing file:{}", file);
170182
switch (command) {
171183
case "adding-columns":
172-
AddingColumnProcessor.INSTANCE.execute(columnNames, columnValues, file, encoding, valueMappings);
184+
AddingColumnProcessor.INSTANCE.execute(columnNames, columnValues, file, encoding, valueMappings, delimiter, ignoreEscapedEnclosure);
173185
break;
174186
case "deleting-columns":
175-
DeletingColumnProcessor.INSTANCE.execute(columnNames, file, encoding);
187+
DeletingColumnProcessor.INSTANCE.execute(columnNames, file, encoding, delimiter, ignoreEscapedEnclosure);
176188
break;
177189
case "updating-columns":
178-
UpdatingColumnProcessor.INSTANCE.execute(columnNames, columnValues, file, encoding, valueMappings);
190+
UpdatingColumnProcessor.INSTANCE.execute(columnNames, columnValues, file, encoding, valueMappings, delimiter, ignoreEscapedEnclosure);
179191
break;
180192
case "ordering-columns":
181-
OrderingColumnProcessor.INSTANCE.execute(columnNames, file, encoding);
193+
OrderingColumnProcessor.INSTANCE.execute(columnNames, file, encoding, delimiter, ignoreEscapedEnclosure);
182194
break;
183195
default:
184196
throw new UnsupportedOperationException(String.format("'%s' command not support. valid-commands:%s", command, "[adding-columns, deleting-columns, updating-columns, ordering-columns]"));

0 commit comments

Comments
 (0)