Skip to content
This repository has been archived by the owner on Jan 22, 2019. It is now read-only.

Commit

Permalink
Implement #26
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed May 21, 2014
1 parent 8479018 commit 579de90
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 24 deletions.
2 changes: 2 additions & 0 deletions release-notes/VERSION
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Project: jackson-dataformat-csv
Version: 2.4.0 (xx-xxx-2014)

#26: Inconsistent quoting of headers, values; add
`CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING` to allow more optimal checks.
#32: Allow disabling of quoteChar
(suggested by Jason D)
#40: Allow (re)ordering of columns of Schema, using `CsvSchema.sortedBy(...)`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,7 @@ protected Writer _createWriter(OutputStream out, JsonEncoding enc, IOContext ctx

protected CsvGenerator _createGenerator(IOContext ctxt, Writer out) throws IOException
{
int feats = _csvGeneratorFeatures;
CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, feats,
CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, _csvGeneratorFeatures,
_objectCodec, out,
_cfgColumnSeparator, _cfgQuoteCharacter, _cfgLineSeparator);
// any other initializations? No?
Expand All @@ -434,7 +433,7 @@ protected Reader _createReader(InputStream in, JsonEncoding enc, IOContext ctxt)
if (enc == null || enc == JsonEncoding.UTF8) {
// 28-May-2012, tatu: Custom UTF8 reader should be faster, esp for small input:
// return new InputStreamReader(in, UTF8);
boolean autoClose = ctxt.isResourceManaged() || this.isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE);
boolean autoClose = ctxt.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE);
return new UTF8Reader(ctxt, in, autoClose);
}
return new InputStreamReader(in, enc.getJavaName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,26 @@ public class CsvGenerator extends GeneratorBase
*/
public enum Feature {
/**
* Placeholder until we have actual features
* Feature that determines how much work is done before determining that
* a column value requires quoting: when set as <code>true</code>, full
* check is made to only use quoting when it is strictly necessary;
* but when <code>false</code>, a faster but more conservative check
* is made, and possibly quoting is used for values that might not need it.
* Trade-offs is basically between optimal/minimal quoting (true), and
* faster handling (false).
* Faster check involves only checking first N characters of value, as well
* as possible looser checks.
*<p>
* Note, however, that regardless setting, all values that need to be quoted
* will be: it is just that when set to <code>false</code>, other values may
* also be quoted (to avoid having to do more expensive checks).
*<p>
* Default value is <code>false</code> for "loose" (approximate, conservative)
* checking.
*
* @since 2.4
*/
BOGUS(false) // placeholder
STRICT_CHECK_FOR_QUOTING(false),
;

protected final boolean _defaultState;
Expand All @@ -41,15 +58,16 @@ public static int collectDefaults()
}
return flags;
}

private Feature(boolean defaultState) {
_defaultState = defaultState;
_mask = (1 << ordinal());
}

public boolean enabledIn(int flags) { return (flags & getMask()) != 0; }
public boolean enabledByDefault() { return _defaultState; }
public int getMask() { return _mask; }
};
}

protected final static long MIN_INT_AS_LONG = Integer.MIN_VALUE;
protected final static long MAX_INT_AS_LONG = Integer.MAX_VALUE;
Expand Down Expand Up @@ -112,7 +130,7 @@ public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
char columnSeparator, char quoteChar, char[] linefeed)
{
this(ctxt, jsonFeatures, csvFeatures, codec,
new CsvWriter(ctxt, out, columnSeparator, quoteChar, linefeed));
new CsvWriter(ctxt, csvFeatures, out, columnSeparator, quoteChar, linefeed));
}

public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
Expand Down Expand Up @@ -259,6 +277,7 @@ private final void _writeFieldName(String name)

public CsvGenerator enable(Feature f) {
_csvFeatures |= f.getMask();
_writer.setFeatures(_csvFeatures);
return this;
}

Expand All @@ -273,11 +292,9 @@ public final boolean isEnabled(Feature f) {

public CsvGenerator configure(Feature f, boolean state) {
if (state) {
enable(f);
} else {
disable(f);
return enable(f);
}
return this;
return disable(f);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ public final CsvFactory getFactory() {
* Definition will not be strictly typed (that is, all columns are
* just defined to be exposed as String tokens).
*/
public CsvSchema schemaFor(JavaType pojoType)
{
public CsvSchema schemaFor(JavaType pojoType) {
return _schemaFor(pojoType, _untypedSchemas, false);
}

Expand All @@ -126,8 +125,7 @@ public final CsvSchema schemaFor(TypeReference<?> pojoTypeRef) {
* determine type limitations which may make parsing more efficient
* (especially for numeric types like java.lang.Integer).
*/
public CsvSchema typedSchemaFor(JavaType pojoType)
{
public CsvSchema typedSchemaFor(JavaType pojoType) {
return _schemaFor(pojoType, _typedSchemas, true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.dataformat.csv.CsvGenerator;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;

import java.io.IOException;
Expand All @@ -22,7 +23,7 @@ public class CsvWriter
/* Also: only do check for optional quotes for short
* values; longer ones will always be quoted.
*/
final protected static int MAX_QUOTE_CHECK = 20;
final protected static int MAX_QUOTE_CHECK = 24;

final protected BufferedValue[] NO_BUFFERED = new BufferedValue[0];

Expand All @@ -36,7 +37,7 @@ public class CsvWriter
*/

final protected IOContext _ioContext;

/**
* Underlying {@link Writer} used for output.
*/
Expand All @@ -57,6 +58,14 @@ public class CsvWriter
* quotes around value
*/
final protected int _cfgMinSafeChar;

protected int _csvFeatures;

/**
* Marker flag used to determine if to do optimal (aka "strict") quoting
* checks or not (looser conservative check)
*/
protected boolean _cfgOptimalQuoting;

/*
/**********************************************************
Expand Down Expand Up @@ -123,10 +132,22 @@ public class CsvWriter
/**********************************************************
*/

@Deprecated // since 2.4, remove in 2.5
public CsvWriter(IOContext ctxt, Writer out,
char columnSeparator, char quoteChar, char[] linefeed)
{
this(ctxt, CsvGenerator.Feature.collectDefaults(),
out, columnSeparator, quoteChar, linefeed);

}

public CsvWriter(IOContext ctxt, int csvFeatures, Writer out,
char columnSeparator, char quoteChar, char[] linefeed)
{
_ioContext = ctxt;
_csvFeatures = csvFeatures;
_cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);

_outputBuffer = ctxt.allocConcatBuffer();
_bufferRecyclable = true;
_outputEnd = _outputBuffer.length;
Expand All @@ -145,6 +166,9 @@ public CsvWriter(IOContext ctxt, Writer out,
public CsvWriter(CsvWriter base, CsvSchema newSchema)
{
_ioContext = base._ioContext;
_csvFeatures = base._csvFeatures;
_cfgOptimalQuoting = base._cfgOptimalQuoting;

_outputBuffer = base._outputBuffer;
_bufferRecyclable = base._bufferRecyclable;
_outputEnd = base._outputEnd;
Expand Down Expand Up @@ -172,6 +196,14 @@ public CsvWriter withSchema(CsvSchema schema) {
return new CsvWriter(this, schema);
}

public CsvWriter setFeatures(int feat) {
if (feat != _csvFeatures) {
_csvFeatures = feat;
_cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(feat);
}
return this;
}

/*
/**********************************************************
/* Read-access to output state
Expand Down Expand Up @@ -583,17 +615,49 @@ protected boolean _mayNeedQuotes(String value, int length)
if (_cfgQuoteCharacter < 0) {
return false;
}
// let's not bother checking long Strings, just quote already:
// may skip checks unless we want exact checking
if (_cfgOptimalQuoting) {
return _needsQuotingStrict(value);
}
if (length > _cfgMaxQuoteCheckChars) {
return true;
}
for (int i = 0; i < length; ++i) {
return _needsQuotingLoose(value);
}

/**
*<p>
* NOTE: final since checking is not expected to be changed here; override
* calling method (<code>_mayNeedQuotes</code>) instead, if necessary.
*
* @since 2.4
*/
protected final boolean _needsQuotingLoose(String value)
{
for (int i = 0, len = value.length(); i < len; ++i) {
if (value.charAt(i) < _cfgMinSafeChar) {
return true;
}
}
return false;
}

/**
* @since 2.4
*/
protected boolean _needsQuotingStrict(String value)
{
for (int i = 0, len = value.length(); i < len; ++i) {
char c = value.charAt(i);
if (c < _cfgMinSafeChar) {
if (c == _cfgColumnSeparator || c == _cfgQuoteCharacter
|| c == '\r' || c == '\n') {
return true;
}
}
}
return false;
}

protected void _buffer(int index, BufferedValue v)
{
Expand Down
30 changes: 27 additions & 3 deletions src/test/java/com/fasterxml/jackson/dataformat/csv/TestParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,21 @@ public void testSimpleExplicit() throws Exception
.addColumn("userImage")
.addColumn("verified")
.build();
ObjectReader r = mapper.reader(schema);
_testSimpleExplicit(r, false);
_testSimpleExplicit(r, true);
}

FiveMinuteUser user = mapper.reader(schema).withType(FiveMinuteUser.class).readValue("Bob,Robertson,MALE,AQIDBAU=,false\n");
private void _testSimpleExplicit(ObjectReader r, boolean useBytes) throws Exception
{
r = r.withType(FiveMinuteUser.class);
FiveMinuteUser user;
final String INPUT = "Bob,Robertson,MALE,AQIDBAU=,false\n";
if (useBytes) {
user = r.readValue(INPUT);
} else {
user = r.readValue(INPUT.getBytes("UTF-8"));
}
assertEquals("Bob", user.firstName);
assertEquals("Robertson", user.lastName);
assertEquals(FiveMinuteUser.Gender.MALE, user.getGender());
Expand Down Expand Up @@ -79,7 +92,12 @@ public void testSimpleAsMaps() throws Exception
}

// Test for [Issue#10]
public void testMapsWithLinefeeds() throws Exception
public void testMapsWithLinefeeds() throws Exception {
_testMapsWithLinefeeds(false);
_testMapsWithLinefeeds(true);
}

private void _testMapsWithLinefeeds(boolean useBytes) throws Exception
{
CsvMapper mapper = mapperForCsv();
String CSV = "A,B,C\n"
Expand All @@ -91,7 +109,13 @@ public void testMapsWithLinefeeds() throws Exception
CsvSchema cs = CsvSchema.emptySchema().withHeader();
ObjectReader or = mapper.reader(HashMap.class).with(cs);

MappingIterator<Map<String,String>> mi = or.readValues(CSV);
MappingIterator<Map<String,String>> mi;

if (useBytes) {
mi = or.readValues(CSV.getBytes("UTF-8"));
} else {
mi = or.readValues(CSV);
}

assertTrue(mi.hasNext());
Map<String,String> map = mi.nextValue();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

public class TestParserQuotes extends ModuleTestBase
{
@JsonPropertyOrder({"age, name"})
@JsonPropertyOrder({"age", "name"})
protected static class AgeName {
public int age;
public String name;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.fasterxml.jackson.dataformat.csv;

import com.fasterxml.jackson.annotation.JsonPropertyOrder;

// Tests for [Issue#26]
public class TestParserStrictQuoting extends ModuleTestBase
{
@JsonPropertyOrder({"a", "b"})
protected static class AB {
public String a, b;

public AB() { }
public AB(String a, String b) {
this.a = a;
this.b = b;
}
}

/*
/**********************************************************************
/* Test methods
/**********************************************************************
*/

public void testStrictQuoting() throws Exception
{
final String NUMS = "12345 6789";
final String LONG = NUMS + NUMS + NUMS + NUMS; // 40 chars should do it

CsvMapper mapper = mapperForCsv();

assertFalse(mapper.getFactory().isEnabled(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING));
CsvSchema schema = mapper.schemaFor(AB.class).withoutHeader();

final AB input = new AB("x", LONG);

// with non-strict, should quote
String csv = mapper.writer(schema).writeValueAsString(input);
assertEquals(aposToQuotes("x,'"+LONG+"'"), csv.trim());

// should be possible to hot-swap
// and with strict/optimal, no quoting
mapper.configure(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING, true);
csv = mapper.writer(schema).writeValueAsString(input);
assertEquals(aposToQuotes("x,"+LONG), csv.trim());
}
}

0 comments on commit 579de90

Please sign in to comment.