Skip to content

Commit

Permalink
csv: delimiter supports at most 2 characters (#9970)
Browse files Browse the repository at this point in the history
close #9969
  • Loading branch information
zhangjinpeng87 authored Dec 2, 2023
1 parent 24ce8c9 commit 5480006
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 10 deletions.
18 changes: 12 additions & 6 deletions pkg/config/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,9 @@ func (s *SinkConfig) MaskSensitiveData() {

// CSVConfig defines a series of configuration items for csv codec.
type CSVConfig struct {
// delimiter between fields
// delimiter between fields, it can be 1 character or at most 2 characters
// It can not be CR or LF or contains CR or LF.
// It should have exclusive characters with quote.
Delimiter string `toml:"delimiter" json:"delimiter"`
// quoting character
Quote string `toml:"quote" json:"quote"`
Expand Down Expand Up @@ -217,20 +219,24 @@ func (c *CSVConfig) validateAndAdjust() error {
case 0:
return cerror.WrapError(cerror.ErrSinkInvalidConfig,
errors.New("csv config delimiter cannot be empty"))
case 1:
case 1, 2:
if strings.ContainsRune(c.Delimiter, CR) || strings.ContainsRune(c.Delimiter, LF) {
return cerror.WrapError(cerror.ErrSinkInvalidConfig,
errors.New("csv config delimiter contains line break characters"))
}
default:
return cerror.WrapError(cerror.ErrSinkInvalidConfig,
errors.New("csv config delimiter contains more than one character, note that escape "+
errors.New("csv config delimiter contains more than two character, note that escape "+
"sequences can only be used in double quotes in toml configuration items."))
}

if len(c.Quote) > 0 && strings.Contains(c.Delimiter, c.Quote) {
return cerror.WrapError(cerror.ErrSinkInvalidConfig,
errors.New("csv config quote and delimiter cannot be the same"))
if len(c.Quote) > 0 {
for _, r := range c.Delimiter {
if strings.ContainsRune(c.Quote, r) {
return cerror.WrapError(cerror.ErrSinkInvalidConfig,
errors.New("csv config quote and delimiter has common characters which is not allowed"))
}
}
}

// validate binary encoding method
Expand Down
26 changes: 22 additions & 4 deletions pkg/config/sink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,15 @@ func TestValidateAndAdjustCSVConfig(t *testing.T) {
},
wantErr: "",
},
{
name: "valid delimiter with 2 characters",
config: &CSVConfig{
Quote: "\"",
Delimiter: "FE",
BinaryEncodingMethod: BinaryEncodingHex,
},
wantErr: "",
},
{
name: "delimiter is empty",
config: &CSVConfig{
Expand All @@ -350,20 +359,29 @@ func TestValidateAndAdjustCSVConfig(t *testing.T) {
wantErr: "csv config delimiter contains line break characters",
},
{
name: "delimiter contains more than one character",
name: "delimiter contains more than two characters",
config: &CSVConfig{
Quote: "'",
Delimiter: "\r\t",
Delimiter: "FEF",
},
wantErr: "csv config delimiter contains more than one character",
wantErr: "csv config delimiter contains more than two character, note that escape " +
"sequences can only be used in double quotes in toml configuration items.",
},
{
name: "delimiter and quote are same",
config: &CSVConfig{
Quote: "'",
Delimiter: "'",
},
wantErr: "csv config quote and delimiter cannot be the same",
wantErr: "csv config quote and delimiter has common characters which is not allowed",
},
{
name: "delimiter and quote contain common characters",
config: &CSVConfig{
Quote: "E",
Delimiter: "FE",
},
wantErr: "csv config quote and delimiter has common characters which is not allowed",
},
{
name: "invalid binary encoding method",
Expand Down

0 comments on commit 5480006

Please sign in to comment.