Skip to content

Commit bc43115

Browse files
author
Joseph Sirianni
authored
Csv header delimiter (#370)
* support optionally setting the header delimiter * perform type assertion once * changelog for csv parser: support optionally setting the header delimiter * update error test cases. add HeaderDelimiter test cases
1 parent 291404b commit bc43115

File tree

4 files changed

+105
-20
lines changed

4 files changed

+105
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010
- File input: Added optional labels for resolved symlink file name and path [PR 364](https://github.com/observIQ/stanza/pull/364)
11+
- CSV Parser: Added optional configuration field `header_delimiter` [PR 370](https://github.com/observIQ/stanza/pull/370)
1112

1213
## 1.1.5 - 2021-07-15
1314

docs/operators/csv_parser.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The `csv_parser` operator parses the string-type field selected by `parse_from`
99
| `id` | `csv_parser` | A unique identifier for the operator |
1010
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries |
1111
| `header` | required | A string of delimited field names. The values in the delimited header will be used as keys |
12+
| `header_delimiter` | value of delimiter | A character that will be used as a delimiter for the header. Values `\r` and `\n` cannot be used as a delimiter |
1213
| `delimiter` | `,` | A character that will be used as a delimiter. Values `\r` and `\n` cannot be used as a delimiter |
1314
| `parse_from` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed |
1415
| `parse_to` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed |
@@ -144,6 +145,50 @@ Configuration:
144145
}
145146
```
146147

148+
</td>
149+
</tr>
150+
</table>
151+
152+
#### Parse the field `message` with differing delimiters for header and fields
153+
154+
Configuration:
155+
156+
```yaml
157+
- type: csv_parser
158+
parse_from: message
159+
delimiter: "+"
160+
header_delimiter: ","
161+
header: 'id,severity,message'
162+
```
163+
164+
<table>
165+
<tr><td> Input record </td> <td> Output record </td></tr>
166+
<tr>
167+
<td>
168+
169+
```json
170+
{
171+
"timestamp": "",
172+
"record": {
173+
"message": "1+debug+\"\"Debug Message\"\""
174+
}
175+
}
176+
```
177+
178+
</td>
179+
<td>
180+
181+
```json
182+
{
183+
"timestamp": "",
184+
"record": {
185+
"id": "1",
186+
"severity": "debug",
187+
"message": "\"Debug Message\""
188+
}
189+
}
190+
```
191+
147192
</td>
148193
</tr>
149194
</table>

operator/builtin/parser/csv/csv.go

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ func NewCSVParserConfig(operatorID string) *CSVParserConfig {
2727
type CSVParserConfig struct {
2828
helper.ParserConfig `yaml:",inline"`
2929

30-
Header string `json:"header" yaml:"header"`
31-
FieldDelimiter string `json:"delimiter,omitempty" yaml:"delimiter,omitempty"`
30+
Header string `json:"header" yaml:"header"`
31+
HeaderDelimiter string `json:"header_delimiter,omitempty" yaml:"header_delimiter,omitempty"`
32+
FieldDelimiter string `json:"delimiter,omitempty" yaml:"delimiter,omitempty"`
3233
}
3334

3435
// Build will build a csv parser operator.
@@ -52,17 +53,24 @@ func (c CSVParserConfig) Build(context operator.BuildContext) ([]operator.Operat
5253

5354
fieldDelimiter := []rune(c.FieldDelimiter)[0]
5455

55-
if !strings.Contains(c.Header, c.FieldDelimiter) {
56-
return nil, fmt.Errorf("missing field delimiter in header")
56+
if c.HeaderDelimiter == "" {
57+
c.HeaderDelimiter = c.FieldDelimiter
5758
}
5859

59-
numFields := len(strings.Split(c.Header, c.FieldDelimiter))
60+
headerDelimiter := []rune(c.HeaderDelimiter)[0]
61+
62+
if !strings.Contains(c.Header, c.HeaderDelimiter) {
63+
return nil, fmt.Errorf("missing header delimiter in header")
64+
}
65+
66+
numFields := len(strings.Split(c.Header, c.HeaderDelimiter))
6067

6168
csvParser := &CSVParser{
62-
ParserOperator: parserOperator,
63-
header: c.Header,
64-
fieldDelimiter: fieldDelimiter,
65-
numFields: numFields,
69+
ParserOperator: parserOperator,
70+
header: c.Header,
71+
headerDelimiter: headerDelimiter,
72+
fieldDelimiter: fieldDelimiter,
73+
numFields: numFields,
6674
}
6775

6876
return []operator.Operator{csvParser}, nil
@@ -71,9 +79,10 @@ func (c CSVParserConfig) Build(context operator.BuildContext) ([]operator.Operat
7179
// CSVParser is an operator that parses csv in an entry.
7280
type CSVParser struct {
7381
helper.ParserOperator
74-
header string
75-
fieldDelimiter rune
76-
numFields int
82+
header string
83+
headerDelimiter rune
84+
fieldDelimiter rune
85+
numFields int
7786
}
7887

7988
// Process will parse an entry for csv.
@@ -84,17 +93,15 @@ func (r *CSVParser) Process(ctx context.Context, entry *entry.Entry) error {
8493
// parse will parse a value using the supplied csv header.
8594
func (r *CSVParser) parse(value interface{}) (interface{}, error) {
8695
var csvLine string
87-
switch value.(type) {
96+
switch t := value.(type) {
8897
case string:
89-
csvLine += value.(string)
98+
csvLine += t
9099
case []byte:
91-
csvLine += string(value.([]byte))
100+
csvLine += string(t)
92101
default:
93102
return nil, fmt.Errorf("type '%T' cannot be parsed as csv", value)
94103
}
95104

96-
delimiterStr := string([]rune{r.fieldDelimiter})
97-
98105
reader := csvparser.NewReader(strings.NewReader(csvLine))
99106
reader.Comma = r.fieldDelimiter
100107
reader.FieldsPerRecord = r.numFields
@@ -110,7 +117,7 @@ func (r *CSVParser) parse(value interface{}) (interface{}, error) {
110117
return nil, err
111118
}
112119

113-
for i, key := range strings.Split(r.header, delimiterStr) {
120+
for i, key := range strings.Split(r.header, string([]rune{r.headerDelimiter})) {
114121
parsedValues[key] = record[i]
115122
}
116123
}

operator/builtin/parser/csv/csv_test.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,19 @@ func TestParserCSV(t *testing.T) {
169169
"position": "agent",
170170
},
171171
},
172+
{
173+
"header-delimiter",
174+
func(p *CSVParserConfig) {
175+
p.Header = "name+sev+msg"
176+
p.HeaderDelimiter = "+"
177+
},
178+
"stanza,INFO,started agent",
179+
map[string]interface{}{
180+
"name": "stanza",
181+
"sev": "INFO",
182+
"msg": "started agent",
183+
},
184+
},
172185
}
173186

174187
for _, tc := range cases {
@@ -270,7 +283,7 @@ func TestBuildParserCSV(t *testing.T) {
270283
c.Header = "name"
271284
_, err := c.Build(testutil.NewBuildContext(t))
272285
require.Error(t, err)
273-
require.Contains(t, err.Error(), "missing field delimiter in header")
286+
require.Contains(t, err.Error(), "missing header delimiter in header")
274287
})
275288

276289
t.Run("InvalidHeaderFieldWrongDelimiter", func(t *testing.T) {
@@ -286,6 +299,25 @@ func TestBuildParserCSV(t *testing.T) {
286299
c.FieldDelimiter = ":"
287300
_, err := c.Build(testutil.NewBuildContext(t))
288301
require.Error(t, err)
289-
require.Contains(t, err.Error(), "missing field delimiter in header")
302+
require.Contains(t, err.Error(), "missing header delimiter in header")
303+
})
304+
305+
t.Run("HeaderDelimiter", func(t *testing.T) {
306+
c := newBasicCSVParser()
307+
c.Header = "name+position+number"
308+
c.HeaderDelimiter = "+"
309+
c.FieldDelimiter = ":"
310+
_, err := c.Build(testutil.NewBuildContext(t))
311+
require.NoError(t, err)
312+
})
313+
314+
t.Run("InvalidHeaderDelimiter", func(t *testing.T) {
315+
c := newBasicCSVParser()
316+
c.Header = "name,position,number"
317+
c.HeaderDelimiter = "+"
318+
c.FieldDelimiter = ":"
319+
_, err := c.Build(testutil.NewBuildContext(t))
320+
require.Error(t, err)
321+
require.Contains(t, err.Error(), "missing header delimiter in header")
290322
})
291323
}

0 commit comments

Comments
 (0)