-
-
Notifications
You must be signed in to change notification settings - Fork 285
feat(plugins): import data from CSV and TSV files into a table (#1568) #1578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cd014d0
856b29d
006fb24
5dfafaf
8578ce1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| // | ||
| // CSVImportOptions.swift | ||
| // CSVImportPlugin | ||
| // | ||
|
|
||
| import Foundation | ||
| import TableProPluginKit | ||
|
|
||
| struct CSVImportOptions: Equatable, Codable { | ||
| enum Delimiter: String, Codable, CaseIterable, Identifiable { | ||
| case auto | ||
| case comma | ||
| case semicolon | ||
| case tab | ||
| case pipe | ||
|
|
||
| var id: String { rawValue } | ||
|
|
||
| var byte: UInt8? { | ||
| switch self { | ||
| case .auto: return nil | ||
| case .comma: return 0x2C | ||
| case .semicolon: return 0x3B | ||
| case .tab: return 0x09 | ||
| case .pipe: return 0x7C | ||
| } | ||
| } | ||
| } | ||
|
|
||
| enum QuoteCharacter: String, Codable, CaseIterable, Identifiable { | ||
| case doubleQuote | ||
| case singleQuote | ||
|
|
||
| var id: String { rawValue } | ||
|
|
||
| var byte: UInt8 { | ||
| switch self { | ||
| case .doubleQuote: return 0x22 | ||
| case .singleQuote: return 0x27 | ||
| } | ||
| } | ||
| } | ||
|
|
||
| enum TextEncoding: String, Codable, CaseIterable, Identifiable { | ||
| case auto | ||
| case utf8 | ||
| case isoLatin1 | ||
| case windowsCP1252 | ||
|
|
||
| var id: String { rawValue } | ||
|
|
||
| var stringEncoding: String.Encoding? { | ||
| switch self { | ||
| case .auto: return nil | ||
| case .utf8: return .utf8 | ||
| case .isoLatin1: return .isoLatin1 | ||
| case .windowsCP1252: return .windowsCP1252 | ||
| } | ||
| } | ||
| } | ||
|
|
||
| var delimiter: Delimiter = .auto | ||
| var quoteCharacter: QuoteCharacter = .doubleQuote | ||
| var encoding: TextEncoding = .auto | ||
| var hasHeaderRow: Bool = true | ||
| var trimWhitespace: Bool = false | ||
| var emptyAsNull: Bool = true | ||
| var nullString: String = "" | ||
| var errorHandling: ImportErrorHandling = .stopAndRollback | ||
| var wrapInTransaction: Bool = true | ||
| var deleteExistingRows: Bool = false | ||
|
|
||
| var detectionSignature: String { | ||
| [ | ||
| delimiter.rawValue, | ||
| quoteCharacter.rawValue, | ||
| encoding.rawValue, | ||
| hasHeaderRow ? "h1" : "h0", | ||
| trimWhitespace ? "t1" : "t0", | ||
| emptyAsNull ? "n1" : "n0", | ||
| nullString | ||
| ].joined(separator: "|") | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| // | ||
| // CSVImportOptionsView.swift | ||
| // CSVImportPlugin | ||
| // | ||
|
|
||
| import SwiftUI | ||
| import TableProPluginKit | ||
|
|
||
| struct CSVImportOptionsView: View { | ||
| let plugin: CSVImportPlugin | ||
|
|
||
| var body: some View { | ||
| HStack(alignment: .top, spacing: 32) { | ||
| Grid(alignment: .leading, horizontalSpacing: 8, verticalSpacing: 10) { | ||
| GridRow { | ||
| Text("Delimiter:") | ||
| .gridColumnAlignment(.trailing) | ||
| Picker("", selection: Bindable(plugin).settings.delimiter) { | ||
| Text("Auto-detect").tag(CSVImportOptions.Delimiter.auto) | ||
| Text("Comma (,)").tag(CSVImportOptions.Delimiter.comma) | ||
| Text("Semicolon (;)").tag(CSVImportOptions.Delimiter.semicolon) | ||
| Text("Tab").tag(CSVImportOptions.Delimiter.tab) | ||
| Text("Pipe (|)").tag(CSVImportOptions.Delimiter.pipe) | ||
| } | ||
| .pickerStyle(.menu) | ||
| .labelsHidden() | ||
| .frame(width: 170) | ||
| } | ||
|
|
||
| GridRow { | ||
| Text("Quote character:") | ||
| Picker("", selection: Bindable(plugin).settings.quoteCharacter) { | ||
| Text("Double quote (\")").tag(CSVImportOptions.QuoteCharacter.doubleQuote) | ||
| Text("Single quote (')").tag(CSVImportOptions.QuoteCharacter.singleQuote) | ||
| } | ||
| .pickerStyle(.menu) | ||
| .labelsHidden() | ||
| .frame(width: 170) | ||
| } | ||
|
|
||
| GridRow { | ||
| Text("Encoding:") | ||
| Picker("", selection: Bindable(plugin).settings.encoding) { | ||
| Text("Auto-detect").tag(CSVImportOptions.TextEncoding.auto) | ||
| Text("UTF-8").tag(CSVImportOptions.TextEncoding.utf8) | ||
| Text("ISO Latin 1").tag(CSVImportOptions.TextEncoding.isoLatin1) | ||
| Text("Windows-1252").tag(CSVImportOptions.TextEncoding.windowsCP1252) | ||
| } | ||
| .pickerStyle(.menu) | ||
| .labelsHidden() | ||
| .frame(width: 170) | ||
| } | ||
|
|
||
| GridRow { | ||
| Text("On error:") | ||
| Picker("", selection: Bindable(plugin).settings.errorHandling) { | ||
| Text("Stop and Rollback").tag(ImportErrorHandling.stopAndRollback) | ||
| Text("Stop and Commit").tag(ImportErrorHandling.stopAndCommit) | ||
| Text("Skip and Continue").tag(ImportErrorHandling.skipAndContinue) | ||
| } | ||
| .pickerStyle(.menu) | ||
| .labelsHidden() | ||
| .frame(width: 170) | ||
| } | ||
|
|
||
| GridRow { | ||
| Text("NULL text:") | ||
| TextField("", text: Bindable(plugin).settings.nullString, prompt: Text(verbatim: "\\N")) | ||
| .textFieldStyle(.roundedBorder) | ||
| .frame(width: 170) | ||
| .help("An extra value that should be imported as NULL, for example \\N.") | ||
| } | ||
| } | ||
|
|
||
| VStack(alignment: .leading, spacing: 10) { | ||
| Toggle("First row is a header", isOn: Bindable(plugin).settings.hasHeaderRow) | ||
| .help("Use the first row as column names. Turn off to import every row as data.") | ||
|
|
||
| Toggle("Trim leading and trailing spaces", isOn: Bindable(plugin).settings.trimWhitespace) | ||
|
|
||
| Toggle("Treat empty values as NULL", isOn: Bindable(plugin).settings.emptyAsNull) | ||
| .help("Insert NULL for empty fields instead of an empty string.") | ||
|
|
||
| Toggle("Wrap in transaction (BEGIN/COMMIT)", isOn: Bindable(plugin).settings.wrapInTransaction) | ||
| .disabled(plugin.settings.errorHandling == .skipAndContinue) | ||
| .help(plugin.settings.errorHandling == .skipAndContinue | ||
| ? String(localized: "Not available in skip-and-continue mode") | ||
| : String(localized: "Insert all rows in a single transaction. If any row fails, all changes are rolled back.")) | ||
|
|
||
| Toggle("Delete existing rows before import", isOn: Bindable(plugin).settings.deleteExistingRows) | ||
| .help("Remove every row from the target table before inserting the imported rows.") | ||
| } | ||
| } | ||
| .font(.system(size: 13)) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| // | ||
| // CSVImportParsing.swift | ||
| // CSVImportPlugin | ||
| // | ||
| // Pure CSV row extraction, NULL handling, and field inference. Kept free of the | ||
| // plugin's loadable-bundle and SwiftUI surface so it can be compiled into the | ||
| // test target directly (a loadable .tableplugin cannot be linked by tests). | ||
| // The RFC 4180 tokenizer itself lives in TableProPluginKit (CSVStreamingParser), | ||
| // shared with the CSV inspector. | ||
| // | ||
|
|
||
| import Foundation | ||
| import TableProPluginKit | ||
|
|
||
| enum CSVImportParsing { | ||
| static let detectionSampleLimit = 200 | ||
|
|
||
| static func resolveDialect(in data: Data, options: CSVImportOptions) -> CSVDialect { | ||
| var dialect = CSVDialect.detect(from: data) | ||
| if let byte = options.delimiter.byte { | ||
| dialect.delimiter = byte | ||
| } | ||
| dialect.quoteChar = options.quoteCharacter.byte | ||
| if let forced = options.encoding.stringEncoding { | ||
| dialect.encoding = forced | ||
| } | ||
| return dialect | ||
| } | ||
|
|
||
| static func defaultColumnName(_ index: Int) -> String { | ||
| "Column \(index + 1)" | ||
| } | ||
|
|
||
| static func columnNames(header: [String]?, columnCount: Int) -> [String] { | ||
| var names: [String] = [] | ||
| names.reserveCapacity(columnCount) | ||
| var used = Set<String>() | ||
| for index in 0..<columnCount { | ||
| let raw = header.flatMap { index < $0.count ? $0[index] : nil } ?? "" | ||
| let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) | ||
| let base = trimmed.isEmpty ? defaultColumnName(index) : trimmed | ||
| var unique = base | ||
| var suffix = 2 | ||
| while !used.insert(unique).inserted { | ||
|
Comment on lines
+37
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a CSV header contains names that differ only by case (for example Useful? React with 👍 / 👎. |
||
| unique = "\(base) \(suffix)" | ||
| suffix += 1 | ||
| } | ||
| names.append(unique) | ||
| } | ||
| return names | ||
| } | ||
|
|
||
| static func cellValue(from raw: String, options: CSVImportOptions) -> PluginCellValue { | ||
| var value = raw | ||
| if options.trimWhitespace { | ||
| value = value.trimmingCharacters(in: .whitespaces) | ||
| } | ||
| if options.emptyAsNull, value.isEmpty { | ||
| return .null | ||
| } | ||
| if !options.nullString.isEmpty, value == options.nullString { | ||
| return .null | ||
| } | ||
| return .text(value) | ||
| } | ||
|
|
||
| static func sampleText(from raw: String, options: CSVImportOptions) -> String? { | ||
| guard case .text(let value) = cellValue(from: raw, options: options), !value.isEmpty else { return nil } | ||
| return value | ||
| } | ||
|
|
||
| static func row(fields: [String], columnNames: [String], options: CSVImportOptions) -> [String: PluginCellValue] { | ||
| var row: [String: PluginCellValue] = [:] | ||
| row.reserveCapacity(columnNames.count) | ||
| for (index, name) in columnNames.enumerated() { | ||
| let raw = index < fields.count ? fields[index] : "" | ||
| row[name] = cellValue(from: raw, options: options) | ||
| } | ||
| return row | ||
| } | ||
|
|
||
| static func importFieldType(for type: CSVTypeInferrer.InferredType) -> PluginImportFieldType { | ||
| switch type { | ||
| case .integer: return .integer | ||
| case .real: return .real | ||
| case .boolean: return .boolean | ||
| case .date: return .text | ||
| case .text: return .text | ||
| @unknown default: return .text | ||
| } | ||
| } | ||
|
|
||
| static func isBlank(_ fields: [String]) -> Bool { | ||
| fields.allSatisfy { $0.isEmpty } | ||
| } | ||
|
|
||
| static func detectFields( | ||
| in data: Data, | ||
| options: CSVImportOptions, | ||
| limit: Int = detectionSampleLimit | ||
| ) -> [PluginImportField] { | ||
| let dialect = resolveDialect(in: data, options: options) | ||
| let parser = CSVStreamingParser(dialect: dialect) | ||
|
|
||
| return data.withUnsafeBytes { raw -> [PluginImportField] in | ||
| guard let base = raw.bindMemory(to: UInt8.self).baseAddress else { return [] } | ||
| let buffer = UnsafeBufferPointer(start: base, count: raw.count) | ||
| let ranges = parser.indexRows(buffer) | ||
| guard !ranges.isEmpty else { return [] } | ||
|
|
||
| var dataRanges = ranges[...] | ||
| var header: [String]? | ||
| if options.hasHeaderRow { | ||
| header = parser.parseRow(buffer, range: ranges[0]) | ||
| dataRanges = ranges.dropFirst() | ||
| } | ||
|
|
||
| let columnCount = header?.count | ||
| ?? dataRanges.first.map { parser.parseRow(buffer, range: $0).count } | ||
| ?? 0 | ||
|
Comment on lines
+118
to
+120
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When Useful? React with 👍 / 👎. |
||
| guard columnCount > 0 else { return [] } | ||
|
|
||
| let names = columnNames(header: header, columnCount: columnCount) | ||
| var samples: [[String]] = Array(repeating: [], count: columnCount) | ||
| var firstValues: [String?] = Array(repeating: nil, count: columnCount) | ||
| var sampled = 0 | ||
|
|
||
| for range in dataRanges { | ||
| if sampled >= limit { break } | ||
| let fields = parser.parseRow(buffer, range: range) | ||
| if isBlank(fields) { continue } | ||
| for column in 0..<columnCount { | ||
| let raw = column < fields.count ? fields[column] : "" | ||
| guard let value = sampleText(from: raw, options: options) else { continue } | ||
| samples[column].append(value) | ||
| if firstValues[column] == nil { firstValues[column] = value } | ||
| } | ||
| sampled += 1 | ||
| } | ||
|
|
||
| return (0..<columnCount).map { column in | ||
| PluginImportField( | ||
| name: names[column], | ||
| sampleValue: firstValues[column].map { String($0.prefix(80)) }, | ||
| inferredType: importFieldType(for: CSVTypeInferrer.infer(column: samples[column])) | ||
| ) | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For a BOM-marked UTF-16 CSV,
CSVDialect.detect(from:)sets.utf16LittleEndian/.utf16BigEndian, butCSVStreamingParserstill scans delimiters and newlines as single bytes and advances only one byte past them. A UTF-16LE file such asa,b\n1,2therefore leaves the delimiter's trailing NUL at the start of the next field, producing corrupted headers/values during detection and import; either transcode/reject UTF-16 before parsing or keep auto-detection to encodings the byte parser can tokenize correctly.Useful? React with 👍 / 👎.