Skip to content

Commit

Permalink
Ignore empty lines for CSVs with multiple fields
Browse files Browse the repository at this point in the history
  • Loading branch information
dehesa committed Mar 14, 2021
1 parent 2fb1c65 commit 82ee81e
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 24 deletions.
52 changes: 30 additions & 22 deletions sources/imperative/reader/Reader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -107,31 +107,39 @@ extension CSVReader {
case .failed(let e): throw e
}

let result: [String]?
do {
result = try self._parseLine(rowIndex: self.count.rows)
} catch let error {
self.status = .failed(error as! CSVError<CSVReader>)
throw error
}

guard let numFields = result?.count else {
self.status = .finished
return nil
}

if self.count.rows > 0 {
guard self.count.fields == numFields else {
let error = Error._invalidFieldCount(rowIndex: self.count.rows+1, parsed: numFields, expected: self.count.fields)
self.status = .failed(error)
loop: while true {
let result: [String]?
do {
result = try self._parseLine(rowIndex: self.count.rows)
} catch let error {
self.status = .failed(error as! CSVError<CSVReader>)
throw error
}
} else {
self.count.fields = numFields
// If no fields were parsed, the EOF has been reached.
guard let fields = result else {
self.status = .finished
return nil
}

let numFields = fields.count
// If a single empty field is received, a white line has been parsed. Ignore empty lines for CSV files were several fields are expected.
if numFields == 1, fields.first!.isEmpty, self.count.rows != 1 {
continue loop
}

if self.count.rows > 0 {
guard self.count.fields == numFields else {
let error = Error._invalidFieldCount(rowIndex: self.count.rows+1, parsed: numFields, expected: self.count.fields)
self.status = .failed(error)
throw error
}
} else {
self.count.fields = numFields
}

self.count.rows += 1
return result
}

self.count.rows += 1
return result
}
}

Expand Down
16 changes: 14 additions & 2 deletions tests/declarative/DecodingBadInputTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ extension DecodingBadInputTests {
}

extension DecodingBadInputTests {
/// Tests bad quoting resulting in too many fields in a particular row
func testBadQuoting() {
/// Tests bad input, in which a row in not escaped resulting in too many fields in a particular row
func testBadEscaping() {
let input = """
x,y
A,A A
Expand All @@ -39,4 +39,16 @@ extension DecodingBadInputTests {
let decoder = CSVDecoder { $0.headerStrategy = .firstLine }
XCTAssertThrowsError(try decoder.decode([_Row].self, from: input))
}

/// Tests a valid CSV file with an extra new line delimeter at the end of the file.
func testExtraNewLine() throws {
let input = """
x,y
A,AA
B,BB
\n
"""
let decoder = CSVDecoder { $0.headerStrategy = .firstLine }
XCTAssertNoThrow(try decoder.decode([_Row].self, from: input))
}
}
54 changes: 54 additions & 0 deletions tests/imperative/ReaderBadInputTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import XCTest
import CodableCSV

/// Check support for handling bad input
final class ReaderBadInputTests: XCTestCase {
override func setUp() {
self.continueAfterFailure = false
}
}

extension ReaderBadInputTests {
/// Representation of a CSV row containing a couple of strings.
private struct _Row: Codable, Equatable {
var x: String
var y: String
}
}

extension ReaderBadInputTests {
/// Tests bad input, in which a row in not escaped resulting in too many fields in a particular row
func testBadEscaping() throws {
let input = """
x,y
A,A A
C,C, C
D,D D
"""
XCTAssertThrowsError(try CSVReader.decode(input: input) { $0.headerStrategy = .firstLine })
}

/// Tests a CSV with a header with three fields (one of them being empty) and subsequent rows with two fields.
func testIllFormedHeader() {
let input = """
x,y,
A,A A
B,"B, B"
"""
XCTAssertThrowsError(try CSVReader.decode(input: input) { $0.headerStrategy = .firstLine })
}

/// Tests a valid CSV file with an extra new line delimeter at the end of the file.
func testExtraNewLine() throws {
let input = """
x,y
A,AA
B,BB
\n
"""
let reader = try CSVReader(input: input) { $0.headerStrategy = .firstLine }
XCTAssertNotNil(try reader.readRow())
XCTAssertNotNil(try reader.readRow())
XCTAssertNil(try reader.readRow())
}
}

0 comments on commit 82ee81e

Please sign in to comment.