Skip to content

Commit

Permalink
Merge pull request #79 from ckrahlisch/master
Browse files Browse the repository at this point in the history
Adds strict delimitation option
  • Loading branch information
bytefish committed Jun 17, 2021
2 parents 508872d + ff5c098 commit 199b5b2
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 18 deletions.
Binary file not shown.
Binary file not shown.
40 changes: 40 additions & 0 deletions TinyCsvParser/TinyCsvParser.Test/Tokenizer/Rfc4180TokenizerTest.cs
Expand Up @@ -96,6 +96,46 @@ public void Rfc4180_QuotedString_Double_Quoted_Data_Test()
Assert.AreEqual("Also goes by \"Mike\", among friends that is", tokens[2]);
}

[Test]
public void RFC4180_CsvParser_StrictDelimitation_Test()
{
// Use a " as Quote Character, a \\ as Escape Character and a , as Delimiter.
var options = new Options('"', '\\', ';', true);

// Initialize the Rfc4180 Tokenizer:
var tokenizer = new RFC4180Tokenizer(options);

// Now Build the Parser:
CsvParserOptions csvParserOptions = new CsvParserOptions(true, tokenizer);
SampleEntityMapping csvMapper = new SampleEntityMapping();
CsvParser<SampleEntity> csvParser = new CsvParser<SampleEntity>(csvParserOptions, csvMapper);


var stringBuilder = new StringBuilder()
.AppendLine("Name; Age; Description")
.AppendLine("\"Michael, Chester\";24;\"Also goes by \"Mike\", among friends that is\"")
.AppendLine("\"Robert, Willliamson\"; ;\"All-around nice guy who always says hi\"");

// Define the NewLine Character to split at:
CsvReaderOptions csvReaderOptions = new CsvReaderOptions(new[] { Environment.NewLine });

var result = csvParser
.ReadFromString(csvReaderOptions, stringBuilder.ToString())
.ToList();

Assert.AreEqual(2, result.Count);

Assert.AreEqual(true, result.All(x => x.IsValid));

Assert.AreEqual("Michael, Chester", result[0].Result.Name);
Assert.AreEqual(24, result[0].Result.Age);
Assert.AreEqual("Also goes by \"Mike\", among friends that is", result[0].Result.Description);

Assert.AreEqual("Robert, Willliamson", result[1].Result.Name);
Assert.AreEqual(false, result[1].Result.Age.HasValue);
Assert.AreEqual("All-around nice guy who always says hi", result[1].Result.Description);
}

[Test]
public void Rfc4180_Issue3_Empty_Column_Test()
{
Expand Down
6 changes: 3 additions & 3 deletions TinyCsvParser/TinyCsvParser/TinyCsvParser.csproj
Expand Up @@ -12,10 +12,10 @@
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<Copyright>Copyright © 2019 Philipp Wagner</Copyright>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<AssemblyVersion>2.6.0.0</AssemblyVersion>
<FileVersion>2.6.0.0</FileVersion>
<AssemblyVersion>2.6.1.0</AssemblyVersion>
<FileVersion>2.6.1.0</FileVersion>
<RepositoryType>git</RepositoryType>
<Version>2.6.0</Version>
<Version>2.6.1</Version>
<IncludeSource>True</IncludeSource>
<IncludeSymbols>True</IncludeSymbols>
<SignAssembly>true</SignAssembly>
Expand Down
6 changes: 4 additions & 2 deletions TinyCsvParser/TinyCsvParser/Tokenizer/RFC4180/Options.cs
Expand Up @@ -7,17 +7,19 @@ public class Options
public readonly char QuoteCharacter;
public readonly char EscapeCharacter;
public readonly char DelimiterCharacter;
public readonly bool StrictDelimitation;

public Options(char quoteCharacter, char escapeCharacter, char delimiterCharacter)
public Options(char quoteCharacter, char escapeCharacter, char delimiterCharacter, bool strictDelimitation = false)
{
QuoteCharacter = quoteCharacter;
EscapeCharacter = escapeCharacter;
DelimiterCharacter = delimiterCharacter;
StrictDelimitation = strictDelimitation;
}

public override string ToString()
{
return $"Options (QuoteCharacter = {QuoteCharacter}, EscapeCharacter = {EscapeCharacter}, DelimiterCharacter = {DelimiterCharacter})";
return $"Options (QuoteCharacter = {QuoteCharacter}, EscapeCharacter = {EscapeCharacter}, DelimiterCharacter = {DelimiterCharacter}, StrictDelimitation = {StrictDelimitation})";
}
}
}
34 changes: 21 additions & 13 deletions TinyCsvParser/TinyCsvParser/Tokenizer/RFC4180/Reader.cs
Expand Up @@ -72,25 +72,28 @@ private Token NextToken(StringReader reader)
}
else
{
if (IsQuoteCharacter(c))
if (!options.StrictDelimitation)
{
result = ReadQuoted(reader);
if (IsQuoteCharacter(c))
{
result = ReadQuoted(reader);

Skip(reader);
Skip(reader);

if (IsEndOfStream(reader.Peek()))
{
return new Token(TokenType.EndOfRecord, result);
}
if (IsEndOfStream(reader.Peek()))
{
return new Token(TokenType.EndOfRecord, result);
}

if (IsDelimiter(reader.Peek()))
{
reader.Read();
}
if (IsDelimiter(reader.Peek()))
{
reader.Read();
}

return new Token(TokenType.Token, result);
return new Token(TokenType.Token, result);
}
}

if (IsEndOfStream(c))
{
return new Token(TokenType.EndOfRecord);
Expand All @@ -99,6 +102,11 @@ private Token NextToken(StringReader reader)
{
result = reader.ReadTo(options.DelimiterCharacter).Trim();

if (options.StrictDelimitation)
{
result = result.TrimStart(options.QuoteCharacter).TrimEnd(options.QuoteCharacter);
}

Skip(reader);

if (IsEndOfStream(reader.Peek()))
Expand Down

0 comments on commit 199b5b2

Please sign in to comment.