CsvFieldIndexer: Fix handling of escaped characters

This commit is contained in:
2024-02-19 02:47:29 +01:00
parent 52cb729c0e
commit 13d9218944
3 changed files with 107 additions and 1 deletions

View File

@@ -140,6 +140,66 @@ public class CsvFieldIndexerTests
Assert.Equal(29, indexer.FieldIndex[1][3]);
}
[Fact]
public void GenerateIndex__TwoLinesWithOneQuotedColumnsWithEscapedQuotes__TwoRows()
{
// --- Arrange
StringReader sr = new(
"""
"Hello \"World\""
"Hello \"World\""
""");
// --- Act
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
// --- Assert
Assert.Equal(3, indexer.Index.Count);
Assert.Equal(0, indexer.Index[0]);
Assert.Equal(18, indexer.Index[1]);
Assert.Equal(36, indexer.Index[2]);
Assert.Equal(2, indexer.FieldIndex.Count);
Assert.Equal(2, indexer.FieldIndex[0].Count);
Assert.Equal(1, indexer.FieldIndex[0][0]);
Assert.Equal(15, indexer.FieldIndex[0][1]);
Assert.Equal(2, indexer.FieldIndex[1].Count);
Assert.Equal(19, indexer.FieldIndex[1][0]);
Assert.Equal(33, indexer.FieldIndex[1][1]);
}
[Fact]
public void GenerateIndex__TwoLinesWithOneQuotedColumnsWithManyEscapedQuotes__TwoRows()
{
// --- Arrange
StringReader sr = new(
"""
"Hello \"World\""
"Hello \"World\"\"\""
""");
// --- Act
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
// --- Assert
Assert.Equal(3, indexer.Index.Count);
Assert.Equal(0, indexer.Index[0]);
Assert.Equal(18, indexer.Index[1]);
Assert.Equal(40, indexer.Index[2]);
Assert.Equal(2, indexer.FieldIndex.Count);
Assert.Equal(2, indexer.FieldIndex[0].Count);
Assert.Equal(1, indexer.FieldIndex[0][0]);
Assert.Equal(15, indexer.FieldIndex[0][1]);
Assert.Equal(2, indexer.FieldIndex[1].Count);
Assert.Equal(19, indexer.FieldIndex[1][0]);
Assert.Equal(37, indexer.FieldIndex[1][1]);
}
[Fact]
public void GenerateIndex__TwoLinesWithTwoQuotedColumnsWithUnicode__TwoRowsTwoFields()
{

View File

@@ -36,7 +36,6 @@ public class CsvParserTest
Assert.Equal("Hello World", parser.Data[0][0]);
}
[Fact]
public void Parse__TwoLinesOfPainText__TwoRows()
{
@@ -105,6 +104,50 @@ public class CsvParserTest
Assert.Equal("World", parser.Data[1][1]);
}
[Fact]
public void Parse__TwoLinesWithOneQuotedColumnsWithEscapedQuotes__TwoRows()
{
// --- Arrange
StringReader sr = new(
"""
"Hello \"World\""
"Hello \"World\""
""");
// --- Act
CsvParser parser = new();
parser.Parse(sr);
// --- Assert
Assert.Equal(2, parser.Data.Count);
Assert.Single(parser.Data[0]);
Assert.Equal("Hello \"World\"", parser.Data[0][0]);
Assert.Single(parser.Data[1]);
Assert.Equal("Hello \"World\"", parser.Data[1][0]);
}
[Fact]
public void Parse__TwoLinesWithOneQuotedColumnsWithManyEscapedQuotes__TwoRows()
{
// --- Arrange
StringReader sr = new(
"""
"Hello \"World\""
"Hello \"World\"\"\""
""");
// --- Act
CsvParser parser = new();
parser.Parse(sr);
// --- Assert
Assert.Equal(2, parser.Data.Count);
Assert.Single(parser.Data[0]);
Assert.Equal("Hello \"World\"", parser.Data[0][0]);
Assert.Single(parser.Data[1]);
Assert.Equal("Hello \"World\"\"\"", parser.Data[1][0]);
}
[Fact]
public void GenerateIndex__TwoLinesWithTwoQuotedColumnsWithUnicode__TwoRowsTwoFields()
{

View File

@@ -102,6 +102,9 @@ public class CsvFieldIndexer
else if (c == _escapeChar && _insideString)
{
i++;
long absolutePosition = lineOffset + i + unicodeDelta;
fieldStartPosition ??= absolutePosition;
fieldEndPosition = absolutePosition;
}
else if ((c == '\n' || c == '\r') && _insideString == false)
{