CsvFieldIndexer: Fix calculation of offsets with unicode characters.
Fixes #4
This commit is contained in:
@@ -137,6 +137,39 @@ public class CsvFieldIndexerTests
|
||||
Assert.Equal(29, indexer.FieldIndex[1][3]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GenerateIndex__TwoLinesWithTwoQuotedColumnsWithUnicode__TwoRowsTwoFields()
|
||||
{
|
||||
// --- Arrange
|
||||
StringReader sr = new("""
|
||||
"Hélló","Wórld"
|
||||
"Hélló","Wórld"
|
||||
""");
|
||||
|
||||
// --- Act
|
||||
CsvFieldIndexer indexer = new();
|
||||
indexer.GenerateIndex(sr);
|
||||
|
||||
// --- Assert
|
||||
|
||||
Assert.Equal(3, indexer.Index.Count);
|
||||
Assert.Equal(0, indexer.Index[0]);
|
||||
Assert.Equal(19, indexer.Index[1]);
|
||||
Assert.Equal(38, indexer.Index[2]);
|
||||
|
||||
Assert.Equal(2, indexer.FieldIndex.Count);
|
||||
Assert.Equal(4, indexer.FieldIndex[0].Count);
|
||||
Assert.Equal(1, indexer.FieldIndex[0][0]);
|
||||
Assert.Equal(7, indexer.FieldIndex[0][1]);
|
||||
Assert.Equal(11, indexer.FieldIndex[0][2]);
|
||||
Assert.Equal(16, indexer.FieldIndex[0][3]);
|
||||
Assert.Equal(4, indexer.FieldIndex[1].Count);
|
||||
Assert.Equal(20, indexer.FieldIndex[1][0]);
|
||||
Assert.Equal(26, indexer.FieldIndex[1][1]);
|
||||
Assert.Equal(30, indexer.FieldIndex[1][2]);
|
||||
Assert.Equal(35, indexer.FieldIndex[1][3]);
|
||||
}
|
||||
|
||||
#endregion GenerateIndex
|
||||
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
@@ -10,15 +9,29 @@ namespace CsvLib
|
||||
private int _position;
|
||||
private readonly StringBuilder _sbBuffer = new StringBuilder();
|
||||
|
||||
private readonly Encoding _currentEncoding = Encoding.Default;
|
||||
|
||||
public BufferedTextReader(TextReader baseReader)
|
||||
{
|
||||
_baseReader = baseReader;
|
||||
if (baseReader is StreamReader streamReader)
|
||||
{
|
||||
_currentEncoding = streamReader.CurrentEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
public override int Read()
|
||||
{
|
||||
_position++;
|
||||
int read = _baseReader.Read();
|
||||
if (read > 127)
|
||||
{
|
||||
int count = _currentEncoding.GetByteCount(((char)read).ToString());
|
||||
_position += count;
|
||||
}
|
||||
else
|
||||
{
|
||||
_position++;
|
||||
}
|
||||
if (read != -1)
|
||||
{
|
||||
_sbBuffer.Append((char)read);
|
||||
@@ -26,11 +39,6 @@ namespace CsvLib
|
||||
return read;
|
||||
}
|
||||
|
||||
public override int Read(char[] buffer, int index, int count)
|
||||
{
|
||||
throw new NotImplementedException("Read buffered method on BufferedTextReader");
|
||||
}
|
||||
|
||||
public override int Peek()
|
||||
{
|
||||
return _baseReader.Peek();
|
||||
|
||||
@@ -9,6 +9,8 @@ namespace CsvLib
|
||||
{
|
||||
private bool _insideString;
|
||||
|
||||
private Encoding _currentEncoding = Encoding.Default;
|
||||
|
||||
private readonly char _separator;
|
||||
private readonly char _quoteChar;
|
||||
private readonly char _escapeChar;
|
||||
@@ -59,6 +61,7 @@ namespace CsvLib
|
||||
List<long> fieldPositions = new List<long>();
|
||||
long? fieldStartPosition = null;
|
||||
long? fieldEndPosition = null;
|
||||
int unicodeDelta = 0;
|
||||
for (int i = 0; i < line.Length; i++)
|
||||
{
|
||||
char c = line[i];
|
||||
@@ -90,7 +93,12 @@ namespace CsvLib
|
||||
}
|
||||
else
|
||||
{
|
||||
long absolutePosition = lineOffset + i;
|
||||
if (c > 127)
|
||||
{
|
||||
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
|
||||
}
|
||||
|
||||
long absolutePosition = lineOffset + i + unicodeDelta;
|
||||
if (fieldStartPosition == null) { fieldStartPosition = absolutePosition; }
|
||||
fieldEndPosition = absolutePosition;
|
||||
}
|
||||
@@ -121,6 +129,10 @@ namespace CsvLib
|
||||
_index.Clear();
|
||||
_index.Add(0);
|
||||
int idxRow = 0;
|
||||
if (textReader is StreamReader streamReader)
|
||||
{
|
||||
_currentEncoding = streamReader.CurrentEncoding;
|
||||
}
|
||||
using (BufferedTextReader reader = new BufferedTextReader(textReader))
|
||||
{
|
||||
string currentLine;
|
||||
@@ -180,7 +192,7 @@ namespace CsvLib
|
||||
public void LoadIndexOfFile(string file)
|
||||
{
|
||||
DateTime dtFile = File.GetCreationTime(file);
|
||||
string indexFile = $"{file}.idx";
|
||||
string indexFile = $"{file}.idx2";
|
||||
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
||||
{
|
||||
_index = Index_LoadFile(indexFile);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace CsvLib
|
||||
{
|
||||
@@ -7,21 +7,30 @@ namespace CsvLib
|
||||
{
|
||||
private readonly TextReader _baseReader;
|
||||
private int _position;
|
||||
private readonly Encoding _currentEncoding = Encoding.Default;
|
||||
|
||||
public TrackingTextReader(TextReader baseReader)
|
||||
{
|
||||
_baseReader = baseReader;
|
||||
if (baseReader is StreamReader streamReader)
|
||||
{
|
||||
_currentEncoding = streamReader.CurrentEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
public override int Read()
|
||||
{
|
||||
_position++;
|
||||
return _baseReader.Read();
|
||||
}
|
||||
|
||||
public override int Read(char[] buffer, int index, int count)
|
||||
{
|
||||
throw new NotImplementedException("Read buffered method on TrackingTextReader");
|
||||
int read = _baseReader.Read();
|
||||
if (read > 127)
|
||||
{
|
||||
int count = _currentEncoding.GetByteCount(((char)read).ToString());
|
||||
_position += count;
|
||||
}
|
||||
else
|
||||
{
|
||||
_position++;
|
||||
}
|
||||
return read;
|
||||
}
|
||||
|
||||
public override int Peek()
|
||||
|
||||
Reference in New Issue
Block a user