CsvFieldIndexer: Fix calculation of offsets with unicode characters.
Fixes #4
This commit is contained in:
@@ -137,6 +137,39 @@ public class CsvFieldIndexerTests
|
|||||||
Assert.Equal(29, indexer.FieldIndex[1][3]);
|
Assert.Equal(29, indexer.FieldIndex[1][3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GenerateIndex__TwoLinesWithTwoQuotedColumnsWithUnicode__TwoRowsTwoFields()
|
||||||
|
{
|
||||||
|
// --- Arrange
|
||||||
|
StringReader sr = new("""
|
||||||
|
"Hélló","Wórld"
|
||||||
|
"Hélló","Wórld"
|
||||||
|
""");
|
||||||
|
|
||||||
|
// --- Act
|
||||||
|
CsvFieldIndexer indexer = new();
|
||||||
|
indexer.GenerateIndex(sr);
|
||||||
|
|
||||||
|
// --- Assert
|
||||||
|
|
||||||
|
Assert.Equal(3, indexer.Index.Count);
|
||||||
|
Assert.Equal(0, indexer.Index[0]);
|
||||||
|
Assert.Equal(19, indexer.Index[1]);
|
||||||
|
Assert.Equal(38, indexer.Index[2]);
|
||||||
|
|
||||||
|
Assert.Equal(2, indexer.FieldIndex.Count);
|
||||||
|
Assert.Equal(4, indexer.FieldIndex[0].Count);
|
||||||
|
Assert.Equal(1, indexer.FieldIndex[0][0]);
|
||||||
|
Assert.Equal(7, indexer.FieldIndex[0][1]);
|
||||||
|
Assert.Equal(11, indexer.FieldIndex[0][2]);
|
||||||
|
Assert.Equal(16, indexer.FieldIndex[0][3]);
|
||||||
|
Assert.Equal(4, indexer.FieldIndex[1].Count);
|
||||||
|
Assert.Equal(20, indexer.FieldIndex[1][0]);
|
||||||
|
Assert.Equal(26, indexer.FieldIndex[1][1]);
|
||||||
|
Assert.Equal(30, indexer.FieldIndex[1][2]);
|
||||||
|
Assert.Equal(35, indexer.FieldIndex[1][3]);
|
||||||
|
}
|
||||||
|
|
||||||
#endregion GenerateIndex
|
#endregion GenerateIndex
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
using System;
|
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
|
|
||||||
@@ -10,15 +9,29 @@ namespace CsvLib
|
|||||||
private int _position;
|
private int _position;
|
||||||
private readonly StringBuilder _sbBuffer = new StringBuilder();
|
private readonly StringBuilder _sbBuffer = new StringBuilder();
|
||||||
|
|
||||||
|
private readonly Encoding _currentEncoding = Encoding.Default;
|
||||||
|
|
||||||
public BufferedTextReader(TextReader baseReader)
|
public BufferedTextReader(TextReader baseReader)
|
||||||
{
|
{
|
||||||
_baseReader = baseReader;
|
_baseReader = baseReader;
|
||||||
|
if (baseReader is StreamReader streamReader)
|
||||||
|
{
|
||||||
|
_currentEncoding = streamReader.CurrentEncoding;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public override int Read()
|
public override int Read()
|
||||||
{
|
{
|
||||||
_position++;
|
|
||||||
int read = _baseReader.Read();
|
int read = _baseReader.Read();
|
||||||
|
if (read > 127)
|
||||||
|
{
|
||||||
|
int count = _currentEncoding.GetByteCount(((char)read).ToString());
|
||||||
|
_position += count;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_position++;
|
||||||
|
}
|
||||||
if (read != -1)
|
if (read != -1)
|
||||||
{
|
{
|
||||||
_sbBuffer.Append((char)read);
|
_sbBuffer.Append((char)read);
|
||||||
@@ -26,11 +39,6 @@ namespace CsvLib
|
|||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override int Read(char[] buffer, int index, int count)
|
|
||||||
{
|
|
||||||
throw new NotImplementedException("Read buffered method on BufferedTextReader");
|
|
||||||
}
|
|
||||||
|
|
||||||
public override int Peek()
|
public override int Peek()
|
||||||
{
|
{
|
||||||
return _baseReader.Peek();
|
return _baseReader.Peek();
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ namespace CsvLib
|
|||||||
{
|
{
|
||||||
private bool _insideString;
|
private bool _insideString;
|
||||||
|
|
||||||
|
private Encoding _currentEncoding = Encoding.Default;
|
||||||
|
|
||||||
private readonly char _separator;
|
private readonly char _separator;
|
||||||
private readonly char _quoteChar;
|
private readonly char _quoteChar;
|
||||||
private readonly char _escapeChar;
|
private readonly char _escapeChar;
|
||||||
@@ -59,6 +61,7 @@ namespace CsvLib
|
|||||||
List<long> fieldPositions = new List<long>();
|
List<long> fieldPositions = new List<long>();
|
||||||
long? fieldStartPosition = null;
|
long? fieldStartPosition = null;
|
||||||
long? fieldEndPosition = null;
|
long? fieldEndPosition = null;
|
||||||
|
int unicodeDelta = 0;
|
||||||
for (int i = 0; i < line.Length; i++)
|
for (int i = 0; i < line.Length; i++)
|
||||||
{
|
{
|
||||||
char c = line[i];
|
char c = line[i];
|
||||||
@@ -90,7 +93,12 @@ namespace CsvLib
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
long absolutePosition = lineOffset + i;
|
if (c > 127)
|
||||||
|
{
|
||||||
|
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
long absolutePosition = lineOffset + i + unicodeDelta;
|
||||||
if (fieldStartPosition == null) { fieldStartPosition = absolutePosition; }
|
if (fieldStartPosition == null) { fieldStartPosition = absolutePosition; }
|
||||||
fieldEndPosition = absolutePosition;
|
fieldEndPosition = absolutePosition;
|
||||||
}
|
}
|
||||||
@@ -121,6 +129,10 @@ namespace CsvLib
|
|||||||
_index.Clear();
|
_index.Clear();
|
||||||
_index.Add(0);
|
_index.Add(0);
|
||||||
int idxRow = 0;
|
int idxRow = 0;
|
||||||
|
if (textReader is StreamReader streamReader)
|
||||||
|
{
|
||||||
|
_currentEncoding = streamReader.CurrentEncoding;
|
||||||
|
}
|
||||||
using (BufferedTextReader reader = new BufferedTextReader(textReader))
|
using (BufferedTextReader reader = new BufferedTextReader(textReader))
|
||||||
{
|
{
|
||||||
string currentLine;
|
string currentLine;
|
||||||
@@ -180,7 +192,7 @@ namespace CsvLib
|
|||||||
public void LoadIndexOfFile(string file)
|
public void LoadIndexOfFile(string file)
|
||||||
{
|
{
|
||||||
DateTime dtFile = File.GetCreationTime(file);
|
DateTime dtFile = File.GetCreationTime(file);
|
||||||
string indexFile = $"{file}.idx";
|
string indexFile = $"{file}.idx2";
|
||||||
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
||||||
{
|
{
|
||||||
_index = Index_LoadFile(indexFile);
|
_index = Index_LoadFile(indexFile);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
using System;
|
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
namespace CsvLib
|
namespace CsvLib
|
||||||
{
|
{
|
||||||
@@ -7,21 +7,30 @@ namespace CsvLib
|
|||||||
{
|
{
|
||||||
private readonly TextReader _baseReader;
|
private readonly TextReader _baseReader;
|
||||||
private int _position;
|
private int _position;
|
||||||
|
private readonly Encoding _currentEncoding = Encoding.Default;
|
||||||
|
|
||||||
public TrackingTextReader(TextReader baseReader)
|
public TrackingTextReader(TextReader baseReader)
|
||||||
{
|
{
|
||||||
_baseReader = baseReader;
|
_baseReader = baseReader;
|
||||||
|
if (baseReader is StreamReader streamReader)
|
||||||
|
{
|
||||||
|
_currentEncoding = streamReader.CurrentEncoding;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public override int Read()
|
public override int Read()
|
||||||
{
|
{
|
||||||
_position++;
|
int read = _baseReader.Read();
|
||||||
return _baseReader.Read();
|
if (read > 127)
|
||||||
}
|
|
||||||
|
|
||||||
public override int Read(char[] buffer, int index, int count)
|
|
||||||
{
|
{
|
||||||
throw new NotImplementedException("Read buffered method on TrackingTextReader");
|
int count = _currentEncoding.GetByteCount(((char)read).ToString());
|
||||||
|
_position += count;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_position++;
|
||||||
|
}
|
||||||
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override int Peek()
|
public override int Peek()
|
||||||
|
|||||||
Reference in New Issue
Block a user