CsvFieldIndexer: Fix calculation of offsets with unicode characters.
Fixes #4
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
@@ -10,15 +9,29 @@ namespace CsvLib
|
||||
private int _position;
|
||||
private readonly StringBuilder _sbBuffer = new StringBuilder();
|
||||
|
||||
private readonly Encoding _currentEncoding = Encoding.Default;
|
||||
|
||||
public BufferedTextReader(TextReader baseReader)
|
||||
{
|
||||
_baseReader = baseReader;
|
||||
if (baseReader is StreamReader streamReader)
|
||||
{
|
||||
_currentEncoding = streamReader.CurrentEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
public override int Read()
|
||||
{
|
||||
_position++;
|
||||
int read = _baseReader.Read();
|
||||
if (read > 127)
|
||||
{
|
||||
int count = _currentEncoding.GetByteCount(((char)read).ToString());
|
||||
_position += count;
|
||||
}
|
||||
else
|
||||
{
|
||||
_position++;
|
||||
}
|
||||
if (read != -1)
|
||||
{
|
||||
_sbBuffer.Append((char)read);
|
||||
@@ -26,11 +39,6 @@ namespace CsvLib
|
||||
return read;
|
||||
}
|
||||
|
||||
public override int Read(char[] buffer, int index, int count)
|
||||
{
|
||||
throw new NotImplementedException("Read buffered method on BufferedTextReader");
|
||||
}
|
||||
|
||||
public override int Peek()
|
||||
{
|
||||
return _baseReader.Peek();
|
||||
|
||||
@@ -9,6 +9,8 @@ namespace CsvLib
|
||||
{
|
||||
private bool _insideString;
|
||||
|
||||
private Encoding _currentEncoding = Encoding.Default;
|
||||
|
||||
private readonly char _separator;
|
||||
private readonly char _quoteChar;
|
||||
private readonly char _escapeChar;
|
||||
@@ -59,6 +61,7 @@ namespace CsvLib
|
||||
List<long> fieldPositions = new List<long>();
|
||||
long? fieldStartPosition = null;
|
||||
long? fieldEndPosition = null;
|
||||
int unicodeDelta = 0;
|
||||
for (int i = 0; i < line.Length; i++)
|
||||
{
|
||||
char c = line[i];
|
||||
@@ -90,7 +93,12 @@ namespace CsvLib
|
||||
}
|
||||
else
|
||||
{
|
||||
long absolutePosition = lineOffset + i;
|
||||
if (c > 127)
|
||||
{
|
||||
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
|
||||
}
|
||||
|
||||
long absolutePosition = lineOffset + i + unicodeDelta;
|
||||
if (fieldStartPosition == null) { fieldStartPosition = absolutePosition; }
|
||||
fieldEndPosition = absolutePosition;
|
||||
}
|
||||
@@ -121,6 +129,10 @@ namespace CsvLib
|
||||
_index.Clear();
|
||||
_index.Add(0);
|
||||
int idxRow = 0;
|
||||
if (textReader is StreamReader streamReader)
|
||||
{
|
||||
_currentEncoding = streamReader.CurrentEncoding;
|
||||
}
|
||||
using (BufferedTextReader reader = new BufferedTextReader(textReader))
|
||||
{
|
||||
string currentLine;
|
||||
@@ -180,7 +192,7 @@ namespace CsvLib
|
||||
public void LoadIndexOfFile(string file)
|
||||
{
|
||||
DateTime dtFile = File.GetCreationTime(file);
|
||||
string indexFile = $"{file}.idx";
|
||||
string indexFile = $"{file}.idx2";
|
||||
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
||||
{
|
||||
_index = Index_LoadFile(indexFile);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace CsvLib
|
||||
{
|
||||
@@ -7,21 +7,30 @@ namespace CsvLib
|
||||
{
|
||||
private readonly TextReader _baseReader;
|
||||
private int _position;
|
||||
private readonly Encoding _currentEncoding = Encoding.Default;
|
||||
|
||||
public TrackingTextReader(TextReader baseReader)
|
||||
{
|
||||
_baseReader = baseReader;
|
||||
if (baseReader is StreamReader streamReader)
|
||||
{
|
||||
_currentEncoding = streamReader.CurrentEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
public override int Read()
|
||||
{
|
||||
_position++;
|
||||
return _baseReader.Read();
|
||||
}
|
||||
|
||||
public override int Read(char[] buffer, int index, int count)
|
||||
{
|
||||
throw new NotImplementedException("Read buffered method on TrackingTextReader");
|
||||
int read = _baseReader.Read();
|
||||
if (read > 127)
|
||||
{
|
||||
int count = _currentEncoding.GetByteCount(((char)read).ToString());
|
||||
_position += count;
|
||||
}
|
||||
else
|
||||
{
|
||||
_position++;
|
||||
}
|
||||
return read;
|
||||
}
|
||||
|
||||
public override int Peek()
|
||||
|
||||
Reference in New Issue
Block a user