Migrate to dotnet8 and fix warnings

This commit is contained in:
2024-02-18 00:46:45 +01:00
parent a688f2a692
commit 66536657e2
15 changed files with 492 additions and 470 deletions

View File

@@ -1,57 +1,52 @@
using System.IO;
using System.Text;
namespace CsvLib
namespace CsvLib;
public class BufferedTextReader : TextReader
{
public class BufferedTextReader : TextReader
private readonly TextReader _baseReader;
private readonly StringBuilder _sbBuffer = new();
private readonly Encoding _currentEncoding = Encoding.Default;
public BufferedTextReader(TextReader baseReader)
{
private readonly TextReader _baseReader;
private int _position;
private readonly StringBuilder _sbBuffer = new();
private readonly Encoding _currentEncoding = Encoding.Default;
public BufferedTextReader(TextReader baseReader)
_baseReader = baseReader;
if (baseReader is StreamReader streamReader)
{
_baseReader = baseReader;
if (baseReader is StreamReader streamReader)
{
_currentEncoding = streamReader.CurrentEncoding;
}
}
public override int Read()
{
int read = _baseReader.Read();
if (read > 127)
{
int count = _currentEncoding.GetByteCount(((char)read).ToString());
_position += count;
}
else
{
_position++;
}
if (read != -1)
{
_sbBuffer.Append((char)read);
}
return read;
}
public int Position
{
get { return _position; }
}
public string GetBuffer()
{
return _sbBuffer.ToString();
}
public void CleanBuffer()
{
_sbBuffer.Clear();
_currentEncoding = streamReader.CurrentEncoding;
}
}
}
public override int Read()
{
int read = _baseReader.Read();
if (read > 127)
{
int count = _currentEncoding.GetByteCount(((char)read).ToString());
Position += count;
}
else
{
Position++;
}
if (read != -1)
{
_sbBuffer.Append((char)read);
}
return read;
}
public int Position { get; private set; }
public string GetBuffer()
{
return _sbBuffer.ToString();
}
public void CleanBuffer()
{
_sbBuffer.Clear();
}
}

View File

@@ -1,5 +1,3 @@
#nullable enable
namespace CsvLib;
public class ByteArraySearcher

View File

@@ -3,322 +3,319 @@ using System.Collections.Generic;
using System.IO;
using System.Text;
namespace CsvLib
namespace CsvLib;
public class CsvFieldIndexer
{
public class CsvFieldIndexer
private bool _insideString;
private Encoding _currentEncoding = Encoding.Default;
private readonly char _separator;
private readonly char _quoteChar;
private readonly char _escapeChar;
public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
{
private bool _insideString;
_separator = separator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
}
private Encoding _currentEncoding = Encoding.Default;
private List<long> _index = new();
private readonly char _separator;
private readonly char _quoteChar;
private readonly char _escapeChar;
public List<long> Index { get { return _index; } }
public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
private List<List<long>> _fieldIndex = new();
public List<List<long>> FieldIndex { get { return _fieldIndex; } }
private void DummyParser(string line)
{
for (int i = 0; i < line.Length; i++)
{
_separator = separator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
}
private List<long> _index = new();
public List<long> Index { get { return _index; } }
private List<List<long>> _fieldIndex = new();
public List<List<long>> FieldIndex { get { return _fieldIndex; } }
private void DummyParser(string line)
{
for (int i = 0; i < line.Length; i++)
char c = line[i];
if (c == _separator && _insideString == false)
{
char c = line[i];
if (c == _separator && _insideString == false)
{
continue;
}
if (c == _quoteChar && _insideString == false)
{
_insideString = true;
continue;
}
if (c == _quoteChar && _insideString)
{
_insideString = false;
continue;
}
if (c == _escapeChar && _insideString)
{
i++;
}
continue;
}
if (c == _quoteChar && _insideString == false)
{
_insideString = true;
continue;
}
if (c == _quoteChar && _insideString)
{
_insideString = false;
continue;
}
if (c == _escapeChar && _insideString)
{
i++;
}
}
}
private List<long> ParseLineIndex(string line, long lineOffset)
private List<long> ParseLineIndex(string line, long lineOffset)
{
List<long> fieldPositions = new();
long? fieldStartPosition = null;
long? fieldEndPosition = null;
int unicodeDelta = 0;
for (int i = 0; i < line.Length; i++)
{
List<long> fieldPositions = new();
long? fieldStartPosition = null;
long? fieldEndPosition = null;
int unicodeDelta = 0;
for (int i = 0; i < line.Length; i++)
char c = line[i];
if (c == _separator && _insideString == false)
{
char c = line[i];
if (c == _separator && _insideString == false)
{
if (fieldStartPosition != null)
{
fieldPositions.Add((long)fieldStartPosition);
fieldPositions.Add((long)fieldEndPosition);
}
fieldStartPosition = null;
fieldEndPosition = null;
}
else if (c == _quoteChar && _insideString == false)
{
_insideString = true;
}
else if (c == _quoteChar && _insideString)
{
_insideString = false;
}
else if (c == _escapeChar && _insideString)
{
i++;
}
else if ((c == '\n' || c == '\r') && _insideString == false)
{
break;
}
else
{
if (c > 127)
{
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
}
long absolutePosition = lineOffset + i + unicodeDelta;
fieldStartPosition ??= absolutePosition;
fieldEndPosition = absolutePosition;
}
}
if (_insideString == false)
{
if (fieldStartPosition != null)
if (fieldStartPosition != null && fieldEndPosition != null)
{
fieldPositions.Add((long)fieldStartPosition);
fieldPositions.Add((long)fieldEndPosition);
}
fieldStartPosition = null;
fieldEndPosition = null;
}
return fieldPositions;
}
private void GenerateIndex(string file)
{
using FileStream stream = new(file, FileMode.Open);
using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
GenerateIndex(streamReader);
stream.Close();
}
public void GenerateIndex(TextReader textReader)
{
_insideString = false;
_index.Clear();
_index.Add(0);
int idxRow = 0;
if (textReader is StreamReader streamReader)
else if (c == _quoteChar && _insideString == false)
{
_currentEncoding = streamReader.CurrentEncoding;
_insideString = true;
}
using BufferedTextReader reader = new(textReader);
string currentLine;
while ((currentLine = reader.ReadLine()) != null)
else if (c == _quoteChar && _insideString)
{
DummyParser(currentLine);
if (_insideString) { continue; }
string fullLine = reader.GetBuffer();
reader.CleanBuffer();
List<long> fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]);
_fieldIndex.Add(fieldIndexes);
_index.Add(reader.Position);
idxRow++;
_insideString = false;
}
}
private const byte FileFormatVersion = 1;
private void SaveFile(string indexFile)
{
if (indexFile == null) { return; }
if (File.Exists(indexFile))
else if (c == _escapeChar && _insideString)
{
File.Delete(indexFile);
i++;
}
Stream streamOut = File.Open(indexFile, FileMode.Create);
using (BinaryWriter binWriter = new(streamOut))
else if ((c == '\n' || c == '\r') && _insideString == false)
{
binWriter.Write((byte)'C');
binWriter.Write((byte)'S');
binWriter.Write((byte)'V');
binWriter.Write(FileFormatVersion);
binWriter.Write(_index.Count);
foreach (long currentIndex in _index)
break;
}
else
{
if (c > 127)
{
binWriter.Write(currentIndex);
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
}
binWriter.Write(_fieldIndex.Count);
foreach (List<long> currentFieldIndex in _fieldIndex)
{
binWriter.Write(currentFieldIndex.Count);
for (int i = 0; i < currentFieldIndex.Count; i++)
{
binWriter.Write(currentFieldIndex[i]);
}
}
}
streamOut.Close();
}
private bool LoadFile(string indexFile)
{
if (File.Exists(indexFile) == false)
{
return false;
}
List<long> tempIndex;
List<List<long>> tempFieldIndex;
Stream streamIn = File.Open(indexFile, FileMode.Open);
try
{
using BinaryReader binReader = new(streamIn);
byte magik0 = binReader.ReadByte();
byte magik1 = binReader.ReadByte();
byte magik2 = binReader.ReadByte();
if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; }
byte fileVersion = binReader.ReadByte();
if (fileVersion != FileFormatVersion) { return false; }
int numIndexes = binReader.ReadInt32();
tempIndex = new List<long>(numIndexes);
for (int i = 0; i < numIndexes; i++)
{
long value = binReader.ReadInt64();
tempIndex.Add(value);
}
int numFieldIndexes = binReader.ReadInt32();
tempFieldIndex = new List<List<long>>(numFieldIndexes);
for (int j = 0; j < numFieldIndexes; j++)
{
int numCurrentFieldIndexes = binReader.ReadInt32();
List<long> currentFieldIndex = new(numCurrentFieldIndexes);
for (int i = 0; i < numCurrentFieldIndexes; i++)
{
long value = binReader.ReadInt64();
currentFieldIndex.Add(value);
}
tempFieldIndex.Add(currentFieldIndex);
}
}
catch (Exception)
{
// NON NON NOM
return false;
}
finally
{
streamIn.Close();
}
_index = tempIndex;
_fieldIndex = tempFieldIndex;
return true;
}
public void LoadIndexOfFile(string file)
{
DateTime dtFile = File.GetCreationTime(file);
string indexFile = $"{file}.idx";
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
{
if (LoadFile(indexFile)) { return; }
}
// Generate index
DateTime dtNow = DateTime.UtcNow;
GenerateIndex(file);
TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
// Save Index if expensive generation
if (tsGenIndex.TotalSeconds > 2)
{
SaveFile(indexFile);
long absolutePosition = lineOffset + i + unicodeDelta;
fieldStartPosition ??= absolutePosition;
fieldEndPosition = absolutePosition;
}
}
public List<long> Search(string fileName, string textToSearch, Action<float> notifyProgress = null)
if (_insideString == false)
{
List<long> index;
using FileStream streamIn = new(fileName, FileMode.Open);
try
if (fieldStartPosition != null && fieldEndPosition != null)
{
index = Search(streamIn, textToSearch, notifyProgress);
fieldPositions.Add((long)fieldStartPosition);
fieldPositions.Add((long)fieldEndPosition);
}
finally
{
streamIn.Close();
}
return index ?? new List<long>();
}
public List<long> Search(Stream streamIn, string textToSearch, Action<float> notifyProgress = null)
return fieldPositions;
}
private void GenerateIndex(string file)
{
using FileStream stream = new(file, FileMode.Open);
using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
GenerateIndex(streamReader);
stream.Close();
}
public void GenerateIndex(TextReader textReader)
{
_insideString = false;
_index.Clear();
_index.Add(0);
int idxRow = 0;
if (textReader is StreamReader streamReader)
{
// TODO: Use MemoryMappedFile for better IO performance
DateTime datePrevious = DateTime.UtcNow;
List<long> newIndexes = new();
byte[] bText = Encoding.UTF8.GetBytes(textToSearch);
ByteArraySearcher searcher = new(bText);
byte[] buffer = new byte[1024];
for (int j = 0; j < _fieldIndex.Count; j++)
{
for (int i = 0; i < _fieldIndex[j].Count; i += 2)
{
TimeSpan tsElapsed = DateTime.UtcNow - datePrevious;
if (tsElapsed.TotalMilliseconds > 200)
{
datePrevious = DateTime.UtcNow;
notifyProgress?.Invoke(j/(float)_fieldIndex.Count);
}
long offset = _fieldIndex[j][i];
int length = (int)(_fieldIndex[j][i + 1] - offset) + 1;
if (buffer.Length < length)
{
buffer = new byte[length];
}
streamIn.Seek(offset, SeekOrigin.Begin);
int read = streamIn.Read(buffer, 0, length);
if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); }
_currentEncoding = streamReader.CurrentEncoding;
}
using BufferedTextReader reader = new(textReader);
while (reader.ReadLine() is { } currentLine)
{
DummyParser(currentLine);
if (_insideString) { continue; }
bool matches = searcher.Contains(buffer, length);
if (matches == false) { continue; }
newIndexes.Add(_index[j]);
break;
}
}
string fullLine = reader.GetBuffer();
reader.CleanBuffer();
List<long> fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]);
_fieldIndex.Add(fieldIndexes);
return newIndexes;
_index.Add(reader.Position);
idxRow++;
}
}
}
private const byte FileFormatVersion = 1;
private void SaveFile(string indexFile)
{
if (File.Exists(indexFile))
{
File.Delete(indexFile);
}
Stream streamOut = File.Open(indexFile, FileMode.Create);
using (BinaryWriter binWriter = new(streamOut))
{
binWriter.Write((byte)'C');
binWriter.Write((byte)'S');
binWriter.Write((byte)'V');
binWriter.Write(FileFormatVersion);
binWriter.Write(_index.Count);
foreach (long currentIndex in _index)
{
binWriter.Write(currentIndex);
}
binWriter.Write(_fieldIndex.Count);
foreach (List<long> currentFieldIndex in _fieldIndex)
{
binWriter.Write(currentFieldIndex.Count);
foreach (long fieldIndex in currentFieldIndex)
{
binWriter.Write(fieldIndex);
}
}
}
streamOut.Close();
}
private bool LoadFile(string indexFile)
{
if (File.Exists(indexFile) == false)
{
return false;
}
List<long> tempIndex;
List<List<long>> tempFieldIndex;
Stream streamIn = File.Open(indexFile, FileMode.Open);
try
{
using BinaryReader binReader = new(streamIn);
byte magik0 = binReader.ReadByte();
byte magik1 = binReader.ReadByte();
byte magik2 = binReader.ReadByte();
if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; }
byte fileVersion = binReader.ReadByte();
if (fileVersion != FileFormatVersion) { return false; }
int numIndexes = binReader.ReadInt32();
tempIndex = new List<long>(numIndexes);
for (int i = 0; i < numIndexes; i++)
{
long value = binReader.ReadInt64();
tempIndex.Add(value);
}
int numFieldIndexes = binReader.ReadInt32();
tempFieldIndex = new List<List<long>>(numFieldIndexes);
for (int j = 0; j < numFieldIndexes; j++)
{
int numCurrentFieldIndexes = binReader.ReadInt32();
List<long> currentFieldIndex = new(numCurrentFieldIndexes);
for (int i = 0; i < numCurrentFieldIndexes; i++)
{
long value = binReader.ReadInt64();
currentFieldIndex.Add(value);
}
tempFieldIndex.Add(currentFieldIndex);
}
}
catch (Exception)
{
// NON NON NOM
return false;
}
finally
{
streamIn.Close();
}
_index = tempIndex;
_fieldIndex = tempFieldIndex;
return true;
}
public void LoadIndexOfFile(string file)
{
DateTime dtFile = File.GetCreationTime(file);
string indexFile = $"{file}.idx";
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
{
if (LoadFile(indexFile)) { return; }
}
// Generate index
DateTime dtNow = DateTime.UtcNow;
GenerateIndex(file);
TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
// Save Index if expensive generation
if (tsGenIndex.TotalSeconds > 2)
{
SaveFile(indexFile);
}
}
public List<long> Search(string fileName, string textToSearch, Action<float>? notifyProgress = null)
{
List<long> index;
using FileStream streamIn = new(fileName, FileMode.Open);
try
{
index = Search(streamIn, textToSearch, notifyProgress);
}
finally
{
streamIn.Close();
}
return index;
}
public List<long> Search(Stream streamIn, string textToSearch, Action<float>? notifyProgress = null)
{
// TODO: Use MemoryMappedFile for better IO performance
DateTime datePrevious = DateTime.UtcNow;
List<long> newIndexes = new();
byte[] bText = Encoding.UTF8.GetBytes(textToSearch);
ByteArraySearcher searcher = new(bText);
byte[] buffer = new byte[1024];
for (int j = 0; j < _fieldIndex.Count; j++)
{
for (int i = 0; i < _fieldIndex[j].Count; i += 2)
{
TimeSpan tsElapsed = DateTime.UtcNow - datePrevious;
if (tsElapsed.TotalMilliseconds > 200)
{
datePrevious = DateTime.UtcNow;
notifyProgress?.Invoke(j/(float)_fieldIndex.Count);
}
long offset = _fieldIndex[j][i];
int length = (int)(_fieldIndex[j][i + 1] - offset) + 1;
if (buffer.Length < length)
{
buffer = new byte[length];
}
streamIn.Seek(offset, SeekOrigin.Begin);
int read = streamIn.Read(buffer, 0, length);
if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); }
bool matches = searcher.Contains(buffer, length);
if (matches == false) { continue; }
newIndexes.Add(_index[j]);
break;
}
}
return newIndexes;
}
}

View File

@@ -1,8 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<LangVersion>11</LangVersion>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@@ -2,102 +2,100 @@
using System.IO;
using System.Text;
namespace CsvLib
namespace CsvLib;
public class CsvParser
{
public class CsvParser
private bool _insideString;
private readonly char _separator;
private readonly char _quoteChar;
private readonly char _escapeChar;
public CsvParser(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
{
private bool _insideString;
_separator = separator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
}
private readonly char _separator;
private readonly char _quoteChar;
private readonly char _escapeChar;
private List<List<string>> _data = new();
public CsvParser(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
private List<string>? _currentReg;
private StringBuilder? _currentCell;
public List<List<string>> Data
{
get { return _data; }
}
private void ParseLine(string line)
{
_currentReg ??= new List<string>();
_currentCell ??= new StringBuilder();
for (int i = 0; i < line.Length; i++)
{
_separator = separator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
}
private List<List<string>> _data = new();
private List<string> _currentReg;
StringBuilder _currentCell;
public List<List<string>> Data
{
get { return _data; }
}
private void ParseLine(string line)
{
_currentReg ??= new List<string>();
_currentCell ??= new StringBuilder();
for (int i = 0; i < line.Length; i++)
{
char c = line[i];
if (c == _separator && _insideString == false)
{
_currentReg.Add(_currentCell.ToString());
_currentCell.Clear();
continue;
}
if (c == _quoteChar && _insideString == false)
{
_insideString = true;
continue;
}
if (c == _quoteChar && _insideString)
{
_insideString = false;
continue;
}
if (c == _escapeChar && _insideString)
{
i++;
if (i == line.Length) { break; }
c = line[i];
}
_currentCell.Append(c);
}
if (_insideString)
{
_currentCell.Append('\n');
}
else
char c = line[i];
if (c == _separator && _insideString == false)
{
_currentReg.Add(_currentCell.ToString());
_currentCell.Clear();
_data.Add(_currentReg);
_currentReg = null;
continue;
}
if (c == _quoteChar && _insideString == false)
{
_insideString = true;
continue;
}
if (c == _quoteChar && _insideString)
{
_insideString = false;
continue;
}
if (c == _escapeChar && _insideString)
{
i++;
if (i == line.Length) { break; }
c = line[i];
}
_currentCell.Append(c);
}
public void ParseFile(string file, long offset = 0, int count = 0)
if (_insideString)
{
_insideString = false;
_data = new List<List<string>>();
_currentCell.Append('\n');
}
else
{
_currentReg.Add(_currentCell.ToString());
_currentCell.Clear();
_data.Add(_currentReg);
_currentReg = null;
FileStream stream = new(file, FileMode.Open);
stream.Seek(offset, SeekOrigin.Begin);
using (StreamReader reader = new(stream, Encoding.Default, true, 4096))
}
}
public void ParseFile(string file, long offset = 0, int count = 0)
{
_insideString = false;
_data = new List<List<string>>();
_currentReg = null;
FileStream stream = new(file, FileMode.Open);
stream.Seek(offset, SeekOrigin.Begin);
using (StreamReader reader = new(stream, Encoding.Default, true, 4096))
{
while (reader.ReadLine() is { } currentLine)
{
string currentLine;
while ((currentLine = reader.ReadLine()) != null)
ParseLine(currentLine);
if (count > 0 && Data.Count == count)
{
ParseLine(currentLine);
if (count > 0 && Data.Count == count)
{
break;
}
break;
}
}
stream.Close();
}
stream.Close();
}
}
}