CsvFieldIndexer: Implement saving and loading of field indexes.
This commit is contained in:
@@ -10,7 +10,7 @@ namespace CsvLib
|
|||||||
private bool _insideString;
|
private bool _insideString;
|
||||||
|
|
||||||
private Encoding _currentEncoding = Encoding.Default;
|
private Encoding _currentEncoding = Encoding.Default;
|
||||||
|
|
||||||
private readonly char _separator;
|
private readonly char _separator;
|
||||||
private readonly char _quoteChar;
|
private readonly char _quoteChar;
|
||||||
private readonly char _escapeChar;
|
private readonly char _escapeChar;
|
||||||
@@ -97,7 +97,7 @@ namespace CsvLib
|
|||||||
{
|
{
|
||||||
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
|
unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
long absolutePosition = lineOffset + i + unicodeDelta;
|
long absolutePosition = lineOffset + i + unicodeDelta;
|
||||||
fieldStartPosition ??= absolutePosition;
|
fieldStartPosition ??= absolutePosition;
|
||||||
fieldEndPosition = absolutePosition;
|
fieldEndPosition = absolutePosition;
|
||||||
@@ -114,7 +114,7 @@ namespace CsvLib
|
|||||||
return fieldPositions;
|
return fieldPositions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void GenerateIndex(string file)
|
private void GenerateIndex(string file)
|
||||||
{
|
{
|
||||||
using FileStream stream = new(file, FileMode.Open);
|
using FileStream stream = new(file, FileMode.Open);
|
||||||
using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
|
using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
|
||||||
@@ -149,7 +149,9 @@ namespace CsvLib
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void Index_SaveFile(string indexFile)
|
private const byte FileFormatVersion = 1;
|
||||||
|
|
||||||
|
private void SaveFile(string indexFile)
|
||||||
{
|
{
|
||||||
if (indexFile == null) { return; }
|
if (indexFile == null) { return; }
|
||||||
if (File.Exists(indexFile))
|
if (File.Exists(indexFile))
|
||||||
@@ -159,57 +161,106 @@ namespace CsvLib
|
|||||||
Stream streamOut = File.Open(indexFile, FileMode.Create);
|
Stream streamOut = File.Open(indexFile, FileMode.Create);
|
||||||
using (BinaryWriter binWriter = new(streamOut))
|
using (BinaryWriter binWriter = new(streamOut))
|
||||||
{
|
{
|
||||||
|
binWriter.Write((byte)'C');
|
||||||
|
binWriter.Write((byte)'S');
|
||||||
|
binWriter.Write((byte)'V');
|
||||||
|
|
||||||
|
binWriter.Write(FileFormatVersion);
|
||||||
|
|
||||||
binWriter.Write(_index.Count);
|
binWriter.Write(_index.Count);
|
||||||
for (int i = 0; i < _index.Count; i++)
|
foreach (long currentIndex in _index)
|
||||||
{
|
{
|
||||||
binWriter.Write(_index[i]);
|
binWriter.Write(currentIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
binWriter.Write(_fieldIndex.Count);
|
||||||
|
foreach (List<long> currentFieldIndex in _fieldIndex)
|
||||||
|
{
|
||||||
|
binWriter.Write(currentFieldIndex.Count);
|
||||||
|
for (int i = 0; i < currentFieldIndex.Count; i++)
|
||||||
|
{
|
||||||
|
binWriter.Write(_index[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
binWriter.Write("test");
|
|
||||||
}
|
}
|
||||||
streamOut.Close();
|
streamOut.Close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void Index_LoadFile(string indexFile)
|
private bool LoadFile(string indexFile)
|
||||||
{
|
{
|
||||||
if (File.Exists(indexFile) == false)
|
if (File.Exists(indexFile) == false)
|
||||||
{
|
{
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
List<long> tempIndex = new();
|
List<long> tempIndex;
|
||||||
|
List<List<long>> tempFieldIndex;
|
||||||
Stream streamIn = File.Open(indexFile, FileMode.Open);
|
Stream streamIn = File.Open(indexFile, FileMode.Open);
|
||||||
using (BinaryReader binReader = new(streamIn))
|
try
|
||||||
{
|
{
|
||||||
int numRegs = binReader.ReadInt32();
|
using BinaryReader binReader = new(streamIn);
|
||||||
for (int i = 0; i < numRegs; i++)
|
|
||||||
|
byte magik0 = binReader.ReadByte();
|
||||||
|
byte magik1 = binReader.ReadByte();
|
||||||
|
byte magik2 = binReader.ReadByte();
|
||||||
|
if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; }
|
||||||
|
|
||||||
|
byte fileVersion = binReader.ReadByte();
|
||||||
|
if (fileVersion != FileFormatVersion) { return false; }
|
||||||
|
|
||||||
|
int numIndexes = binReader.ReadInt32();
|
||||||
|
tempIndex = new List<long>(numIndexes);
|
||||||
|
for (int i = 0; i < numIndexes; i++)
|
||||||
{
|
{
|
||||||
long value = binReader.ReadInt64();
|
long value = binReader.ReadInt64();
|
||||||
tempIndex.Add(value);
|
tempIndex.Add(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int numFieldIndexes = binReader.ReadInt32();
|
||||||
|
tempFieldIndex = new List<List<long>>(numFieldIndexes);
|
||||||
|
for (int j = 0; j < numFieldIndexes; j++)
|
||||||
|
{
|
||||||
|
int numCurrentFieldIndexes = binReader.ReadInt32();
|
||||||
|
List<long> currentFieldIndex = new(numCurrentFieldIndexes);
|
||||||
|
for (int i = 0; i < numCurrentFieldIndexes; i++)
|
||||||
|
{
|
||||||
|
long value = binReader.ReadInt64();
|
||||||
|
currentFieldIndex.Add(value);
|
||||||
|
}
|
||||||
|
tempFieldIndex.Add(currentFieldIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception)
|
||||||
|
{
|
||||||
|
// NON NON NOM
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
streamIn.Close();
|
||||||
}
|
}
|
||||||
streamIn.Close();
|
|
||||||
_index = tempIndex;
|
_index = tempIndex;
|
||||||
|
_fieldIndex = tempFieldIndex;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void LoadIndexOfFile(string file)
|
public void LoadIndexOfFile(string file)
|
||||||
{
|
{
|
||||||
DateTime dtFile = File.GetCreationTime(file);
|
DateTime dtFile = File.GetCreationTime(file);
|
||||||
string indexFile = $"{file}.idx2";
|
string indexFile = $"{file}.idx";
|
||||||
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
||||||
{
|
{
|
||||||
Index_LoadFile(indexFile);
|
if (LoadFile(indexFile)) { return; }
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
// Generate index
|
|
||||||
DateTime dtNow = DateTime.UtcNow;
|
|
||||||
GenerateIndex(file);
|
|
||||||
TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
|
|
||||||
|
|
||||||
// Save Index if expensive generation
|
// Generate index
|
||||||
if (tsGenIndex.TotalSeconds > 2)
|
DateTime dtNow = DateTime.UtcNow;
|
||||||
{
|
GenerateIndex(file);
|
||||||
Index_SaveFile(indexFile);
|
TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
|
||||||
}
|
|
||||||
|
// Save Index if expensive generation
|
||||||
|
if (tsGenIndex.TotalSeconds > 2)
|
||||||
|
{
|
||||||
|
SaveFile(indexFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user