CsvFieldIndexer.Search: Naive search implementation.

This commit is contained in:
2023-08-21 04:31:21 +02:00
parent 0c05215b94
commit a0010593f6
4 changed files with 135 additions and 10 deletions

View File

@@ -39,11 +39,6 @@ namespace CsvLib
return read;
}
public override int Peek()
{
return _baseReader.Peek();
}
public int Position
{
get { return _position; }

View File

@@ -32,8 +32,7 @@ public class ByteArraySearcher
}
}
if (found)
return true;
if (found) { return true; }
}
return false;

View File

@@ -119,6 +119,7 @@ namespace CsvLib
using FileStream stream = new(file, FileMode.Open);
using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
GenerateIndex(streamReader);
stream.Close();
}
public void GenerateIndex(TextReader textReader)
@@ -172,7 +173,7 @@ namespace CsvLib
{
binWriter.Write(currentIndex);
}
binWriter.Write(_fieldIndex.Count);
foreach (List<long> currentFieldIndex in _fieldIndex)
{
@@ -198,7 +199,7 @@ namespace CsvLib
try
{
using BinaryReader binReader = new(streamIn);
byte magik0 = binReader.ReadByte();
byte magik1 = binReader.ReadByte();
byte magik2 = binReader.ReadByte();
@@ -214,7 +215,7 @@ namespace CsvLib
long value = binReader.ReadInt64();
tempIndex.Add(value);
}
int numFieldIndexes = binReader.ReadInt32();
tempFieldIndex = new List<List<long>>(numFieldIndexes);
for (int j = 0; j < numFieldIndexes; j++)
@@ -263,5 +264,61 @@ namespace CsvLib
SaveFile(indexFile);
}
}
public List<long> Search(string fileName, string textToSearch, Action<float> notifyProgress = null)
{
List<long> index;
using FileStream streamIn = new(fileName, FileMode.Open);
try
{
index = Search(streamIn, textToSearch, notifyProgress);
}
finally
{
streamIn.Close();
}
return index ?? new List<long>();
}
public List<long> Search(Stream streamIn, string textToSearch, Action<float> notifyProgress = null)
{
// TODO: Use MemoryMappedFile for better IO performance
DateTime datePrevious = DateTime.UtcNow;
List<long> newIndexes = new();
byte[] bText = Encoding.UTF8.GetBytes(textToSearch);
ByteArraySearcher searcher = new(bText);
byte[] buffer = new byte[1024];
for (int j = 0; j < _fieldIndex.Count; j++)
{
for (int i = 0; i < _fieldIndex[j].Count; i += 2)
{
TimeSpan tsElapsed = DateTime.UtcNow - datePrevious;
if (tsElapsed.TotalMilliseconds > 200)
{
datePrevious = DateTime.UtcNow;
notifyProgress?.Invoke(j/(float)_fieldIndex.Count);
}
long offset = _fieldIndex[j][i];
int length = (int)(_fieldIndex[j][i + 1] - offset) + 1;
if (buffer.Length < length)
{
buffer = new byte[length];
}
streamIn.Seek(offset, SeekOrigin.Begin);
int read = streamIn.Read(buffer, 0, length);
if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); }
bool matches = searcher.Contains(buffer, length);
if (matches == false) { continue; }
newIndexes.Add(_index[j]);
break;
}
}
return newIndexes;
}
}
}