CsvFieldIndexer.Search: Naive search implementation.

This commit is contained in:
2023-08-21 04:31:21 +02:00
parent 0c05215b94
commit a0010593f6
4 changed files with 135 additions and 10 deletions

View File

@@ -1,3 +1,4 @@
using System.Text;
using CsvLib;
namespace CvsLib;
@@ -172,4 +173,77 @@ public class CsvFieldIndexerTests
#endregion GenerateIndex
#region Search
[Fact]
public void Search__TwoLinesWithTwoQuotedColumns__OneIndexFirstRow()
{
// --- Arrange
string strText = """
"Hello","test"
"Hello","World"
""";
StringReader sr = new(strText);
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
// --- Act
byte[] bText = Encoding.UTF8.GetBytes(strText);
MemoryStream ms = new(bText);
List<long> indexes = indexer.Search(ms, "test");
// --- Assert
Assert.Single(indexes);
Assert.Equal(0, indexes[0]);
}
[Fact]
public void Search__TwoLinesWithTwoQuotedColumns__OneIndexSecondRow()
{
// --- Arrange
string strText = """
"Hello","World"
"Hello","test"
""";
StringReader sr = new(strText);
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
// --- Act
byte[] bText = Encoding.UTF8.GetBytes(strText);
MemoryStream ms = new(bText);
List<long> indexes = indexer.Search(ms, "test");
// --- Assert
Assert.Single(indexes);
Assert.Equal(16, indexes[0]);
}
[Fact]
public void Search__TwoLinesWithTwoQuotedColumnsTwoMatches__OneIndexSecondRow()
{
// --- Arrange
string strText = """
"Hello","World"
"test","test"
""";
StringReader sr = new(strText);
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
// --- Act
byte[] bText = Encoding.UTF8.GetBytes(strText);
MemoryStream ms = new(bText);
List<long> indexes = indexer.Search(ms, "test");
// --- Assert
Assert.Single(indexes);
Assert.Equal(16, indexes[0]);
}
#endregion Search
}

View File

@@ -39,11 +39,6 @@ namespace CsvLib
return read;
}
public override int Peek()
{
return _baseReader.Peek();
}
public int Position
{
get { return _position; }

View File

@@ -32,8 +32,7 @@ public class ByteArraySearcher
}
}
if (found)
return true;
if (found) { return true; }
}
return false;

View File

@@ -119,6 +119,7 @@ namespace CsvLib
using FileStream stream = new(file, FileMode.Open);
using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
GenerateIndex(streamReader);
stream.Close();
}
public void GenerateIndex(TextReader textReader)
@@ -263,5 +264,61 @@ namespace CsvLib
SaveFile(indexFile);
}
}
public List<long> Search(string fileName, string textToSearch, Action<float> notifyProgress = null)
{
List<long> index;
using FileStream streamIn = new(fileName, FileMode.Open);
try
{
index = Search(streamIn, textToSearch, notifyProgress);
}
finally
{
streamIn.Close();
}
return index ?? new List<long>();
}
public List<long> Search(Stream streamIn, string textToSearch, Action<float> notifyProgress = null)
{
// TODO: Use MemoryMappedFile for better IO performance
DateTime datePrevious = DateTime.UtcNow;
List<long> newIndexes = new();
byte[] bText = Encoding.UTF8.GetBytes(textToSearch);
ByteArraySearcher searcher = new(bText);
byte[] buffer = new byte[1024];
for (int j = 0; j < _fieldIndex.Count; j++)
{
for (int i = 0; i < _fieldIndex[j].Count; i += 2)
{
TimeSpan tsElapsed = DateTime.UtcNow - datePrevious;
if (tsElapsed.TotalMilliseconds > 200)
{
datePrevious = DateTime.UtcNow;
notifyProgress?.Invoke(j/(float)_fieldIndex.Count);
}
long offset = _fieldIndex[j][i];
int length = (int)(_fieldIndex[j][i + 1] - offset) + 1;
if (buffer.Length < length)
{
buffer = new byte[length];
}
streamIn.Seek(offset, SeekOrigin.Begin);
int read = streamIn.Read(buffer, 0, length);
if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); }
bool matches = searcher.Contains(buffer, length);
if (matches == false) { continue; }
newIndexes.Add(_index[j]);
break;
}
}
return newIndexes;
}
}
}