From 52cb729c0e5afbe1f0c9df981d7f935ec6a190ec Mon Sep 17 00:00:00 2001 From: "Valeriano A.R." Date: Sun, 18 Feb 2024 16:30:45 +0100 Subject: [PATCH] CsvFieldIndexer: Add tests to Save and Load --- CsvLib.Tests/CsvFieldIndexerTests.cs | 30 ++++ CsvLib/CsvFieldIndexer.cs | 201 ++++++++++++++++----------- CsvView.sln.DotSettings | 1 + CsvView/MainWindow.axaml.cs | 2 +- 4 files changed, 153 insertions(+), 81 deletions(-) diff --git a/CsvLib.Tests/CsvFieldIndexerTests.cs b/CsvLib.Tests/CsvFieldIndexerTests.cs index cf88d36..0a9b4ef 100644 --- a/CsvLib.Tests/CsvFieldIndexerTests.cs +++ b/CsvLib.Tests/CsvFieldIndexerTests.cs @@ -252,4 +252,34 @@ public class CsvFieldIndexerTests #endregion Search + #region Save & Load + + [Fact] + public void Save__TwoLinesWithTwoQuotedColumnsTwoMatchesSave__LoadsCorrectly() + { + // --- Arrange + StringReader sr = new( + """ + "Hélló","Wórld" + "Hélló","Wórld" + """); + + // --- Act + CsvFieldIndexer indexer = new(); + indexer.GenerateIndex(sr); + MemoryStream stream = new(); + indexer.Save(stream); + byte[] savedData = stream.ToArray(); + CsvFieldIndexer indexer2 = new(); + MemoryStream stream2 = new(savedData); + bool loadResult = indexer2.Load(stream2); + + // --- Assert + Assert.True(loadResult); + Assert.Equal(indexer.Index, indexer2.Index); + Assert.Equal(indexer.FieldIndex, indexer2.FieldIndex); + } + + #endregion Save & Load + } diff --git a/CsvLib/CsvFieldIndexer.cs b/CsvLib/CsvFieldIndexer.cs index 1eef8e3..30362c7 100644 --- a/CsvLib/CsvFieldIndexer.cs +++ b/CsvLib/CsvFieldIndexer.cs @@ -7,6 +7,10 @@ namespace CsvLib; public class CsvFieldIndexer { + #region Declarations + + private const byte FileFormatVersion = 1; + private bool _insideString; private Encoding _currentEncoding = Encoding.Default; @@ -15,13 +19,21 @@ public class CsvFieldIndexer private readonly char _quoteChar; private readonly char _escapeChar; + #endregion Declarations + + #region Life cycle + public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\') { _separator = separator; _quoteChar = quoteChar; _escapeChar = escapeChar; } + + #endregion Life cycle + #region Properties + private List _index = new(); public List Index { get { return _index; } } @@ -30,6 +42,10 @@ public class CsvFieldIndexer public List> FieldIndex { get { return _fieldIndex; } } + #endregion Properties + + #region Parsing + private void DummyParser(string line) { for (int i = 0; i < line.Length; i++) @@ -114,6 +130,10 @@ public class CsvFieldIndexer return fieldPositions; } + #endregion Parsing + + #region GenerateIndex + private void GenerateIndex(string file) { using FileStream stream = new(file, FileMode.Open); @@ -149,7 +169,36 @@ public class CsvFieldIndexer } } - private const byte FileFormatVersion = 1; + #endregion GenerateIndex + + #region Save + + public void Save(Stream streamOut) + { + using BinaryWriter binWriter = new(streamOut); + + binWriter.Write((byte)'C'); + binWriter.Write((byte)'S'); + binWriter.Write((byte)'V'); + + binWriter.Write(FileFormatVersion); + + binWriter.Write(_index.Count); + foreach (long currentIndex in _index) + { + binWriter.Write(currentIndex); + } + + binWriter.Write(_fieldIndex.Count); + foreach (List currentFieldIndex in _fieldIndex) + { + binWriter.Write(currentFieldIndex.Count); + foreach (long fieldIndex in currentFieldIndex) + { + binWriter.Write(fieldIndex); + } + } + } private void SaveFile(string indexFile) { @@ -158,75 +207,63 @@ public class CsvFieldIndexer File.Delete(indexFile); } Stream streamOut = File.Open(indexFile, FileMode.Create); - using (BinaryWriter binWriter = new(streamOut)) - { - binWriter.Write((byte)'C'); - binWriter.Write((byte)'S'); - binWriter.Write((byte)'V'); - - binWriter.Write(FileFormatVersion); - - binWriter.Write(_index.Count); - foreach (long currentIndex in _index) - { - binWriter.Write(currentIndex); - } - - binWriter.Write(_fieldIndex.Count); - foreach (List currentFieldIndex in _fieldIndex) - { - binWriter.Write(currentFieldIndex.Count); - foreach (long fieldIndex in currentFieldIndex) - { - binWriter.Write(fieldIndex); - } - } - } + Save(streamOut); streamOut.Close(); } + #endregion Save + + #region Load + + public bool Load(Stream streamIn) + { + using BinaryReader binReader = new(streamIn); + + byte magik0 = binReader.ReadByte(); + byte magik1 = binReader.ReadByte(); + byte magik2 = binReader.ReadByte(); + if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; } + + byte fileVersion = binReader.ReadByte(); + if (fileVersion != FileFormatVersion) { return false; } + + int numIndexes = binReader.ReadInt32(); + List tempIndex = new(numIndexes); + for (int i = 0; i < numIndexes; i++) + { + long value = binReader.ReadInt64(); + tempIndex.Add(value); + } + + int numFieldIndexes = binReader.ReadInt32(); + List> tempFieldIndex = new(numFieldIndexes); + for (int j = 0; j < numFieldIndexes; j++) + { + int numCurrentFieldIndexes = binReader.ReadInt32(); + List currentFieldIndex = new(numCurrentFieldIndexes); + for (int i = 0; i < numCurrentFieldIndexes; i++) + { + long value = binReader.ReadInt64(); + currentFieldIndex.Add(value); + } + tempFieldIndex.Add(currentFieldIndex); + } + + _index = tempIndex; + _fieldIndex = tempFieldIndex; + return true; + } + private bool LoadFile(string indexFile) { if (File.Exists(indexFile) == false) { return false; } - List tempIndex; - List> tempFieldIndex; Stream streamIn = File.Open(indexFile, FileMode.Open); try { - using BinaryReader binReader = new(streamIn); - - byte magik0 = binReader.ReadByte(); - byte magik1 = binReader.ReadByte(); - byte magik2 = binReader.ReadByte(); - if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; } - - byte fileVersion = binReader.ReadByte(); - if (fileVersion != FileFormatVersion) { return false; } - - int numIndexes = binReader.ReadInt32(); - tempIndex = new List(numIndexes); - for (int i = 0; i < numIndexes; i++) - { - long value = binReader.ReadInt64(); - tempIndex.Add(value); - } - - int numFieldIndexes = binReader.ReadInt32(); - tempFieldIndex = new List>(numFieldIndexes); - for (int j = 0; j < numFieldIndexes; j++) - { - int numCurrentFieldIndexes = binReader.ReadInt32(); - List currentFieldIndex = new(numCurrentFieldIndexes); - for (int i = 0; i < numCurrentFieldIndexes; i++) - { - long value = binReader.ReadInt64(); - currentFieldIndex.Add(value); - } - tempFieldIndex.Add(currentFieldIndex); - } + if (Load(streamIn) == false) return false; } catch (Exception) { @@ -237,8 +274,6 @@ public class CsvFieldIndexer { streamIn.Close(); } - _index = tempIndex; - _fieldIndex = tempFieldIndex; return true; } @@ -263,21 +298,10 @@ public class CsvFieldIndexer } } - public List Search(string fileName, string textToSearch, Action? notifyProgress = null) - { - List index; - using FileStream streamIn = new(fileName, FileMode.Open); - try - { - index = Search(streamIn, textToSearch, notifyProgress); - } - finally - { - streamIn.Close(); - } - return index; - } - + #endregion Load + + #region Search + public List Search(Stream streamIn, string textToSearch, Action? notifyProgress = null) { // TODO: Use MemoryMappedFile for better IO performance @@ -294,12 +318,12 @@ public class CsvFieldIndexer if (tsElapsed.TotalMilliseconds > 200) { datePrevious = DateTime.UtcNow; - notifyProgress?.Invoke(j/(float)_fieldIndex.Count); + notifyProgress?.Invoke(j / (float)_fieldIndex.Count); } - + long offset = _fieldIndex[j][i]; int length = (int)(_fieldIndex[j][i + 1] - offset) + 1; - + if (buffer.Length < length) { buffer = new byte[length]; @@ -310,7 +334,7 @@ public class CsvFieldIndexer bool matches = searcher.Contains(buffer, length); if (matches == false) { continue; } - + newIndexes.Add(_index[j]); break; } @@ -318,4 +342,21 @@ public class CsvFieldIndexer return newIndexes; } -} \ No newline at end of file + + public List SearchFile(string fileName, string textToSearch, Action? notifyProgress = null) + { + List index; + using FileStream streamIn = new(fileName, FileMode.Open); + try + { + index = Search(streamIn, textToSearch, notifyProgress); + } + finally + { + streamIn.Close(); + } + return index; + } + + #endregion Search +} diff --git a/CsvView.sln.DotSettings b/CsvView.sln.DotSettings index ec9561f..0033532 100644 --- a/CsvView.sln.DotSettings +++ b/CsvView.sln.DotSettings @@ -1,4 +1,5 @@  + True /usr/share/dotnet/sdk/7.0.107/MSBuild.dll 4294967293 True \ No newline at end of file diff --git a/CsvView/MainWindow.axaml.cs b/CsvView/MainWindow.axaml.cs index 9469217..0d181d7 100644 --- a/CsvView/MainWindow.axaml.cs +++ b/CsvView/MainWindow.axaml.cs @@ -95,7 +95,7 @@ public partial class MainWindow : Window CsvFieldIndexer csvIndexer = new(); csvIndexer.LoadIndexOfFile(_loadedFile); - List newIndexes = csvIndexer.Search(_loadedFile, textToSearch); + List newIndexes = csvIndexer.SearchFile(_loadedFile, textToSearch); _index = newIndexes; _totalRegs = _index.Count - 1;