From 66536657e2c23e0e9912e06649474ff073b49581 Mon Sep 17 00:00:00 2001 From: "Valeriano A.R." Date: Sun, 18 Feb 2024 00:46:45 +0100 Subject: [PATCH] Migrate to dotnet8 and fix warnings --- .idea/.idea.CsvView/.idea/avalonia.xml | 11 + CsvLib.Tests/ByteArraySearcherTests.cs | 33 +- CsvLib.Tests/CsvFieldIndexerTests.cs | 70 ++-- CsvLib.Tests/CsvLib.Tests.csproj | 2 +- CsvLib/BufferedTextReader.cs | 93 ++-- CsvLib/ByteArraySearcher.cs | 2 - CsvLib/CsvFieldIndexer.cs | 559 ++++++++++++------------- CsvLib/CsvLib.csproj | 3 +- CsvLib/CsvParser.cs | 158 ++++--- CsvView.sln.DotSettings | 3 +- CsvView/App.axaml | 2 +- CsvView/CsvView.csproj | 8 +- CsvView/MainWindow.axaml | 8 +- CsvView/MainWindow.axaml.cs | 6 +- CsvView/Program.cs | 4 +- 15 files changed, 492 insertions(+), 470 deletions(-) create mode 100644 .idea/.idea.CsvView/.idea/avalonia.xml diff --git a/.idea/.idea.CsvView/.idea/avalonia.xml b/.idea/.idea.CsvView/.idea/avalonia.xml new file mode 100644 index 0000000..07fc4bd --- /dev/null +++ b/.idea/.idea.CsvView/.idea/avalonia.xml @@ -0,0 +1,11 @@ + + + + + + \ No newline at end of file diff --git a/CsvLib.Tests/ByteArraySearcherTests.cs b/CsvLib.Tests/ByteArraySearcherTests.cs index b7c5c5d..e8e2418 100644 --- a/CsvLib.Tests/ByteArraySearcherTests.cs +++ b/CsvLib.Tests/ByteArraySearcherTests.cs @@ -7,60 +7,75 @@ public class ByteArraySearcherTests [Fact] public void Contains__EmptyNeedle_ReturnsTrue() { - byte[] haystack = { 1, 2, 3, 4, 5 }; + // --- Arrange + byte[] haystack = [1, 2, 3, 4, 5,]; byte[] needle = Array.Empty(); ByteArraySearcher searcher = new(needle); + // --- Act bool result = searcher.Contains(haystack); + // --- Assert Assert.True(result); } [Fact] public void Contains__NeedleAtBeginning_ReturnsTrue() { - byte[] haystack = { 1, 2, 3, 4, 5 }; - byte[] needle = { 1, 2, 3 }; + // --- Arrange + byte[] haystack = [1, 2, 3, 4, 5,]; + byte[] needle = [1, 2, 3,]; ByteArraySearcher searcher = new(needle); + // --- Act bool result = searcher.Contains(haystack); + // --- Assert Assert.True(result); } [Fact] public void Contains__NeedleInMiddle_ReturnsTrue() { - byte[] haystack = { 1, 2, 3, 4, 5 }; - byte[] needle = { 3, 4 }; + // --- Arrange + byte[] haystack = [1, 2, 3, 4, 5,]; + byte[] needle = [3, 4,]; ByteArraySearcher searcher = new(needle); + // --- Act bool result = searcher.Contains(haystack); + // --- Assert Assert.True(result); } [Fact] public void Contains__NeedleAtEnd_ReturnsTrue() { - byte[] haystack = { 1, 2, 3, 4, 5 }; - byte[] needle = { 4, 5 }; + // --- Arrange + byte[] haystack = [1, 2, 3, 4, 5,]; + byte[] needle = [4, 5,]; ByteArraySearcher searcher = new(needle); + // --- Act bool result = searcher.Contains(haystack); + // --- Assert Assert.True(result); } [Fact] public void Contains__NeedleNotPresent_ReturnsFalse() { - byte[] haystack = { 1, 2, 3, 4, 5 }; - byte[] needle = { 5, 6, 7 }; + // --- Arrange + byte[] haystack = [1, 2, 3, 4, 5,]; + byte[] needle = [5, 6, 7,]; ByteArraySearcher searcher = new(needle); + // --- Act bool result = searcher.Contains(haystack); + // --- Assert Assert.False(result); } } diff --git a/CsvLib.Tests/CsvFieldIndexerTests.cs b/CsvLib.Tests/CsvFieldIndexerTests.cs index 478a786..016258b 100644 --- a/CsvLib.Tests/CsvFieldIndexerTests.cs +++ b/CsvLib.Tests/CsvFieldIndexerTests.cs @@ -5,7 +5,6 @@ namespace CvsLib; public class CsvFieldIndexerTests { - #region GenerateIndex [Fact] @@ -51,10 +50,11 @@ public class CsvFieldIndexerTests public void GenerateIndex__TwoLinesOfPainText__TwoRows() { // --- Arrange - StringReader sr = new(""" - Hello World - Hello World - """); + StringReader sr = new( + """ + Hello World + Hello World + """); // --- Act CsvFieldIndexer indexer = new(); @@ -80,10 +80,11 @@ public class CsvFieldIndexerTests public void GenerateIndex__TwoLinesOfQuotedText__TwoRows() { // --- Arrange - StringReader sr = new(""" - "Hello World" - "Hello World" - """); + StringReader sr = new( + """ + "Hello World" + "Hello World" + """); // --- Act CsvFieldIndexer indexer = new(); @@ -109,10 +110,11 @@ public class CsvFieldIndexerTests public void GenerateIndex__TwoLinesWithTwoQuotedColumns__TwoRowsTwoFields() { // --- Arrange - StringReader sr = new(""" - "Hello","World" - "Hello","World" - """); + StringReader sr = new( + """ + "Hello","World" + "Hello","World" + """); // --- Act CsvFieldIndexer indexer = new(); @@ -142,10 +144,11 @@ public class CsvFieldIndexerTests public void GenerateIndex__TwoLinesWithTwoQuotedColumnsWithUnicode__TwoRowsTwoFields() { // --- Arrange - StringReader sr = new(""" - "Hélló","Wórld" - "Hélló","Wórld" - """); + StringReader sr = new( + """ + "Hélló","Wórld" + "Hélló","Wórld" + """); // --- Act CsvFieldIndexer indexer = new(); @@ -174,15 +177,16 @@ public class CsvFieldIndexerTests #endregion GenerateIndex #region Search - + [Fact] public void Search__TwoLinesWithTwoQuotedColumns__OneIndexFirstRow() { // --- Arrange - string strText = """ - "Hello","test" - "Hello","World" - """; + const string strText = + """ + "Hello","test" + "Hello","World" + """; StringReader sr = new(strText); CsvFieldIndexer indexer = new(); indexer.GenerateIndex(sr); @@ -202,10 +206,11 @@ public class CsvFieldIndexerTests public void Search__TwoLinesWithTwoQuotedColumns__OneIndexSecondRow() { // --- Arrange - string strText = """ - "Hello","World" - "Hello","test" - """; + const string strText = + """ + "Hello","World" + "Hello","test" + """; StringReader sr = new(strText); CsvFieldIndexer indexer = new(); indexer.GenerateIndex(sr); @@ -225,10 +230,11 @@ public class CsvFieldIndexerTests public void Search__TwoLinesWithTwoQuotedColumnsTwoMatches__OneIndexSecondRow() { // --- Arrange - string strText = """ - "Hello","World" - "test","test" - """; + const string strText = + """ + "Hello","World" + "test","test" + """; StringReader sr = new(strText); CsvFieldIndexer indexer = new(); indexer.GenerateIndex(sr); @@ -243,7 +249,7 @@ public class CsvFieldIndexerTests Assert.Single(indexes); Assert.Equal(16, indexes[0]); } - + #endregion Search - + } diff --git a/CsvLib.Tests/CsvLib.Tests.csproj b/CsvLib.Tests/CsvLib.Tests.csproj index 59b30ab..089b48f 100644 --- a/CsvLib.Tests/CsvLib.Tests.csproj +++ b/CsvLib.Tests/CsvLib.Tests.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0 enable enable CvsLib diff --git a/CsvLib/BufferedTextReader.cs b/CsvLib/BufferedTextReader.cs index ec942d6..f7f1e5a 100644 --- a/CsvLib/BufferedTextReader.cs +++ b/CsvLib/BufferedTextReader.cs @@ -1,57 +1,52 @@ using System.IO; using System.Text; -namespace CsvLib +namespace CsvLib; + +public class BufferedTextReader : TextReader { - public class BufferedTextReader : TextReader + private readonly TextReader _baseReader; + private readonly StringBuilder _sbBuffer = new(); + + private readonly Encoding _currentEncoding = Encoding.Default; + + public BufferedTextReader(TextReader baseReader) { - private readonly TextReader _baseReader; - private int _position; - private readonly StringBuilder _sbBuffer = new(); - - private readonly Encoding _currentEncoding = Encoding.Default; - - public BufferedTextReader(TextReader baseReader) + _baseReader = baseReader; + if (baseReader is StreamReader streamReader) { - _baseReader = baseReader; - if (baseReader is StreamReader streamReader) - { - _currentEncoding = streamReader.CurrentEncoding; - } - } - - public override int Read() - { - int read = _baseReader.Read(); - if (read > 127) - { - int count = _currentEncoding.GetByteCount(((char)read).ToString()); - _position += count; - } - else - { - _position++; - } - if (read != -1) - { - _sbBuffer.Append((char)read); - } - return read; - } - - public int Position - { - get { return _position; } - } - - public string GetBuffer() - { - return _sbBuffer.ToString(); - } - - public void CleanBuffer() - { - _sbBuffer.Clear(); + _currentEncoding = streamReader.CurrentEncoding; } } -} + + public override int Read() + { + int read = _baseReader.Read(); + if (read > 127) + { + int count = _currentEncoding.GetByteCount(((char)read).ToString()); + Position += count; + } + else + { + Position++; + } + if (read != -1) + { + _sbBuffer.Append((char)read); + } + return read; + } + + public int Position { get; private set; } + + public string GetBuffer() + { + return _sbBuffer.ToString(); + } + + public void CleanBuffer() + { + _sbBuffer.Clear(); + } +} \ No newline at end of file diff --git a/CsvLib/ByteArraySearcher.cs b/CsvLib/ByteArraySearcher.cs index cbe0b64..5b12aaa 100644 --- a/CsvLib/ByteArraySearcher.cs +++ b/CsvLib/ByteArraySearcher.cs @@ -1,5 +1,3 @@ -#nullable enable - namespace CsvLib; public class ByteArraySearcher diff --git a/CsvLib/CsvFieldIndexer.cs b/CsvLib/CsvFieldIndexer.cs index 1a62582..1eef8e3 100644 --- a/CsvLib/CsvFieldIndexer.cs +++ b/CsvLib/CsvFieldIndexer.cs @@ -3,322 +3,319 @@ using System.Collections.Generic; using System.IO; using System.Text; -namespace CsvLib +namespace CsvLib; + +public class CsvFieldIndexer { - public class CsvFieldIndexer + private bool _insideString; + + private Encoding _currentEncoding = Encoding.Default; + + private readonly char _separator; + private readonly char _quoteChar; + private readonly char _escapeChar; + + public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\') { - private bool _insideString; + _separator = separator; + _quoteChar = quoteChar; + _escapeChar = escapeChar; + } - private Encoding _currentEncoding = Encoding.Default; + private List _index = new(); - private readonly char _separator; - private readonly char _quoteChar; - private readonly char _escapeChar; + public List Index { get { return _index; } } - public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\') + private List> _fieldIndex = new(); + + public List> FieldIndex { get { return _fieldIndex; } } + + private void DummyParser(string line) + { + for (int i = 0; i < line.Length; i++) { - _separator = separator; - _quoteChar = quoteChar; - _escapeChar = escapeChar; - } - - private List _index = new(); - - public List Index { get { return _index; } } - - private List> _fieldIndex = new(); - - public List> FieldIndex { get { return _fieldIndex; } } - - private void DummyParser(string line) - { - for (int i = 0; i < line.Length; i++) + char c = line[i]; + if (c == _separator && _insideString == false) { - char c = line[i]; - if (c == _separator && _insideString == false) - { - continue; - } - if (c == _quoteChar && _insideString == false) - { - _insideString = true; - continue; - } - if (c == _quoteChar && _insideString) - { - _insideString = false; - continue; - } - if (c == _escapeChar && _insideString) - { - i++; - } + continue; + } + if (c == _quoteChar && _insideString == false) + { + _insideString = true; + continue; + } + if (c == _quoteChar && _insideString) + { + _insideString = false; + continue; + } + if (c == _escapeChar && _insideString) + { + i++; } } + } - private List ParseLineIndex(string line, long lineOffset) + private List ParseLineIndex(string line, long lineOffset) + { + List fieldPositions = new(); + long? fieldStartPosition = null; + long? fieldEndPosition = null; + int unicodeDelta = 0; + for (int i = 0; i < line.Length; i++) { - List fieldPositions = new(); - long? fieldStartPosition = null; - long? fieldEndPosition = null; - int unicodeDelta = 0; - for (int i = 0; i < line.Length; i++) + char c = line[i]; + if (c == _separator && _insideString == false) { - char c = line[i]; - if (c == _separator && _insideString == false) - { - if (fieldStartPosition != null) - { - fieldPositions.Add((long)fieldStartPosition); - fieldPositions.Add((long)fieldEndPosition); - } - fieldStartPosition = null; - fieldEndPosition = null; - } - else if (c == _quoteChar && _insideString == false) - { - _insideString = true; - } - else if (c == _quoteChar && _insideString) - { - _insideString = false; - } - else if (c == _escapeChar && _insideString) - { - i++; - } - else if ((c == '\n' || c == '\r') && _insideString == false) - { - break; - } - else - { - if (c > 127) - { - unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1; - } - - long absolutePosition = lineOffset + i + unicodeDelta; - fieldStartPosition ??= absolutePosition; - fieldEndPosition = absolutePosition; - } - } - if (_insideString == false) - { - if (fieldStartPosition != null) + if (fieldStartPosition != null && fieldEndPosition != null) { fieldPositions.Add((long)fieldStartPosition); fieldPositions.Add((long)fieldEndPosition); } + fieldStartPosition = null; + fieldEndPosition = null; } - return fieldPositions; - } - - private void GenerateIndex(string file) - { - using FileStream stream = new(file, FileMode.Open); - using StreamReader streamReader = new(stream, Encoding.Default, true, 4096); - GenerateIndex(streamReader); - stream.Close(); - } - - public void GenerateIndex(TextReader textReader) - { - _insideString = false; - _index.Clear(); - _index.Add(0); - int idxRow = 0; - if (textReader is StreamReader streamReader) + else if (c == _quoteChar && _insideString == false) { - _currentEncoding = streamReader.CurrentEncoding; + _insideString = true; } - using BufferedTextReader reader = new(textReader); - string currentLine; - while ((currentLine = reader.ReadLine()) != null) + else if (c == _quoteChar && _insideString) { - DummyParser(currentLine); - if (_insideString) { continue; } - - string fullLine = reader.GetBuffer(); - reader.CleanBuffer(); - List fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]); - _fieldIndex.Add(fieldIndexes); - - _index.Add(reader.Position); - - idxRow++; + _insideString = false; } - } - - private const byte FileFormatVersion = 1; - - private void SaveFile(string indexFile) - { - if (indexFile == null) { return; } - if (File.Exists(indexFile)) + else if (c == _escapeChar && _insideString) { - File.Delete(indexFile); + i++; } - Stream streamOut = File.Open(indexFile, FileMode.Create); - using (BinaryWriter binWriter = new(streamOut)) + else if ((c == '\n' || c == '\r') && _insideString == false) { - binWriter.Write((byte)'C'); - binWriter.Write((byte)'S'); - binWriter.Write((byte)'V'); - - binWriter.Write(FileFormatVersion); - - binWriter.Write(_index.Count); - foreach (long currentIndex in _index) + break; + } + else + { + if (c > 127) { - binWriter.Write(currentIndex); + unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1; } - binWriter.Write(_fieldIndex.Count); - foreach (List currentFieldIndex in _fieldIndex) - { - binWriter.Write(currentFieldIndex.Count); - for (int i = 0; i < currentFieldIndex.Count; i++) - { - binWriter.Write(currentFieldIndex[i]); - } - } - } - streamOut.Close(); - } - - private bool LoadFile(string indexFile) - { - if (File.Exists(indexFile) == false) - { - return false; - } - List tempIndex; - List> tempFieldIndex; - Stream streamIn = File.Open(indexFile, FileMode.Open); - try - { - using BinaryReader binReader = new(streamIn); - - byte magik0 = binReader.ReadByte(); - byte magik1 = binReader.ReadByte(); - byte magik2 = binReader.ReadByte(); - if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; } - - byte fileVersion = binReader.ReadByte(); - if (fileVersion != FileFormatVersion) { return false; } - - int numIndexes = binReader.ReadInt32(); - tempIndex = new List(numIndexes); - for (int i = 0; i < numIndexes; i++) - { - long value = binReader.ReadInt64(); - tempIndex.Add(value); - } - - int numFieldIndexes = binReader.ReadInt32(); - tempFieldIndex = new List>(numFieldIndexes); - for (int j = 0; j < numFieldIndexes; j++) - { - int numCurrentFieldIndexes = binReader.ReadInt32(); - List currentFieldIndex = new(numCurrentFieldIndexes); - for (int i = 0; i < numCurrentFieldIndexes; i++) - { - long value = binReader.ReadInt64(); - currentFieldIndex.Add(value); - } - tempFieldIndex.Add(currentFieldIndex); - } - } - catch (Exception) - { - // NON NON NOM - return false; - } - finally - { - streamIn.Close(); - } - _index = tempIndex; - _fieldIndex = tempFieldIndex; - return true; - } - - public void LoadIndexOfFile(string file) - { - DateTime dtFile = File.GetCreationTime(file); - string indexFile = $"{file}.idx"; - if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile) - { - if (LoadFile(indexFile)) { return; } - } - - // Generate index - DateTime dtNow = DateTime.UtcNow; - GenerateIndex(file); - TimeSpan tsGenIndex = DateTime.UtcNow - dtNow; - - // Save Index if expensive generation - if (tsGenIndex.TotalSeconds > 2) - { - SaveFile(indexFile); + long absolutePosition = lineOffset + i + unicodeDelta; + fieldStartPosition ??= absolutePosition; + fieldEndPosition = absolutePosition; } } - - public List Search(string fileName, string textToSearch, Action notifyProgress = null) + if (_insideString == false) { - List index; - using FileStream streamIn = new(fileName, FileMode.Open); - try + if (fieldStartPosition != null && fieldEndPosition != null) { - index = Search(streamIn, textToSearch, notifyProgress); + fieldPositions.Add((long)fieldStartPosition); + fieldPositions.Add((long)fieldEndPosition); } - finally - { - streamIn.Close(); - } - return index ?? new List(); } - - public List Search(Stream streamIn, string textToSearch, Action notifyProgress = null) + return fieldPositions; + } + + private void GenerateIndex(string file) + { + using FileStream stream = new(file, FileMode.Open); + using StreamReader streamReader = new(stream, Encoding.Default, true, 4096); + GenerateIndex(streamReader); + stream.Close(); + } + + public void GenerateIndex(TextReader textReader) + { + _insideString = false; + _index.Clear(); + _index.Add(0); + int idxRow = 0; + if (textReader is StreamReader streamReader) { - // TODO: Use MemoryMappedFile for better IO performance - DateTime datePrevious = DateTime.UtcNow; - List newIndexes = new(); - byte[] bText = Encoding.UTF8.GetBytes(textToSearch); - ByteArraySearcher searcher = new(bText); - byte[] buffer = new byte[1024]; - for (int j = 0; j < _fieldIndex.Count; j++) - { - for (int i = 0; i < _fieldIndex[j].Count; i += 2) - { - TimeSpan tsElapsed = DateTime.UtcNow - datePrevious; - if (tsElapsed.TotalMilliseconds > 200) - { - datePrevious = DateTime.UtcNow; - notifyProgress?.Invoke(j/(float)_fieldIndex.Count); - } - - long offset = _fieldIndex[j][i]; - int length = (int)(_fieldIndex[j][i + 1] - offset) + 1; - - if (buffer.Length < length) - { - buffer = new byte[length]; - } - streamIn.Seek(offset, SeekOrigin.Begin); - int read = streamIn.Read(buffer, 0, length); - if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); } + _currentEncoding = streamReader.CurrentEncoding; + } + using BufferedTextReader reader = new(textReader); + while (reader.ReadLine() is { } currentLine) + { + DummyParser(currentLine); + if (_insideString) { continue; } - bool matches = searcher.Contains(buffer, length); - if (matches == false) { continue; } - - newIndexes.Add(_index[j]); - break; - } - } + string fullLine = reader.GetBuffer(); + reader.CleanBuffer(); + List fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]); + _fieldIndex.Add(fieldIndexes); - return newIndexes; + _index.Add(reader.Position); + + idxRow++; } } -} + + private const byte FileFormatVersion = 1; + + private void SaveFile(string indexFile) + { + if (File.Exists(indexFile)) + { + File.Delete(indexFile); + } + Stream streamOut = File.Open(indexFile, FileMode.Create); + using (BinaryWriter binWriter = new(streamOut)) + { + binWriter.Write((byte)'C'); + binWriter.Write((byte)'S'); + binWriter.Write((byte)'V'); + + binWriter.Write(FileFormatVersion); + + binWriter.Write(_index.Count); + foreach (long currentIndex in _index) + { + binWriter.Write(currentIndex); + } + + binWriter.Write(_fieldIndex.Count); + foreach (List currentFieldIndex in _fieldIndex) + { + binWriter.Write(currentFieldIndex.Count); + foreach (long fieldIndex in currentFieldIndex) + { + binWriter.Write(fieldIndex); + } + } + } + streamOut.Close(); + } + + private bool LoadFile(string indexFile) + { + if (File.Exists(indexFile) == false) + { + return false; + } + List tempIndex; + List> tempFieldIndex; + Stream streamIn = File.Open(indexFile, FileMode.Open); + try + { + using BinaryReader binReader = new(streamIn); + + byte magik0 = binReader.ReadByte(); + byte magik1 = binReader.ReadByte(); + byte magik2 = binReader.ReadByte(); + if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; } + + byte fileVersion = binReader.ReadByte(); + if (fileVersion != FileFormatVersion) { return false; } + + int numIndexes = binReader.ReadInt32(); + tempIndex = new List(numIndexes); + for (int i = 0; i < numIndexes; i++) + { + long value = binReader.ReadInt64(); + tempIndex.Add(value); + } + + int numFieldIndexes = binReader.ReadInt32(); + tempFieldIndex = new List>(numFieldIndexes); + for (int j = 0; j < numFieldIndexes; j++) + { + int numCurrentFieldIndexes = binReader.ReadInt32(); + List currentFieldIndex = new(numCurrentFieldIndexes); + for (int i = 0; i < numCurrentFieldIndexes; i++) + { + long value = binReader.ReadInt64(); + currentFieldIndex.Add(value); + } + tempFieldIndex.Add(currentFieldIndex); + } + } + catch (Exception) + { + // NON NON NOM + return false; + } + finally + { + streamIn.Close(); + } + _index = tempIndex; + _fieldIndex = tempFieldIndex; + return true; + } + + public void LoadIndexOfFile(string file) + { + DateTime dtFile = File.GetCreationTime(file); + string indexFile = $"{file}.idx"; + if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile) + { + if (LoadFile(indexFile)) { return; } + } + + // Generate index + DateTime dtNow = DateTime.UtcNow; + GenerateIndex(file); + TimeSpan tsGenIndex = DateTime.UtcNow - dtNow; + + // Save Index if expensive generation + if (tsGenIndex.TotalSeconds > 2) + { + SaveFile(indexFile); + } + } + + public List Search(string fileName, string textToSearch, Action? notifyProgress = null) + { + List index; + using FileStream streamIn = new(fileName, FileMode.Open); + try + { + index = Search(streamIn, textToSearch, notifyProgress); + } + finally + { + streamIn.Close(); + } + return index; + } + + public List Search(Stream streamIn, string textToSearch, Action? notifyProgress = null) + { + // TODO: Use MemoryMappedFile for better IO performance + DateTime datePrevious = DateTime.UtcNow; + List newIndexes = new(); + byte[] bText = Encoding.UTF8.GetBytes(textToSearch); + ByteArraySearcher searcher = new(bText); + byte[] buffer = new byte[1024]; + for (int j = 0; j < _fieldIndex.Count; j++) + { + for (int i = 0; i < _fieldIndex[j].Count; i += 2) + { + TimeSpan tsElapsed = DateTime.UtcNow - datePrevious; + if (tsElapsed.TotalMilliseconds > 200) + { + datePrevious = DateTime.UtcNow; + notifyProgress?.Invoke(j/(float)_fieldIndex.Count); + } + + long offset = _fieldIndex[j][i]; + int length = (int)(_fieldIndex[j][i + 1] - offset) + 1; + + if (buffer.Length < length) + { + buffer = new byte[length]; + } + streamIn.Seek(offset, SeekOrigin.Begin); + int read = streamIn.Read(buffer, 0, length); + if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); } + + bool matches = searcher.Contains(buffer, length); + if (matches == false) { continue; } + + newIndexes.Add(_index[j]); + break; + } + } + + return newIndexes; + } +} \ No newline at end of file diff --git a/CsvLib/CsvLib.csproj b/CsvLib/CsvLib.csproj index 96b54da..53bbed1 100644 --- a/CsvLib/CsvLib.csproj +++ b/CsvLib/CsvLib.csproj @@ -1,8 +1,9 @@ - net7.0 + net8.0 11 + enable diff --git a/CsvLib/CsvParser.cs b/CsvLib/CsvParser.cs index 69f8d30..b3c9aed 100644 --- a/CsvLib/CsvParser.cs +++ b/CsvLib/CsvParser.cs @@ -2,102 +2,100 @@ using System.IO; using System.Text; -namespace CsvLib +namespace CsvLib; + +public class CsvParser { - public class CsvParser + private bool _insideString; + + private readonly char _separator; + private readonly char _quoteChar; + private readonly char _escapeChar; + + public CsvParser(char separator = ',', char quoteChar = '"', char escapeChar = '\\') { - private bool _insideString; + _separator = separator; + _quoteChar = quoteChar; + _escapeChar = escapeChar; + } - private readonly char _separator; - private readonly char _quoteChar; - private readonly char _escapeChar; + private List> _data = new(); - public CsvParser(char separator = ',', char quoteChar = '"', char escapeChar = '\\') + private List? _currentReg; + private StringBuilder? _currentCell; + + public List> Data + { + get { return _data; } + } + + private void ParseLine(string line) + { + _currentReg ??= new List(); + _currentCell ??= new StringBuilder(); + + for (int i = 0; i < line.Length; i++) { - _separator = separator; - _quoteChar = quoteChar; - _escapeChar = escapeChar; - } - - private List> _data = new(); - - private List _currentReg; - StringBuilder _currentCell; - - public List> Data - { - get { return _data; } - } - - private void ParseLine(string line) - { - _currentReg ??= new List(); - _currentCell ??= new StringBuilder(); - - for (int i = 0; i < line.Length; i++) - { - char c = line[i]; - if (c == _separator && _insideString == false) - { - _currentReg.Add(_currentCell.ToString()); - _currentCell.Clear(); - continue; - } - if (c == _quoteChar && _insideString == false) - { - _insideString = true; - continue; - } - if (c == _quoteChar && _insideString) - { - _insideString = false; - continue; - } - if (c == _escapeChar && _insideString) - { - i++; - if (i == line.Length) { break; } - c = line[i]; - } - - _currentCell.Append(c); - } - - - if (_insideString) - { - _currentCell.Append('\n'); - } - else + char c = line[i]; + if (c == _separator && _insideString == false) { _currentReg.Add(_currentCell.ToString()); _currentCell.Clear(); - _data.Add(_currentReg); - _currentReg = null; + continue; } + if (c == _quoteChar && _insideString == false) + { + _insideString = true; + continue; + } + if (c == _quoteChar && _insideString) + { + _insideString = false; + continue; + } + if (c == _escapeChar && _insideString) + { + i++; + if (i == line.Length) { break; } + c = line[i]; + } + + _currentCell.Append(c); } - public void ParseFile(string file, long offset = 0, int count = 0) + + if (_insideString) { - _insideString = false; - _data = new List>(); + _currentCell.Append('\n'); + } + else + { + _currentReg.Add(_currentCell.ToString()); + _currentCell.Clear(); + _data.Add(_currentReg); _currentReg = null; - FileStream stream = new(file, FileMode.Open); - stream.Seek(offset, SeekOrigin.Begin); - using (StreamReader reader = new(stream, Encoding.Default, true, 4096)) + } + } + + public void ParseFile(string file, long offset = 0, int count = 0) + { + _insideString = false; + _data = new List>(); + _currentReg = null; + FileStream stream = new(file, FileMode.Open); + stream.Seek(offset, SeekOrigin.Begin); + using (StreamReader reader = new(stream, Encoding.Default, true, 4096)) + { + while (reader.ReadLine() is { } currentLine) { - string currentLine; - while ((currentLine = reader.ReadLine()) != null) + ParseLine(currentLine); + if (count > 0 && Data.Count == count) { - ParseLine(currentLine); - if (count > 0 && Data.Count == count) - { - break; - } + break; } } - stream.Close(); } - + stream.Close(); } -} + +} \ No newline at end of file diff --git a/CsvView.sln.DotSettings b/CsvView.sln.DotSettings index f9ba49e..ec9561f 100644 --- a/CsvView.sln.DotSettings +++ b/CsvView.sln.DotSettings @@ -1,3 +1,4 @@  /usr/share/dotnet/sdk/7.0.107/MSBuild.dll - 4294967293 \ No newline at end of file + 4294967293 + True \ No newline at end of file diff --git a/CsvView/App.axaml b/CsvView/App.axaml index 1df1472..8cce478 100644 --- a/CsvView/App.axaml +++ b/CsvView/App.axaml @@ -1,7 +1,7 @@ + RequestedThemeVariant="Dark"> diff --git a/CsvView/CsvView.csproj b/CsvView/CsvView.csproj index 8de198e..4890001 100644 --- a/CsvView/CsvView.csproj +++ b/CsvView/CsvView.csproj @@ -1,14 +1,13 @@  WinExe - net7.0 + net8.0 enable true app.manifest true - - + @@ -17,8 +16,7 @@ - - + diff --git a/CsvView/MainWindow.axaml b/CsvView/MainWindow.axaml index 3def5a3..9f43c5b 100644 --- a/CsvView/MainWindow.axaml +++ b/CsvView/MainWindow.axaml @@ -9,12 +9,12 @@ Width="800" Height="600" Title="CsvView"> - - + + - + @@ -22,7 +22,7 @@ - / + / diff --git a/CsvView/MainWindow.axaml.cs b/CsvView/MainWindow.axaml.cs index aa1077e..9469217 100644 --- a/CsvView/MainWindow.axaml.cs +++ b/CsvView/MainWindow.axaml.cs @@ -28,8 +28,8 @@ public partial class MainWindow : Window AllowMultiple = false, FileTypeFilter = new List { - new("CSV Files") { Patterns = new[] { "*.csv" } }, - new("Any File") { Patterns = new[] { "*" } }, + new("CSV Files") { Patterns = new[] { "*.csv", }, }, + new("Any File") { Patterns = new[] { "*", }, }, }, }); @@ -89,6 +89,8 @@ public partial class MainWindow : Window private void Search(string? textToSearch) { + if (textToSearch == null) { return; } + // TODO: Loading animation CsvFieldIndexer csvIndexer = new(); csvIndexer.LoadIndexOfFile(_loadedFile); diff --git a/CsvView/Program.cs b/CsvView/Program.cs index 977975b..b4b1f5d 100644 --- a/CsvView/Program.cs +++ b/CsvView/Program.cs @@ -3,7 +3,7 @@ using System; namespace CsvView; -class Program +static class Program { // Initialization code. Don't use any Avalonia, third-party APIs or any // SynchronizationContext-reliant code before AppMain is called: things aren't initialized @@ -13,7 +13,7 @@ class Program .StartWithClassicDesktopLifetime(args); // Avalonia configuration, don't remove; also used by visual designer. - public static AppBuilder BuildAvaloniaApp() + private static AppBuilder BuildAvaloniaApp() => AppBuilder.Configure() .UsePlatformDetect() .WithInterFont()