diff --git a/.idea/.idea.CsvView/.idea/avalonia.xml b/.idea/.idea.CsvView/.idea/avalonia.xml
new file mode 100644
index 0000000..07fc4bd
--- /dev/null
+++ b/.idea/.idea.CsvView/.idea/avalonia.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CsvLib.Tests/ByteArraySearcherTests.cs b/CsvLib.Tests/ByteArraySearcherTests.cs
index b7c5c5d..e8e2418 100644
--- a/CsvLib.Tests/ByteArraySearcherTests.cs
+++ b/CsvLib.Tests/ByteArraySearcherTests.cs
@@ -7,60 +7,75 @@ public class ByteArraySearcherTests
[Fact]
public void Contains__EmptyNeedle_ReturnsTrue()
{
- byte[] haystack = { 1, 2, 3, 4, 5 };
+ // --- Arrange
+ byte[] haystack = [1, 2, 3, 4, 5,];
byte[] needle = Array.Empty();
ByteArraySearcher searcher = new(needle);
+ // --- Act
bool result = searcher.Contains(haystack);
+ // --- Assert
Assert.True(result);
}
[Fact]
public void Contains__NeedleAtBeginning_ReturnsTrue()
{
- byte[] haystack = { 1, 2, 3, 4, 5 };
- byte[] needle = { 1, 2, 3 };
+ // --- Arrange
+ byte[] haystack = [1, 2, 3, 4, 5,];
+ byte[] needle = [1, 2, 3,];
ByteArraySearcher searcher = new(needle);
+ // --- Act
bool result = searcher.Contains(haystack);
+ // --- Assert
Assert.True(result);
}
[Fact]
public void Contains__NeedleInMiddle_ReturnsTrue()
{
- byte[] haystack = { 1, 2, 3, 4, 5 };
- byte[] needle = { 3, 4 };
+ // --- Arrange
+ byte[] haystack = [1, 2, 3, 4, 5,];
+ byte[] needle = [3, 4,];
ByteArraySearcher searcher = new(needle);
+ // --- Act
bool result = searcher.Contains(haystack);
+ // --- Assert
Assert.True(result);
}
[Fact]
public void Contains__NeedleAtEnd_ReturnsTrue()
{
- byte[] haystack = { 1, 2, 3, 4, 5 };
- byte[] needle = { 4, 5 };
+ // --- Arrange
+ byte[] haystack = [1, 2, 3, 4, 5,];
+ byte[] needle = [4, 5,];
ByteArraySearcher searcher = new(needle);
+ // --- Act
bool result = searcher.Contains(haystack);
+ // --- Assert
Assert.True(result);
}
[Fact]
public void Contains__NeedleNotPresent_ReturnsFalse()
{
- byte[] haystack = { 1, 2, 3, 4, 5 };
- byte[] needle = { 5, 6, 7 };
+ // --- Arrange
+ byte[] haystack = [1, 2, 3, 4, 5,];
+ byte[] needle = [5, 6, 7,];
ByteArraySearcher searcher = new(needle);
+ // --- Act
bool result = searcher.Contains(haystack);
+ // --- Assert
Assert.False(result);
}
}
diff --git a/CsvLib.Tests/CsvFieldIndexerTests.cs b/CsvLib.Tests/CsvFieldIndexerTests.cs
index 478a786..016258b 100644
--- a/CsvLib.Tests/CsvFieldIndexerTests.cs
+++ b/CsvLib.Tests/CsvFieldIndexerTests.cs
@@ -5,7 +5,6 @@ namespace CvsLib;
public class CsvFieldIndexerTests
{
-
#region GenerateIndex
[Fact]
@@ -51,10 +50,11 @@ public class CsvFieldIndexerTests
public void GenerateIndex__TwoLinesOfPainText__TwoRows()
{
// --- Arrange
- StringReader sr = new("""
- Hello World
- Hello World
- """);
+ StringReader sr = new(
+ """
+ Hello World
+ Hello World
+ """);
// --- Act
CsvFieldIndexer indexer = new();
@@ -80,10 +80,11 @@ public class CsvFieldIndexerTests
public void GenerateIndex__TwoLinesOfQuotedText__TwoRows()
{
// --- Arrange
- StringReader sr = new("""
- "Hello World"
- "Hello World"
- """);
+ StringReader sr = new(
+ """
+ "Hello World"
+ "Hello World"
+ """);
// --- Act
CsvFieldIndexer indexer = new();
@@ -109,10 +110,11 @@ public class CsvFieldIndexerTests
public void GenerateIndex__TwoLinesWithTwoQuotedColumns__TwoRowsTwoFields()
{
// --- Arrange
- StringReader sr = new("""
- "Hello","World"
- "Hello","World"
- """);
+ StringReader sr = new(
+ """
+ "Hello","World"
+ "Hello","World"
+ """);
// --- Act
CsvFieldIndexer indexer = new();
@@ -142,10 +144,11 @@ public class CsvFieldIndexerTests
public void GenerateIndex__TwoLinesWithTwoQuotedColumnsWithUnicode__TwoRowsTwoFields()
{
// --- Arrange
- StringReader sr = new("""
- "Hélló","Wórld"
- "Hélló","Wórld"
- """);
+ StringReader sr = new(
+ """
+ "Hélló","Wórld"
+ "Hélló","Wórld"
+ """);
// --- Act
CsvFieldIndexer indexer = new();
@@ -174,15 +177,16 @@ public class CsvFieldIndexerTests
#endregion GenerateIndex
#region Search
-
+
[Fact]
public void Search__TwoLinesWithTwoQuotedColumns__OneIndexFirstRow()
{
// --- Arrange
- string strText = """
- "Hello","test"
- "Hello","World"
- """;
+ const string strText =
+ """
+ "Hello","test"
+ "Hello","World"
+ """;
StringReader sr = new(strText);
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
@@ -202,10 +206,11 @@ public class CsvFieldIndexerTests
public void Search__TwoLinesWithTwoQuotedColumns__OneIndexSecondRow()
{
// --- Arrange
- string strText = """
- "Hello","World"
- "Hello","test"
- """;
+ const string strText =
+ """
+ "Hello","World"
+ "Hello","test"
+ """;
StringReader sr = new(strText);
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
@@ -225,10 +230,11 @@ public class CsvFieldIndexerTests
public void Search__TwoLinesWithTwoQuotedColumnsTwoMatches__OneIndexSecondRow()
{
// --- Arrange
- string strText = """
- "Hello","World"
- "test","test"
- """;
+ const string strText =
+ """
+ "Hello","World"
+ "test","test"
+ """;
StringReader sr = new(strText);
CsvFieldIndexer indexer = new();
indexer.GenerateIndex(sr);
@@ -243,7 +249,7 @@ public class CsvFieldIndexerTests
Assert.Single(indexes);
Assert.Equal(16, indexes[0]);
}
-
+
#endregion Search
-
+
}
diff --git a/CsvLib.Tests/CsvLib.Tests.csproj b/CsvLib.Tests/CsvLib.Tests.csproj
index 59b30ab..089b48f 100644
--- a/CsvLib.Tests/CsvLib.Tests.csproj
+++ b/CsvLib.Tests/CsvLib.Tests.csproj
@@ -1,7 +1,7 @@
- net7.0
+ net8.0
enable
enable
CvsLib
diff --git a/CsvLib/BufferedTextReader.cs b/CsvLib/BufferedTextReader.cs
index ec942d6..f7f1e5a 100644
--- a/CsvLib/BufferedTextReader.cs
+++ b/CsvLib/BufferedTextReader.cs
@@ -1,57 +1,52 @@
using System.IO;
using System.Text;
-namespace CsvLib
+namespace CsvLib;
+
+public class BufferedTextReader : TextReader
{
- public class BufferedTextReader : TextReader
+ private readonly TextReader _baseReader;
+ private readonly StringBuilder _sbBuffer = new();
+
+ private readonly Encoding _currentEncoding = Encoding.Default;
+
+ public BufferedTextReader(TextReader baseReader)
{
- private readonly TextReader _baseReader;
- private int _position;
- private readonly StringBuilder _sbBuffer = new();
-
- private readonly Encoding _currentEncoding = Encoding.Default;
-
- public BufferedTextReader(TextReader baseReader)
+ _baseReader = baseReader;
+ if (baseReader is StreamReader streamReader)
{
- _baseReader = baseReader;
- if (baseReader is StreamReader streamReader)
- {
- _currentEncoding = streamReader.CurrentEncoding;
- }
- }
-
- public override int Read()
- {
- int read = _baseReader.Read();
- if (read > 127)
- {
- int count = _currentEncoding.GetByteCount(((char)read).ToString());
- _position += count;
- }
- else
- {
- _position++;
- }
- if (read != -1)
- {
- _sbBuffer.Append((char)read);
- }
- return read;
- }
-
- public int Position
- {
- get { return _position; }
- }
-
- public string GetBuffer()
- {
- return _sbBuffer.ToString();
- }
-
- public void CleanBuffer()
- {
- _sbBuffer.Clear();
+ _currentEncoding = streamReader.CurrentEncoding;
}
}
-}
+
+ public override int Read()
+ {
+ int read = _baseReader.Read();
+ if (read > 127)
+ {
+ int count = _currentEncoding.GetByteCount(((char)read).ToString());
+ Position += count;
+ }
+ else
+ {
+ Position++;
+ }
+ if (read != -1)
+ {
+ _sbBuffer.Append((char)read);
+ }
+ return read;
+ }
+
+ public int Position { get; private set; }
+
+ public string GetBuffer()
+ {
+ return _sbBuffer.ToString();
+ }
+
+ public void CleanBuffer()
+ {
+ _sbBuffer.Clear();
+ }
+}
\ No newline at end of file
diff --git a/CsvLib/ByteArraySearcher.cs b/CsvLib/ByteArraySearcher.cs
index cbe0b64..5b12aaa 100644
--- a/CsvLib/ByteArraySearcher.cs
+++ b/CsvLib/ByteArraySearcher.cs
@@ -1,5 +1,3 @@
-#nullable enable
-
namespace CsvLib;
public class ByteArraySearcher
diff --git a/CsvLib/CsvFieldIndexer.cs b/CsvLib/CsvFieldIndexer.cs
index 1a62582..1eef8e3 100644
--- a/CsvLib/CsvFieldIndexer.cs
+++ b/CsvLib/CsvFieldIndexer.cs
@@ -3,322 +3,319 @@ using System.Collections.Generic;
using System.IO;
using System.Text;
-namespace CsvLib
+namespace CsvLib;
+
+public class CsvFieldIndexer
{
- public class CsvFieldIndexer
+ private bool _insideString;
+
+ private Encoding _currentEncoding = Encoding.Default;
+
+ private readonly char _separator;
+ private readonly char _quoteChar;
+ private readonly char _escapeChar;
+
+ public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
{
- private bool _insideString;
+ _separator = separator;
+ _quoteChar = quoteChar;
+ _escapeChar = escapeChar;
+ }
- private Encoding _currentEncoding = Encoding.Default;
+ private List _index = new();
- private readonly char _separator;
- private readonly char _quoteChar;
- private readonly char _escapeChar;
+ public List Index { get { return _index; } }
- public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
+ private List> _fieldIndex = new();
+
+ public List> FieldIndex { get { return _fieldIndex; } }
+
+ private void DummyParser(string line)
+ {
+ for (int i = 0; i < line.Length; i++)
{
- _separator = separator;
- _quoteChar = quoteChar;
- _escapeChar = escapeChar;
- }
-
- private List _index = new();
-
- public List Index { get { return _index; } }
-
- private List> _fieldIndex = new();
-
- public List> FieldIndex { get { return _fieldIndex; } }
-
- private void DummyParser(string line)
- {
- for (int i = 0; i < line.Length; i++)
+ char c = line[i];
+ if (c == _separator && _insideString == false)
{
- char c = line[i];
- if (c == _separator && _insideString == false)
- {
- continue;
- }
- if (c == _quoteChar && _insideString == false)
- {
- _insideString = true;
- continue;
- }
- if (c == _quoteChar && _insideString)
- {
- _insideString = false;
- continue;
- }
- if (c == _escapeChar && _insideString)
- {
- i++;
- }
+ continue;
+ }
+ if (c == _quoteChar && _insideString == false)
+ {
+ _insideString = true;
+ continue;
+ }
+ if (c == _quoteChar && _insideString)
+ {
+ _insideString = false;
+ continue;
+ }
+ if (c == _escapeChar && _insideString)
+ {
+ i++;
}
}
+ }
- private List ParseLineIndex(string line, long lineOffset)
+ private List ParseLineIndex(string line, long lineOffset)
+ {
+ List fieldPositions = new();
+ long? fieldStartPosition = null;
+ long? fieldEndPosition = null;
+ int unicodeDelta = 0;
+ for (int i = 0; i < line.Length; i++)
{
- List fieldPositions = new();
- long? fieldStartPosition = null;
- long? fieldEndPosition = null;
- int unicodeDelta = 0;
- for (int i = 0; i < line.Length; i++)
+ char c = line[i];
+ if (c == _separator && _insideString == false)
{
- char c = line[i];
- if (c == _separator && _insideString == false)
- {
- if (fieldStartPosition != null)
- {
- fieldPositions.Add((long)fieldStartPosition);
- fieldPositions.Add((long)fieldEndPosition);
- }
- fieldStartPosition = null;
- fieldEndPosition = null;
- }
- else if (c == _quoteChar && _insideString == false)
- {
- _insideString = true;
- }
- else if (c == _quoteChar && _insideString)
- {
- _insideString = false;
- }
- else if (c == _escapeChar && _insideString)
- {
- i++;
- }
- else if ((c == '\n' || c == '\r') && _insideString == false)
- {
- break;
- }
- else
- {
- if (c > 127)
- {
- unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
- }
-
- long absolutePosition = lineOffset + i + unicodeDelta;
- fieldStartPosition ??= absolutePosition;
- fieldEndPosition = absolutePosition;
- }
- }
- if (_insideString == false)
- {
- if (fieldStartPosition != null)
+ if (fieldStartPosition != null && fieldEndPosition != null)
{
fieldPositions.Add((long)fieldStartPosition);
fieldPositions.Add((long)fieldEndPosition);
}
+ fieldStartPosition = null;
+ fieldEndPosition = null;
}
- return fieldPositions;
- }
-
- private void GenerateIndex(string file)
- {
- using FileStream stream = new(file, FileMode.Open);
- using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
- GenerateIndex(streamReader);
- stream.Close();
- }
-
- public void GenerateIndex(TextReader textReader)
- {
- _insideString = false;
- _index.Clear();
- _index.Add(0);
- int idxRow = 0;
- if (textReader is StreamReader streamReader)
+ else if (c == _quoteChar && _insideString == false)
{
- _currentEncoding = streamReader.CurrentEncoding;
+ _insideString = true;
}
- using BufferedTextReader reader = new(textReader);
- string currentLine;
- while ((currentLine = reader.ReadLine()) != null)
+ else if (c == _quoteChar && _insideString)
{
- DummyParser(currentLine);
- if (_insideString) { continue; }
-
- string fullLine = reader.GetBuffer();
- reader.CleanBuffer();
- List fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]);
- _fieldIndex.Add(fieldIndexes);
-
- _index.Add(reader.Position);
-
- idxRow++;
+ _insideString = false;
}
- }
-
- private const byte FileFormatVersion = 1;
-
- private void SaveFile(string indexFile)
- {
- if (indexFile == null) { return; }
- if (File.Exists(indexFile))
+ else if (c == _escapeChar && _insideString)
{
- File.Delete(indexFile);
+ i++;
}
- Stream streamOut = File.Open(indexFile, FileMode.Create);
- using (BinaryWriter binWriter = new(streamOut))
+ else if ((c == '\n' || c == '\r') && _insideString == false)
{
- binWriter.Write((byte)'C');
- binWriter.Write((byte)'S');
- binWriter.Write((byte)'V');
-
- binWriter.Write(FileFormatVersion);
-
- binWriter.Write(_index.Count);
- foreach (long currentIndex in _index)
+ break;
+ }
+ else
+ {
+ if (c > 127)
{
- binWriter.Write(currentIndex);
+ unicodeDelta += _currentEncoding.GetByteCount(c.ToString()) - 1;
}
- binWriter.Write(_fieldIndex.Count);
- foreach (List currentFieldIndex in _fieldIndex)
- {
- binWriter.Write(currentFieldIndex.Count);
- for (int i = 0; i < currentFieldIndex.Count; i++)
- {
- binWriter.Write(currentFieldIndex[i]);
- }
- }
- }
- streamOut.Close();
- }
-
- private bool LoadFile(string indexFile)
- {
- if (File.Exists(indexFile) == false)
- {
- return false;
- }
- List tempIndex;
- List> tempFieldIndex;
- Stream streamIn = File.Open(indexFile, FileMode.Open);
- try
- {
- using BinaryReader binReader = new(streamIn);
-
- byte magik0 = binReader.ReadByte();
- byte magik1 = binReader.ReadByte();
- byte magik2 = binReader.ReadByte();
- if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; }
-
- byte fileVersion = binReader.ReadByte();
- if (fileVersion != FileFormatVersion) { return false; }
-
- int numIndexes = binReader.ReadInt32();
- tempIndex = new List(numIndexes);
- for (int i = 0; i < numIndexes; i++)
- {
- long value = binReader.ReadInt64();
- tempIndex.Add(value);
- }
-
- int numFieldIndexes = binReader.ReadInt32();
- tempFieldIndex = new List>(numFieldIndexes);
- for (int j = 0; j < numFieldIndexes; j++)
- {
- int numCurrentFieldIndexes = binReader.ReadInt32();
- List currentFieldIndex = new(numCurrentFieldIndexes);
- for (int i = 0; i < numCurrentFieldIndexes; i++)
- {
- long value = binReader.ReadInt64();
- currentFieldIndex.Add(value);
- }
- tempFieldIndex.Add(currentFieldIndex);
- }
- }
- catch (Exception)
- {
- // NON NON NOM
- return false;
- }
- finally
- {
- streamIn.Close();
- }
- _index = tempIndex;
- _fieldIndex = tempFieldIndex;
- return true;
- }
-
- public void LoadIndexOfFile(string file)
- {
- DateTime dtFile = File.GetCreationTime(file);
- string indexFile = $"{file}.idx";
- if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
- {
- if (LoadFile(indexFile)) { return; }
- }
-
- // Generate index
- DateTime dtNow = DateTime.UtcNow;
- GenerateIndex(file);
- TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
-
- // Save Index if expensive generation
- if (tsGenIndex.TotalSeconds > 2)
- {
- SaveFile(indexFile);
+ long absolutePosition = lineOffset + i + unicodeDelta;
+ fieldStartPosition ??= absolutePosition;
+ fieldEndPosition = absolutePosition;
}
}
-
- public List Search(string fileName, string textToSearch, Action notifyProgress = null)
+ if (_insideString == false)
{
- List index;
- using FileStream streamIn = new(fileName, FileMode.Open);
- try
+ if (fieldStartPosition != null && fieldEndPosition != null)
{
- index = Search(streamIn, textToSearch, notifyProgress);
+ fieldPositions.Add((long)fieldStartPosition);
+ fieldPositions.Add((long)fieldEndPosition);
}
- finally
- {
- streamIn.Close();
- }
- return index ?? new List();
}
-
- public List Search(Stream streamIn, string textToSearch, Action notifyProgress = null)
+ return fieldPositions;
+ }
+
+ private void GenerateIndex(string file)
+ {
+ using FileStream stream = new(file, FileMode.Open);
+ using StreamReader streamReader = new(stream, Encoding.Default, true, 4096);
+ GenerateIndex(streamReader);
+ stream.Close();
+ }
+
+ public void GenerateIndex(TextReader textReader)
+ {
+ _insideString = false;
+ _index.Clear();
+ _index.Add(0);
+ int idxRow = 0;
+ if (textReader is StreamReader streamReader)
{
- // TODO: Use MemoryMappedFile for better IO performance
- DateTime datePrevious = DateTime.UtcNow;
- List newIndexes = new();
- byte[] bText = Encoding.UTF8.GetBytes(textToSearch);
- ByteArraySearcher searcher = new(bText);
- byte[] buffer = new byte[1024];
- for (int j = 0; j < _fieldIndex.Count; j++)
- {
- for (int i = 0; i < _fieldIndex[j].Count; i += 2)
- {
- TimeSpan tsElapsed = DateTime.UtcNow - datePrevious;
- if (tsElapsed.TotalMilliseconds > 200)
- {
- datePrevious = DateTime.UtcNow;
- notifyProgress?.Invoke(j/(float)_fieldIndex.Count);
- }
-
- long offset = _fieldIndex[j][i];
- int length = (int)(_fieldIndex[j][i + 1] - offset) + 1;
-
- if (buffer.Length < length)
- {
- buffer = new byte[length];
- }
- streamIn.Seek(offset, SeekOrigin.Begin);
- int read = streamIn.Read(buffer, 0, length);
- if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); }
+ _currentEncoding = streamReader.CurrentEncoding;
+ }
+ using BufferedTextReader reader = new(textReader);
+ while (reader.ReadLine() is { } currentLine)
+ {
+ DummyParser(currentLine);
+ if (_insideString) { continue; }
- bool matches = searcher.Contains(buffer, length);
- if (matches == false) { continue; }
-
- newIndexes.Add(_index[j]);
- break;
- }
- }
+ string fullLine = reader.GetBuffer();
+ reader.CleanBuffer();
+ List fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]);
+ _fieldIndex.Add(fieldIndexes);
- return newIndexes;
+ _index.Add(reader.Position);
+
+ idxRow++;
}
}
-}
+
+ private const byte FileFormatVersion = 1;
+
+ private void SaveFile(string indexFile)
+ {
+ if (File.Exists(indexFile))
+ {
+ File.Delete(indexFile);
+ }
+ Stream streamOut = File.Open(indexFile, FileMode.Create);
+ using (BinaryWriter binWriter = new(streamOut))
+ {
+ binWriter.Write((byte)'C');
+ binWriter.Write((byte)'S');
+ binWriter.Write((byte)'V');
+
+ binWriter.Write(FileFormatVersion);
+
+ binWriter.Write(_index.Count);
+ foreach (long currentIndex in _index)
+ {
+ binWriter.Write(currentIndex);
+ }
+
+ binWriter.Write(_fieldIndex.Count);
+ foreach (List currentFieldIndex in _fieldIndex)
+ {
+ binWriter.Write(currentFieldIndex.Count);
+ foreach (long fieldIndex in currentFieldIndex)
+ {
+ binWriter.Write(fieldIndex);
+ }
+ }
+ }
+ streamOut.Close();
+ }
+
+ private bool LoadFile(string indexFile)
+ {
+ if (File.Exists(indexFile) == false)
+ {
+ return false;
+ }
+ List tempIndex;
+ List> tempFieldIndex;
+ Stream streamIn = File.Open(indexFile, FileMode.Open);
+ try
+ {
+ using BinaryReader binReader = new(streamIn);
+
+ byte magik0 = binReader.ReadByte();
+ byte magik1 = binReader.ReadByte();
+ byte magik2 = binReader.ReadByte();
+ if (magik0 != (byte)'C' || magik1 != (byte)'S' || magik2 != (byte)'V') { return false; }
+
+ byte fileVersion = binReader.ReadByte();
+ if (fileVersion != FileFormatVersion) { return false; }
+
+ int numIndexes = binReader.ReadInt32();
+ tempIndex = new List(numIndexes);
+ for (int i = 0; i < numIndexes; i++)
+ {
+ long value = binReader.ReadInt64();
+ tempIndex.Add(value);
+ }
+
+ int numFieldIndexes = binReader.ReadInt32();
+ tempFieldIndex = new List>(numFieldIndexes);
+ for (int j = 0; j < numFieldIndexes; j++)
+ {
+ int numCurrentFieldIndexes = binReader.ReadInt32();
+ List currentFieldIndex = new(numCurrentFieldIndexes);
+ for (int i = 0; i < numCurrentFieldIndexes; i++)
+ {
+ long value = binReader.ReadInt64();
+ currentFieldIndex.Add(value);
+ }
+ tempFieldIndex.Add(currentFieldIndex);
+ }
+ }
+ catch (Exception)
+ {
+ // NON NON NOM
+ return false;
+ }
+ finally
+ {
+ streamIn.Close();
+ }
+ _index = tempIndex;
+ _fieldIndex = tempFieldIndex;
+ return true;
+ }
+
+ public void LoadIndexOfFile(string file)
+ {
+ DateTime dtFile = File.GetCreationTime(file);
+ string indexFile = $"{file}.idx";
+ if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
+ {
+ if (LoadFile(indexFile)) { return; }
+ }
+
+ // Generate index
+ DateTime dtNow = DateTime.UtcNow;
+ GenerateIndex(file);
+ TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
+
+ // Save Index if expensive generation
+ if (tsGenIndex.TotalSeconds > 2)
+ {
+ SaveFile(indexFile);
+ }
+ }
+
+ public List Search(string fileName, string textToSearch, Action? notifyProgress = null)
+ {
+ List index;
+ using FileStream streamIn = new(fileName, FileMode.Open);
+ try
+ {
+ index = Search(streamIn, textToSearch, notifyProgress);
+ }
+ finally
+ {
+ streamIn.Close();
+ }
+ return index;
+ }
+
+ public List Search(Stream streamIn, string textToSearch, Action? notifyProgress = null)
+ {
+ // TODO: Use MemoryMappedFile for better IO performance
+ DateTime datePrevious = DateTime.UtcNow;
+ List newIndexes = new();
+ byte[] bText = Encoding.UTF8.GetBytes(textToSearch);
+ ByteArraySearcher searcher = new(bText);
+ byte[] buffer = new byte[1024];
+ for (int j = 0; j < _fieldIndex.Count; j++)
+ {
+ for (int i = 0; i < _fieldIndex[j].Count; i += 2)
+ {
+ TimeSpan tsElapsed = DateTime.UtcNow - datePrevious;
+ if (tsElapsed.TotalMilliseconds > 200)
+ {
+ datePrevious = DateTime.UtcNow;
+ notifyProgress?.Invoke(j/(float)_fieldIndex.Count);
+ }
+
+ long offset = _fieldIndex[j][i];
+ int length = (int)(_fieldIndex[j][i + 1] - offset) + 1;
+
+ if (buffer.Length < length)
+ {
+ buffer = new byte[length];
+ }
+ streamIn.Seek(offset, SeekOrigin.Begin);
+ int read = streamIn.Read(buffer, 0, length);
+ if (read != length) { throw new Exception($"Search: Expected {length} bytes, but read {read}"); }
+
+ bool matches = searcher.Contains(buffer, length);
+ if (matches == false) { continue; }
+
+ newIndexes.Add(_index[j]);
+ break;
+ }
+ }
+
+ return newIndexes;
+ }
+}
\ No newline at end of file
diff --git a/CsvLib/CsvLib.csproj b/CsvLib/CsvLib.csproj
index 96b54da..53bbed1 100644
--- a/CsvLib/CsvLib.csproj
+++ b/CsvLib/CsvLib.csproj
@@ -1,8 +1,9 @@
- net7.0
+ net8.0
11
+ enable
diff --git a/CsvLib/CsvParser.cs b/CsvLib/CsvParser.cs
index 69f8d30..b3c9aed 100644
--- a/CsvLib/CsvParser.cs
+++ b/CsvLib/CsvParser.cs
@@ -2,102 +2,100 @@
using System.IO;
using System.Text;
-namespace CsvLib
+namespace CsvLib;
+
+public class CsvParser
{
- public class CsvParser
+ private bool _insideString;
+
+ private readonly char _separator;
+ private readonly char _quoteChar;
+ private readonly char _escapeChar;
+
+ public CsvParser(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
{
- private bool _insideString;
+ _separator = separator;
+ _quoteChar = quoteChar;
+ _escapeChar = escapeChar;
+ }
- private readonly char _separator;
- private readonly char _quoteChar;
- private readonly char _escapeChar;
+ private List> _data = new();
- public CsvParser(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
+ private List? _currentReg;
+ private StringBuilder? _currentCell;
+
+ public List> Data
+ {
+ get { return _data; }
+ }
+
+ private void ParseLine(string line)
+ {
+ _currentReg ??= new List();
+ _currentCell ??= new StringBuilder();
+
+ for (int i = 0; i < line.Length; i++)
{
- _separator = separator;
- _quoteChar = quoteChar;
- _escapeChar = escapeChar;
- }
-
- private List> _data = new();
-
- private List _currentReg;
- StringBuilder _currentCell;
-
- public List> Data
- {
- get { return _data; }
- }
-
- private void ParseLine(string line)
- {
- _currentReg ??= new List();
- _currentCell ??= new StringBuilder();
-
- for (int i = 0; i < line.Length; i++)
- {
- char c = line[i];
- if (c == _separator && _insideString == false)
- {
- _currentReg.Add(_currentCell.ToString());
- _currentCell.Clear();
- continue;
- }
- if (c == _quoteChar && _insideString == false)
- {
- _insideString = true;
- continue;
- }
- if (c == _quoteChar && _insideString)
- {
- _insideString = false;
- continue;
- }
- if (c == _escapeChar && _insideString)
- {
- i++;
- if (i == line.Length) { break; }
- c = line[i];
- }
-
- _currentCell.Append(c);
- }
-
-
- if (_insideString)
- {
- _currentCell.Append('\n');
- }
- else
+ char c = line[i];
+ if (c == _separator && _insideString == false)
{
_currentReg.Add(_currentCell.ToString());
_currentCell.Clear();
- _data.Add(_currentReg);
- _currentReg = null;
+ continue;
}
+ if (c == _quoteChar && _insideString == false)
+ {
+ _insideString = true;
+ continue;
+ }
+ if (c == _quoteChar && _insideString)
+ {
+ _insideString = false;
+ continue;
+ }
+ if (c == _escapeChar && _insideString)
+ {
+ i++;
+ if (i == line.Length) { break; }
+ c = line[i];
+ }
+
+ _currentCell.Append(c);
}
- public void ParseFile(string file, long offset = 0, int count = 0)
+
+ if (_insideString)
{
- _insideString = false;
- _data = new List>();
+ _currentCell.Append('\n');
+ }
+ else
+ {
+ _currentReg.Add(_currentCell.ToString());
+ _currentCell.Clear();
+ _data.Add(_currentReg);
_currentReg = null;
- FileStream stream = new(file, FileMode.Open);
- stream.Seek(offset, SeekOrigin.Begin);
- using (StreamReader reader = new(stream, Encoding.Default, true, 4096))
+ }
+ }
+
+ public void ParseFile(string file, long offset = 0, int count = 0)
+ {
+ _insideString = false;
+ _data = new List>();
+ _currentReg = null;
+ FileStream stream = new(file, FileMode.Open);
+ stream.Seek(offset, SeekOrigin.Begin);
+ using (StreamReader reader = new(stream, Encoding.Default, true, 4096))
+ {
+ while (reader.ReadLine() is { } currentLine)
{
- string currentLine;
- while ((currentLine = reader.ReadLine()) != null)
+ ParseLine(currentLine);
+ if (count > 0 && Data.Count == count)
{
- ParseLine(currentLine);
- if (count > 0 && Data.Count == count)
- {
- break;
- }
+ break;
}
}
- stream.Close();
}
-
+ stream.Close();
}
-}
+
+}
\ No newline at end of file
diff --git a/CsvView.sln.DotSettings b/CsvView.sln.DotSettings
index f9ba49e..ec9561f 100644
--- a/CsvView.sln.DotSettings
+++ b/CsvView.sln.DotSettings
@@ -1,3 +1,4 @@
/usr/share/dotnet/sdk/7.0.107/MSBuild.dll
- 4294967293
\ No newline at end of file
+ 4294967293
+ True
\ No newline at end of file
diff --git a/CsvView/App.axaml b/CsvView/App.axaml
index 1df1472..8cce478 100644
--- a/CsvView/App.axaml
+++ b/CsvView/App.axaml
@@ -1,7 +1,7 @@
+ RequestedThemeVariant="Dark">
diff --git a/CsvView/CsvView.csproj b/CsvView/CsvView.csproj
index 8de198e..4890001 100644
--- a/CsvView/CsvView.csproj
+++ b/CsvView/CsvView.csproj
@@ -1,14 +1,13 @@
WinExe
- net7.0
+ net8.0
enable
true
app.manifest
true
-
-
+
@@ -17,8 +16,7 @@
-
-
+
diff --git a/CsvView/MainWindow.axaml b/CsvView/MainWindow.axaml
index 3def5a3..9f43c5b 100644
--- a/CsvView/MainWindow.axaml
+++ b/CsvView/MainWindow.axaml
@@ -9,12 +9,12 @@
Width="800"
Height="600"
Title="CsvView">
-
-
+
+
-
+
@@ -22,7 +22,7 @@
- /
+ /
diff --git a/CsvView/MainWindow.axaml.cs b/CsvView/MainWindow.axaml.cs
index aa1077e..9469217 100644
--- a/CsvView/MainWindow.axaml.cs
+++ b/CsvView/MainWindow.axaml.cs
@@ -28,8 +28,8 @@ public partial class MainWindow : Window
AllowMultiple = false,
FileTypeFilter = new List
{
- new("CSV Files") { Patterns = new[] { "*.csv" } },
- new("Any File") { Patterns = new[] { "*" } },
+ new("CSV Files") { Patterns = new[] { "*.csv", }, },
+ new("Any File") { Patterns = new[] { "*", }, },
},
});
@@ -89,6 +89,8 @@ public partial class MainWindow : Window
private void Search(string? textToSearch)
{
+ if (textToSearch == null) { return; }
+
// TODO: Loading animation
CsvFieldIndexer csvIndexer = new();
csvIndexer.LoadIndexOfFile(_loadedFile);
diff --git a/CsvView/Program.cs b/CsvView/Program.cs
index 977975b..b4b1f5d 100644
--- a/CsvView/Program.cs
+++ b/CsvView/Program.cs
@@ -3,7 +3,7 @@ using System;
namespace CsvView;
-class Program
+static class Program
{
// Initialization code. Don't use any Avalonia, third-party APIs or any
// SynchronizationContext-reliant code before AppMain is called: things aren't initialized
@@ -13,7 +13,7 @@ class Program
.StartWithClassicDesktopLifetime(args);
// Avalonia configuration, don't remove; also used by visual designer.
- public static AppBuilder BuildAvaloniaApp()
+ private static AppBuilder BuildAvaloniaApp()
=> AppBuilder.Configure()
.UsePlatformDetect()
.WithInterFont()