diff --git a/CsvLib.Tests/CsvFieldIndexerTests.cs b/CsvLib.Tests/CsvFieldIndexerTests.cs
new file mode 100644
index 0000000..3a8a48e
--- /dev/null
+++ b/CsvLib.Tests/CsvFieldIndexerTests.cs
@@ -0,0 +1,142 @@
+using CsvLib;
+
+namespace CvsLib;
+
+public class CsvFieldIndexerTests
+{
+ #region GenerateIndex
+
+ [Fact]
+ public void GenerateIndex__Empty()
+ {
+ // --- Arrange
+ StringReader sr = new(string.Empty);
+
+ // --- Act
+ CsvFieldIndexer indexer = new();
+ indexer.GenerateIndex(sr);
+
+ // --- Assert
+
+ Assert.Single(indexer.Index);
+
+ Assert.Equal(0, indexer.Index[0]);
+ Assert.Empty(indexer.FieldIndex);
+ }
+
+ [Fact]
+ public void GenerateIndex__PlainText__OneRow()
+ {
+ // --- Arrange
+ StringReader sr = new("Hello World");
+
+ // --- Act
+ CsvFieldIndexer indexer = new();
+ indexer.GenerateIndex(sr);
+
+ // --- Assert
+
+ Assert.Equal(2, indexer.Index.Count);
+ Assert.Equal(0, indexer.Index[0]);
+ Assert.Equal(12, indexer.Index[1]);
+
+ Assert.Single(indexer.FieldIndex);
+ Assert.Equal(0, indexer.FieldIndex[0][0]);
+ Assert.Equal(10, indexer.FieldIndex[0][1]);
+ }
+
+ [Fact]
+ public void GenerateIndex__TwoLinesOfPainText__TwoRows()
+ {
+ // --- Arrange
+ StringReader sr = new("""
+ Hello World
+ Hello World
+ """);
+
+ // --- Act
+ CsvFieldIndexer indexer = new();
+ indexer.GenerateIndex(sr);
+
+ // --- Assert
+
+ Assert.Equal(3, indexer.Index.Count);
+ Assert.Equal(0, indexer.Index[0]);
+ Assert.Equal(12, indexer.Index[1]);
+ Assert.Equal(24, indexer.Index[2]);
+
+ Assert.Equal(2, indexer.FieldIndex.Count);
+ Assert.Equal(2, indexer.FieldIndex[0].Count);
+ Assert.Equal(0, indexer.FieldIndex[0][0]);
+ Assert.Equal(10, indexer.FieldIndex[0][1]);
+ Assert.Equal(2, indexer.FieldIndex[1].Count);
+ Assert.Equal(12, indexer.FieldIndex[1][0]);
+ Assert.Equal(22, indexer.FieldIndex[1][1]);
+ }
+
+ [Fact]
+ public void GenerateIndex__TwoLinesOfQuotedText__TwoRows()
+ {
+ // --- Arrange
+ StringReader sr = new("""
+ "Hello World"
+ "Hello World"
+ """);
+
+ // --- Act
+ CsvFieldIndexer indexer = new();
+ indexer.GenerateIndex(sr);
+
+ // --- Assert
+
+ Assert.Equal(3, indexer.Index.Count);
+ Assert.Equal(0, indexer.Index[0]);
+ Assert.Equal(14, indexer.Index[1]);
+ Assert.Equal(28, indexer.Index[2]);
+
+ Assert.Equal(2, indexer.FieldIndex.Count);
+ Assert.Equal(2, indexer.FieldIndex[0].Count);
+ Assert.Equal(1, indexer.FieldIndex[0][0]);
+ Assert.Equal(11, indexer.FieldIndex[0][1]);
+ Assert.Equal(2, indexer.FieldIndex[1].Count);
+ Assert.Equal(15, indexer.FieldIndex[1][0]);
+ Assert.Equal(25, indexer.FieldIndex[1][1]);
+ }
+
+ [Fact]
+ public void GenerateIndex__TwoLinesWithTwoQuotedColumns__TwoRowsTwoFields()
+ {
+ // --- Arrange
+ StringReader sr = new("""
+ "Hello","World"
+ "Hello","World"
+ """);
+
+ // --- Act
+ CsvFieldIndexer indexer = new();
+ indexer.GenerateIndex(sr);
+
+ // --- Assert
+
+ Assert.Equal(3, indexer.Index.Count);
+ Assert.Equal(0, indexer.Index[0]);
+ Assert.Equal(16, indexer.Index[1]);
+ Assert.Equal(32, indexer.Index[2]);
+
+ Assert.Equal(2, indexer.FieldIndex.Count);
+ Assert.Equal(4, indexer.FieldIndex[0].Count);
+ Assert.Equal(1, indexer.FieldIndex[0][0]);
+ Assert.Equal(5, indexer.FieldIndex[0][1]);
+ Assert.Equal(9, indexer.FieldIndex[0][2]);
+ Assert.Equal(13, indexer.FieldIndex[0][3]);
+ Assert.Equal(4, indexer.FieldIndex[1].Count);
+ Assert.Equal(17, indexer.FieldIndex[1][0]);
+ Assert.Equal(21, indexer.FieldIndex[1][1]);
+ Assert.Equal(25, indexer.FieldIndex[1][2]);
+ Assert.Equal(29, indexer.FieldIndex[1][3]);
+ }
+
+
+
+ #endregion GenerateIndex
+}
\ No newline at end of file
diff --git a/CsvLib.Tests/CsvLib.Tests.csproj b/CsvLib.Tests/CsvLib.Tests.csproj
new file mode 100644
index 0000000..59b30ab
--- /dev/null
+++ b/CsvLib.Tests/CsvLib.Tests.csproj
@@ -0,0 +1,29 @@
+
+
+
+ net7.0
+ enable
+ enable
+ CvsLib
+
+ false
+
+
+
+
+
+
+ runtime; build; native; contentfiles; analyzers; buildtransitive
+ all
+
+
+ runtime; build; native; contentfiles; analyzers; buildtransitive
+ all
+
+
+
+
+
+
+
+
diff --git a/CsvLib.Tests/Usings.cs b/CsvLib.Tests/Usings.cs
new file mode 100644
index 0000000..c802f44
--- /dev/null
+++ b/CsvLib.Tests/Usings.cs
@@ -0,0 +1 @@
+global using Xunit;
diff --git a/CsvLib/BufferedTextReader.cs b/CsvLib/BufferedTextReader.cs
new file mode 100644
index 0000000..9f349a0
--- /dev/null
+++ b/CsvLib/BufferedTextReader.cs
@@ -0,0 +1,54 @@
+using System;
+using System.IO;
+using System.Text;
+
+namespace CsvLib
+{
+ public class BufferedTextReader : TextReader
+ {
+ private readonly TextReader _baseReader;
+ private int _position;
+ private readonly StringBuilder _sbBuffer = new StringBuilder();
+
+ public BufferedTextReader(TextReader baseReader)
+ {
+ _baseReader = baseReader;
+ }
+
+ public override int Read()
+ {
+ _position++;
+ int read = _baseReader.Read();
+ if (read != -1)
+ {
+ _sbBuffer.Append((char)read);
+ }
+ return read;
+ }
+
+ public override int Read(char[] buffer, int index, int count)
+ {
+ throw new NotImplementedException("Read buffered method on BufferedTextReader");
+ }
+
+ public override int Peek()
+ {
+ return _baseReader.Peek();
+ }
+
+ public int Position
+ {
+ get { return _position; }
+ }
+
+ public string GetBuffer()
+ {
+ return _sbBuffer.ToString();
+ }
+
+ public void CleanBuffer()
+ {
+ _sbBuffer.Clear();
+ }
+ }
+}
diff --git a/CsvLib/CsvFieldIndexer.cs b/CsvLib/CsvFieldIndexer.cs
new file mode 100644
index 0000000..dbd1f8b
--- /dev/null
+++ b/CsvLib/CsvFieldIndexer.cs
@@ -0,0 +1,205 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace CsvLib
+{
+ public class CsvFieldIndexer
+ {
+ private bool _insideString;
+
+ private readonly char _separator;
+ private readonly char _quoteChar;
+ private readonly char _escapeChar;
+
+ public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
+ {
+ _separator = separator;
+ _quoteChar = quoteChar;
+ _escapeChar = escapeChar;
+ }
+
+ private List _index = new List();
+
+ public List Index { get { return _index; } }
+
+ private List> _fieldIndex = new List>();
+
+ public List> FieldIndex { get { return _fieldIndex; } }
+
+ private void DummyParser(string line)
+ {
+ for (int i = 0; i < line.Length; i++)
+ {
+ char c = line[i];
+ if (c == _separator && _insideString == false)
+ {
+ continue;
+ }
+ if (c == _quoteChar && _insideString == false)
+ {
+ _insideString = true;
+ continue;
+ }
+ if (c == _quoteChar && _insideString)
+ {
+ _insideString = false;
+ continue;
+ }
+ if (c == _escapeChar && _insideString)
+ {
+ i++;
+ c = line[i];
+ }
+ }
+ }
+
+ private List ParseLineIndex(string line, long lineOffset)
+ {
+ List fieldPositions = new List();
+ long? fieldStartPosition = null;
+ long? fieldEndPosition = null;
+ for (int i = 0; i < line.Length; i++)
+ {
+ char c = line[i];
+ if (c == _separator && _insideString == false)
+ {
+ if (fieldStartPosition != null)
+ {
+ fieldPositions.Add((long)fieldStartPosition);
+ fieldPositions.Add((long)fieldEndPosition);
+ }
+ fieldStartPosition = null;
+ fieldEndPosition = null;
+ }
+ else if (c == _quoteChar && _insideString == false)
+ {
+ _insideString = true;
+ }
+ else if (c == _quoteChar && _insideString)
+ {
+ _insideString = false;
+ }
+ else if (c == _escapeChar && _insideString)
+ {
+ i++;
+ c = line[i];
+ }
+ else if ((c == '\n' || c == '\r') && _insideString == false)
+ {
+ break;
+ }
+ else
+ {
+ long absolutePosition = lineOffset + i;
+ if (fieldStartPosition == null) { fieldStartPosition = absolutePosition; }
+ fieldEndPosition = absolutePosition;
+ }
+ }
+ if (_insideString == false)
+ {
+ if (fieldStartPosition != null)
+ {
+ fieldPositions.Add((long)fieldStartPosition);
+ fieldPositions.Add((long)fieldEndPosition);
+ }
+ }
+ return fieldPositions;
+ }
+
+ public void GenerateIndex(string file)
+ {
+ using (FileStream stream = new FileStream(file, FileMode.Open))
+ using (StreamReader streamReader = new StreamReader(stream, Encoding.Default, true, 4096))
+ {
+ GenerateIndex(streamReader);
+ }
+ }
+
+ public void GenerateIndex(TextReader textReader)
+ {
+ _insideString = false;
+ _index.Clear();
+ _index.Add(0);
+ int idxRow = 0;
+ using (BufferedTextReader reader = new BufferedTextReader(textReader))
+ {
+ string currentLine;
+ while ((currentLine = reader.ReadLine()) != null)
+ {
+ DummyParser(currentLine);
+ if (_insideString) { continue; }
+
+ string fullLine = reader.GetBuffer();
+ reader.CleanBuffer();
+ List fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]);
+ _fieldIndex.Add(fieldIndexes);
+
+ _index.Add(reader.Position);
+
+ idxRow++;
+ }
+ }
+ }
+
+ private void Index_SaveFile(string indexFile)
+ {
+ if (File.Exists(indexFile))
+ {
+ File.Delete(indexFile);
+ }
+ Stream streamOut = File.Open(indexFile, FileMode.Create);
+ using (BinaryWriter binWriter = new BinaryWriter(streamOut))
+ {
+ binWriter.Write(_index.Count);
+ for (int i = 0; i < _index.Count; i++)
+ {
+ binWriter.Write(_index[i]);
+ }
+ }
+ streamOut.Close();
+ }
+
+ private static List Index_LoadFile(string indexFile)
+ {
+ List tempIndex = new List();
+
+ Stream streamIn = File.Open(indexFile, FileMode.Open);
+ using (BinaryReader binReader = new BinaryReader(streamIn))
+ {
+ int numRegs = binReader.ReadInt32();
+ for (int i = 0; i < numRegs; i++)
+ {
+ long value = binReader.ReadInt64();
+ tempIndex.Add(value);
+ }
+ }
+ streamIn.Close();
+ return tempIndex;
+ }
+
+ public void LoadIndexOfFile(string file)
+ {
+ DateTime dtFile = File.GetCreationTime(file);
+ string indexFile = $"{file}.idx";
+ if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
+ {
+ _index = Index_LoadFile(indexFile);
+ }
+ else
+ {
+ // Generate index
+ DateTime dtNow = DateTime.UtcNow;
+ GenerateIndex(file);
+ TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
+
+ // Save Index if expensive generation
+ if (tsGenIndex.TotalSeconds > 2)
+ {
+ Index_SaveFile(indexFile);
+ }
+ }
+ }
+ }
+}
diff --git a/CsvView.sln b/CsvView.sln
index 0edf147..1ccb1fb 100644
--- a/CsvView.sln
+++ b/CsvView.sln
@@ -7,6 +7,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsvView", "CsvView.csproj",
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsvLib", "CsvLib\CsvLib.csproj", "{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsvLib.Tests", "CsvLib.Tests\CsvLib.Tests.csproj", "{EC5C84D8-1CDE-4AED-9C16-6C4086A20893}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -21,6 +23,10 @@ Global
{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}.Release|Any CPU.Build.0 = Release|Any CPU
+ {EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE