142
CsvLib.Tests/CsvFieldIndexerTests.cs
Normal file
142
CsvLib.Tests/CsvFieldIndexerTests.cs
Normal file
@@ -0,0 +1,142 @@
|
||||
using CsvLib;
|
||||
|
||||
namespace CvsLib;
|
||||
|
||||
public class CsvFieldIndexerTests
|
||||
{
|
||||
#region GenerateIndex
|
||||
|
||||
[Fact]
|
||||
public void GenerateIndex__Empty()
|
||||
{
|
||||
// --- Arrange
|
||||
StringReader sr = new(string.Empty);
|
||||
|
||||
// --- Act
|
||||
CsvFieldIndexer indexer = new();
|
||||
indexer.GenerateIndex(sr);
|
||||
|
||||
// --- Assert
|
||||
|
||||
Assert.Single(indexer.Index);
|
||||
|
||||
Assert.Equal(0, indexer.Index[0]);
|
||||
Assert.Empty(indexer.FieldIndex);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GenerateIndex__PlainText__OneRow()
|
||||
{
|
||||
// --- Arrange
|
||||
StringReader sr = new("Hello World");
|
||||
|
||||
// --- Act
|
||||
CsvFieldIndexer indexer = new();
|
||||
indexer.GenerateIndex(sr);
|
||||
|
||||
// --- Assert
|
||||
|
||||
Assert.Equal(2, indexer.Index.Count);
|
||||
Assert.Equal(0, indexer.Index[0]);
|
||||
Assert.Equal(12, indexer.Index[1]);
|
||||
|
||||
Assert.Single(indexer.FieldIndex);
|
||||
Assert.Equal(0, indexer.FieldIndex[0][0]);
|
||||
Assert.Equal(10, indexer.FieldIndex[0][1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GenerateIndex__TwoLinesOfPainText__TwoRows()
|
||||
{
|
||||
// --- Arrange
|
||||
StringReader sr = new("""
|
||||
Hello World
|
||||
Hello World
|
||||
""");
|
||||
|
||||
// --- Act
|
||||
CsvFieldIndexer indexer = new();
|
||||
indexer.GenerateIndex(sr);
|
||||
|
||||
// --- Assert
|
||||
|
||||
Assert.Equal(3, indexer.Index.Count);
|
||||
Assert.Equal(0, indexer.Index[0]);
|
||||
Assert.Equal(12, indexer.Index[1]);
|
||||
Assert.Equal(24, indexer.Index[2]);
|
||||
|
||||
Assert.Equal(2, indexer.FieldIndex.Count);
|
||||
Assert.Equal(2, indexer.FieldIndex[0].Count);
|
||||
Assert.Equal(0, indexer.FieldIndex[0][0]);
|
||||
Assert.Equal(10, indexer.FieldIndex[0][1]);
|
||||
Assert.Equal(2, indexer.FieldIndex[1].Count);
|
||||
Assert.Equal(12, indexer.FieldIndex[1][0]);
|
||||
Assert.Equal(22, indexer.FieldIndex[1][1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GenerateIndex__TwoLinesOfQuotedText__TwoRows()
|
||||
{
|
||||
// --- Arrange
|
||||
StringReader sr = new("""
|
||||
"Hello World"
|
||||
"Hello World"
|
||||
""");
|
||||
|
||||
// --- Act
|
||||
CsvFieldIndexer indexer = new();
|
||||
indexer.GenerateIndex(sr);
|
||||
|
||||
// --- Assert
|
||||
|
||||
Assert.Equal(3, indexer.Index.Count);
|
||||
Assert.Equal(0, indexer.Index[0]);
|
||||
Assert.Equal(14, indexer.Index[1]);
|
||||
Assert.Equal(28, indexer.Index[2]);
|
||||
|
||||
Assert.Equal(2, indexer.FieldIndex.Count);
|
||||
Assert.Equal(2, indexer.FieldIndex[0].Count);
|
||||
Assert.Equal(1, indexer.FieldIndex[0][0]);
|
||||
Assert.Equal(11, indexer.FieldIndex[0][1]);
|
||||
Assert.Equal(2, indexer.FieldIndex[1].Count);
|
||||
Assert.Equal(15, indexer.FieldIndex[1][0]);
|
||||
Assert.Equal(25, indexer.FieldIndex[1][1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GenerateIndex__TwoLinesWithTwoQuotedColumns__TwoRowsTwoFields()
|
||||
{
|
||||
// --- Arrange
|
||||
StringReader sr = new("""
|
||||
"Hello","World"
|
||||
"Hello","World"
|
||||
""");
|
||||
|
||||
// --- Act
|
||||
CsvFieldIndexer indexer = new();
|
||||
indexer.GenerateIndex(sr);
|
||||
|
||||
// --- Assert
|
||||
|
||||
Assert.Equal(3, indexer.Index.Count);
|
||||
Assert.Equal(0, indexer.Index[0]);
|
||||
Assert.Equal(16, indexer.Index[1]);
|
||||
Assert.Equal(32, indexer.Index[2]);
|
||||
|
||||
Assert.Equal(2, indexer.FieldIndex.Count);
|
||||
Assert.Equal(4, indexer.FieldIndex[0].Count);
|
||||
Assert.Equal(1, indexer.FieldIndex[0][0]);
|
||||
Assert.Equal(5, indexer.FieldIndex[0][1]);
|
||||
Assert.Equal(9, indexer.FieldIndex[0][2]);
|
||||
Assert.Equal(13, indexer.FieldIndex[0][3]);
|
||||
Assert.Equal(4, indexer.FieldIndex[1].Count);
|
||||
Assert.Equal(17, indexer.FieldIndex[1][0]);
|
||||
Assert.Equal(21, indexer.FieldIndex[1][1]);
|
||||
Assert.Equal(25, indexer.FieldIndex[1][2]);
|
||||
Assert.Equal(29, indexer.FieldIndex[1][3]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endregion GenerateIndex
|
||||
}
|
||||
29
CsvLib.Tests/CsvLib.Tests.csproj
Normal file
29
CsvLib.Tests/CsvLib.Tests.csproj
Normal file
@@ -0,0 +1,29 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net7.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<RootNamespace>CvsLib</RootNamespace>
|
||||
|
||||
<IsPackable>false</IsPackable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
|
||||
<PackageReference Include="xunit" Version="2.4.2" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="coverlet.collector" Version="3.1.2">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\CsvLib\CsvLib.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
1
CsvLib.Tests/Usings.cs
Normal file
1
CsvLib.Tests/Usings.cs
Normal file
@@ -0,0 +1 @@
|
||||
global using Xunit;
|
||||
54
CsvLib/BufferedTextReader.cs
Normal file
54
CsvLib/BufferedTextReader.cs
Normal file
@@ -0,0 +1,54 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace CsvLib
|
||||
{
|
||||
public class BufferedTextReader : TextReader
|
||||
{
|
||||
private readonly TextReader _baseReader;
|
||||
private int _position;
|
||||
private readonly StringBuilder _sbBuffer = new StringBuilder();
|
||||
|
||||
public BufferedTextReader(TextReader baseReader)
|
||||
{
|
||||
_baseReader = baseReader;
|
||||
}
|
||||
|
||||
public override int Read()
|
||||
{
|
||||
_position++;
|
||||
int read = _baseReader.Read();
|
||||
if (read != -1)
|
||||
{
|
||||
_sbBuffer.Append((char)read);
|
||||
}
|
||||
return read;
|
||||
}
|
||||
|
||||
public override int Read(char[] buffer, int index, int count)
|
||||
{
|
||||
throw new NotImplementedException("Read buffered method on BufferedTextReader");
|
||||
}
|
||||
|
||||
public override int Peek()
|
||||
{
|
||||
return _baseReader.Peek();
|
||||
}
|
||||
|
||||
public int Position
|
||||
{
|
||||
get { return _position; }
|
||||
}
|
||||
|
||||
public string GetBuffer()
|
||||
{
|
||||
return _sbBuffer.ToString();
|
||||
}
|
||||
|
||||
public void CleanBuffer()
|
||||
{
|
||||
_sbBuffer.Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
205
CsvLib/CsvFieldIndexer.cs
Normal file
205
CsvLib/CsvFieldIndexer.cs
Normal file
@@ -0,0 +1,205 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace CsvLib
|
||||
{
|
||||
public class CsvFieldIndexer
|
||||
{
|
||||
private bool _insideString;
|
||||
|
||||
private readonly char _separator;
|
||||
private readonly char _quoteChar;
|
||||
private readonly char _escapeChar;
|
||||
|
||||
public CsvFieldIndexer(char separator = ',', char quoteChar = '"', char escapeChar = '\\')
|
||||
{
|
||||
_separator = separator;
|
||||
_quoteChar = quoteChar;
|
||||
_escapeChar = escapeChar;
|
||||
}
|
||||
|
||||
private List<long> _index = new List<long>();
|
||||
|
||||
public List<long> Index { get { return _index; } }
|
||||
|
||||
private List<List<long>> _fieldIndex = new List<List<long>>();
|
||||
|
||||
public List<List<long>> FieldIndex { get { return _fieldIndex; } }
|
||||
|
||||
private void DummyParser(string line)
|
||||
{
|
||||
for (int i = 0; i < line.Length; i++)
|
||||
{
|
||||
char c = line[i];
|
||||
if (c == _separator && _insideString == false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (c == _quoteChar && _insideString == false)
|
||||
{
|
||||
_insideString = true;
|
||||
continue;
|
||||
}
|
||||
if (c == _quoteChar && _insideString)
|
||||
{
|
||||
_insideString = false;
|
||||
continue;
|
||||
}
|
||||
if (c == _escapeChar && _insideString)
|
||||
{
|
||||
i++;
|
||||
c = line[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<long> ParseLineIndex(string line, long lineOffset)
|
||||
{
|
||||
List<long> fieldPositions = new List<long>();
|
||||
long? fieldStartPosition = null;
|
||||
long? fieldEndPosition = null;
|
||||
for (int i = 0; i < line.Length; i++)
|
||||
{
|
||||
char c = line[i];
|
||||
if (c == _separator && _insideString == false)
|
||||
{
|
||||
if (fieldStartPosition != null)
|
||||
{
|
||||
fieldPositions.Add((long)fieldStartPosition);
|
||||
fieldPositions.Add((long)fieldEndPosition);
|
||||
}
|
||||
fieldStartPosition = null;
|
||||
fieldEndPosition = null;
|
||||
}
|
||||
else if (c == _quoteChar && _insideString == false)
|
||||
{
|
||||
_insideString = true;
|
||||
}
|
||||
else if (c == _quoteChar && _insideString)
|
||||
{
|
||||
_insideString = false;
|
||||
}
|
||||
else if (c == _escapeChar && _insideString)
|
||||
{
|
||||
i++;
|
||||
c = line[i];
|
||||
}
|
||||
else if ((c == '\n' || c == '\r') && _insideString == false)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
long absolutePosition = lineOffset + i;
|
||||
if (fieldStartPosition == null) { fieldStartPosition = absolutePosition; }
|
||||
fieldEndPosition = absolutePosition;
|
||||
}
|
||||
}
|
||||
if (_insideString == false)
|
||||
{
|
||||
if (fieldStartPosition != null)
|
||||
{
|
||||
fieldPositions.Add((long)fieldStartPosition);
|
||||
fieldPositions.Add((long)fieldEndPosition);
|
||||
}
|
||||
}
|
||||
return fieldPositions;
|
||||
}
|
||||
|
||||
public void GenerateIndex(string file)
|
||||
{
|
||||
using (FileStream stream = new FileStream(file, FileMode.Open))
|
||||
using (StreamReader streamReader = new StreamReader(stream, Encoding.Default, true, 4096))
|
||||
{
|
||||
GenerateIndex(streamReader);
|
||||
}
|
||||
}
|
||||
|
||||
public void GenerateIndex(TextReader textReader)
|
||||
{
|
||||
_insideString = false;
|
||||
_index.Clear();
|
||||
_index.Add(0);
|
||||
int idxRow = 0;
|
||||
using (BufferedTextReader reader = new BufferedTextReader(textReader))
|
||||
{
|
||||
string currentLine;
|
||||
while ((currentLine = reader.ReadLine()) != null)
|
||||
{
|
||||
DummyParser(currentLine);
|
||||
if (_insideString) { continue; }
|
||||
|
||||
string fullLine = reader.GetBuffer();
|
||||
reader.CleanBuffer();
|
||||
List<long> fieldIndexes = ParseLineIndex(fullLine, _index[idxRow]);
|
||||
_fieldIndex.Add(fieldIndexes);
|
||||
|
||||
_index.Add(reader.Position);
|
||||
|
||||
idxRow++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Index_SaveFile(string indexFile)
|
||||
{
|
||||
if (File.Exists(indexFile))
|
||||
{
|
||||
File.Delete(indexFile);
|
||||
}
|
||||
Stream streamOut = File.Open(indexFile, FileMode.Create);
|
||||
using (BinaryWriter binWriter = new BinaryWriter(streamOut))
|
||||
{
|
||||
binWriter.Write(_index.Count);
|
||||
for (int i = 0; i < _index.Count; i++)
|
||||
{
|
||||
binWriter.Write(_index[i]);
|
||||
}
|
||||
}
|
||||
streamOut.Close();
|
||||
}
|
||||
|
||||
private static List<long> Index_LoadFile(string indexFile)
|
||||
{
|
||||
List<long> tempIndex = new List<long>();
|
||||
|
||||
Stream streamIn = File.Open(indexFile, FileMode.Open);
|
||||
using (BinaryReader binReader = new BinaryReader(streamIn))
|
||||
{
|
||||
int numRegs = binReader.ReadInt32();
|
||||
for (int i = 0; i < numRegs; i++)
|
||||
{
|
||||
long value = binReader.ReadInt64();
|
||||
tempIndex.Add(value);
|
||||
}
|
||||
}
|
||||
streamIn.Close();
|
||||
return tempIndex;
|
||||
}
|
||||
|
||||
public void LoadIndexOfFile(string file)
|
||||
{
|
||||
DateTime dtFile = File.GetCreationTime(file);
|
||||
string indexFile = $"{file}.idx";
|
||||
if (File.Exists(indexFile) && File.GetCreationTime(indexFile) > dtFile)
|
||||
{
|
||||
_index = Index_LoadFile(indexFile);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Generate index
|
||||
DateTime dtNow = DateTime.UtcNow;
|
||||
GenerateIndex(file);
|
||||
TimeSpan tsGenIndex = DateTime.UtcNow - dtNow;
|
||||
|
||||
// Save Index if expensive generation
|
||||
if (tsGenIndex.TotalSeconds > 2)
|
||||
{
|
||||
Index_SaveFile(indexFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsvView", "CsvView.csproj",
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsvLib", "CsvLib\CsvLib.csproj", "{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsvLib.Tests", "CsvLib.Tests\CsvLib.Tests.csproj", "{EC5C84D8-1CDE-4AED-9C16-6C4086A20893}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -21,6 +23,10 @@ Global
|
||||
{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{EB0FDB60-8B9D-401C-85A8-4CF4105D5063}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{EC5C84D8-1CDE-4AED-9C16-6C4086A20893}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
Reference in New Issue
Block a user