8 Commits
1_0 ... 1_1

8 changed files with 318 additions and 71 deletions

View File

@@ -1,5 +1,13 @@
# .Net library for compressing URLs # .Net library for compressing URLs
## Instalation
You can install the NuGet package using this command on the package manager:
Install-Package VAR.UrlCompressor
Alternativelly you can copy and reference the assembly resulting of the project VAR.UrlCompressor.
## Usage ## Usage
### VAR.UrlCompressor ### VAR.UrlCompressor
@@ -10,16 +18,28 @@ Add the resulting assembly as reference in your projects, and this line on code:
Compress an URL with: Compress an URL with:
string compressedUrl = UrlCompressor.Compress("https:\\google.com"); string compressedUrl = UrlCompressor.Compress("https:\\google.com");
// compressedUrl = "Hk30TGDxt8jOOW6"
Decompress an URL with: Decompress an URL with:
string decompressedUrl = UrlCompressor.Compress("xGncYbYfopHYpG0"); string decompressedUrl = UrlCompressor.Decompress("Hk30TGDxt8jOOW6");
// decompressedUrl = "Hk30TGDxt8jOOW6";
For extra compression use host conversions. For example:
Dictionary<string, string> hostConversions = new Dictionary<string, string> {
{ "google", "G" }
{ "com", "C" }
}
string compressedUrl = UrlCompressor.Compress("https:\\google.com", );
// compressedUrl = "oMyuFVR41"
### UrlCompressor.Tests ### UrlCompressor.Tests
It is a simple console application, to test basic funcitionallity of the library. It is a simple console application, to test basic funcitionallity of the library.
## Building ## Building
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE. A Visual Studio 2017 solution is provided. Simply, click build on the IDE.
A .nuget package can be build using: A .nuget package can be build using:
VAR.UrlCompressor\Build.NuGet.cmd VAR.UrlCompressor\Build.NuGet.cmd

View File

@@ -1,4 +1,5 @@
using System; using System;
using System.Collections.Generic;
namespace UrlCompressor.Tests namespace UrlCompressor.Tests
{ {
@@ -6,25 +7,31 @@ namespace UrlCompressor.Tests
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
TestUrl("http://google.com"); Dictionary<string, string> _hostConversions = new Dictionary<string, string> {
TestUrl("https://google.com"); { "com", "C" },
TestUrl("http://facebook.com"); { "net", "N" },
TestUrl("https://facebook.com"); { "org", "O" },
TestUrl("https://twitter.com"); };
TestUrl("https://twitter.com/Kableado");
TestUrl("https://github.com/Kableado"); TestUrl("http://google.com", _hostConversions);
TestUrl("https://varstudio.net"); TestUrl("https://google.com", _hostConversions);
TestUrl("http://facebook.com", _hostConversions);
TestUrl("https://facebook.com", _hostConversions);
TestUrl("https://twitter.com", _hostConversions);
TestUrl("https://twitter.com/Kableado", _hostConversions);
TestUrl("https://github.com/Kableado", _hostConversions);
TestUrl("https://varstudio.net", _hostConversions);
Console.Read(); Console.Read();
} }
private static bool TestUrl(string url) private static bool TestUrl(string url, Dictionary<string, string> _hostConversions)
{ {
Console.WriteLine("---------------------------------------------"); Console.WriteLine("---------------------------------------------");
Console.WriteLine(" Url: {0}", url); Console.WriteLine(" Url: {0}", url);
string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url); string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url, _hostConversions);
Console.WriteLine(" CompressedUrl: {0}", compressedUrl); Console.WriteLine(" CompressedUrl: {0}", compressedUrl);
string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl); string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl, _hostConversions);
Console.WriteLine("DecompressedUrl: {0}", decompressedUrl); Console.WriteLine("DecompressedUrl: {0}", decompressedUrl);
if(url!= decompressedUrl) if(url!= decompressedUrl)
{ {

View File

@@ -53,6 +53,7 @@ namespace VAR.UrlCompressor
for (int i = 0; i < base62.Length; i++) for (int i = 0; i < base62.Length; i++)
{ {
int charIdx = Base62CodingSpace.IndexOf(base62[i]); int charIdx = Base62CodingSpace.IndexOf(base62[i]);
if (charIdx == -1) { continue; }
if ((i + 1) == base62.Length) if ((i + 1) == base62.Length)
{ {
// Last symbol // Last symbol
@@ -66,7 +67,7 @@ namespace VAR.UrlCompressor
bytes.WriteBit(bitPosition, 3 - pad, (charIdx & 0x04) > 0); bytes.WriteBit(bitPosition, 3 - pad, (charIdx & 0x04) > 0);
bytes.WriteBit(bitPosition, 4 - pad, (charIdx & 0x02) > 0); bytes.WriteBit(bitPosition, 4 - pad, (charIdx & 0x02) > 0);
bytes.WriteBit(bitPosition, 5 - pad, (charIdx & 0x01) > 0); bytes.WriteBit(bitPosition, 5 - pad, (charIdx & 0x01) > 0);
bitPosition += (6 - pad);
break; break;
} }
@@ -100,6 +101,8 @@ namespace VAR.UrlCompressor
} }
} }
Array.Resize(ref bytes, (int)Math.Ceiling((double)bitPosition / 8));
return bytes; return bytes;
} }
} }

View File

@@ -65,32 +65,36 @@ namespace VAR.UrlCompressor
} }
} }
private int _bitPosition = 0;
private List<bool> _encodedSymbol = new List<bool>();
private byte[] _scratch = null;
private void EncodeChar(char data)
{
_encodedSymbol.Clear();
_encodedSymbol = Root.Traverse(data, _encodedSymbol);
foreach (bool v in _encodedSymbol)
{
_scratch.WriteBit(_bitPosition, 0, v);
_bitPosition++;
}
}
public byte[] Encode(byte[] data) public byte[] Encode(byte[] data)
{ {
byte[] scratch = new byte[data.Length * 2]; _scratch = new byte[data.Length * 2];
int bitPosition = 0; _bitPosition = 0;
var encodedSymbol = new List<bool>();
for (int i = 0; i < data.Length; i++) for (int i = 0; i < data.Length; i++)
{ {
encodedSymbol.Clear(); EncodeChar((char)data[i]);
encodedSymbol = Root.Traverse((char)data[i], encodedSymbol);
foreach(bool v in encodedSymbol)
{
scratch.WriteBit(bitPosition, 0, v);
bitPosition++;
} }
} EncodeChar(EOD);
encodedSymbol.Clear();
encodedSymbol = Root.Traverse(EOD, encodedSymbol); int byteLenght = (int)Math.Ceiling((double)_bitPosition / 8);
foreach (bool v in encodedSymbol)
{
scratch.WriteBit(bitPosition, 0, v);
bitPosition++;
}
int byteLenght = (int)Math.Ceiling((double)bitPosition / 8);
byte[] compressedData = new byte[byteLenght]; byte[] compressedData = new byte[byteLenght];
Array.Copy(scratch, compressedData, byteLenght); Array.Copy(_scratch, compressedData, byteLenght);
_scratch = null;
return compressedData; return compressedData;
} }
@@ -98,15 +102,15 @@ namespace VAR.UrlCompressor
public byte[] Decode(byte[] data) public byte[] Decode(byte[] data)
{ {
HuffmanTreeNode current = Root; HuffmanTreeNode current = Root;
byte[] scratch = new byte[data.Length]; _scratch = new byte[data.Length];
int bitPosition = 0; _bitPosition = 0;
int bytePosition = 0; int bytePosition = 0;
int lenght = data.Length * 8; int lenght = data.Length * 8;
while (bitPosition < lenght) while (_bitPosition < lenght)
{ {
bool bit = data.ReadBit(bitPosition, 0); bool bit = data.ReadBit(_bitPosition, 0);
bitPosition++; _bitPosition++;
if (bit) if (bit)
{ {
if (current.Right != null) if (current.Right != null)
@@ -125,14 +129,15 @@ namespace VAR.UrlCompressor
if (current.IsLeaf()) if (current.IsLeaf())
{ {
if (current.Symbol == EOD) { break; } if (current.Symbol == EOD) { break; }
scratch = scratch.WriteByte(bytePosition, (byte)current.Symbol); _scratch = _scratch.WriteByte(bytePosition, (byte)current.Symbol);
bytePosition++; bytePosition++;
current = Root; current = Root;
} }
} }
byte[] decompressedData = new byte[bytePosition]; byte[] decompressedData = new byte[bytePosition];
Array.Copy(scratch, decompressedData, bytePosition); Array.Copy(_scratch, decompressedData, bytePosition);
_scratch = null;
return decompressedData; return decompressedData;
} }

View File

@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
[assembly: AssemblyCulture("")] [assembly: AssemblyCulture("")]
[assembly: ComVisible(false)] [assembly: ComVisible(false)]
[assembly: Guid("016ae05d-12af-40c6-8d0c-064970004f0b")] [assembly: Guid("016ae05d-12af-40c6-8d0c-064970004f0b")]
[assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.1.*")]

136
VAR.UrlCompressor/Url.cs Normal file
View File

@@ -0,0 +1,136 @@
using System;
namespace VAR.UrlCompressor
{
class Url
{
public string Protocol { get; set; }
public string Host { get; set; }
public string Port { get; set; }
public string Path { get; set; }
private Url() { }
private enum ParseStatus
{
ParsingProtocol,
ParsingHost,
ParsingPort,
ParsingPath,
};
private ParseStatus _status = ParseStatus.ParsingProtocol;
private int _i0 = 0;
private int _i = 0;
private void ResetParser()
{
_status = ParseStatus.ParsingProtocol;
_i0 = 0;
_i = 0;
}
private void ParseUrl(string url)
{
while (_i < url.Length)
{
switch (_status)
{
case ParseStatus.ParsingProtocol:
if (url[_i] == ':')
{
Protocol = url.Substring(_i0, _i - _i0);
if (_i + 2 >= url.Length)
{
throw new Exception(string.Format("Unexpected end of URL, while parsing protocol. \"{0}\"", url));
}
if (url[_i + 1] != '/' || url[_i + 2] != '/')
{
throw new Exception(string.Format("Unexpected end of URL, while parsing protocol. \"{0}\"", url));
}
_status = ParseStatus.ParsingHost;
_i0 = _i + 3;
_i = _i0;
}
break;
case ParseStatus.ParsingHost:
if (char.IsLetterOrDigit(url[_i]) == false && url[_i] != '.')
{
Host = url.Substring(_i0, _i - _i0);
_i0 = _i;
if (url[_i] == ':')
{
_status = ParseStatus.ParsingPort;
}
else
{
_status = ParseStatus.ParsingPath;
}
}
break;
case ParseStatus.ParsingPort:
if (char.IsDigit(url[_i]) == false)
{
Port = url.Substring(_i0, _i - _i0);
_i0 = _i;
_status = ParseStatus.ParsingPath;
}
break;
case ParseStatus.ParsingPath:
_i = url.Length - 1;
break;
}
_i++;
}
switch (_status)
{
case ParseStatus.ParsingProtocol:
Protocol = url.Substring(_i0, _i - _i0);
break;
case ParseStatus.ParsingHost:
Host = url.Substring(_i0, _i - _i0);
break;
case ParseStatus.ParsingPort:
Port = url.Substring(_i0, _i - _i0);
break;
case ParseStatus.ParsingPath:
Path = url.Substring(_i0, _i - _i0);
break;
}
}
public static Url CreateFromString(string url)
{
var newUrl = new Url();
newUrl.ParseUrl(url);
return newUrl;
}
public static Url CreateFromShortString(string url)
{
var newUrl = new Url()
{
Protocol = url[0].ToString(),
_status = ParseStatus.ParsingHost,
_i = 1,
_i0 = 1,
};
newUrl.ParseUrl(url);
return newUrl;
}
public override string ToString()
{
return string.Format("{0}://{1}{2}{3}", Protocol, Host, Port, Path);
}
public string ToShortString()
{
return string.Format("{0}{1}{2}{3}", Protocol[0], Host, Port, Path);
}
}
}

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic; using System;
using System.Collections.Generic;
using System.Text; using System.Text;
namespace VAR.UrlCompressor namespace VAR.UrlCompressor
@@ -81,21 +82,21 @@ namespace VAR.UrlCompressor
{ '8', 1000}, { '8', 1000},
{ '9', 1000}, { '9', 1000},
// Common simbols // Common symbols
{ ' ', 100}, { ' ', 100},
{ '!', 100}, { '!', 100},
{ '"', 100}, { '"', 100},
{ '#', 50000}, // NOTE: Exagerate to minimize bitstream of this symbol '#' { '#', 20000}, // NOTE: Exagerate to minimize bitstream of this symbol '#'
{ '$', 50000}, // NOTE: Exagerate to minimize bitstream of this symbol '$' { '$', 20000}, // NOTE: Exagerate to minimize bitstream of this symbol '$'
{ '%', 100}, { '%', 100},
{ '&', 100}, { '&', 100},
{ '\'', 100}, { '\'', 20000}, // NOTE: Exagerate to minimize bitstream of this symbol '/'
{ '(', 100}, { '(', 100},
{ '*', 100}, { '*', 100},
{ '+', 100}, { '+', 100},
{ ',', 100}, { ',', 100},
{ '-', 100}, { '-', 100},
{ '.', 100}, { '.', 20000}, // NOTE: Exagerate to minimize bitstream of this symbol '.'
{ '/', 100}, { '/', 100},
{ ':', 100}, { ':', 100},
{ ';', 100}, { ';', 100},
@@ -118,46 +119,120 @@ namespace VAR.UrlCompressor
_huffmanTree = new HuffmanTree(frequencies); _huffmanTree = new HuffmanTree(frequencies);
} }
public static string Compress(string url) private static void XorData(byte[] data, byte xorKey)
{
for (int i = 0; i < data.Length; i++)
{
data[i] = (byte)(data[i] ^ xorKey);
}
}
private static byte ChecksumCalculate(byte[] data)
{
byte checksum = 0;
foreach(byte b in data)
{
checksum = (byte)(checksum ^ b);
}
return checksum;
}
private static byte[] ChecksumAdd(byte[] data)
{
byte[] newData = new byte[data.Length + 1];
byte checksum = ChecksumCalculate(data);
XorData(data, checksum);
Array.Copy(data, 0, newData, 1, data.Length);
newData[0] = checksum;
return newData;
}
private static byte[] ChecksumCheck(byte[] data)
{
byte[] newData = new byte[data.Length - 1];
Array.Copy(data, 1, newData, 0, data.Length - 1);
byte oldChecksum = data[0];
XorData(newData, oldChecksum);
byte checksum = ChecksumCalculate(newData);
if (checksum != oldChecksum) { throw new Exception("Checksum mismatch."); }
return newData;
}
public static string Compress(string url, Dictionary<string, string> hostConversions = null)
{ {
InitHuffmanTree(); InitHuffmanTree();
// Replace protocol indicator Url oUrl = Url.CreateFromString(url);
if (url.StartsWith("https://") || url.StartsWith("HTTPS://"))
// "Compress" protocol
if (oUrl.Protocol == "http" || oUrl.Protocol == null) { oUrl.Protocol = "#"; }
else if (oUrl.Protocol == "https") { oUrl.Protocol = "$"; }
else if (oUrl.Protocol == "ftp") { oUrl.Protocol = "F"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
if (hostConversions != null)
{ {
url = string.Format("${0}", url.Substring("https://".Length)); // "Compress" hosts
string[] urlHostParts = oUrl.Host.Split('.');
for (int i = 0; i < urlHostParts.Length; i++)
{
foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Key)
{
urlHostParts[i] = hostConversion.Value;
break;
} }
if (url.StartsWith("http://") || url.StartsWith("HTTP://")) }
{ }
url = string.Format("#{0}", url.Substring("http://".Length)); oUrl.Host = string.Join(".", urlHostParts);
} }
url = oUrl.ToShortString();
// Reduce entropy
byte[] urlBytes = Encoding.ASCII.GetBytes(url); byte[] urlBytes = Encoding.ASCII.GetBytes(url);
urlBytes = _huffmanTree.Encode(urlBytes);
byte[] compressedUrlBytes = _huffmanTree.Encode(urlBytes); urlBytes = ChecksumAdd(urlBytes);
return Base62.Encode(urlBytes);
return Base62.Encode(compressedUrlBytes);
} }
public static string Decompress(string compressedUrl) public static string Decompress(string compressedUrl, Dictionary<string, string> hostConversions = null)
{ {
InitHuffmanTree(); InitHuffmanTree();
byte[] urlBytes = Base62.Decode(compressedUrl); byte[] urlBytes = Base62.Decode(compressedUrl);
urlBytes = ChecksumCheck(urlBytes);
urlBytes = _huffmanTree.Decode(urlBytes);
string url = Encoding.ASCII.GetString(urlBytes);
byte[] decompressedUrlBytes = _huffmanTree.Decode(urlBytes); Url oUrl = Url.CreateFromShortString(url);
string url = Encoding.ASCII.GetString(decompressedUrlBytes); // "Decompress" protocol
if (oUrl.Protocol == "#") { oUrl.Protocol = "http"; }
else if (oUrl.Protocol == "$") { oUrl.Protocol = "https"; }
else if (oUrl.Protocol == "F") { oUrl.Protocol = "ftp"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
// Restore protocol indicator if (hostConversions != null)
if (url.StartsWith("#"))
{ {
url = string.Format("http://{0}", url.Substring(1)); // "Decompress" hosts
} string[] urlHostParts = oUrl.Host.Split('.');
if (url.StartsWith("$")) for (int i = 0; i < urlHostParts.Length; i++)
{ {
url = string.Format("https://{0}", url.Substring(1)); foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Value)
{
urlHostParts[i] = hostConversion.Key;
break;
} }
}
}
oUrl.Host = string.Join(".", urlHostParts);
}
url = oUrl.ToString();
return url; return url;
} }

View File

@@ -63,6 +63,7 @@
<Compile Include="Huffman.cs" /> <Compile Include="Huffman.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="UrlCompressor.cs" /> <Compile Include="UrlCompressor.cs" />
<Compile Include="Url.cs" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Build.NuGet.cmd" /> <None Include="Build.NuGet.cmd" />