Compare commits

..

6 Commits

7 changed files with 281 additions and 39 deletions

View File

@@ -1,5 +1,13 @@
# .Net library for compressing URLs
## Instalation
You can install the NuGet package using this command on the package manager:
Install-Package VAR.UrlCompressor
Alternativelly you can copy and reference the assembly resulting of the project VAR.UrlCompressor.
## Usage
### VAR.UrlCompressor
@@ -10,16 +18,28 @@ Add the resulting assembly as reference in your projects, and this line on code:
Compress an URL with:
string compressedUrl = UrlCompressor.Compress("https:\\google.com");
// compressedUrl = "Hk30TGDxt8jOOW6"
Decompress an URL with:
string decompressedUrl = UrlCompressor.Compress("xGncYbYfopHYpG0");
string decompressedUrl = UrlCompressor.Decompress("Hk30TGDxt8jOOW6");
// decompressedUrl = "Hk30TGDxt8jOOW6";
For extra compression use host conversions. For example:
Dictionary<string, string> hostConversions = new Dictionary<string, string> {
{ "google", "G" }
{ "com", "C" }
}
string compressedUrl = UrlCompressor.Compress("https:\\google.com", );
// compressedUrl = "oMyuFVR41"
### UrlCompressor.Tests
It is a simple console application, to test basic funcitionallity of the library.
## Building
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
A Visual Studio 2017 solution is provided. Simply, click build on the IDE.
A .nuget package can be build using:
VAR.UrlCompressor\Build.NuGet.cmd

View File

@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
namespace UrlCompressor.Tests
{
@@ -6,25 +7,31 @@ namespace UrlCompressor.Tests
{
static void Main(string[] args)
{
TestUrl("http://google.com");
TestUrl("https://google.com");
TestUrl("http://facebook.com");
TestUrl("https://facebook.com");
TestUrl("https://twitter.com");
TestUrl("https://twitter.com/Kableado");
TestUrl("https://github.com/Kableado");
TestUrl("https://varstudio.net");
Dictionary<string, string> _hostConversions = new Dictionary<string, string> {
{ "com", "C" },
{ "net", "N" },
{ "org", "O" },
};
TestUrl("http://google.com", _hostConversions);
TestUrl("https://google.com", _hostConversions);
TestUrl("http://facebook.com", _hostConversions);
TestUrl("https://facebook.com", _hostConversions);
TestUrl("https://twitter.com", _hostConversions);
TestUrl("https://twitter.com/Kableado", _hostConversions);
TestUrl("https://github.com/Kableado", _hostConversions);
TestUrl("https://varstudio.net", _hostConversions);
Console.Read();
}
private static bool TestUrl(string url)
private static bool TestUrl(string url, Dictionary<string, string> _hostConversions)
{
Console.WriteLine("---------------------------------------------");
Console.WriteLine(" Url: {0}", url);
string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url);
string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url, _hostConversions);
Console.WriteLine(" CompressedUrl: {0}", compressedUrl);
string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl);
string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl, _hostConversions);
Console.WriteLine("DecompressedUrl: {0}", decompressedUrl);
if(url!= decompressedUrl)
{

View File

@@ -53,6 +53,7 @@ namespace VAR.UrlCompressor
for (int i = 0; i < base62.Length; i++)
{
int charIdx = Base62CodingSpace.IndexOf(base62[i]);
if (charIdx == -1) { continue; }
if ((i + 1) == base62.Length)
{
// Last symbol
@@ -66,7 +67,7 @@ namespace VAR.UrlCompressor
bytes.WriteBit(bitPosition, 3 - pad, (charIdx & 0x04) > 0);
bytes.WriteBit(bitPosition, 4 - pad, (charIdx & 0x02) > 0);
bytes.WriteBit(bitPosition, 5 - pad, (charIdx & 0x01) > 0);
bitPosition += (6 - pad);
break;
}
@@ -100,6 +101,8 @@ namespace VAR.UrlCompressor
}
}
Array.Resize(ref bytes, (int)Math.Ceiling((double)bitPosition / 8));
return bytes;
}
}

View File

@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
[assembly: AssemblyCulture("")]
[assembly: ComVisible(false)]
[assembly: Guid("016ae05d-12af-40c6-8d0c-064970004f0b")]
[assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.1.*")]

136
VAR.UrlCompressor/Url.cs Normal file
View File

@@ -0,0 +1,136 @@
using System;
namespace VAR.UrlCompressor
{
class Url
{
public string Protocol { get; set; }
public string Host { get; set; }
public string Port { get; set; }
public string Path { get; set; }
private Url() { }
private enum ParseStatus
{
ParsingProtocol,
ParsingHost,
ParsingPort,
ParsingPath,
};
private ParseStatus _status = ParseStatus.ParsingProtocol;
private int _i0 = 0;
private int _i = 0;
private void ResetParser()
{
_status = ParseStatus.ParsingProtocol;
_i0 = 0;
_i = 0;
}
private void ParseUrl(string url)
{
while (_i < url.Length)
{
switch (_status)
{
case ParseStatus.ParsingProtocol:
if (url[_i] == ':')
{
Protocol = url.Substring(_i0, _i - _i0);
if (_i + 2 >= url.Length)
{
throw new Exception(string.Format("Unexpected end of URL, while parsing protocol. \"{0}\"", url));
}
if (url[_i + 1] != '/' || url[_i + 2] != '/')
{
throw new Exception(string.Format("Unexpected end of URL, while parsing protocol. \"{0}\"", url));
}
_status = ParseStatus.ParsingHost;
_i0 = _i + 3;
_i = _i0;
}
break;
case ParseStatus.ParsingHost:
if (char.IsLetterOrDigit(url[_i]) == false && url[_i] != '.')
{
Host = url.Substring(_i0, _i - _i0);
_i0 = _i;
if (url[_i] == ':')
{
_status = ParseStatus.ParsingPort;
}
else
{
_status = ParseStatus.ParsingPath;
}
}
break;
case ParseStatus.ParsingPort:
if (char.IsDigit(url[_i]) == false)
{
Port = url.Substring(_i0, _i - _i0);
_i0 = _i;
_status = ParseStatus.ParsingPath;
}
break;
case ParseStatus.ParsingPath:
_i = url.Length - 1;
break;
}
_i++;
}
switch (_status)
{
case ParseStatus.ParsingProtocol:
Protocol = url.Substring(_i0, _i - _i0);
break;
case ParseStatus.ParsingHost:
Host = url.Substring(_i0, _i - _i0);
break;
case ParseStatus.ParsingPort:
Port = url.Substring(_i0, _i - _i0);
break;
case ParseStatus.ParsingPath:
Path = url.Substring(_i0, _i - _i0);
break;
}
}
public static Url CreateFromString(string url)
{
var newUrl = new Url();
newUrl.ParseUrl(url);
return newUrl;
}
public static Url CreateFromShortString(string url)
{
var newUrl = new Url()
{
Protocol = url[0].ToString(),
_status = ParseStatus.ParsingHost,
_i = 1,
_i0 = 1,
};
newUrl.ParseUrl(url);
return newUrl;
}
public override string ToString()
{
return string.Format("{0}://{1}{2}{3}", Protocol, Host, Port, Path);
}
public string ToShortString()
{
return string.Format("{0}{1}{2}{3}", Protocol[0], Host, Port, Path);
}
}
}

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Text;
namespace VAR.UrlCompressor
@@ -118,46 +119,120 @@ namespace VAR.UrlCompressor
_huffmanTree = new HuffmanTree(frequencies);
}
public static string Compress(string url)
private static void XorData(byte[] data, byte xorKey)
{
for (int i = 0; i < data.Length; i++)
{
data[i] = (byte)(data[i] ^ xorKey);
}
}
private static byte ChecksumCalculate(byte[] data)
{
byte checksum = 0;
foreach(byte b in data)
{
checksum = (byte)(checksum ^ b);
}
return checksum;
}
private static byte[] ChecksumAdd(byte[] data)
{
byte[] newData = new byte[data.Length + 1];
byte checksum = ChecksumCalculate(data);
XorData(data, checksum);
Array.Copy(data, 0, newData, 1, data.Length);
newData[0] = checksum;
return newData;
}
private static byte[] ChecksumCheck(byte[] data)
{
byte[] newData = new byte[data.Length - 1];
Array.Copy(data, 1, newData, 0, data.Length - 1);
byte oldChecksum = data[0];
XorData(newData, oldChecksum);
byte checksum = ChecksumCalculate(newData);
if (checksum != oldChecksum) { throw new Exception("Checksum mismatch."); }
return newData;
}
public static string Compress(string url, Dictionary<string, string> hostConversions = null)
{
InitHuffmanTree();
// Replace protocol indicator
if (url.StartsWith("https://") || url.StartsWith("HTTPS://"))
Url oUrl = Url.CreateFromString(url);
// "Compress" protocol
if (oUrl.Protocol == "http" || oUrl.Protocol == null) { oUrl.Protocol = "#"; }
else if (oUrl.Protocol == "https") { oUrl.Protocol = "$"; }
else if (oUrl.Protocol == "ftp") { oUrl.Protocol = "F"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
if (hostConversions != null)
{
url = string.Format("${0}", url.Substring("https://".Length));
}
if (url.StartsWith("http://") || url.StartsWith("HTTP://"))
{
url = string.Format("#{0}", url.Substring("http://".Length));
// "Compress" hosts
string[] urlHostParts = oUrl.Host.Split('.');
for (int i = 0; i < urlHostParts.Length; i++)
{
foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Key)
{
urlHostParts[i] = hostConversion.Value;
break;
}
}
}
oUrl.Host = string.Join(".", urlHostParts);
}
url = oUrl.ToShortString();
// Reduce entropy
byte[] urlBytes = Encoding.ASCII.GetBytes(url);
byte[] compressedUrlBytes = _huffmanTree.Encode(urlBytes);
return Base62.Encode(compressedUrlBytes);
urlBytes = _huffmanTree.Encode(urlBytes);
urlBytes = ChecksumAdd(urlBytes);
return Base62.Encode(urlBytes);
}
public static string Decompress(string compressedUrl)
public static string Decompress(string compressedUrl, Dictionary<string, string> hostConversions = null)
{
InitHuffmanTree();
byte[] urlBytes = Base62.Decode(compressedUrl);
urlBytes = ChecksumCheck(urlBytes);
urlBytes = _huffmanTree.Decode(urlBytes);
string url = Encoding.ASCII.GetString(urlBytes);
byte[] decompressedUrlBytes = _huffmanTree.Decode(urlBytes);
Url oUrl = Url.CreateFromShortString(url);
// "Decompress" protocol
if (oUrl.Protocol == "#") { oUrl.Protocol = "http"; }
else if (oUrl.Protocol == "$") { oUrl.Protocol = "https"; }
else if (oUrl.Protocol == "F") { oUrl.Protocol = "ftp"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
string url = Encoding.ASCII.GetString(decompressedUrlBytes);
// Restore protocol indicator
if (url.StartsWith("#"))
if (hostConversions != null)
{
url = string.Format("http://{0}", url.Substring(1));
}
if (url.StartsWith("$"))
{
url = string.Format("https://{0}", url.Substring(1));
// "Decompress" hosts
string[] urlHostParts = oUrl.Host.Split('.');
for (int i = 0; i < urlHostParts.Length; i++)
{
foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Value)
{
urlHostParts[i] = hostConversion.Key;
break;
}
}
}
oUrl.Host = string.Join(".", urlHostParts);
}
url = oUrl.ToString();
return url;
}

View File

@@ -63,6 +63,7 @@
<Compile Include="Huffman.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="UrlCompressor.cs" />
<Compile Include="Url.cs" />
</ItemGroup>
<ItemGroup>
<None Include="Build.NuGet.cmd" />