UrlCompressor: Use typical host names for extra reduction. Can be user defined.

This commit is contained in:
2017-07-13 00:01:05 +02:00
parent 675b87e6ee
commit 9fe35362fc
2 changed files with 76 additions and 35 deletions

View File

@@ -1,4 +1,5 @@
using System; using System;
using System.Collections.Generic;
namespace UrlCompressor.Tests namespace UrlCompressor.Tests
{ {
@@ -6,25 +7,31 @@ namespace UrlCompressor.Tests
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
TestUrl("http://google.com"); Dictionary<string, string> _hostConversions = new Dictionary<string, string> {
TestUrl("https://google.com"); { "com", "C" },
TestUrl("http://facebook.com"); { "net", "N" },
TestUrl("https://facebook.com"); { "org", "O" },
TestUrl("https://twitter.com"); };
TestUrl("https://twitter.com/Kableado");
TestUrl("https://github.com/Kableado"); TestUrl("http://google.com", _hostConversions);
TestUrl("https://varstudio.net"); TestUrl("https://google.com", _hostConversions);
TestUrl("http://facebook.com", _hostConversions);
TestUrl("https://facebook.com", _hostConversions);
TestUrl("https://twitter.com", _hostConversions);
TestUrl("https://twitter.com/Kableado", _hostConversions);
TestUrl("https://github.com/Kableado", _hostConversions);
TestUrl("https://varstudio.net", _hostConversions);
Console.Read(); Console.Read();
} }
private static bool TestUrl(string url) private static bool TestUrl(string url, Dictionary<string, string> _hostConversions)
{ {
Console.WriteLine("---------------------------------------------"); Console.WriteLine("---------------------------------------------");
Console.WriteLine(" Url: {0}", url); Console.WriteLine(" Url: {0}", url);
string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url); string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url, _hostConversions);
Console.WriteLine(" CompressedUrl: {0}", compressedUrl); Console.WriteLine(" CompressedUrl: {0}", compressedUrl);
string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl); string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl, _hostConversions);
Console.WriteLine("DecompressedUrl: {0}", decompressedUrl); Console.WriteLine("DecompressedUrl: {0}", decompressedUrl);
if(url!= decompressedUrl) if(url!= decompressedUrl)
{ {

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic; using System;
using System.Collections.Generic;
using System.Text; using System.Text;
namespace VAR.UrlCompressor namespace VAR.UrlCompressor
@@ -118,46 +119,79 @@ namespace VAR.UrlCompressor
_huffmanTree = new HuffmanTree(frequencies); _huffmanTree = new HuffmanTree(frequencies);
} }
public static string Compress(string url) public static string Compress(string url, Dictionary<string, string> hostConversions = null)
{ {
InitHuffmanTree(); InitHuffmanTree();
// Replace protocol indicator Url oUrl = Url.CreateFromString(url);
if (url.StartsWith("https://") || url.StartsWith("HTTPS://"))
// "Compress" protocol
if (oUrl.Protocol == "http" || oUrl.Protocol == null) { oUrl.Protocol = "#"; }
else if (oUrl.Protocol == "https") { oUrl.Protocol = "$"; }
else if (oUrl.Protocol == "ftp") { oUrl.Protocol = "F"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
if (hostConversions != null)
{ {
url = string.Format("${0}", url.Substring("https://".Length)); // "Compress" hosts
} string[] urlHostParts = oUrl.Host.Split('.');
if (url.StartsWith("http://") || url.StartsWith("HTTP://")) for (int i = 0; i < urlHostParts.Length; i++)
{ {
url = string.Format("#{0}", url.Substring("http://".Length)); foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Key)
{
urlHostParts[i] = hostConversion.Value;
break;
}
}
}
oUrl.Host = string.Join(".", urlHostParts);
} }
url = oUrl.ToShortString();
// Reduce entropy
byte[] urlBytes = Encoding.ASCII.GetBytes(url); byte[] urlBytes = Encoding.ASCII.GetBytes(url);
urlBytes = _huffmanTree.Encode(urlBytes);
byte[] compressedUrlBytes = _huffmanTree.Encode(urlBytes); return Base62.Encode(urlBytes);
return Base62.Encode(compressedUrlBytes);
} }
public static string Decompress(string compressedUrl) public static string Decompress(string compressedUrl, Dictionary<string, string> hostConversions = null)
{ {
InitHuffmanTree(); InitHuffmanTree();
byte[] urlBytes = Base62.Decode(compressedUrl); byte[] urlBytes = Base62.Decode(compressedUrl);
urlBytes = _huffmanTree.Decode(urlBytes);
string url = Encoding.ASCII.GetString(urlBytes);
byte[] decompressedUrlBytes = _huffmanTree.Decode(urlBytes); Url oUrl = Url.CreateFromShortString(url);
// "Decompress" protocol
if (oUrl.Protocol == "#") { oUrl.Protocol = "http"; }
else if (oUrl.Protocol == "$") { oUrl.Protocol = "https"; }
else if (oUrl.Protocol == "F") { oUrl.Protocol = "ftp"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
string url = Encoding.ASCII.GetString(decompressedUrlBytes); if (hostConversions != null)
// Restore protocol indicator
if (url.StartsWith("#"))
{ {
url = string.Format("http://{0}", url.Substring(1)); // "Decompress" hosts
} string[] urlHostParts = oUrl.Host.Split('.');
if (url.StartsWith("$")) for (int i = 0; i < urlHostParts.Length; i++)
{ {
url = string.Format("https://{0}", url.Substring(1)); foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Value)
{
urlHostParts[i] = hostConversion.Key;
break;
}
}
}
oUrl.Host = string.Join(".", urlHostParts);
} }
url = oUrl.ToString();
return url; return url;
} }