UrlCompressor: Use typical host names for extra reduction. Can be user defined.

This commit is contained in:
2017-07-13 00:01:05 +02:00
parent 675b87e6ee
commit 9fe35362fc
2 changed files with 76 additions and 35 deletions

View File

@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
namespace UrlCompressor.Tests
{
@@ -6,25 +7,31 @@ namespace UrlCompressor.Tests
{
static void Main(string[] args)
{
TestUrl("http://google.com");
TestUrl("https://google.com");
TestUrl("http://facebook.com");
TestUrl("https://facebook.com");
TestUrl("https://twitter.com");
TestUrl("https://twitter.com/Kableado");
TestUrl("https://github.com/Kableado");
TestUrl("https://varstudio.net");
Dictionary<string, string> _hostConversions = new Dictionary<string, string> {
{ "com", "C" },
{ "net", "N" },
{ "org", "O" },
};
TestUrl("http://google.com", _hostConversions);
TestUrl("https://google.com", _hostConversions);
TestUrl("http://facebook.com", _hostConversions);
TestUrl("https://facebook.com", _hostConversions);
TestUrl("https://twitter.com", _hostConversions);
TestUrl("https://twitter.com/Kableado", _hostConversions);
TestUrl("https://github.com/Kableado", _hostConversions);
TestUrl("https://varstudio.net", _hostConversions);
Console.Read();
}
private static bool TestUrl(string url)
private static bool TestUrl(string url, Dictionary<string, string> _hostConversions)
{
Console.WriteLine("---------------------------------------------");
Console.WriteLine(" Url: {0}", url);
string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url);
string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url, _hostConversions);
Console.WriteLine(" CompressedUrl: {0}", compressedUrl);
string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl);
string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl, _hostConversions);
Console.WriteLine("DecompressedUrl: {0}", decompressedUrl);
if(url!= decompressedUrl)
{

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Text;
namespace VAR.UrlCompressor
@@ -118,46 +119,79 @@ namespace VAR.UrlCompressor
_huffmanTree = new HuffmanTree(frequencies);
}
public static string Compress(string url)
public static string Compress(string url, Dictionary<string, string> hostConversions = null)
{
InitHuffmanTree();
// Replace protocol indicator
if (url.StartsWith("https://") || url.StartsWith("HTTPS://"))
Url oUrl = Url.CreateFromString(url);
// "Compress" protocol
if (oUrl.Protocol == "http" || oUrl.Protocol == null) { oUrl.Protocol = "#"; }
else if (oUrl.Protocol == "https") { oUrl.Protocol = "$"; }
else if (oUrl.Protocol == "ftp") { oUrl.Protocol = "F"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
if (hostConversions != null)
{
url = string.Format("${0}", url.Substring("https://".Length));
// "Compress" hosts
string[] urlHostParts = oUrl.Host.Split('.');
for (int i = 0; i < urlHostParts.Length; i++)
{
foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Key)
{
urlHostParts[i] = hostConversion.Value;
break;
}
if (url.StartsWith("http://") || url.StartsWith("HTTP://"))
{
url = string.Format("#{0}", url.Substring("http://".Length));
}
}
oUrl.Host = string.Join(".", urlHostParts);
}
url = oUrl.ToShortString();
// Reduce entropy
byte[] urlBytes = Encoding.ASCII.GetBytes(url);
byte[] compressedUrlBytes = _huffmanTree.Encode(urlBytes);
return Base62.Encode(compressedUrlBytes);
urlBytes = _huffmanTree.Encode(urlBytes);
return Base62.Encode(urlBytes);
}
public static string Decompress(string compressedUrl)
public static string Decompress(string compressedUrl, Dictionary<string, string> hostConversions = null)
{
InitHuffmanTree();
byte[] urlBytes = Base62.Decode(compressedUrl);
urlBytes = _huffmanTree.Decode(urlBytes);
string url = Encoding.ASCII.GetString(urlBytes);
byte[] decompressedUrlBytes = _huffmanTree.Decode(urlBytes);
Url oUrl = Url.CreateFromShortString(url);
string url = Encoding.ASCII.GetString(decompressedUrlBytes);
// "Decompress" protocol
if (oUrl.Protocol == "#") { oUrl.Protocol = "http"; }
else if (oUrl.Protocol == "$") { oUrl.Protocol = "https"; }
else if (oUrl.Protocol == "F") { oUrl.Protocol = "ftp"; }
else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); }
// Restore protocol indicator
if (url.StartsWith("#"))
if (hostConversions != null)
{
url = string.Format("http://{0}", url.Substring(1));
}
if (url.StartsWith("$"))
// "Decompress" hosts
string[] urlHostParts = oUrl.Host.Split('.');
for (int i = 0; i < urlHostParts.Length; i++)
{
url = string.Format("https://{0}", url.Substring(1));
foreach (KeyValuePair<string, string> hostConversion in hostConversions)
{
if (urlHostParts[i] == hostConversion.Value)
{
urlHostParts[i] = hostConversion.Key;
break;
}
}
}
oUrl.Host = string.Join(".", urlHostParts);
}
url = oUrl.ToString();
return url;
}