From 9fe35362fc0113dc037b9713a9c30123c060f16b Mon Sep 17 00:00:00 2001 From: "Valeriano A.R" Date: Thu, 13 Jul 2017 00:01:05 +0200 Subject: [PATCH] UrlCompressor: Use typical host names for extra reduction. Can be user defined. --- UrlCompressor.Tests/Program.cs | 29 +++++++---- VAR.UrlCompressor/UrlCompressor.cs | 82 +++++++++++++++++++++--------- 2 files changed, 76 insertions(+), 35 deletions(-) diff --git a/UrlCompressor.Tests/Program.cs b/UrlCompressor.Tests/Program.cs index 03143aa..63a398e 100644 --- a/UrlCompressor.Tests/Program.cs +++ b/UrlCompressor.Tests/Program.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; namespace UrlCompressor.Tests { @@ -6,25 +7,31 @@ namespace UrlCompressor.Tests { static void Main(string[] args) { - TestUrl("http://google.com"); - TestUrl("https://google.com"); - TestUrl("http://facebook.com"); - TestUrl("https://facebook.com"); - TestUrl("https://twitter.com"); - TestUrl("https://twitter.com/Kableado"); - TestUrl("https://github.com/Kableado"); - TestUrl("https://varstudio.net"); + Dictionary _hostConversions = new Dictionary { + { "com", "C" }, + { "net", "N" }, + { "org", "O" }, + }; + + TestUrl("http://google.com", _hostConversions); + TestUrl("https://google.com", _hostConversions); + TestUrl("http://facebook.com", _hostConversions); + TestUrl("https://facebook.com", _hostConversions); + TestUrl("https://twitter.com", _hostConversions); + TestUrl("https://twitter.com/Kableado", _hostConversions); + TestUrl("https://github.com/Kableado", _hostConversions); + TestUrl("https://varstudio.net", _hostConversions); Console.Read(); } - private static bool TestUrl(string url) + private static bool TestUrl(string url, Dictionary _hostConversions) { Console.WriteLine("---------------------------------------------"); Console.WriteLine(" Url: {0}", url); - string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url); + string compressedUrl = VAR.UrlCompressor.UrlCompressor.Compress(url, _hostConversions); Console.WriteLine(" CompressedUrl: {0}", compressedUrl); - string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl); + string decompressedUrl = VAR.UrlCompressor.UrlCompressor.Decompress(compressedUrl, _hostConversions); Console.WriteLine("DecompressedUrl: {0}", decompressedUrl); if(url!= decompressedUrl) { diff --git a/VAR.UrlCompressor/UrlCompressor.cs b/VAR.UrlCompressor/UrlCompressor.cs index 51637f3..8b10093 100644 --- a/VAR.UrlCompressor/UrlCompressor.cs +++ b/VAR.UrlCompressor/UrlCompressor.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Text; namespace VAR.UrlCompressor @@ -118,46 +119,79 @@ namespace VAR.UrlCompressor _huffmanTree = new HuffmanTree(frequencies); } - public static string Compress(string url) + public static string Compress(string url, Dictionary hostConversions = null) { InitHuffmanTree(); - // Replace protocol indicator - if (url.StartsWith("https://") || url.StartsWith("HTTPS://")) + Url oUrl = Url.CreateFromString(url); + + // "Compress" protocol + if (oUrl.Protocol == "http" || oUrl.Protocol == null) { oUrl.Protocol = "#"; } + else if (oUrl.Protocol == "https") { oUrl.Protocol = "$"; } + else if (oUrl.Protocol == "ftp") { oUrl.Protocol = "F"; } + else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); } + + if (hostConversions != null) { - url = string.Format("${0}", url.Substring("https://".Length)); - } - if (url.StartsWith("http://") || url.StartsWith("HTTP://")) - { - url = string.Format("#{0}", url.Substring("http://".Length)); + // "Compress" hosts + string[] urlHostParts = oUrl.Host.Split('.'); + for (int i = 0; i < urlHostParts.Length; i++) + { + foreach (KeyValuePair hostConversion in hostConversions) + { + if (urlHostParts[i] == hostConversion.Key) + { + urlHostParts[i] = hostConversion.Value; + break; + } + } + } + oUrl.Host = string.Join(".", urlHostParts); } + url = oUrl.ToShortString(); + + // Reduce entropy byte[] urlBytes = Encoding.ASCII.GetBytes(url); - - byte[] compressedUrlBytes = _huffmanTree.Encode(urlBytes); - - return Base62.Encode(compressedUrlBytes); + urlBytes = _huffmanTree.Encode(urlBytes); + return Base62.Encode(urlBytes); } - public static string Decompress(string compressedUrl) + public static string Decompress(string compressedUrl, Dictionary hostConversions = null) { InitHuffmanTree(); byte[] urlBytes = Base62.Decode(compressedUrl); + urlBytes = _huffmanTree.Decode(urlBytes); + string url = Encoding.ASCII.GetString(urlBytes); - byte[] decompressedUrlBytes = _huffmanTree.Decode(urlBytes); + Url oUrl = Url.CreateFromShortString(url); + + // "Decompress" protocol + if (oUrl.Protocol == "#") { oUrl.Protocol = "http"; } + else if (oUrl.Protocol == "$") { oUrl.Protocol = "https"; } + else if (oUrl.Protocol == "F") { oUrl.Protocol = "ftp"; } + else { throw new Exception(string.Format("Unkown protocol \"{0}\"", oUrl.Protocol)); } - string url = Encoding.ASCII.GetString(decompressedUrlBytes); - - // Restore protocol indicator - if (url.StartsWith("#")) + if (hostConversions != null) { - url = string.Format("http://{0}", url.Substring(1)); - } - if (url.StartsWith("$")) - { - url = string.Format("https://{0}", url.Substring(1)); + // "Decompress" hosts + string[] urlHostParts = oUrl.Host.Split('.'); + for (int i = 0; i < urlHostParts.Length; i++) + { + foreach (KeyValuePair hostConversion in hostConversions) + { + if (urlHostParts[i] == hostConversion.Value) + { + urlHostParts[i] = hostConversion.Key; + break; + } + } + } + oUrl.Host = string.Join(".", urlHostParts); } + + url = oUrl.ToString(); return url; }