While .NET has a URI class it's not great. They don't expose the various parts as properties and you can't manipulate them either. Many projects I've seen (Subtext included) just try and manipulate them via strings which varying degrees of success.
Here's a C# .NET URL decoder that uses a regular expression I developed for performance based on a VBScript class I developed a while back. Comments and white space have been removed to keep it short.
using System;
using System.Collections.Specialized;
using System.Text.RegularExpressions;
public class URL : ICloneable, IComparable
{
private const string schemeDecodeRegex = @"([^:]+):";
private const string mailtoDecodeRegex = @"(mailto:)(([^@]+)@(.+))";
private const string urlDecodeRegex = @"([^:]+)://(([^:@]+)(:([^@]+))?@)?([^:/?#]+)(:([d]+))?([^?#]+)?(\?([^#]+))?(#(.*))?";
private URL baseUrl;
private string scheme;
private long port;
private bool useDefaultPort;
private string hostName;
private string user;
private string password;
private string path;
private NameValueCollection query;
private string fragment;
private bool relative;
public URL() {
Reset();
}
public URL(string url) {
Reset();
FullURL = url;
}
public URL(URL copyUrl) {
Reset();
CopyFrom(copyUrl);
}
public string Scheme {
get { return scheme; }
set { scheme = value.Trim(); }
}
public long Port {
get { return port; }
set { port = value;
useDefaultPort = false; }
}
public bool UseDefaultPort {
get { return useDefaultPort; }
set { useDefaultPort = value; }
}
public string User {
get { return user; }
set { user = value; }
}
public string Password {
get { return password; }
set { password = value; }
}
public string HostName {
get { return hostName; }
set { hostName = value; }
}
public string Path {
get { return path; }
set { path = value; }
}
public NameValueCollection Query {
get { return query; }
set { query = value; }
}
public string Fragment {
get { return fragment; }
set { fragment = value; }
}
public string FullURL {
get {
if (Scheme.Equals("mailto"))
return string.Format("{0}:{1}@{2}", Scheme, User, HostName);
string newURL = string.Empty;
if (!Relative) {
newURL += Scheme + "://";
if (User.Length > 0) {
newURL += User;
if (Password.Length > 0)
newURL += ":" + Password;
newURL += "@";
}
newURL += HostName;
if (!UseDefaultPort)
newURL += ":" + Port;
}
newURL += Path;
if (QueryString.Length > 0)
newURL += "?" + QueryString;
if (Fragment.Length > 0)
newURL += "#" + Fragment;
return newURL;
}
set {
Reset();
Match m = new Regex(schemeDecodeRegex).Match(value);
if (m.Success)
if (m.Groups[1].Captures[0].Value.ToLower().Equals("mailto"))
DecodeMailTo(value);
else
DecodeURL(value);
}
}
public bool Relative {
get { return relative; }
set { relative = value; }
}
public string QueryString {
get {
string newQueryString = string.Empty;
for (int queryIdx = 0; queryIdx < Query.Count; queryIdx++) {
newQueryString += (queryIdx == 0 ? "" : "&") + Query.Keys[queryIdx];
if (Query[queryIdx].Length > 0)
newQueryString += "=" + Query[queryIdx];
}
return newQueryString;
}
set {
Query.Clear();
AppendQueryString(value);
}
}
public URL BaseUrl {
get { return baseUrl; }
set { baseUrl = value; }
}
public void AppendQueryString(string newQueryString) {
string[] pairs = newQueryString.Split('&');
for (int pairIdx = 0; pairIdx < pairs.Length; pairIdx++) {
string pair = pairs[pairIdx];
int keyPos = pair.IndexOf('=');
if (keyPos > 0) {
string key = pair.Substring(0, keyPos);
string value = pair.Substring(keyPos + 1);
query[key] = value;
}
else
query[pair] = string.Empty;
}
}
public void Reset() {
Scheme = string.Empty;
Port = 0;
UseDefaultPort = true;
HostName = string.Empty;
User = string.Empty;
Password = string.Empty;
Path = string.Empty;
Query = new NameValueCollection();
Fragment = string.Empty;
Relative = false;
}
public void CopyFrom(URL copyUrl) {
Scheme = copyUrl.Scheme;
User = copyUrl.User;
Password = copyUrl.Password;
HostName = copyUrl.HostName;
}
public override bool Equals(object obj) {
if (obj == null) return false;
if (obj.GetType() != this.GetType()) return false;
return (FullURL == ((URL) obj).FullURL);
}
public override int GetHashCode() {
return FullURL.GetHashCode();
}
public override string ToString() {
return FullURL;
}
private void DecodeURL(string value) {
Match m = new Regex(urlDecodeRegex).Match(value);
if (m.Success) {
if (m.Groups[1].Captures.Count == 1)
Scheme = m.Groups[1].Captures[0].Value;
if (m.Groups[4].Captures.Count == 1)
User = m.Groups[4].Captures[0].Value;
if (m.Groups[5].Captures.Count == 1)
Password = m.Groups[5].Captures[0].Value;
if (m.Groups[6].Captures.Count == 1)
HostName = m.Groups[6].Captures[0].Value;
if (m.Groups[8].Captures.Count == 1)
Port = Int32.Parse(m.Groups[8].Captures[0].Value);
if (m.Groups[9].Captures.Count == 1)
Path = m.Groups[9].Captures[0].Value;
if (m.Groups[11].Captures.Count == 1)
QueryString = m.Groups[11].Captures[0].Value;
if (m.Groups[13].Captures.Count == 1)
Fragment = m.Groups[13].Captures[0].Value;
}
}
private void DecodeMailTo(string value) {
Match m = new Regex(mailtoDecodeRegex).Match(value);
if (m.Success) {
if (m.Groups[1].Captures.Count == 1)
Scheme = m.Groups[1].Captures[0].Value;
if (m.Groups[2].Captures.Count == 1)
User = m.Groups[2].Captures[0].Value;
if (m.Groups[3].Captures.Count == 1)
HostName = m.Groups[3].Captures[0].Value;
}
}
public object Clone() {
URL newClone = (URL) this.MemberwiseClone();
newClone.Query = new NameValueCollection(Query);
return newClone;
}
public int CompareTo(object obj) {
if (obj == this) return 0;
if (!(obj is URL)) return -1;
return ((URL) obj).FullURL.CompareTo(FullURL);
}
}
[)amien












I linked from DotNetKicks. This is cool. How would I use this to grab the URL without the QueryString?
Yeah sorry there are no usage samples - I did write a few but my blog software started activating the anti-spam on me :D
To get the current URL:
You could remove all the query string with:
Alternatively if you just wanted to set one item - say a customerID you would normally have to hope it didn't exist and add it - taking note to append "&" or "?" depending on if there are other query values. With this class you can just;
Oh, and to write our your hyperlink you'd do something like;
[)amien
Excellent. Thank you!
You should consider making it Serializable. THat's one issue I've had with the existing Uri class.
I just checked and Uri in .NET 2.0 is serializable.
The Uri class in .NET is pretty useless. It doesn't include username/password properties, the query part is a string instead of a collection of name/value pairs etc.
[)amien
Great job, thanks!
If I had this: http://damieng.com/blog/2006/07/07/URL_parsing_and_manipulation_in_.NET
How could I get just this part?
http://damieng.com/blog/2006/07/07
@Mike: Well you could but if that's all you want you may as well just grab the string up to the final / with urlString.Substring(0,urlString.LastIndexOf('/')-1)
[)amien
This works well, but any attempts that I make to use an imported query string dont seem to work. I can build a querystring and export ok, but importing one fails.
Is there a download of this as a .cs class file?
I found the problem, it's in the regular expression.. Here's the new line.
private const string urlDecodeRegex = @"([^:]+)://(([^:@]+)(:([^@]+))?@)?([^:/?#]+)(:([d]+))?([^?#]+)?(\?([^#]+))?(#(.*))?";
Thanks for spotting and figuring that out Ross - I must have broken it during the reformatting exercise when I switched to this theme.
[)amien
It's also remarkably intolerant of syntax problems. There's also a problem with the Uri class (in .NET 2.0 Compact Framework at least) where creating it with a string that has leading spaces causes memory corruption. Lovely. We have our own Uri class to fix this (and other) problems. Backward slashes resulting in FormatExceptions is another one that comes to mind. Quite possibly all this is fixed in new shiny .NET versions.
So, how long is it till V-day? :)