diff --git a/Test/Flurl.Test/UrlBuilderTests.cs b/Test/Flurl.Test/UrlBuilderTests.cs index e0535121..19525f69 100644 --- a/Test/Flurl.Test/UrlBuilderTests.cs +++ b/Test/Flurl.Test/UrlBuilderTests.cs @@ -322,18 +322,19 @@ public void interprets_plus_as_space() { Assert.AreEqual("1 2", url.QueryParams["x"]); } - [Test] - public void can_encode_space_as_plus() { - var url = new Url("http://www.mysite.com/foo+bar?x=1+2"); - Assert.AreEqual("http://www.mysite.com/foo+bar?x=1+2", url.ToString(true)); - } - [Test] public void encodes_plus() { var url = new Url("http://www.mysite.com").SetQueryParam("x", "1+2"); Assert.AreEqual("http://www.mysite.com?x=1%2B2", url.ToString()); } + [Test] + public void can_encode_space_as_plus() { + var url = new Url("http://www.mysite.com").AppendPathSegment("a b").SetQueryParam("c d", "1 2"); + Assert.AreEqual("http://www.mysite.com/a%20b?c%20d=1%202", url.ToString()); // but not by default + Assert.AreEqual("http://www.mysite.com/a+b?c+d=1+2", url.ToString(true)); + } + [TestCase("http://www.mysite.com/more", true)] [TestCase("http://www.mysite.com/more?x=1&y=2", true)] [TestCase("http://www.mysite.com/more?x=1&y=2#frag", true)] @@ -393,14 +394,39 @@ public void constructor_parses_url_correctly(string full, string path, string qu Assert.AreEqual(full, url.ToString()); } -#if !NET40 // https://github.com/tmenier/Flurl/issues/185 [Test] - public void can_encode_very_long_value() { - // 65520 chars was the tipping point https://github.com/dotnet/corefx/issues/1936 - var s = new String('x', 1000000); - Url.EncodeQueryParamValue(s, false); + public void can_encode_and_decode_very_long_value() { + // 65,520 chars is the tipping point for Uri.EscapeDataString https://github.com/dotnet/corefx/issues/1936 + var len = 500000; + + // every 10th char needs to be encoded + var s = string.Concat(Enumerable.Repeat("xxxxxxxxx ", len / 10)); + Assert.AreEqual(len, s.Length); // just a sanity check + + // encode space as %20 + var encoded = Url.Encode(s, false); + // hex encoding will add 2 addtional chars for every char that needs to be encoded + Assert.AreEqual(len + (2 * len / 10), encoded.Length); + var expected = string.Concat(Enumerable.Repeat("xxxxxxxxx%20", len / 10)); + Assert.AreEqual(expected, encoded); + + var decoded = Url.Decode(encoded, false); + Assert.AreEqual(s, decoded); + + // encode space as + + encoded = Url.Encode(s, true); + Assert.AreEqual(len, encoded.Length); + expected = string.Concat(Enumerable.Repeat("xxxxxxxxx+", len / 10)); + Assert.AreEqual(expected, encoded); + + // interpret + as space + decoded = Url.Decode(encoded, true); + Assert.AreEqual(s, decoded); + + // don't interpret + as space, encoded and decoded should be the same + decoded = Url.Decode(encoded, false); + Assert.AreEqual(encoded, decoded); } -#endif } } diff --git a/src/Flurl/Flurl.csproj b/src/Flurl/Flurl.csproj index f9871dcd..dc91544a 100644 --- a/src/Flurl/Flurl.csproj +++ b/src/Flurl/Flurl.csproj @@ -4,7 +4,7 @@ net40;net45;netstandard1.3;netstandard1.0; True Flurl - 2.5.2 + 2.6.0 Todd Menier A fluent, portable URL builder. To make HTTP calls off the fluent chain, check out Flurl.Http. http://tmenier.github.io/Flurl diff --git a/src/Flurl/QueryParameter.cs b/src/Flurl/QueryParameter.cs index 73c89330..2ddad43a 100644 --- a/src/Flurl/QueryParameter.cs +++ b/src/Flurl/QueryParameter.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using Flurl.Util; namespace Flurl { @@ -22,7 +23,7 @@ public QueryParameter(string name, object value, bool isEncoded = false) { Name = name; if (isEncoded && value != null) { _encodedValue = value as string; - _value = Url.DecodeQueryParamValue(_encodedValue); + _value = Url.Decode(_encodedValue, true); } else { Value = value; @@ -51,21 +52,21 @@ public object Value { /// Indicates whether to encode space characters with "+" instead of "%20". /// public string ToString(bool encodeSpaceAsPlus) { - if (_encodedValue != null) { - return $"{Name}={_encodedValue}"; - } if (_value is IEnumerable && !(_value is string)) { return string.Join("&", from v in (_value as IEnumerable).Cast() - select BuildPair(Name, v, encodeSpaceAsPlus)); - } - else { - return BuildPair(Name, Value, encodeSpaceAsPlus); + select BuildPair(Name, v, false, encodeSpaceAsPlus)); } + return BuildPair(Name, _encodedValue ?? Value, _encodedValue != null, encodeSpaceAsPlus); } - private static string BuildPair(string name, object value, bool encodeSpaceAsPlus) { - return (value == null) ? name : $"{name}={Url.EncodeQueryParamValue(value, encodeSpaceAsPlus)}"; + private static string BuildPair(string name, object value, bool valueIsEncoded, bool encodeSpaceAsPlus) { + name = Url.Encode(name, encodeSpaceAsPlus); + if (value == null) + return name; + + value = valueIsEncoded ? value : Url.Encode(value.ToInvariantString(), encodeSpaceAsPlus); + return $"{name}={value}"; } } } diff --git a/src/Flurl/Url.cs b/src/Flurl/Url.cs index c3e1fb19..1743900d 100644 --- a/src/Flurl/Url.cs +++ b/src/Flurl/Url.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Net; using System.Text.RegularExpressions; namespace Flurl @@ -120,60 +119,68 @@ public static string GetRoot(string url) { } /// - /// Decodes a URL-encoded query parameter value. + /// Decodes a URL-encoded string. /// - /// The encoded query parameter value. + /// The URL-encoded string. + /// If true, any '+' character will be decoded to a space. /// - public static string DecodeQueryParamValue(string value) { - // Uri.UnescapeDataString comes closest to doing it right, but famously stumbles on the + sign - // http://weblog.west-wind.com/posts/2009/Feb/05/Html-and-Uri-String-Encoding-without-SystemWeb - return Uri.UnescapeDataString((value ?? "").Replace("+", " ")); + public static string Decode(string s, bool interpretPlusAsSpace) { + if (string.IsNullOrEmpty(s)) + return s; + + s = Uri.UnescapeDataString(s); + return interpretPlusAsSpace ? s.Replace("+", " ") : s; } + private const int MAX_URL_LENGTH = 65519; + /// - /// URL-encodes a query parameter value. + /// URL-encodes a string, including reserved characters such as '/' and '?'. /// - /// The query parameter value to encode. + /// The string to encode. /// If true, spaces will be encoded as + signs. Otherwise, they'll be encoded as %20. /// - public static string EncodeQueryParamValue(object value, bool encodeSpaceAsPlus) { - var result = (value ?? "").ToInvariantString(); -#if NET40 - result = Uri.EscapeDataString(result); -#else - result = WebUtility.UrlEncode(result); -#endif - return encodeSpaceAsPlus ? result.Replace("%20", "+") : result; + public static string Encode(string s, bool encodeSpaceAsPlus) { + if (string.IsNullOrEmpty(s)) + return s; + + if (s.Length > MAX_URL_LENGTH) { + // Uri.EscapeDataString is going to throw because the string is "too long", so break it into pieces and concat them + var parts = new string[(int)Math.Ceiling((double)s.Length / MAX_URL_LENGTH)]; + for (var i = 0; i < parts.Length; i++) { + var start = i * MAX_URL_LENGTH; + var len = Math.Min(MAX_URL_LENGTH, s.Length - start); + parts[i] = Uri.EscapeDataString(s.Substring(start, len)); + } + s = string.Concat(parts); + } + else { + s = Uri.EscapeDataString(s); + } + return encodeSpaceAsPlus ? s.Replace("%20", "+") : s; } /// - /// Encodes characters that are illegal in a URL. Does not encode reserved characters, i.e. '/', '+', etc. + /// URL-encodes characters in a string that are neither reserved nor unreserved. Avoids encoding reserved characters such as '/' and '?'. Avoids encoding '%' if it begins a %-hex-hex sequence (i.e. avoids double-encoding). /// - /// The URL or URL part. - public static string EncodeIllegalCharacters(string urlPart) { - if (string.IsNullOrEmpty(urlPart)) - return urlPart; + /// The string to encode. + public static string EncodeIllegalCharacters(string s) { + if (string.IsNullOrEmpty(s)) + return s; - // EscapeUriString works perfectly if there are no % characters (and this avoids regex overhead of SplitAndEscapeParts) - if (!urlPart.Contains("%")) - return Uri.EscapeUriString(urlPart); + // Uri.EscapeUriString mostly does what we want - encodes illegal characters only - but it has a quirk + // in that % isn't illegal if it's the start of a %-encoded sequence https://stackoverflow.com/a/47636037/62600 - // String.Concat should be marginally faster than StringBuilder with relatively few/small strings (like most URLs) - return string.Concat(SplitAndEscapeParts(urlPart)); - } + // no % characters, so avoid the regex overhead + if (!s.Contains("%")) + return Uri.EscapeUriString(s); - private static IEnumerable SplitAndEscapeParts(string s) { - // EscapeUriString encodes illegal characters only, but doesn't recognize %-encoded character sequences as legal: https://stackoverflow.com/a/47636037/62600 - // So pick out all %-hex-hex matches and avoid double-encoding - const string pattern = "(.*?)((%[0-9A-Fa-f]{2})|$)"; - foreach (Match match in Regex.Matches(s, pattern)) { - var a = match.Groups[1].Value; - var b = match.Groups[2].Value; - if (a.Length > 0) - yield return Uri.EscapeUriString(a); // sequence with no %-encoding - encode illegal characters - if (b.Length > 0) - yield return b; // 3-character %-encoded sequence - leave it alone - } + // pick out all %-hex-hex matches and avoid double-encoding + return Regex.Replace(s, "(.*?)((%[0-9A-Fa-f]{2})|$)", c => { + var a = c.Groups[1].Value; // group 1 is a sequence with no %-encoding - encode illegal characters + var b = c.Groups[2].Value; // group 2 is a valid 3-character %-encoded sequence - leave it alone! + return Uri.EscapeUriString(a) + b; + }); } /// @@ -395,7 +402,7 @@ public override string ToString() { /// Indicates whether to encode spaces with the "+" character instead of "%20" /// public string ToString(bool encodeSpaceAsPlus) { - var sb = new System.Text.StringBuilder(Path); + var sb = new System.Text.StringBuilder(encodeSpaceAsPlus ? Path.Replace("%20", "+") : Path); if (Query.Length > 0) sb.Append("?").Append(QueryParams.ToString(encodeSpaceAsPlus)); if (Fragment.Length > 0)