From f1a9718061f233942e77b467604b1b5c357127e8 Mon Sep 17 00:00:00 2001 From: Todd Menier Date: Mon, 18 Dec 2017 22:27:03 -0600 Subject: [PATCH 1/3] #258 (and a redo of #185) --- Test/Flurl.Test/UrlBuilderTests.cs | 33 ++++++++----- src/Flurl/QueryParameter.cs | 19 ++++---- src/Flurl/Url.cs | 74 ++++++++++++++++-------------- 3 files changed, 71 insertions(+), 55 deletions(-) diff --git a/Test/Flurl.Test/UrlBuilderTests.cs b/Test/Flurl.Test/UrlBuilderTests.cs index e0535121..00e3db23 100644 --- a/Test/Flurl.Test/UrlBuilderTests.cs +++ b/Test/Flurl.Test/UrlBuilderTests.cs @@ -322,18 +322,19 @@ public void interprets_plus_as_space() { Assert.AreEqual("1 2", url.QueryParams["x"]); } - [Test] - public void can_encode_space_as_plus() { - var url = new Url("http://www.mysite.com/foo+bar?x=1+2"); - Assert.AreEqual("http://www.mysite.com/foo+bar?x=1+2", url.ToString(true)); - } - [Test] public void encodes_plus() { var url = new Url("http://www.mysite.com").SetQueryParam("x", "1+2"); Assert.AreEqual("http://www.mysite.com?x=1%2B2", url.ToString()); } + [Test] + public void can_encode_space_as_plus() { + var url = new Url("http://www.mysite.com").AppendPathSegment("a b").SetQueryParam("c d", "1 2"); + Assert.AreEqual("http://www.mysite.com/a%20b?c%20d=1%202", url.ToString()); // but not by default + Assert.AreEqual("http://www.mysite.com/a+b?c+d=1+2", url.ToString(true)); + } + [TestCase("http://www.mysite.com/more", true)] [TestCase("http://www.mysite.com/more?x=1&y=2", true)] [TestCase("http://www.mysite.com/more?x=1&y=2#frag", true)] @@ -393,14 +394,24 @@ public void constructor_parses_url_correctly(string full, string path, string qu Assert.AreEqual(full, url.ToString()); } -#if !NET40 // https://github.com/tmenier/Flurl/issues/185 [Test] public void can_encode_very_long_value() { - // 65520 chars was the tipping point https://github.com/dotnet/corefx/issues/1936 - var s = new String('x', 1000000); - Url.EncodeQueryParamValue(s, false); + // 65520 chars is the tipping point https://github.com/dotnet/corefx/issues/1936 + var len = 300000; + + var s = new string('x', len); + var encoded = Url.Encode(s, false); + Assert.AreEqual(s, encoded); + + // same length string, but every 10th char needs to be hex encoded + s = string.Concat(Enumerable.Repeat("xxxxxxxxx?", len / 10)); + Assert.AreEqual(len, s.Length); // just a sanity check + encoded = Url.Encode(s, false); + // hex encoding will add 2 addtional chars for every char that needs to be encoded + Assert.AreEqual(len + (2 * len / 10), encoded.Length); + var expected = string.Concat(Enumerable.Repeat("xxxxxxxxx%3F", len / 10)); + Assert.AreEqual(expected, encoded); } -#endif } } diff --git a/src/Flurl/QueryParameter.cs b/src/Flurl/QueryParameter.cs index 73c89330..1aba9244 100644 --- a/src/Flurl/QueryParameter.cs +++ b/src/Flurl/QueryParameter.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using Flurl.Util; namespace Flurl { @@ -51,21 +52,21 @@ public object Value { /// Indicates whether to encode space characters with "+" instead of "%20". /// public string ToString(bool encodeSpaceAsPlus) { - if (_encodedValue != null) { - return $"{Name}={_encodedValue}"; - } if (_value is IEnumerable && !(_value is string)) { return string.Join("&", from v in (_value as IEnumerable).Cast() - select BuildPair(Name, v, encodeSpaceAsPlus)); - } - else { - return BuildPair(Name, Value, encodeSpaceAsPlus); + select BuildPair(Name, v, false, encodeSpaceAsPlus)); } + return BuildPair(Name, _encodedValue ?? Value, _encodedValue != null, encodeSpaceAsPlus); } - private static string BuildPair(string name, object value, bool encodeSpaceAsPlus) { - return (value == null) ? name : $"{name}={Url.EncodeQueryParamValue(value, encodeSpaceAsPlus)}"; + private static string BuildPair(string name, object value, bool valueIsEncoded, bool encodeSpaceAsPlus) { + name = Url.Encode(name, encodeSpaceAsPlus); + if (value == null) + return name; + + value = valueIsEncoded ? value : Url.Encode(value.ToInvariantString(), encodeSpaceAsPlus); + return $"{name}={value}"; } } } diff --git a/src/Flurl/Url.cs b/src/Flurl/Url.cs index c3e1fb19..f565cbda 100644 --- a/src/Flurl/Url.cs +++ b/src/Flurl/Url.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Net; using System.Text.RegularExpressions; namespace Flurl @@ -130,50 +129,55 @@ public static string DecodeQueryParamValue(string value) { return Uri.UnescapeDataString((value ?? "").Replace("+", " ")); } + private const int MAX_URL_LENGTH = 65519; + /// - /// URL-encodes a query parameter value. + /// URL-encodes a string, including reserved characters such as '/' and '?'. /// - /// The query parameter value to encode. + /// The string to encode. /// If true, spaces will be encoded as + signs. Otherwise, they'll be encoded as %20. /// - public static string EncodeQueryParamValue(object value, bool encodeSpaceAsPlus) { - var result = (value ?? "").ToInvariantString(); -#if NET40 - result = Uri.EscapeDataString(result); -#else - result = WebUtility.UrlEncode(result); -#endif - return encodeSpaceAsPlus ? result.Replace("%20", "+") : result; + public static string Encode(string s, bool encodeSpaceAsPlus) { + if (string.IsNullOrEmpty(s)) + return s; + + if (s.Length > MAX_URL_LENGTH) { + // Uri.EscapeDataString is going to throw because the string is "too long", so break it into pieces and concat them + var parts = new string[(int)Math.Ceiling((double)s.Length / MAX_URL_LENGTH)]; + for (var i = 0; i < parts.Length; i++) { + var start = i * MAX_URL_LENGTH; + var len = Math.Min(MAX_URL_LENGTH, s.Length - start); + parts[i] = Uri.EscapeDataString(s.Substring(start, len)); + } + s = string.Concat(parts); + } + else { + s = Uri.EscapeDataString(s); + } + return encodeSpaceAsPlus ? s.Replace("%20", "+") : s; } /// - /// Encodes characters that are illegal in a URL. Does not encode reserved characters, i.e. '/', '+', etc. + /// URL-encodes characters in a string that are neither reserved nor unreserved. Avoids encoding reserved characters such as '/' and '?'. Avoids encoding '%' if it begins a %-hex-hex sequence (i.e. avoids double-encoding). /// - /// The URL or URL part. - public static string EncodeIllegalCharacters(string urlPart) { - if (string.IsNullOrEmpty(urlPart)) - return urlPart; + /// The string to encode. + public static string EncodeIllegalCharacters(string s) { + if (string.IsNullOrEmpty(s)) + return s; - // EscapeUriString works perfectly if there are no % characters (and this avoids regex overhead of SplitAndEscapeParts) - if (!urlPart.Contains("%")) - return Uri.EscapeUriString(urlPart); + // Uri.EscapeUriString generally does what we want - encode illegal characters only - but it has a quirk + // in that % isn't illegal if it's the start of a %-encoded sequence https://stackoverflow.com/a/47636037/62600 - // String.Concat should be marginally faster than StringBuilder with relatively few/small strings (like most URLs) - return string.Concat(SplitAndEscapeParts(urlPart)); - } + // no % characters, so avoid the regex overhead + if (!s.Contains("%")) + return Uri.EscapeUriString(s); - private static IEnumerable SplitAndEscapeParts(string s) { - // EscapeUriString encodes illegal characters only, but doesn't recognize %-encoded character sequences as legal: https://stackoverflow.com/a/47636037/62600 - // So pick out all %-hex-hex matches and avoid double-encoding - const string pattern = "(.*?)((%[0-9A-Fa-f]{2})|$)"; - foreach (Match match in Regex.Matches(s, pattern)) { - var a = match.Groups[1].Value; - var b = match.Groups[2].Value; - if (a.Length > 0) - yield return Uri.EscapeUriString(a); // sequence with no %-encoding - encode illegal characters - if (b.Length > 0) - yield return b; // 3-character %-encoded sequence - leave it alone - } + // pick out all %-hex-hex matches and avoid double-encoding + return Regex.Replace(s, "(.*?)((%[0-9A-Fa-f]{2})|$)", c => { + var a = c.Groups[1].Value; // group 1 is a sequence with no %-encoding - encode illegal characters + var b = c.Groups[2].Value; // group 2 is a 3-character %-encoded sequence - leave it alone + return Uri.EscapeUriString(a) + b; + }); } /// @@ -395,7 +399,7 @@ public override string ToString() { /// Indicates whether to encode spaces with the "+" character instead of "%20" /// public string ToString(bool encodeSpaceAsPlus) { - var sb = new System.Text.StringBuilder(Path); + var sb = new System.Text.StringBuilder(encodeSpaceAsPlus ? Path.Replace("%20", "+") : Path); if (Query.Length > 0) sb.Append("?").Append(QueryParams.ToString(encodeSpaceAsPlus)); if (Fragment.Length > 0) From 350173a18c8cff786a030d451290048b9289bd97 Mon Sep 17 00:00:00 2001 From: Todd Menier Date: Tue, 19 Dec 2017 11:18:29 -0600 Subject: [PATCH 2/3] #262 better Url.Decode --- Test/Flurl.Test/UrlBuilderTests.cs | 37 +++++++++++++++++++++--------- src/Flurl/QueryParameter.cs | 2 +- src/Flurl/Url.cs | 19 ++++++++------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/Test/Flurl.Test/UrlBuilderTests.cs b/Test/Flurl.Test/UrlBuilderTests.cs index 00e3db23..19525f69 100644 --- a/Test/Flurl.Test/UrlBuilderTests.cs +++ b/Test/Flurl.Test/UrlBuilderTests.cs @@ -396,22 +396,37 @@ public void constructor_parses_url_correctly(string full, string path, string qu // https://github.com/tmenier/Flurl/issues/185 [Test] - public void can_encode_very_long_value() { - // 65520 chars is the tipping point https://github.com/dotnet/corefx/issues/1936 - var len = 300000; + public void can_encode_and_decode_very_long_value() { + // 65,520 chars is the tipping point for Uri.EscapeDataString https://github.com/dotnet/corefx/issues/1936 + var len = 500000; - var s = new string('x', len); - var encoded = Url.Encode(s, false); - Assert.AreEqual(s, encoded); - - // same length string, but every 10th char needs to be hex encoded - s = string.Concat(Enumerable.Repeat("xxxxxxxxx?", len / 10)); + // every 10th char needs to be encoded + var s = string.Concat(Enumerable.Repeat("xxxxxxxxx ", len / 10)); Assert.AreEqual(len, s.Length); // just a sanity check - encoded = Url.Encode(s, false); + + // encode space as %20 + var encoded = Url.Encode(s, false); // hex encoding will add 2 addtional chars for every char that needs to be encoded Assert.AreEqual(len + (2 * len / 10), encoded.Length); - var expected = string.Concat(Enumerable.Repeat("xxxxxxxxx%3F", len / 10)); + var expected = string.Concat(Enumerable.Repeat("xxxxxxxxx%20", len / 10)); Assert.AreEqual(expected, encoded); + + var decoded = Url.Decode(encoded, false); + Assert.AreEqual(s, decoded); + + // encode space as + + encoded = Url.Encode(s, true); + Assert.AreEqual(len, encoded.Length); + expected = string.Concat(Enumerable.Repeat("xxxxxxxxx+", len / 10)); + Assert.AreEqual(expected, encoded); + + // interpret + as space + decoded = Url.Decode(encoded, true); + Assert.AreEqual(s, decoded); + + // don't interpret + as space, encoded and decoded should be the same + decoded = Url.Decode(encoded, false); + Assert.AreEqual(encoded, decoded); } } } diff --git a/src/Flurl/QueryParameter.cs b/src/Flurl/QueryParameter.cs index 1aba9244..2ddad43a 100644 --- a/src/Flurl/QueryParameter.cs +++ b/src/Flurl/QueryParameter.cs @@ -23,7 +23,7 @@ public QueryParameter(string name, object value, bool isEncoded = false) { Name = name; if (isEncoded && value != null) { _encodedValue = value as string; - _value = Url.DecodeQueryParamValue(_encodedValue); + _value = Url.Decode(_encodedValue, true); } else { Value = value; diff --git a/src/Flurl/Url.cs b/src/Flurl/Url.cs index f565cbda..1743900d 100644 --- a/src/Flurl/Url.cs +++ b/src/Flurl/Url.cs @@ -119,14 +119,17 @@ public static string GetRoot(string url) { } /// - /// Decodes a URL-encoded query parameter value. + /// Decodes a URL-encoded string. /// - /// The encoded query parameter value. + /// The URL-encoded string. + /// If true, any '+' character will be decoded to a space. /// - public static string DecodeQueryParamValue(string value) { - // Uri.UnescapeDataString comes closest to doing it right, but famously stumbles on the + sign - // http://weblog.west-wind.com/posts/2009/Feb/05/Html-and-Uri-String-Encoding-without-SystemWeb - return Uri.UnescapeDataString((value ?? "").Replace("+", " ")); + public static string Decode(string s, bool interpretPlusAsSpace) { + if (string.IsNullOrEmpty(s)) + return s; + + s = Uri.UnescapeDataString(s); + return interpretPlusAsSpace ? s.Replace("+", " ") : s; } private const int MAX_URL_LENGTH = 65519; @@ -165,7 +168,7 @@ public static string EncodeIllegalCharacters(string s) { if (string.IsNullOrEmpty(s)) return s; - // Uri.EscapeUriString generally does what we want - encode illegal characters only - but it has a quirk + // Uri.EscapeUriString mostly does what we want - encodes illegal characters only - but it has a quirk // in that % isn't illegal if it's the start of a %-encoded sequence https://stackoverflow.com/a/47636037/62600 // no % characters, so avoid the regex overhead @@ -175,7 +178,7 @@ public static string EncodeIllegalCharacters(string s) { // pick out all %-hex-hex matches and avoid double-encoding return Regex.Replace(s, "(.*?)((%[0-9A-Fa-f]{2})|$)", c => { var a = c.Groups[1].Value; // group 1 is a sequence with no %-encoding - encode illegal characters - var b = c.Groups[2].Value; // group 2 is a 3-character %-encoded sequence - leave it alone + var b = c.Groups[2].Value; // group 2 is a valid 3-character %-encoded sequence - leave it alone! return Uri.EscapeUriString(a) + b; }); } From 10ff1656eff6318f59435411862fdd63bc407130 Mon Sep 17 00:00:00 2001 From: Todd Menier Date: Tue, 19 Dec 2017 15:33:14 -0600 Subject: [PATCH 3/3] bump version --- src/Flurl/Flurl.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Flurl/Flurl.csproj b/src/Flurl/Flurl.csproj index f9871dcd..dc91544a 100644 --- a/src/Flurl/Flurl.csproj +++ b/src/Flurl/Flurl.csproj @@ -4,7 +4,7 @@ net40;net45;netstandard1.3;netstandard1.0; True Flurl - 2.5.2 + 2.6.0 Todd Menier A fluent, portable URL builder. To make HTTP calls off the fluent chain, check out Flurl.Http. http://tmenier.github.io/Flurl