feat(uri): add NormalizeEscapedPath helper

ogen-go · Dec 7, 2022 · b42c914 · b42c914
1 parent cd50284
commit b42c914
Show file tree

Hide file tree

Showing 2 changed files with 166 additions and 0 deletions.
diff --git a/uri/normalize.go b/uri/normalize.go
@@ -0,0 +1,113 @@
+package uri
+
+import (
+	"strings"
+)
+
+func ishex(c byte) bool {
+	switch {
+	case '0' <= c && c <= '9':
+		return true
+	case 'a' <= c && c <= 'f':
+		return true
+	case 'A' <= c && c <= 'F':
+		return true
+	}
+	return false
+}
+
+func unhex(c byte) byte {
+	switch {
+	case '0' <= c && c <= '9':
+		return c - '0'
+	case 'a' <= c && c <= 'f':
+		return c - 'a' + 10
+	case 'A' <= c && c <= 'F':
+		return c - 'A' + 10
+	}
+	return 0
+}
+
+func asciiToUpper(c byte) byte {
+	if c >= 'a' && c <= 'f' {
+		return c - ('a' - 'A')
+	}
+	return c
+}
+
+func asciiIsLowercase(c byte) bool {
+	return 'a' <= c && c <= 'z'
+}
+
+// Return true if the specified character should be escaped when
+// appearing in a URL path string, according to RFC 3986.
+func shouldEscapePath(c byte) bool {
+	// §2.3 Unreserved characters (alpha)
+	if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+		return false
+	}
+	switch c {
+	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': // 0-9
+		return false
+	case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
+		return false
+	default:
+		// Everything else must be escaped.
+		return true
+	}
+}
+
+// NormalizeEscapedPath normalizes escaped path.
+//
+// All percent-encoded characters are upper-cased.
+//
+// If the escaping
+func NormalizeEscapedPath(s string) (string, bool) {
+	// Search % with lower case octets.
+	iter := s
+	for {
+		idx := strings.IndexByte(iter, '%')
+		if idx < 0 {
+			return s, true
+		}
+		if idx+2 >= len(iter) || !ishex(iter[idx+1]) || !ishex(iter[idx+2]) {
+			// Invalid escape sequence.
+			return "", false
+		}
+		a, b := iter[idx+1], iter[idx+2]
+		if asciiIsLowercase(a) || asciiIsLowercase(b) {
+			goto slow
+		}
+		// Unescape character.
+		ch := unhex(a)<<4 | unhex(b)
+		if !shouldEscapePath(ch) {
+			// Unescape character.
+			goto slow
+		}
+		iter = iter[idx+3:]
+	}
+
+slow:
+	var t strings.Builder
+	t.Grow(len(s))
+	for i := 0; i < len(s); {
+		switch s[i] {
+		case '%':
+			// Unescape character.
+			a, b := s[i+1], s[i+2]
+			ch := unhex(a)<<4 | unhex(b)
+			if shouldEscapePath(ch) {
+				t.WriteByte('%')
+				t.WriteByte(asciiToUpper(a))
+				t.WriteByte(asciiToUpper(b))
+			} else {
+				t.WriteByte(ch)
+			}
+			i += 3
+		default:
+			t.WriteByte(s[i])
+			i++
+		}
+	}
+	return t.String(), true
+}
diff --git a/uri/normalize_test.go b/uri/normalize_test.go
@@ -0,0 +1,53 @@
+package uri
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestNormalizeEscapedPath(t *testing.T) {
+	tests := []struct {
+		s    string
+		want string
+		ok   bool
+	}{
+		// Fast path.
+		{"", "", true},
+		{"/foo", "/foo", true},
+		{"/foo/bar", "/foo/bar", true},
+		{"/foo%00bar", "/foo%00bar", true},
+		{"/foo%0Abar", "/foo%0Abar", true},
+		{"/foo%20bar", "/foo%20bar", true},
+		{"/foo%3Fbar", "/foo%3Fbar", true},
+		{"/foo%25bar", "/foo%25bar", true},
+
+		// Slow path.
+		// Unnecessary escapes.
+		{"/user/ern%61do", "/user/ernado", true},
+		{"/user/ern%41do", "/user/ernAdo", true},
+		// Lowercase hex digits.
+		{"/foo%3fbar", "/foo%3Fbar", true},
+		{"/foo%3fbar", "/foo%3Fbar", true},
+
+		// Invalid.
+		{"/foo%", "", false},
+		{"/foo%3", "", false},
+		{"/foo%zz", "", false},
+	}
+	for i, tt := range tests {
+		tt := tt
+		t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) {
+			a := require.New(t)
+
+			got, ok := NormalizeEscapedPath(tt.s)
+			if !tt.ok {
+				a.False(ok)
+				return
+			}
+			a.True(ok)
+			a.Equal(tt.want, got)
+		})
+	}
+}