Skip to content

Commit

Permalink
feat(uri): add NormalizeEscapedPath helper
Browse files Browse the repository at this point in the history
  • Loading branch information
tdakkota committed Dec 7, 2022
1 parent cd50284 commit b42c914
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 0 deletions.
113 changes: 113 additions & 0 deletions uri/normalize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package uri

import (
"strings"
)

func ishex(c byte) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}

func unhex(c byte) byte {
switch {
case '0' <= c && c <= '9':
return c - '0'
case 'a' <= c && c <= 'f':
return c - 'a' + 10
case 'A' <= c && c <= 'F':
return c - 'A' + 10
}
return 0
}

func asciiToUpper(c byte) byte {
if c >= 'a' && c <= 'f' {
return c - ('a' - 'A')
}
return c
}

func asciiIsLowercase(c byte) bool {
return 'a' <= c && c <= 'z'
}

// Return true if the specified character should be escaped when
// appearing in a URL path string, according to RFC 3986.
func shouldEscapePath(c byte) bool {
// §2.3 Unreserved characters (alpha)
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
return false
}
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': // 0-9
return false
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
return false
default:
// Everything else must be escaped.
return true
}
}

// NormalizeEscapedPath normalizes escaped path.
//
// All percent-encoded characters are upper-cased.
//
// If the escaping
func NormalizeEscapedPath(s string) (string, bool) {
// Search % with lower case octets.
iter := s
for {
idx := strings.IndexByte(iter, '%')
if idx < 0 {
return s, true
}
if idx+2 >= len(iter) || !ishex(iter[idx+1]) || !ishex(iter[idx+2]) {
// Invalid escape sequence.
return "", false
}
a, b := iter[idx+1], iter[idx+2]
if asciiIsLowercase(a) || asciiIsLowercase(b) {
goto slow
}
// Unescape character.
ch := unhex(a)<<4 | unhex(b)
if !shouldEscapePath(ch) {
// Unescape character.
goto slow
}
iter = iter[idx+3:]
}

slow:
var t strings.Builder
t.Grow(len(s))
for i := 0; i < len(s); {
switch s[i] {
case '%':
// Unescape character.
a, b := s[i+1], s[i+2]
ch := unhex(a)<<4 | unhex(b)
if shouldEscapePath(ch) {
t.WriteByte('%')
t.WriteByte(asciiToUpper(a))
t.WriteByte(asciiToUpper(b))
} else {
t.WriteByte(ch)
}
i += 3
default:
t.WriteByte(s[i])
i++
}
}
return t.String(), true
}
53 changes: 53 additions & 0 deletions uri/normalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package uri

import (
"fmt"
"testing"

"github.com/stretchr/testify/require"
)

func TestNormalizeEscapedPath(t *testing.T) {
tests := []struct {
s string
want string
ok bool
}{
// Fast path.
{"", "", true},
{"/foo", "/foo", true},
{"/foo/bar", "/foo/bar", true},
{"/foo%00bar", "/foo%00bar", true},
{"/foo%0Abar", "/foo%0Abar", true},
{"/foo%20bar", "/foo%20bar", true},
{"/foo%3Fbar", "/foo%3Fbar", true},
{"/foo%25bar", "/foo%25bar", true},

// Slow path.
// Unnecessary escapes.
{"/user/ern%61do", "/user/ernado", true},
{"/user/ern%41do", "/user/ernAdo", true},
// Lowercase hex digits.
{"/foo%3fbar", "/foo%3Fbar", true},
{"/foo%3fbar", "/foo%3Fbar", true},

// Invalid.
{"/foo%", "", false},
{"/foo%3", "", false},
{"/foo%zz", "", false},
}
for i, tt := range tests {
tt := tt
t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) {
a := require.New(t)

got, ok := NormalizeEscapedPath(tt.s)
if !tt.ok {
a.False(ok)
return
}
a.True(ok)
a.Equal(tt.want, got)
})
}
}

0 comments on commit b42c914

Please sign in to comment.