Skip to content

Commit

Permalink
Fix #23235: Allow unicode characters in URL paths
Browse files Browse the repository at this point in the history
git-svn-id: https://josm.openstreetmap.de/svn/trunk@18869 0c6e7542-c601-0410-84e7-c038aed88b3b
  • Loading branch information
taylor.smock committed Oct 13, 2023
1 parent d5b6008 commit a7958a1
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,13 @@ public class UrlValidator extends AbstractValidator {
/**
* Protocol scheme (e.g. http, ftp, https).
*/
private static final String SCHEME_REGEX = "^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*";
private static final String SCHEME_REGEX = "^\\p{Alpha}[\\p{Alnum}+\\-.]*";
private static final Pattern SCHEME_PATTERN = Pattern.compile(SCHEME_REGEX);

// Drop numeric, and "+-." for now
// TODO does not allow for optional userinfo.
// Validation of character set is done by isValidAuthority
private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\."; // allows for IPV4 but not IPV6
private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}-\\."; // allows for IPV4 but not IPV6
private static final String IPV6_REGEX = "[0-9a-fA-F:]+"; // do this as separate match because : could cause ambiguity with port prefix

// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
Expand All @@ -150,7 +150,7 @@ public class UrlValidator extends AbstractValidator {
USERINFO_CHARS_REGEX + "+:" + // At least one character for the name
USERINFO_CHARS_REGEX + "*@"; // password may be absent
private static final String AUTHORITY_REGEX =
"(?:\\[("+IPV6_REGEX+")\\]|(?:(?:"+USERINFO_FIELD_REGEX+")?([" + AUTHORITY_CHARS_REGEX + "]*)))(:\\d*)?(.*)?";
"(?:\\[("+IPV6_REGEX+")]|(?:(?:"+USERINFO_FIELD_REGEX+")?([" + AUTHORITY_CHARS_REGEX + "]*)))(:\\d*)?(.*)?";
// 1 e.g. user:pass@ 2 3 4
private static final Pattern AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEX);

Expand All @@ -163,8 +163,8 @@ public class UrlValidator extends AbstractValidator {
*/
private static final int PARSE_AUTHORITY_EXTRA = 4;

private static final String PATH_REGEX = "^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$";
private static final Pattern PATH_PATTERN = Pattern.compile(PATH_REGEX);
private static final String PATH_REGEX = "^(/[-\\w:@&?=+,.!/~*'%$_;()]*)?$";
private static final Pattern PATH_PATTERN = Pattern.compile(PATH_REGEX, Pattern.UNICODE_CHARACTER_CLASS);

private static final String QUERY_REGEX = "^(.*)$";
private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,8 @@ void testValidatorName() {
new ResultPair("", true),
new ResultPair("/test1/file", true),
new ResultPair("/..//file", false),
new ResultPair("/test1//file", false)
new ResultPair("/test1//file", false),
new ResultPair("/projekte-und-themen/eulen-falken-und-deren-nistkästen/trafotürme", true)
};

//Test allow2slash, noFragment
Expand Down

0 comments on commit a7958a1

Please sign in to comment.