@@ -15,10 +15,11 @@ class Url {
15
15
private $ relativePath ; // no trailing "/"
16
16
private $ assumedHost ;
17
17
private $ assumedPath ;
18
+ private $ hostUnmodified ;
18
19
19
20
public function __construct ($ url ) {
20
21
$ this ->url = $ url ;
21
- $ parts = parse_url ($ url );
22
+ $ parts = parse_url ($ url ?: '' );
22
23
$ this ->scheme = isset ($ parts ["scheme " ]) ? strtolower ($ parts ["scheme " ]) : NULL ;
23
24
$ this ->port = isset ($ parts ["port " ]) ? $ parts ["port " ] : NULL ;
24
25
$ this ->host = isset ($ parts ["host " ]) ? strtolower ($ parts ["host " ]) : NULL ;
@@ -32,12 +33,14 @@ public function __construct($url) {
32
33
33
34
if (!isset ($ parts ["host " ]) && isset ($ parts ["path " ])) { // This is probably a host written like this: nitropack.io
34
35
if (preg_match ("/^[^\s\/]+?\.[^\s\/]+$/ " , $ parts ["path " ])) {
35
- $ this ->host = $ this ->path ;
36
+ $ this ->host = strtolower ($ this ->path );
37
+ $ this ->hostUnmodified = $ this ->path ;
36
38
$ this ->path = "/ " ;
37
39
$ this ->assumedHost = true ;
38
40
$ this ->assumedPath = true ;
39
41
} else if (preg_match ("/^([^\s\/]+?\.[^\s\/]+)(\/.*?)$/ " , $ parts ["path " ], $ matches )) {
40
- $ this ->host = $ matches [1 ];
42
+ $ this ->host = strtolower ($ matches [1 ]);
43
+ $ this ->hostUnmodified = $ matches [1 ];
41
44
$ this ->path = $ matches [2 ];
42
45
$ this ->assumedHost = true ;
43
46
}
@@ -51,6 +54,25 @@ public function __toString() {
51
54
}
52
55
53
56
private function buildParts () {
57
+ $ this ->updateRootUrl ();
58
+ $ this ->updateRelativePath ();
59
+ }
60
+
61
+ private function suggestScheme () {
62
+ if (!$ this ->scheme ) {
63
+ if ($ this ->base ) {
64
+ $ scheme = $ this ->base ->getScheme () ? $ this ->base ->getScheme () : "http " ;
65
+ } else {
66
+ $ scheme = "http " ;
67
+ }
68
+ } else {
69
+ $ scheme = $ this ->scheme ;
70
+ }
71
+
72
+ return $ scheme ;
73
+ }
74
+
75
+ private function updateRootUrl () {
54
76
if ($ this ->host ) {
55
77
$ scheme = $ this ->suggestScheme ();
56
78
$ this ->rootUrl = $ scheme . ":// " . $ this ->host ;
@@ -62,7 +84,9 @@ private function buildParts() {
62
84
} else if ($ this ->base ) {
63
85
$ this ->rootUrl = $ this ->base ->getRootUrl ();
64
86
}
87
+ }
65
88
89
+ private function updateRelativePath () {
66
90
if (substr ($ this ->path , -1 ) != '/ ' && $ this ->path != '/ ' ) {
67
91
$ this ->relativePath = dirname ($ this ->path );
68
92
} else {
@@ -78,20 +102,6 @@ private function buildParts() {
78
102
}
79
103
}
80
104
81
- private function suggestScheme () {
82
- if (!$ this ->scheme ) {
83
- if ($ this ->base ) {
84
- $ scheme = $ this ->base ->getScheme () ? $ this ->base ->getScheme () : "http " ;
85
- } else {
86
- $ scheme = "http " ;
87
- }
88
- } else {
89
- $ scheme = $ this ->scheme ;
90
- }
91
-
92
- return $ scheme ;
93
- }
94
-
95
105
public function getUrl () { return $ this ->url ; }
96
106
public function getScheme () { return $ this ->suggestScheme (); }
97
107
public function getPort () { return $ this ->port ; }
@@ -104,13 +114,29 @@ public function getBaseUrl() { return $this->base ? $this->base->getNormalized()
104
114
public function getRootUrl () { return $ this ->rootUrl ; }
105
115
public function getRelativePath () { return $ this ->relativePath ; }
106
116
107
- public function setScheme ($ scheme ) { $ this ->scheme = $ scheme ; }
108
- public function setPort ($ port ) { $ this ->port = $ port ; }
109
- public function setHost ($ host ) { $ this ->host = $ host ; }
110
- public function setPath ($ path ) { $ this ->path = $ path ; }
111
117
public function setQuery ($ query ) { $ this ->query = $ query ; }
112
118
public function setHash ($ hash ) { $ this ->hash = $ hash ; }
113
119
120
+ public function setPath ($ path ) {
121
+ $ this ->path = $ path ;
122
+ $ this ->updateRelativePath ();
123
+ }
124
+
125
+ public function setPort ($ port ) {
126
+ $ this ->port = $ port ;
127
+ $ this ->updateRootUrl ();
128
+ }
129
+
130
+ public function setScheme ($ scheme ) {
131
+ $ this ->scheme = $ scheme ;
132
+ $ this ->updateRootUrl ();
133
+ }
134
+
135
+ public function setHost ($ host ) {
136
+ $ this ->host = $ host ;
137
+ $ this ->updateRootUrl ();
138
+ }
139
+
114
140
public function setBaseUrl ($ url ) {
115
141
if ($ url instanceof Url) {
116
142
$ this ->base = $ url ;
@@ -119,7 +145,7 @@ public function setBaseUrl($url) {
119
145
}
120
146
121
147
if ($ this ->assumedHost ) {
122
- $ this ->path = $ this ->assumedPath ? $ this ->host : $ this ->host .$ this ->path ;
148
+ $ this ->path = $ this ->assumedPath ? $ this ->hostUnmodified : $ this ->host .$ this ->path ;
123
149
$ this ->host = NULL ;
124
150
$ this ->assumedHost = false ;
125
151
$ this ->assumedPath = false ;
@@ -130,8 +156,6 @@ public function setBaseUrl($url) {
130
156
131
157
public function getNormalized ($ resolvePathNavigation = true , $ includeHash = true ) {
132
158
$ path = $ this ->path ;
133
- $ query = $ this ->query ;
134
- $ hash = $ this ->hash ;
135
159
136
160
$ url = "" ;
137
161
if (strlen ($ path ) > 0 && $ path [0 ] == "/ " ) { // absolute path - use rootUrl
@@ -145,6 +169,15 @@ public function getNormalized($resolvePathNavigation = true, $includeHash = true
145
169
$ path = $ this ->resolvePathNavigation ($ path , $ resolvePathNavigation );
146
170
}
147
171
172
+ if (strpos ($ path ,'% ' ) !== false ) {
173
+ // Based on RFC3986 (https://www.ietf.org/rfc/rfc3986.txt):
174
+ // For consistency, URI producers and normalizers should use uppercase hexadecimal digits for all
175
+ // percent-encodings.
176
+ $ path = preg_replace_callback ('/%[a-fA-F\d]{2}/ ' , function ($ matches ) {
177
+ return strtoupper ($ matches [0 ]);
178
+ }, $ path );
179
+ }
180
+
148
181
$ path_parts = explode ('/ ' , $ path );
149
182
$ final_parts = array ();
150
183
@@ -177,6 +210,42 @@ public function getNormalized($resolvePathNavigation = true, $includeHash = true
177
210
return $ url ;
178
211
}
179
212
213
+ /**
214
+ * Checks if the URL object produces a valid URL
215
+ * @return boolean
216
+ */
217
+ public function isValid () {
218
+ try {
219
+ $ originalHost = $ this ->getHost ();
220
+ // Add more compatibility chars in the array below
221
+ // FILTER_VALIDATE_URL validates against http://www.faqs.org/rfcs/rfc2396.html,
222
+ // which, for example, treats underscore("_") as invalid for hosts.
223
+ $ charsToReplace = ['_ ' ];
224
+ $ replacementChar = '- ' ;
225
+
226
+ if (empty ($ originalHost )) {
227
+ // probably a relative path
228
+ return false ;
229
+ }
230
+
231
+ // do we expect to have multibyte string for URL?
232
+ // filter_var will also fail with multibyte string as URL
233
+ if (!empty (array_intersect ($ charsToReplace , str_split ($ originalHost )))) {
234
+ $ newHost = str_replace ($ charsToReplace , $ replacementChar , $ originalHost );
235
+ $ this ->setHost ($ newHost );
236
+ }
237
+
238
+ if (filter_var ($ this ->getNormalized (), FILTER_VALIDATE_URL ) === false ) {
239
+ return false ;
240
+ }
241
+
242
+ return true ;
243
+ } finally {
244
+ // Restore the original host
245
+ $ this ->setHost ($ originalHost );
246
+ }
247
+ }
248
+
180
249
private function normalizeQueryStr ($ queryStr ) {
181
250
$ queryStr = rawurldecode ($ queryStr );
182
251
$ newQueryStr = "" ;
0 commit comments