diff --git a/README.md b/README.md index 8461279..5e401ff 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Based on the [wikipedia paper][wiki] and the [RFC 3986 document][rfc]. ## Changelog +* **v1.2.0** : Switch to `net/url` url escaping. * **v1.1.1** : Fix failing test due to Go1.12 changes (thanks to @ianlancetaylor). * **2016-11-14 (v1.1.0)** : IDN: Conform to RFC 5895: Fold character width (thanks to @beeker1121). * **2016-07-27 (v1.0.0)** : Normalize IDN to ASCII (thanks to @zenovich). diff --git a/purell.go b/purell.go index 3b086ce..8d3f888 100644 --- a/purell.go +++ b/purell.go @@ -90,8 +90,8 @@ var rxHostInteriorDots = regexp.MustCompile(`\.+`) var rxEmptyPort = regexp.MustCompile(`:+$`) // Map of flags to implementation function. -// FlagDecodeUnnecessaryEscapes has no action, since it is done automatically -// by parsing the string as an URL. Same for FlagUppercaseEscapes and FlagRemoveEmptyQuerySeparator. +// FlagDecodeUnnecessaryEscapes, FlagEncodeNecessaryEscapes, and FlagUppercaseEscapes +// have no action, since they are done automatically by parsing the string as an URL. // Since maps have undefined traversing order, make a slice of ordered keys var flagsOrder = []NormalizationFlags{ @@ -105,6 +105,7 @@ var flagsOrder = []NormalizationFlags{ FlagRemoveDuplicateSlashes, FlagRemoveWWW, FlagAddWWW, + FlagRemoveEmptyQuerySeparator, FlagSortQuery, FlagDecodeDWORDHost, FlagDecodeOctalHost, @@ -127,6 +128,7 @@ var flags = map[NormalizationFlags]func(*url.URL){ FlagRemoveDuplicateSlashes: removeDuplicateSlashes, FlagRemoveWWW: removeWWW, FlagAddWWW: addWWW, + FlagRemoveEmptyQuerySeparator: removeEmptyQuerySeparator, FlagSortQuery: sortQuery, FlagDecodeDWORDHost: decodeDWORDHost, FlagDecodeOctalHost: decodeOctalHost, @@ -175,12 +177,14 @@ func NormalizeURLString(u string, f NormalizationFlags) (string, error) { // NormalizeURL returns the normalized string. // It takes a parsed URL object as input, as well as the normalization flags. func NormalizeURL(u *url.URL, f NormalizationFlags) string { + u.RawPath = "" for _, k := range flagsOrder { if f&k == k { flags[k](u) } } - return escapeURL(u) + + return u.String() } func lowercaseScheme(u *url.URL) { @@ -225,9 +229,7 @@ func addTrailingSlash(u *url.URL) { u.Path += "/" } } else if l = len(u.Host); l > 0 { - if !strings.HasSuffix(u.Host, "/") { - u.Host += "/" - } + u.Path = "/" } } @@ -293,6 +295,10 @@ func addWWW(u *url.URL) { } } +func removeEmptyQuerySeparator(u *url.URL) { + u.ForceQuery = false +} + func sortQuery(u *url.URL) { q := u.Query() diff --git a/purell_test.go b/purell_test.go index a1264a9..2682dad 100644 --- a/purell_test.go +++ b/purell_test.go @@ -1,10 +1,8 @@ package purell import ( - "fmt" "net/url" "testing" - "unicode" ) type testCase struct { @@ -748,6 +746,11 @@ func runCase(tc *testCase, t *testing.T) { } } +/* +* NOTE: these tests as written do not test URL path escaping, since they include chars +* read as query strings &c. We recommend using the default net/url escaping code; if you +* need something more permissive, you'll need to rely on urlesc. + func TestDecodeUnnecessaryEscapesAll(t *testing.T) { var url = "http://host/" @@ -787,3 +790,4 @@ func TestEncodeNecessaryEscapesAll(t *testing.T) { t.Errorf("EncodeNecessaryEscapesAll:\nwant\n%s\ngot\n%s", want, s) } } +*/ diff --git a/urlesc.go b/urlesc.go deleted file mode 100644 index 53d815f..0000000 --- a/urlesc.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file implements query escaping as per RFC 3986. -// It contains some parts of the net/url package, modified so as to allow -// some reserved characters incorrectly escaped by net/url. -// See https://github.com/golang/go/issues/5684 -package purell - -import ( - "bytes" - "net/url" - "strings" -) - -type encoding int - -const ( - encodePath encoding = 1 + iota - encodeUserPassword - encodeQueryComponent - encodeFragment -) - -// Return true if the specified character should be escaped when -// appearing in a URL string, according to RFC 3986. -func shouldEscape(c byte, mode encoding) bool { - // §2.3 Unreserved characters (alphanum) - if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { - return false - } - - switch c { - case '-', '.', '_', '~': // §2.3 Unreserved characters (mark) - return false - - // §2.2 Reserved characters (reserved) - case ':', '/', '?', '#', '[', ']', '@', // gen-delims - '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims - // Different sections of the URL allow a few of - // the reserved characters to appear unescaped. - switch mode { - case encodePath: // §3.3 - // The RFC allows sub-delims and : @. - // '/', '[' and ']' can be used to assign meaning to individual path - // segments. This package only manipulates the path as a whole, - // so we allow those as well. That leaves only ? and # to escape. - return c == '?' || c == '#' - - case encodeUserPassword: // §3.2.1 - // The RFC allows : and sub-delims in - // userinfo. The parsing of userinfo treats ':' as special so we must escape - // all the gen-delims. - return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@' - - case encodeQueryComponent: // §3.4 - // The RFC allows / and ?. - return c != '/' && c != '?' - - case encodeFragment: // §4.1 - // The RFC text is silent but the grammar allows - // everything, so escape nothing but # - return c == '#' - } - } - - // Everything else must be escaped. - return true -} - -func escape(s string, mode encoding) string { - spaceCount, hexCount := 0, 0 - for i := 0; i < len(s); i++ { - c := s[i] - if shouldEscape(c, mode) { - if c == ' ' && mode == encodeQueryComponent { - spaceCount++ - } else { - hexCount++ - } - } - } - - if spaceCount == 0 && hexCount == 0 { - return s - } - - t := make([]byte, len(s)+2*hexCount) - j := 0 - for i := 0; i < len(s); i++ { - switch c := s[i]; { - case c == ' ' && mode == encodeQueryComponent: - t[j] = '+' - j++ - case shouldEscape(c, mode): - t[j] = '%' - t[j+1] = "0123456789ABCDEF"[c>>4] - t[j+2] = "0123456789ABCDEF"[c&15] - j += 3 - default: - t[j] = s[i] - j++ - } - } - return string(t) -} - -var uiReplacer = strings.NewReplacer( - "%21", "!", - "%27", "'", - "%28", "(", - "%29", ")", - "%2A", "*", -) - -// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986. -func unescapeUserinfo(s string) string { - return uiReplacer.Replace(s) -} - -// Escape reassembles the URL into a valid URL string. -// The general form of the result is one of: -// -// scheme:opaque -// scheme://userinfo@host/path?query#fragment -// -// If u.Opaque is non-empty, String uses the first form; -// otherwise it uses the second form. -// -// In the second form, the following rules apply: -// - if u.Scheme is empty, scheme: is omitted. -// - if u.User is nil, userinfo@ is omitted. -// - if u.Host is empty, host/ is omitted. -// - if u.Scheme and u.Host are empty and u.User is nil, -// the entire scheme://userinfo@host/ is omitted. -// - if u.Host is non-empty and u.Path begins with a /, -// the form host/path does not add its own /. -// - if u.RawQuery is empty, ?query is omitted. -// - if u.Fragment is empty, #fragment is omitted. -func escapeURL(u *url.URL) string { - var buf bytes.Buffer - if u.Scheme != "" { - buf.WriteString(u.Scheme) - buf.WriteByte(':') - } - if u.Opaque != "" { - buf.WriteString(u.Opaque) - } else { - if u.Scheme != "" || u.Host != "" || u.User != nil { - buf.WriteString("//") - if ui := u.User; ui != nil { - buf.WriteString(unescapeUserinfo(ui.String())) - buf.WriteByte('@') - } - if h := u.Host; h != "" { - buf.WriteString(h) - } - } - if u.Path != "" && u.Path[0] != '/' && u.Host != "" { - buf.WriteByte('/') - } - buf.WriteString(escape(u.Path, encodePath)) - } - if u.RawQuery != "" { - buf.WriteByte('?') - buf.WriteString(u.RawQuery) - } - if u.Fragment != "" { - buf.WriteByte('#') - buf.WriteString(escape(u.Fragment, encodeFragment)) - } - return buf.String() -} diff --git a/urlesc_test.go b/urlesc_test.go deleted file mode 100644 index f6d0fb4..0000000 --- a/urlesc_test.go +++ /dev/null @@ -1,592 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package purell - -import ( - "net/url" - "testing" -) - -type URLTest struct { - in string - out *url.URL - roundtrip string // expected result of reserializing the URL; empty means same as "in". -} - -var urltests = []URLTest{ - // no path - { - "http://www.google.com", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - }, - "", - }, - // path - { - "http://www.google.com/", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/", - }, - "", - }, - // path with hex escaping - { - "http://www.google.com/file%20one%26two", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/file one&two", - }, - "http://www.google.com/file%20one&two", - }, - // user - { - "ftp://webmaster@www.google.com/", - &url.URL{ - Scheme: "ftp", - User: url.User("webmaster"), - Host: "www.google.com", - Path: "/", - }, - "", - }, - // escape sequence in username - { - "ftp://john%20doe@www.google.com/", - &url.URL{ - Scheme: "ftp", - User: url.User("john doe"), - Host: "www.google.com", - Path: "/", - }, - "ftp://john%20doe@www.google.com/", - }, - // query - { - "http://www.google.com/?q=go+language", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/", - RawQuery: "q=go+language", - }, - "", - }, - // query with hex escaping: NOT parsed - { - "http://www.google.com/?q=go%20language", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/", - RawQuery: "q=go%20language", - }, - "", - }, - // %20 outside query - { - "http://www.google.com/a%20b?q=c+d", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/a b", - RawQuery: "q=c+d", - }, - "", - }, - // path without leading /, so no parsing - { - "http:www.google.com/?q=go+language", - &url.URL{ - Scheme: "http", - Opaque: "www.google.com/", - RawQuery: "q=go+language", - }, - "http:www.google.com/?q=go+language", - }, - // path without leading /, so no parsing - { - "http:%2f%2fwww.google.com/?q=go+language", - &url.URL{ - Scheme: "http", - Opaque: "%2f%2fwww.google.com/", - RawQuery: "q=go+language", - }, - "http:%2f%2fwww.google.com/?q=go+language", - }, - // non-authority with path - { - "mailto:/webmaster@golang.org", - &url.URL{ - Scheme: "mailto", - Path: "/webmaster@golang.org", - }, - "mailto:///webmaster@golang.org", // unfortunate compromise - }, - // non-authority - { - "mailto:webmaster@golang.org", - &url.URL{ - Scheme: "mailto", - Opaque: "webmaster@golang.org", - }, - "", - }, - // unescaped :// in query should not create a scheme - { - "/foo?query=http://bad", - &url.URL{ - Path: "/foo", - RawQuery: "query=http://bad", - }, - "", - }, - // leading // without scheme should create an authority - { - "//foo", - &url.URL{ - Host: "foo", - }, - "", - }, - // leading // without scheme, with userinfo, path, and query - { - "//user@foo/path?a=b", - &url.URL{ - User: url.User("user"), - Host: "foo", - Path: "/path", - RawQuery: "a=b", - }, - "", - }, - // Three leading slashes isn't an authority, but doesn't return an error. - // (We can't return an error, as this code is also used via - // ServeHTTP -> ReadRequest -> Parse, which is arguably a - // different URL parsing context, but currently shares the - // same codepath) - { - "///threeslashes", - &url.URL{ - Path: "///threeslashes", - }, - "", - }, - { - "http://user:password@google.com", - &url.URL{ - Scheme: "http", - User: url.UserPassword("user", "password"), - Host: "google.com", - }, - "http://user:password@google.com", - }, - // unescaped @ in username should not confuse host - { - "http://j@ne:password@google.com", - &url.URL{ - Scheme: "http", - User: url.UserPassword("j@ne", "password"), - Host: "google.com", - }, - "http://j%40ne:password@google.com", - }, - // unescaped @ in password should not confuse host - { - "http://jane:p@ssword@google.com", - &url.URL{ - Scheme: "http", - User: url.UserPassword("jane", "p@ssword"), - Host: "google.com", - }, - "http://jane:p%40ssword@google.com", - }, - { - "http://j@ne:password@google.com/p@th?q=@go", - &url.URL{ - Scheme: "http", - User: url.UserPassword("j@ne", "password"), - Host: "google.com", - Path: "/p@th", - RawQuery: "q=@go", - }, - "http://j%40ne:password@google.com/p@th?q=@go", - }, - { - "http://www.google.com/?q=go+language#foo", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/", - RawQuery: "q=go+language", - Fragment: "foo", - }, - "", - }, - { - "http://www.google.com/?q=go+language#foo%26bar", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/", - RawQuery: "q=go+language", - Fragment: "foo&bar", - }, - "http://www.google.com/?q=go+language#foo&bar", - }, - { - "file:///home/adg/rabbits", - &url.URL{ - Scheme: "file", - Host: "", - Path: "/home/adg/rabbits", - }, - "file:///home/adg/rabbits", - }, - // "Windows" paths are no exception to the rule. - // See golang.org/issue/6027, especially comment #9. - { - "file:///C:/FooBar/Baz.txt", - &url.URL{ - Scheme: "file", - Host: "", - Path: "/C:/FooBar/Baz.txt", - }, - "file:///C:/FooBar/Baz.txt", - }, - // case-insensitive scheme - { - "MaIlTo:webmaster@golang.org", - &url.URL{ - Scheme: "mailto", - Opaque: "webmaster@golang.org", - }, - "mailto:webmaster@golang.org", - }, - // Relative path - { - "a/b/c", - &url.URL{ - Path: "a/b/c", - }, - "a/b/c", - }, - // escaped '?' in username and password - { - "http://%3Fam:pa%3Fsword@google.com", - &url.URL{ - Scheme: "http", - User: url.UserPassword("?am", "pa?sword"), - Host: "google.com", - }, - "", - }, - // escaped '?' and '#' in path - { - "http://example.com/%3F%23", - &url.URL{ - Scheme: "http", - Host: "example.com", - Path: "?#", - }, - "", - }, - // unescaped [ ] ! ' ( ) * in path - { - "http://example.com/[]!'()*", - &url.URL{ - Scheme: "http", - Host: "example.com", - Path: "[]!'()*", - }, - "http://example.com/[]!'()*", - }, - // escaped : / ? # [ ] @ in username and password - { - "http://%3A%2F%3F:%23%5B%5D%40@example.com", - &url.URL{ - Scheme: "http", - User: url.UserPassword(":/?", "#[]@"), - Host: "example.com", - }, - "", - }, - // unescaped ! $ & ' ( ) * + , ; = in username and password - { - "http://!$&'():*+,;=@example.com", - &url.URL{ - Scheme: "http", - User: url.UserPassword("!$&'()", "*+,;="), - Host: "example.com", - }, - "", - }, - // unescaped = : / . ? = in query component - { - "http://example.com/?q=http://google.com/?q=", - &url.URL{ - Scheme: "http", - Host: "example.com", - Path: "/", - RawQuery: "q=http://google.com/?q=", - }, - "", - }, - // unescaped : / ? [ ] @ ! $ & ' ( ) * + , ; = in fragment - { - "http://example.com/#:/?%23[]@!$&'()*+,;=", - &url.URL{ - Scheme: "http", - Host: "example.com", - Path: "/", - Fragment: ":/?#[]@!$&'()*+,;=", - }, - "", - }, -} - -func DoTestString(t *testing.T, parse func(string) (*url.URL, error), name string, tests []URLTest) { - for _, tt := range tests { - u, err := parse(tt.in) - if err != nil { - t.Errorf("%s(%q) returned error %s", name, tt.in, err) - continue - } - expected := tt.in - if len(tt.roundtrip) > 0 { - expected = tt.roundtrip - } - s := escapeURL(u) - if s != expected { - t.Errorf("Escape(%s(%q)) == %q (expected %q)", name, tt.in, s, expected) - } - } -} - -func TestURLString(t *testing.T) { - DoTestString(t, url.Parse, "Parse", urltests) - - // no leading slash on path should prepend - // slash on String() call - noslash := URLTest{ - "http://www.google.com/search", - &url.URL{ - Scheme: "http", - Host: "www.google.com", - Path: "search", - }, - "", - } - s := escapeURL(noslash.out) - if s != noslash.in { - t.Errorf("Expected %s; go %s", noslash.in, s) - } -} - -var resolveReferenceTests = []struct { - base, rel, expected string -}{ - // Absolute URL references - {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, - {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, - {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, - - // Path-absolute references - {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, - {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, - {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, - - // Scheme-relative - {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, - - // Path-relative references: - - // ... current directory - {"http://foo.com", ".", "http://foo.com/"}, - {"http://foo.com/bar", ".", "http://foo.com/"}, - {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, - - // ... going down - {"http://foo.com", "bar", "http://foo.com/bar"}, - {"http://foo.com/", "bar", "http://foo.com/bar"}, - {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, - - // ... going up - {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, - {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, - {"http://foo.com/bar", "..", "http://foo.com/"}, - {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, - // ".." in the middle (issue 3560) - {"http://foo.com/bar/baz", "quux/dotdot/../tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/../tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/.././tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/./../tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/././../../tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/./.././../tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/dotdot/./../../.././././tail", "http://foo.com/bar/quux/tail"}, - {"http://foo.com/bar/baz", "quux/./dotdot/../dotdot/../dot/./tail/..", "http://foo.com/bar/quux/dot/"}, - - // Remove any dot-segments prior to forming the target URI. - // http://tools.ietf.org/html/rfc3986#section-5.2.4 - {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/baz"}, - - // Triple dot isn't special - {"http://foo.com/bar", "...", "http://foo.com/..."}, - - // Fragment - {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, - - // RFC 3986: Normal Examples - // http://tools.ietf.org/html/rfc3986#section-5.4.1 - {"http://a/b/c/d;p?q", "g:h", "g:h"}, - {"http://a/b/c/d;p?q", "g", "http://a/b/c/g"}, - {"http://a/b/c/d;p?q", "./g", "http://a/b/c/g"}, - {"http://a/b/c/d;p?q", "g/", "http://a/b/c/g/"}, - {"http://a/b/c/d;p?q", "/g", "http://a/g"}, - {"http://a/b/c/d;p?q", "//g", "http://g"}, - {"http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y"}, - {"http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y"}, - {"http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s"}, - {"http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s"}, - {"http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s"}, - {"http://a/b/c/d;p?q", ";x", "http://a/b/c/;x"}, - {"http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x"}, - {"http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s"}, - {"http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q"}, - {"http://a/b/c/d;p?q", ".", "http://a/b/c/"}, - {"http://a/b/c/d;p?q", "./", "http://a/b/c/"}, - {"http://a/b/c/d;p?q", "..", "http://a/b/"}, - {"http://a/b/c/d;p?q", "../", "http://a/b/"}, - {"http://a/b/c/d;p?q", "../g", "http://a/b/g"}, - {"http://a/b/c/d;p?q", "../..", "http://a/"}, - {"http://a/b/c/d;p?q", "../../", "http://a/"}, - {"http://a/b/c/d;p?q", "../../g", "http://a/g"}, - - // RFC 3986: Abnormal Examples - // http://tools.ietf.org/html/rfc3986#section-5.4.2 - {"http://a/b/c/d;p?q", "../../../g", "http://a/g"}, - {"http://a/b/c/d;p?q", "../../../../g", "http://a/g"}, - {"http://a/b/c/d;p?q", "/./g", "http://a/g"}, - {"http://a/b/c/d;p?q", "/../g", "http://a/g"}, - {"http://a/b/c/d;p?q", "g.", "http://a/b/c/g."}, - {"http://a/b/c/d;p?q", ".g", "http://a/b/c/.g"}, - {"http://a/b/c/d;p?q", "g..", "http://a/b/c/g.."}, - {"http://a/b/c/d;p?q", "..g", "http://a/b/c/..g"}, - {"http://a/b/c/d;p?q", "./../g", "http://a/b/g"}, - {"http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/"}, - {"http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h"}, - {"http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h"}, - {"http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y"}, - {"http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y"}, - {"http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x"}, - {"http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x"}, - {"http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x"}, - {"http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x"}, - - // Extras. - {"https://a/b/c/d;p?q", "//g?q", "https://g?q"}, - {"https://a/b/c/d;p?q", "//g#s", "https://g#s"}, - {"https://a/b/c/d;p?q", "//g/d/e/f?y#s", "https://g/d/e/f?y#s"}, - {"https://a/b/c/d;p#s", "?y", "https://a/b/c/d;p?y"}, - {"https://a/b/c/d;p?q#s", "?y", "https://a/b/c/d;p?y"}, -} - -func TestResolveReference(t *testing.T) { - mustParse := func(url_ string) *url.URL { - u, err := url.Parse(url_) - if err != nil { - t.Fatalf("Expected URL to parse: %q, got error: %v", url_, err) - } - return u - } - opaque := &url.URL{Scheme: "scheme", Opaque: "opaque"} - for _, test := range resolveReferenceTests { - base := mustParse(test.base) - rel := mustParse(test.rel) - url := base.ResolveReference(rel) - if escapeURL(url) != test.expected { - t.Errorf("URL(%q).ResolveReference(%q) == %q, got %q", test.base, test.rel, test.expected, escapeURL(url)) - } - // Ensure that new instances are returned. - if base == url { - t.Errorf("Expected URL.ResolveReference to return new URL instance.") - } - // Test the convenience wrapper too. - url, err := base.Parse(test.rel) - if err != nil { - t.Errorf("URL(%q).Parse(%q) failed: %v", test.base, test.rel, err) - } else if escapeURL(url) != test.expected { - t.Errorf("URL(%q).Parse(%q) == %q, got %q", test.base, test.rel, test.expected, escapeURL(url)) - } else if base == url { - // Ensure that new instances are returned for the wrapper too. - t.Errorf("Expected URL.Parse to return new URL instance.") - } - // Ensure Opaque resets the URL. - url = base.ResolveReference(opaque) - if *url != *opaque { - t.Errorf("ResolveReference failed to resolve opaque URL: want %#v, got %#v", url, opaque) - } - // Test the convenience wrapper with an opaque URL too. - url, err = base.Parse("scheme:opaque") - if err != nil { - t.Errorf(`URL(%q).Parse("scheme:opaque") failed: %v`, test.base, err) - } else if *url != *opaque { - t.Errorf("Parse failed to resolve opaque URL: want %#v, got %#v", url, opaque) - } else if base == url { - // Ensure that new instances are returned, again. - t.Errorf("Expected URL.Parse to return new URL instance.") - } - } -} - -type shouldEscapeTest struct { - in byte - mode encoding - escape bool -} - -var shouldEscapeTests = []shouldEscapeTest{ - // Unreserved characters (§2.3) - {'a', encodePath, false}, - {'a', encodeUserPassword, false}, - {'a', encodeQueryComponent, false}, - {'a', encodeFragment, false}, - {'z', encodePath, false}, - {'A', encodePath, false}, - {'Z', encodePath, false}, - {'0', encodePath, false}, - {'9', encodePath, false}, - {'-', encodePath, false}, - {'-', encodeUserPassword, false}, - {'-', encodeQueryComponent, false}, - {'-', encodeFragment, false}, - {'.', encodePath, false}, - {'_', encodePath, false}, - {'~', encodePath, false}, - - // User information (§3.2.1) - {':', encodeUserPassword, true}, - {'/', encodeUserPassword, true}, - {'?', encodeUserPassword, true}, - {'@', encodeUserPassword, true}, - {'$', encodeUserPassword, false}, - {'&', encodeUserPassword, false}, - {'+', encodeUserPassword, false}, - {',', encodeUserPassword, false}, - {';', encodeUserPassword, false}, - {'=', encodeUserPassword, false}, -} - -func TestShouldEscape(t *testing.T) { - for _, tt := range shouldEscapeTests { - if shouldEscape(tt.in, tt.mode) != tt.escape { - t.Errorf("shouldEscape(%q, %v) returned %v; expected %v", tt.in, tt.mode, !tt.escape, tt.escape) - } - } -}