// #Warning! You Should Use this Code Carefully, and As Your Own Risk.
package main
import (
"fmt"
"net/url"
"strings"
)
/*
After hours searching, I can't find any method can get the result exact as the JS encodeURIComponent function.
In my situation I need to write a sign method which need encode the user input exact same as the JS encodeURIComponent.
This function does solved my problem.
*/
func main() {
params := url.Values{
"test_string": {"+!+'( )*-._~0-👿 👿9a-zA-Z 中文测试 test with ❤️ !@#$%^&&*()~<>?/.,;'[][]:{{}|{}|"},
}
urlEncode := params.Encode()
fmt.Println(urlEncode)
urlEncode = compatibleRFC3986Encode(urlEncode)
fmt.Println("RFC3986", urlEncode)
urlEncode = compatibleJSEncodeURIComponent(urlEncode)
fmt.Println("JS encodeURIComponent", urlEncode)
}
// Compatible with RFC 3986.
func compatibleRFC3986Encode(str string) string {
resultStr := str
resultStr = strings.Replace(resultStr, "+", "%20", -1)
return resultStr
}
// This func mimic JS encodeURIComponent, JS is wild and not very strict.
func compatibleJSEncodeURIComponent(str string) string {
resultStr := str
resultStr = strings.Replace(resultStr, "+", "%20", -1)
resultStr = strings.Replace(resultStr, "%21", "!", -1)
resultStr = strings.Replace(resultStr, "%27", "'", -1)
resultStr = strings.Replace(resultStr, "%28", "(", -1)
resultStr = strings.Replace(resultStr, "%29", ")", -1)
resultStr = strings.Replace(resultStr, "%2A", "*", -1)
return resultStr
}
* encoded into %2A
# encoded into %23
% encoded into %25
< encoded into %3C
> encoded into %3E
+ encoded into %2B
enter key (#13#10) is encoded into %0D%0A
package main
import (
"fmt"
"strconv"
)
const (
encodePath encoding = 1 + iota
encodeHost
encodeUserPassword
encodeQueryComponent
encodeFragment
)
type encoding int
type EscapeError string
func (e EscapeError) Error() string {
return "invalid URL escape " + strconv.Quote(string(e))
}
func ishex(c byte) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}
func unhex(c byte) byte {
switch {
case '0' <= c && c <= '9':
return c - '0'
case 'a' <= c && c <= 'f':
return c - 'a' + 10
case 'A' <= c && c <= 'F':
return c - 'A' + 10
}
return 0
}
// Return true if the specified character should be escaped when
// appearing in a URL string, according to RFC 3986.
//
// Please be informed that for now shouldEscape does not check all
// reserved characters correctly. See golang.org/issue/5684.
func shouldEscape(c byte, mode encoding) bool {
// §2.3 Unreserved characters (alphanum)
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
return false
}
if mode == encodeHost {
// §3.2.2 Host allows
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
// as part of reg-name.
// We add : because we include :port as part of host.
// We add [ ] because we include [ipv6]:port as part of host
switch c {
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']':
return false
}
}
switch c {
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
return false
case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch mode {
case encodePath: // §3.3
// The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments. This package
// only manipulates the path as a whole, so we allow those
// last two as well. That leaves only ? to escape.
return c == '?'
case encodeUserPassword: // §3.2.1
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
// userinfo, so we must escape only '@', '/', and '?'.
// The parsing of userinfo treats ':' as special so we must escape
// that too.
return c == '@' || c == '/' || c == '?' || c == ':'
case encodeQueryComponent: // §3.4
// The RFC reserves (so we must escape) everything.
return true
case encodeFragment: // §4.1
// The RFC text is silent but the grammar allows
// everything, so escape nothing.
return false
}
}
// Everything else must be escaped.
return true
}
func escape(s string, mode encoding) string {
spaceCount, hexCount := 0, 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c, mode) {
if c == ' ' && mode == encodeQueryComponent {
spaceCount++
} else {
hexCount++
}
}
}
if spaceCount == 0 && hexCount == 0 {
return s
}
t := make([]byte, len(s)+2*hexCount)
j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == ' ' && mode == encodeQueryComponent:
t[j] = '+'
j++
case shouldEscape(c, mode):
t[j] = '%'
t[j+1] = "0123456789ABCDEF"[c>>4]
t[j+2] = "0123456789ABCDEF"[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}
// unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) {
// Count %, check that they're well-formed.
n := 0
hasPlus := false
for i := 0; i < len(s); {
switch s[i] {
case '%':
n++
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
s = s[i:]
if len(s) > 3 {
s = s[:3]
}
return "", EscapeError(s)
}
i += 3
case '+':
hasPlus = mode == encodeQueryComponent
i++
default:
i++
}
}
if n == 0 && !hasPlus {
return s, nil
}
t := make([]byte, len(s)-2*n)
j := 0
for i := 0; i < len(s); {
switch s[i] {
case '%':
t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
j++
i += 3
case '+':
if mode == encodeQueryComponent {
t[j] = ' '
} else {
t[j] = '+'
}
j++
i++
default:
t[j] = s[i]
j++
i++
}
}
return string(t), nil
}
func EncodeUriComponent(rawString string) string{
return escape(rawString, encodeFragment)
}
func DecodeUriCompontent(encoded string) (string, error){
return unescape(encoded, encodeQueryComponent)
}
// https://golang.org/src/net/url/url.go
// http://remove-line-numbers.ruurtjan.com/
func main() {
// http://www.url-encode-decode.com/
origin := "äöüHel/lo world"
encoded := EncodeUriComponent(origin)
fmt.Println(encoded)
s, _ := DecodeUriCompontent(encoded)
fmt.Println(s)
}
8条答案
按热度按时间o0lyfsai1#
您可以使用net/url模块进行所有您想要的URL编码。它不会为URL的各个部分分解单独的编码函数,您必须让它构建整个URL。在看了源代码之后,我认为它做得非常好,符合标准。
下面是一个例子(playground link)
哪个指纹-
5lwkijsr2#
MDN on encodeURIComponent:
encodeURIComponent转义除以下字符之外的所有字符:字母,十进制数字,
'-', '_', '.', '!', '~', '*', ''', '(', ')'
从Go语言的url.QueryEscape实现(具体来说,是
shouldEscape
私有函数)中,转义除以下字符之外的所有字符:字母、十进制数字、'-', '_', '.', '~'
。与Javascript不同,Go的QueryEscape()将转义
'!', '*', ''', '(', ')'
。基本上,Go语言的版本是严格符合RFC-3986的。JavaScript的比较宽松。来自MDN:如果希望更严格地遵守RFC 3986(保留!、'、(、)和 *),尽管这些字符没有正式的URI定界用途,但可以安全地使用以下字符:
qojgxg4l3#
到了Go 1.8,这种情况发生了变化。除了旧的
QueryEscape
之外,我们现在还可以访问PathEscape
来编码路径分量,沿着unescape对应的PathUnescape
。nnvyjq4y4#
不如这样:
iovurdzv5#
为了模仿Javascript的
encodeURIComponent()
,我创建了一个字符串助手函数。示例:将
"My String"
变为"My%20String"
https://github.com/mrap/stringutil/blob/master/urlencode.go
vyswwuz26#
如果有人想得到确切的结果比较JS encodeURIComponent尝试我的函数,它是肮脏的,但工作得很好。
https://gist.github.com/czyang/7ae30f4f625fee14cfc40c143e1b78bf
lfapxunr7#
希望这个能帮上忙
xkftehaa8#
下面是escape和unescape的实现(摘自go源代码):