321 lines
8.0 KiB
Go
321 lines
8.0 KiB
Go
|
package stringutil
|
||
|
|
||
|
import (
|
||
|
"sync"
|
||
|
"unicode"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
mu sync.Mutex
|
||
|
|
||
|
// Based on https://github.com/golang/lint/blob/32a87160691b3c96046c0c678fe57c5bef761456/lint.go#L702
|
||
|
commonInitialismMap = map[string]struct{}{
|
||
|
"API": struct{}{},
|
||
|
"ASCII": struct{}{},
|
||
|
"CPU": struct{}{},
|
||
|
"CSRF": struct{}{},
|
||
|
"CSS": struct{}{},
|
||
|
"DNS": struct{}{},
|
||
|
"EOF": struct{}{},
|
||
|
"GUID": struct{}{},
|
||
|
"HTML": struct{}{},
|
||
|
"HTTP": struct{}{},
|
||
|
"HTTPS": struct{}{},
|
||
|
"ID": struct{}{},
|
||
|
"IP": struct{}{},
|
||
|
"JSON": struct{}{},
|
||
|
"LHS": struct{}{},
|
||
|
"QPS": struct{}{},
|
||
|
"RAM": struct{}{},
|
||
|
"RHS": struct{}{},
|
||
|
"RPC": struct{}{},
|
||
|
"SLA": struct{}{},
|
||
|
"SMTP": struct{}{},
|
||
|
"SQL": struct{}{},
|
||
|
"SSH": struct{}{},
|
||
|
"TCP": struct{}{},
|
||
|
"TLS": struct{}{},
|
||
|
"TTL": struct{}{},
|
||
|
"UDP": struct{}{},
|
||
|
"UI": struct{}{},
|
||
|
"UID": struct{}{},
|
||
|
"UUID": struct{}{},
|
||
|
"URI": struct{}{},
|
||
|
"URL": struct{}{},
|
||
|
"UTF8": struct{}{},
|
||
|
"VM": struct{}{},
|
||
|
"XML": struct{}{},
|
||
|
"XSRF": struct{}{},
|
||
|
"XSS": struct{}{},
|
||
|
}
|
||
|
commonInitialisms = keys(commonInitialismMap)
|
||
|
commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
|
||
|
longestLen = longestLength(commonInitialisms)
|
||
|
shortestLen = shortestLength(commonInitialisms, longestLen)
|
||
|
)
|
||
|
|
||
|
// ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
|
||
|
// It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
|
||
|
func ToUpperCamelCase(s string) string {
|
||
|
if s == "" {
|
||
|
return ""
|
||
|
}
|
||
|
upper := true
|
||
|
start := 0
|
||
|
result := make([]byte, 0, len(s))
|
||
|
var runeBuf [utf8.UTFMax]byte
|
||
|
var initialism []byte
|
||
|
for _, c := range s {
|
||
|
if c == '_' {
|
||
|
upper = true
|
||
|
candidate := string(result[start:])
|
||
|
initialism = initialism[:0]
|
||
|
for _, r := range candidate {
|
||
|
if r < utf8.RuneSelf {
|
||
|
initialism = append(initialism, toUpperASCII(byte(r)))
|
||
|
} else {
|
||
|
n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
|
||
|
initialism = append(initialism, runeBuf[:n]...)
|
||
|
}
|
||
|
}
|
||
|
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
|
||
|
result = append(result[:start], initialism...)
|
||
|
}
|
||
|
start = len(result)
|
||
|
continue
|
||
|
}
|
||
|
if upper {
|
||
|
if c < utf8.RuneSelf {
|
||
|
result = append(result, toUpperASCII(byte(c)))
|
||
|
} else {
|
||
|
n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
|
||
|
result = append(result, runeBuf[:n]...)
|
||
|
}
|
||
|
upper = false
|
||
|
continue
|
||
|
}
|
||
|
if c < utf8.RuneSelf {
|
||
|
result = append(result, byte(c))
|
||
|
} else {
|
||
|
n := utf8.EncodeRune(runeBuf[:], c)
|
||
|
result = append(result, runeBuf[:n]...)
|
||
|
}
|
||
|
}
|
||
|
candidate := string(result[start:])
|
||
|
initialism = initialism[:0]
|
||
|
for _, r := range candidate {
|
||
|
if r < utf8.RuneSelf {
|
||
|
initialism = append(initialism, toUpperASCII(byte(r)))
|
||
|
} else {
|
||
|
n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
|
||
|
initialism = append(initialism, runeBuf[:n]...)
|
||
|
}
|
||
|
}
|
||
|
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
|
||
|
result = append(result[:start], initialism...)
|
||
|
}
|
||
|
return string(result)
|
||
|
}
|
||
|
|
||
|
// ToUpperCamelCaseASCII is similar to ToUpperCamelCase, but optimized for
|
||
|
// only the ASCII characters.
|
||
|
// ToUpperCamelCaseASCII is faster than ToUpperCamelCase, but doesn't work if
|
||
|
// contains non-ASCII characters.
|
||
|
func ToUpperCamelCaseASCII(s string) string {
|
||
|
if s == "" {
|
||
|
return ""
|
||
|
}
|
||
|
upper := true
|
||
|
start := 0
|
||
|
result := make([]byte, 0, len(s))
|
||
|
var initialism []byte
|
||
|
for i := 0; i < len(s); i++ {
|
||
|
c := s[i]
|
||
|
if c == '_' {
|
||
|
upper = true
|
||
|
candidate := result[start:]
|
||
|
initialism = initialism[:0]
|
||
|
for _, b := range candidate {
|
||
|
initialism = append(initialism, toUpperASCII(b))
|
||
|
}
|
||
|
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
|
||
|
result = append(result[:start], initialism...)
|
||
|
}
|
||
|
start = len(result)
|
||
|
continue
|
||
|
}
|
||
|
if upper {
|
||
|
result = append(result, toUpperASCII(c))
|
||
|
upper = false
|
||
|
continue
|
||
|
}
|
||
|
result = append(result, c)
|
||
|
}
|
||
|
candidate := result[start:]
|
||
|
initialism = initialism[:0]
|
||
|
for _, b := range candidate {
|
||
|
initialism = append(initialism, toUpperASCII(b))
|
||
|
}
|
||
|
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
|
||
|
result = append(result[:start], initialism...)
|
||
|
}
|
||
|
return string(result)
|
||
|
}
|
||
|
|
||
|
// ToSnakeCase returns a copy of the string s with all Unicode letters mapped to their snake case.
|
||
|
// It will insert letter of '_' at position of previous letter of uppercase and all
|
||
|
// letters convert to lower case.
|
||
|
// ToSnakeCase does not insert '_' letter into a common initialism word like ID, URL and so on.
|
||
|
func ToSnakeCase(s string) string {
|
||
|
if s == "" {
|
||
|
return ""
|
||
|
}
|
||
|
result := make([]byte, 0, len(s))
|
||
|
var runeBuf [utf8.UTFMax]byte
|
||
|
var j, skipCount int
|
||
|
for i, c := range s {
|
||
|
if i < skipCount {
|
||
|
continue
|
||
|
}
|
||
|
if unicode.IsUpper(c) {
|
||
|
if i != 0 {
|
||
|
result = append(result, '_')
|
||
|
}
|
||
|
next := nextIndex(j, len(s))
|
||
|
if length := commonInitialism.Lookup(s[j:next]); length > 0 {
|
||
|
for _, r := range s[j : j+length] {
|
||
|
if r < utf8.RuneSelf {
|
||
|
result = append(result, toLowerASCII(byte(r)))
|
||
|
} else {
|
||
|
n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(r))
|
||
|
result = append(result, runeBuf[:n]...)
|
||
|
}
|
||
|
}
|
||
|
j += length - 1
|
||
|
skipCount = i + length
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
if c < utf8.RuneSelf {
|
||
|
result = append(result, toLowerASCII(byte(c)))
|
||
|
} else {
|
||
|
n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(c))
|
||
|
result = append(result, runeBuf[:n]...)
|
||
|
}
|
||
|
j++
|
||
|
}
|
||
|
return string(result)
|
||
|
}
|
||
|
|
||
|
// ToSnakeCaseASCII is similar to ToSnakeCase, but optimized for only the ASCII
|
||
|
// characters.
|
||
|
// ToSnakeCaseASCII is faster than ToSnakeCase, but doesn't work correctly if
|
||
|
// contains non-ASCII characters.
|
||
|
func ToSnakeCaseASCII(s string) string {
|
||
|
if s == "" {
|
||
|
return ""
|
||
|
}
|
||
|
result := make([]byte, 0, len(s))
|
||
|
for i := 0; i < len(s); i++ {
|
||
|
c := s[i]
|
||
|
if isUpperASCII(c) {
|
||
|
if i != 0 {
|
||
|
result = append(result, '_')
|
||
|
}
|
||
|
if k := i + shortestLen - 1; k < len(s) && isUpperASCII(s[k]) {
|
||
|
if length := commonInitialism.Lookup(s[i:nextIndex(i, len(s))]); length > 0 {
|
||
|
for j, buf := 0, s[i:i+length]; j < len(buf); j++ {
|
||
|
result = append(result, toLowerASCII(buf[j]))
|
||
|
}
|
||
|
i += length - 1
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
result = append(result, toLowerASCII(c))
|
||
|
}
|
||
|
return string(result)
|
||
|
}
|
||
|
|
||
|
// AddCommonInitialism adds ss to list of common initialisms.
|
||
|
func AddCommonInitialism(ss ...string) {
|
||
|
mu.Lock()
|
||
|
defer mu.Unlock()
|
||
|
for _, s := range ss {
|
||
|
commonInitialismMap[s] = struct{}{}
|
||
|
}
|
||
|
commonInitialisms = keys(commonInitialismMap)
|
||
|
commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
|
||
|
longestLen = longestLength(commonInitialisms)
|
||
|
shortestLen = shortestLength(commonInitialisms, longestLen)
|
||
|
}
|
||
|
|
||
|
// DelCommonInitialism deletes ss from list of common initialisms.
|
||
|
func DelCommonInitialism(ss ...string) {
|
||
|
mu.Lock()
|
||
|
defer mu.Unlock()
|
||
|
for _, s := range ss {
|
||
|
delete(commonInitialismMap, s)
|
||
|
}
|
||
|
commonInitialisms = keys(commonInitialismMap)
|
||
|
commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
|
||
|
longestLen = longestLength(commonInitialisms)
|
||
|
shortestLen = shortestLength(commonInitialisms, longestLen)
|
||
|
}
|
||
|
|
||
|
func isUpperASCII(c byte) bool {
|
||
|
return 'A' <= c && c <= 'Z'
|
||
|
}
|
||
|
|
||
|
func isLowerASCII(c byte) bool {
|
||
|
return 'a' <= c && c <= 'z'
|
||
|
}
|
||
|
|
||
|
func toUpperASCII(c byte) byte {
|
||
|
if isLowerASCII(c) {
|
||
|
return c - ('a' - 'A')
|
||
|
}
|
||
|
return c
|
||
|
}
|
||
|
|
||
|
func toLowerASCII(c byte) byte {
|
||
|
if isUpperASCII(c) {
|
||
|
return c + 'a' - 'A'
|
||
|
}
|
||
|
return c
|
||
|
}
|
||
|
|
||
|
func nextIndex(i, maxlen int) int {
|
||
|
if n := i + longestLen; n < maxlen {
|
||
|
return n
|
||
|
}
|
||
|
return maxlen
|
||
|
}
|
||
|
|
||
|
func keys(m map[string]struct{}) []string {
|
||
|
result := make([]string, 0, len(m))
|
||
|
for k := range m {
|
||
|
result = append(result, k)
|
||
|
}
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func shortestLength(strs []string, shortest int) int {
|
||
|
for _, s := range strs {
|
||
|
if candidate := utf8.RuneCountInString(s); candidate < shortest {
|
||
|
shortest = candidate
|
||
|
}
|
||
|
}
|
||
|
return shortest
|
||
|
}
|
||
|
|
||
|
func longestLength(strs []string) (longest int) {
|
||
|
for _, s := range strs {
|
||
|
if candidate := utf8.RuneCountInString(s); candidate > longest {
|
||
|
longest = candidate
|
||
|
}
|
||
|
}
|
||
|
return longest
|
||
|
}
|