649 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			649 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright 2012 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
// +build ignore
 | 
						|
 | 
						|
package main
 | 
						|
 | 
						|
// This program generates table.go and table_test.go.
 | 
						|
// Invoke as
 | 
						|
//
 | 
						|
//	go run gen.go |gofmt >table.go
 | 
						|
//	go run gen.go -test |gofmt >table_test.go
 | 
						|
 | 
						|
import (
 | 
						|
	"flag"
 | 
						|
	"fmt"
 | 
						|
	"math/rand"
 | 
						|
	"os"
 | 
						|
	"sort"
 | 
						|
	"strings"
 | 
						|
)
 | 
						|
 | 
						|
// identifier converts s to a Go exported identifier.
 | 
						|
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
 | 
						|
func identifier(s string) string {
 | 
						|
	b := make([]byte, 0, len(s))
 | 
						|
	cap := true
 | 
						|
	for _, c := range s {
 | 
						|
		if c == '-' {
 | 
						|
			cap = true
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if cap && 'a' <= c && c <= 'z' {
 | 
						|
			c -= 'a' - 'A'
 | 
						|
		}
 | 
						|
		cap = false
 | 
						|
		b = append(b, byte(c))
 | 
						|
	}
 | 
						|
	return string(b)
 | 
						|
}
 | 
						|
 | 
						|
var test = flag.Bool("test", false, "generate table_test.go")
 | 
						|
 | 
						|
func main() {
 | 
						|
	flag.Parse()
 | 
						|
 | 
						|
	var all []string
 | 
						|
	all = append(all, elements...)
 | 
						|
	all = append(all, attributes...)
 | 
						|
	all = append(all, eventHandlers...)
 | 
						|
	all = append(all, extra...)
 | 
						|
	sort.Strings(all)
 | 
						|
 | 
						|
	if *test {
 | 
						|
		fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
 | 
						|
		fmt.Printf("package atom\n\n")
 | 
						|
		fmt.Printf("var testAtomList = []string{\n")
 | 
						|
		for _, s := range all {
 | 
						|
			fmt.Printf("\t%q,\n", s)
 | 
						|
		}
 | 
						|
		fmt.Printf("}\n")
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// uniq - lists have dups
 | 
						|
	// compute max len too
 | 
						|
	maxLen := 0
 | 
						|
	w := 0
 | 
						|
	for _, s := range all {
 | 
						|
		if w == 0 || all[w-1] != s {
 | 
						|
			if maxLen < len(s) {
 | 
						|
				maxLen = len(s)
 | 
						|
			}
 | 
						|
			all[w] = s
 | 
						|
			w++
 | 
						|
		}
 | 
						|
	}
 | 
						|
	all = all[:w]
 | 
						|
 | 
						|
	// Find hash that minimizes table size.
 | 
						|
	var best *table
 | 
						|
	for i := 0; i < 1000000; i++ {
 | 
						|
		if best != nil && 1<<(best.k-1) < len(all) {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		h := rand.Uint32()
 | 
						|
		for k := uint(0); k <= 16; k++ {
 | 
						|
			if best != nil && k >= best.k {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			var t table
 | 
						|
			if t.init(h, k, all) {
 | 
						|
				best = &t
 | 
						|
				break
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if best == nil {
 | 
						|
		fmt.Fprintf(os.Stderr, "failed to construct string table\n")
 | 
						|
		os.Exit(1)
 | 
						|
	}
 | 
						|
 | 
						|
	// Lay out strings, using overlaps when possible.
 | 
						|
	layout := append([]string{}, all...)
 | 
						|
 | 
						|
	// Remove strings that are substrings of other strings
 | 
						|
	for changed := true; changed; {
 | 
						|
		changed = false
 | 
						|
		for i, s := range layout {
 | 
						|
			if s == "" {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			for j, t := range layout {
 | 
						|
				if i != j && t != "" && strings.Contains(s, t) {
 | 
						|
					changed = true
 | 
						|
					layout[j] = ""
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Join strings where one suffix matches another prefix.
 | 
						|
	for {
 | 
						|
		// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
 | 
						|
		// maximizing overlap length k.
 | 
						|
		besti := -1
 | 
						|
		bestj := -1
 | 
						|
		bestk := 0
 | 
						|
		for i, s := range layout {
 | 
						|
			if s == "" {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			for j, t := range layout {
 | 
						|
				if i == j {
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
 | 
						|
					if s[len(s)-k:] == t[:k] {
 | 
						|
						besti = i
 | 
						|
						bestj = j
 | 
						|
						bestk = k
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if bestk > 0 {
 | 
						|
			layout[besti] += layout[bestj][bestk:]
 | 
						|
			layout[bestj] = ""
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		break
 | 
						|
	}
 | 
						|
 | 
						|
	text := strings.Join(layout, "")
 | 
						|
 | 
						|
	atom := map[string]uint32{}
 | 
						|
	for _, s := range all {
 | 
						|
		off := strings.Index(text, s)
 | 
						|
		if off < 0 {
 | 
						|
			panic("lost string " + s)
 | 
						|
		}
 | 
						|
		atom[s] = uint32(off<<8 | len(s))
 | 
						|
	}
 | 
						|
 | 
						|
	// Generate the Go code.
 | 
						|
	fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
 | 
						|
	fmt.Printf("package atom\n\nconst (\n")
 | 
						|
	for _, s := range all {
 | 
						|
		fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
 | 
						|
	}
 | 
						|
	fmt.Printf(")\n\n")
 | 
						|
 | 
						|
	fmt.Printf("const hash0 = %#x\n\n", best.h0)
 | 
						|
	fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
 | 
						|
 | 
						|
	fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
 | 
						|
	for i, s := range best.tab {
 | 
						|
		if s == "" {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
 | 
						|
	}
 | 
						|
	fmt.Printf("}\n")
 | 
						|
	datasize := (1 << best.k) * 4
 | 
						|
 | 
						|
	fmt.Printf("const atomText =\n")
 | 
						|
	textsize := len(text)
 | 
						|
	for len(text) > 60 {
 | 
						|
		fmt.Printf("\t%q +\n", text[:60])
 | 
						|
		text = text[60:]
 | 
						|
	}
 | 
						|
	fmt.Printf("\t%q\n\n", text)
 | 
						|
 | 
						|
	fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
 | 
						|
}
 | 
						|
 | 
						|
type byLen []string
 | 
						|
 | 
						|
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
 | 
						|
func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
 | 
						|
func (x byLen) Len() int           { return len(x) }
 | 
						|
 | 
						|
// fnv computes the FNV hash with an arbitrary starting value h.
 | 
						|
func fnv(h uint32, s string) uint32 {
 | 
						|
	for i := 0; i < len(s); i++ {
 | 
						|
		h ^= uint32(s[i])
 | 
						|
		h *= 16777619
 | 
						|
	}
 | 
						|
	return h
 | 
						|
}
 | 
						|
 | 
						|
// A table represents an attempt at constructing the lookup table.
 | 
						|
// The lookup table uses cuckoo hashing, meaning that each string
 | 
						|
// can be found in one of two positions.
 | 
						|
type table struct {
 | 
						|
	h0   uint32
 | 
						|
	k    uint
 | 
						|
	mask uint32
 | 
						|
	tab  []string
 | 
						|
}
 | 
						|
 | 
						|
// hash returns the two hashes for s.
 | 
						|
func (t *table) hash(s string) (h1, h2 uint32) {
 | 
						|
	h := fnv(t.h0, s)
 | 
						|
	h1 = h & t.mask
 | 
						|
	h2 = (h >> 16) & t.mask
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
// init initializes the table with the given parameters.
 | 
						|
// h0 is the initial hash value,
 | 
						|
// k is the number of bits of hash value to use, and
 | 
						|
// x is the list of strings to store in the table.
 | 
						|
// init returns false if the table cannot be constructed.
 | 
						|
func (t *table) init(h0 uint32, k uint, x []string) bool {
 | 
						|
	t.h0 = h0
 | 
						|
	t.k = k
 | 
						|
	t.tab = make([]string, 1<<k)
 | 
						|
	t.mask = 1<<k - 1
 | 
						|
	for _, s := range x {
 | 
						|
		if !t.insert(s) {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// insert inserts s in the table.
 | 
						|
func (t *table) insert(s string) bool {
 | 
						|
	h1, h2 := t.hash(s)
 | 
						|
	if t.tab[h1] == "" {
 | 
						|
		t.tab[h1] = s
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	if t.tab[h2] == "" {
 | 
						|
		t.tab[h2] = s
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	if t.push(h1, 0) {
 | 
						|
		t.tab[h1] = s
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	if t.push(h2, 0) {
 | 
						|
		t.tab[h2] = s
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// push attempts to push aside the entry in slot i.
 | 
						|
func (t *table) push(i uint32, depth int) bool {
 | 
						|
	if depth > len(t.tab) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	s := t.tab[i]
 | 
						|
	h1, h2 := t.hash(s)
 | 
						|
	j := h1 + h2 - i
 | 
						|
	if t.tab[j] != "" && !t.push(j, depth+1) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	t.tab[j] = s
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// The lists of element names and attribute keys were taken from
 | 
						|
// https://html.spec.whatwg.org/multipage/indices.html#index
 | 
						|
// as of the "HTML Living Standard - Last Updated 21 February 2015" version.
 | 
						|
 | 
						|
var elements = []string{
 | 
						|
	"a",
 | 
						|
	"abbr",
 | 
						|
	"address",
 | 
						|
	"area",
 | 
						|
	"article",
 | 
						|
	"aside",
 | 
						|
	"audio",
 | 
						|
	"b",
 | 
						|
	"base",
 | 
						|
	"bdi",
 | 
						|
	"bdo",
 | 
						|
	"blockquote",
 | 
						|
	"body",
 | 
						|
	"br",
 | 
						|
	"button",
 | 
						|
	"canvas",
 | 
						|
	"caption",
 | 
						|
	"cite",
 | 
						|
	"code",
 | 
						|
	"col",
 | 
						|
	"colgroup",
 | 
						|
	"command",
 | 
						|
	"data",
 | 
						|
	"datalist",
 | 
						|
	"dd",
 | 
						|
	"del",
 | 
						|
	"details",
 | 
						|
	"dfn",
 | 
						|
	"dialog",
 | 
						|
	"div",
 | 
						|
	"dl",
 | 
						|
	"dt",
 | 
						|
	"em",
 | 
						|
	"embed",
 | 
						|
	"fieldset",
 | 
						|
	"figcaption",
 | 
						|
	"figure",
 | 
						|
	"footer",
 | 
						|
	"form",
 | 
						|
	"h1",
 | 
						|
	"h2",
 | 
						|
	"h3",
 | 
						|
	"h4",
 | 
						|
	"h5",
 | 
						|
	"h6",
 | 
						|
	"head",
 | 
						|
	"header",
 | 
						|
	"hgroup",
 | 
						|
	"hr",
 | 
						|
	"html",
 | 
						|
	"i",
 | 
						|
	"iframe",
 | 
						|
	"img",
 | 
						|
	"input",
 | 
						|
	"ins",
 | 
						|
	"kbd",
 | 
						|
	"keygen",
 | 
						|
	"label",
 | 
						|
	"legend",
 | 
						|
	"li",
 | 
						|
	"link",
 | 
						|
	"map",
 | 
						|
	"mark",
 | 
						|
	"menu",
 | 
						|
	"menuitem",
 | 
						|
	"meta",
 | 
						|
	"meter",
 | 
						|
	"nav",
 | 
						|
	"noscript",
 | 
						|
	"object",
 | 
						|
	"ol",
 | 
						|
	"optgroup",
 | 
						|
	"option",
 | 
						|
	"output",
 | 
						|
	"p",
 | 
						|
	"param",
 | 
						|
	"pre",
 | 
						|
	"progress",
 | 
						|
	"q",
 | 
						|
	"rp",
 | 
						|
	"rt",
 | 
						|
	"ruby",
 | 
						|
	"s",
 | 
						|
	"samp",
 | 
						|
	"script",
 | 
						|
	"section",
 | 
						|
	"select",
 | 
						|
	"small",
 | 
						|
	"source",
 | 
						|
	"span",
 | 
						|
	"strong",
 | 
						|
	"style",
 | 
						|
	"sub",
 | 
						|
	"summary",
 | 
						|
	"sup",
 | 
						|
	"table",
 | 
						|
	"tbody",
 | 
						|
	"td",
 | 
						|
	"template",
 | 
						|
	"textarea",
 | 
						|
	"tfoot",
 | 
						|
	"th",
 | 
						|
	"thead",
 | 
						|
	"time",
 | 
						|
	"title",
 | 
						|
	"tr",
 | 
						|
	"track",
 | 
						|
	"u",
 | 
						|
	"ul",
 | 
						|
	"var",
 | 
						|
	"video",
 | 
						|
	"wbr",
 | 
						|
}
 | 
						|
 | 
						|
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
 | 
						|
 | 
						|
var attributes = []string{
 | 
						|
	"abbr",
 | 
						|
	"accept",
 | 
						|
	"accept-charset",
 | 
						|
	"accesskey",
 | 
						|
	"action",
 | 
						|
	"alt",
 | 
						|
	"async",
 | 
						|
	"autocomplete",
 | 
						|
	"autofocus",
 | 
						|
	"autoplay",
 | 
						|
	"challenge",
 | 
						|
	"charset",
 | 
						|
	"checked",
 | 
						|
	"cite",
 | 
						|
	"class",
 | 
						|
	"cols",
 | 
						|
	"colspan",
 | 
						|
	"command",
 | 
						|
	"content",
 | 
						|
	"contenteditable",
 | 
						|
	"contextmenu",
 | 
						|
	"controls",
 | 
						|
	"coords",
 | 
						|
	"crossorigin",
 | 
						|
	"data",
 | 
						|
	"datetime",
 | 
						|
	"default",
 | 
						|
	"defer",
 | 
						|
	"dir",
 | 
						|
	"dirname",
 | 
						|
	"disabled",
 | 
						|
	"download",
 | 
						|
	"draggable",
 | 
						|
	"dropzone",
 | 
						|
	"enctype",
 | 
						|
	"for",
 | 
						|
	"form",
 | 
						|
	"formaction",
 | 
						|
	"formenctype",
 | 
						|
	"formmethod",
 | 
						|
	"formnovalidate",
 | 
						|
	"formtarget",
 | 
						|
	"headers",
 | 
						|
	"height",
 | 
						|
	"hidden",
 | 
						|
	"high",
 | 
						|
	"href",
 | 
						|
	"hreflang",
 | 
						|
	"http-equiv",
 | 
						|
	"icon",
 | 
						|
	"id",
 | 
						|
	"inputmode",
 | 
						|
	"ismap",
 | 
						|
	"itemid",
 | 
						|
	"itemprop",
 | 
						|
	"itemref",
 | 
						|
	"itemscope",
 | 
						|
	"itemtype",
 | 
						|
	"keytype",
 | 
						|
	"kind",
 | 
						|
	"label",
 | 
						|
	"lang",
 | 
						|
	"list",
 | 
						|
	"loop",
 | 
						|
	"low",
 | 
						|
	"manifest",
 | 
						|
	"max",
 | 
						|
	"maxlength",
 | 
						|
	"media",
 | 
						|
	"mediagroup",
 | 
						|
	"method",
 | 
						|
	"min",
 | 
						|
	"minlength",
 | 
						|
	"multiple",
 | 
						|
	"muted",
 | 
						|
	"name",
 | 
						|
	"novalidate",
 | 
						|
	"open",
 | 
						|
	"optimum",
 | 
						|
	"pattern",
 | 
						|
	"ping",
 | 
						|
	"placeholder",
 | 
						|
	"poster",
 | 
						|
	"preload",
 | 
						|
	"radiogroup",
 | 
						|
	"readonly",
 | 
						|
	"rel",
 | 
						|
	"required",
 | 
						|
	"reversed",
 | 
						|
	"rows",
 | 
						|
	"rowspan",
 | 
						|
	"sandbox",
 | 
						|
	"spellcheck",
 | 
						|
	"scope",
 | 
						|
	"scoped",
 | 
						|
	"seamless",
 | 
						|
	"selected",
 | 
						|
	"shape",
 | 
						|
	"size",
 | 
						|
	"sizes",
 | 
						|
	"sortable",
 | 
						|
	"sorted",
 | 
						|
	"span",
 | 
						|
	"src",
 | 
						|
	"srcdoc",
 | 
						|
	"srclang",
 | 
						|
	"start",
 | 
						|
	"step",
 | 
						|
	"style",
 | 
						|
	"tabindex",
 | 
						|
	"target",
 | 
						|
	"title",
 | 
						|
	"translate",
 | 
						|
	"type",
 | 
						|
	"typemustmatch",
 | 
						|
	"usemap",
 | 
						|
	"value",
 | 
						|
	"width",
 | 
						|
	"wrap",
 | 
						|
}
 | 
						|
 | 
						|
var eventHandlers = []string{
 | 
						|
	"onabort",
 | 
						|
	"onautocomplete",
 | 
						|
	"onautocompleteerror",
 | 
						|
	"onafterprint",
 | 
						|
	"onbeforeprint",
 | 
						|
	"onbeforeunload",
 | 
						|
	"onblur",
 | 
						|
	"oncancel",
 | 
						|
	"oncanplay",
 | 
						|
	"oncanplaythrough",
 | 
						|
	"onchange",
 | 
						|
	"onclick",
 | 
						|
	"onclose",
 | 
						|
	"oncontextmenu",
 | 
						|
	"oncuechange",
 | 
						|
	"ondblclick",
 | 
						|
	"ondrag",
 | 
						|
	"ondragend",
 | 
						|
	"ondragenter",
 | 
						|
	"ondragleave",
 | 
						|
	"ondragover",
 | 
						|
	"ondragstart",
 | 
						|
	"ondrop",
 | 
						|
	"ondurationchange",
 | 
						|
	"onemptied",
 | 
						|
	"onended",
 | 
						|
	"onerror",
 | 
						|
	"onfocus",
 | 
						|
	"onhashchange",
 | 
						|
	"oninput",
 | 
						|
	"oninvalid",
 | 
						|
	"onkeydown",
 | 
						|
	"onkeypress",
 | 
						|
	"onkeyup",
 | 
						|
	"onlanguagechange",
 | 
						|
	"onload",
 | 
						|
	"onloadeddata",
 | 
						|
	"onloadedmetadata",
 | 
						|
	"onloadstart",
 | 
						|
	"onmessage",
 | 
						|
	"onmousedown",
 | 
						|
	"onmousemove",
 | 
						|
	"onmouseout",
 | 
						|
	"onmouseover",
 | 
						|
	"onmouseup",
 | 
						|
	"onmousewheel",
 | 
						|
	"onoffline",
 | 
						|
	"ononline",
 | 
						|
	"onpagehide",
 | 
						|
	"onpageshow",
 | 
						|
	"onpause",
 | 
						|
	"onplay",
 | 
						|
	"onplaying",
 | 
						|
	"onpopstate",
 | 
						|
	"onprogress",
 | 
						|
	"onratechange",
 | 
						|
	"onreset",
 | 
						|
	"onresize",
 | 
						|
	"onscroll",
 | 
						|
	"onseeked",
 | 
						|
	"onseeking",
 | 
						|
	"onselect",
 | 
						|
	"onshow",
 | 
						|
	"onsort",
 | 
						|
	"onstalled",
 | 
						|
	"onstorage",
 | 
						|
	"onsubmit",
 | 
						|
	"onsuspend",
 | 
						|
	"ontimeupdate",
 | 
						|
	"ontoggle",
 | 
						|
	"onunload",
 | 
						|
	"onvolumechange",
 | 
						|
	"onwaiting",
 | 
						|
}
 | 
						|
 | 
						|
// extra are ad-hoc values not covered by any of the lists above.
 | 
						|
var extra = []string{
 | 
						|
	"align",
 | 
						|
	"annotation",
 | 
						|
	"annotation-xml",
 | 
						|
	"applet",
 | 
						|
	"basefont",
 | 
						|
	"bgsound",
 | 
						|
	"big",
 | 
						|
	"blink",
 | 
						|
	"center",
 | 
						|
	"color",
 | 
						|
	"desc",
 | 
						|
	"face",
 | 
						|
	"font",
 | 
						|
	"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
 | 
						|
	"foreignobject",
 | 
						|
	"frame",
 | 
						|
	"frameset",
 | 
						|
	"image",
 | 
						|
	"isindex",
 | 
						|
	"listing",
 | 
						|
	"malignmark",
 | 
						|
	"marquee",
 | 
						|
	"math",
 | 
						|
	"mglyph",
 | 
						|
	"mi",
 | 
						|
	"mn",
 | 
						|
	"mo",
 | 
						|
	"ms",
 | 
						|
	"mtext",
 | 
						|
	"nobr",
 | 
						|
	"noembed",
 | 
						|
	"noframes",
 | 
						|
	"plaintext",
 | 
						|
	"prompt",
 | 
						|
	"public",
 | 
						|
	"spacer",
 | 
						|
	"strike",
 | 
						|
	"svg",
 | 
						|
	"system",
 | 
						|
	"tt",
 | 
						|
	"xmp",
 | 
						|
}
 |