You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

648 lines
10 KiB

  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. // This program generates table.go and table_test.go.
  7. // Invoke as
  8. //
  9. // go run gen.go |gofmt >table.go
  10. // go run gen.go -test |gofmt >table_test.go
  11. import (
  12. "flag"
  13. "fmt"
  14. "math/rand"
  15. "os"
  16. "sort"
  17. "strings"
  18. )
  19. // identifier converts s to a Go exported identifier.
  20. // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
  21. func identifier(s string) string {
  22. b := make([]byte, 0, len(s))
  23. cap := true
  24. for _, c := range s {
  25. if c == '-' {
  26. cap = true
  27. continue
  28. }
  29. if cap && 'a' <= c && c <= 'z' {
  30. c -= 'a' - 'A'
  31. }
  32. cap = false
  33. b = append(b, byte(c))
  34. }
  35. return string(b)
  36. }
  37. var test = flag.Bool("test", false, "generate table_test.go")
  38. func main() {
  39. flag.Parse()
  40. var all []string
  41. all = append(all, elements...)
  42. all = append(all, attributes...)
  43. all = append(all, eventHandlers...)
  44. all = append(all, extra...)
  45. sort.Strings(all)
  46. if *test {
  47. fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
  48. fmt.Printf("package atom\n\n")
  49. fmt.Printf("var testAtomList = []string{\n")
  50. for _, s := range all {
  51. fmt.Printf("\t%q,\n", s)
  52. }
  53. fmt.Printf("}\n")
  54. return
  55. }
  56. // uniq - lists have dups
  57. // compute max len too
  58. maxLen := 0
  59. w := 0
  60. for _, s := range all {
  61. if w == 0 || all[w-1] != s {
  62. if maxLen < len(s) {
  63. maxLen = len(s)
  64. }
  65. all[w] = s
  66. w++
  67. }
  68. }
  69. all = all[:w]
  70. // Find hash that minimizes table size.
  71. var best *table
  72. for i := 0; i < 1000000; i++ {
  73. if best != nil && 1<<(best.k-1) < len(all) {
  74. break
  75. }
  76. h := rand.Uint32()
  77. for k := uint(0); k <= 16; k++ {
  78. if best != nil && k >= best.k {
  79. break
  80. }
  81. var t table
  82. if t.init(h, k, all) {
  83. best = &t
  84. break
  85. }
  86. }
  87. }
  88. if best == nil {
  89. fmt.Fprintf(os.Stderr, "failed to construct string table\n")
  90. os.Exit(1)
  91. }
  92. // Lay out strings, using overlaps when possible.
  93. layout := append([]string{}, all...)
  94. // Remove strings that are substrings of other strings
  95. for changed := true; changed; {
  96. changed = false
  97. for i, s := range layout {
  98. if s == "" {
  99. continue
  100. }
  101. for j, t := range layout {
  102. if i != j && t != "" && strings.Contains(s, t) {
  103. changed = true
  104. layout[j] = ""
  105. }
  106. }
  107. }
  108. }
  109. // Join strings where one suffix matches another prefix.
  110. for {
  111. // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
  112. // maximizing overlap length k.
  113. besti := -1
  114. bestj := -1
  115. bestk := 0
  116. for i, s := range layout {
  117. if s == "" {
  118. continue
  119. }
  120. for j, t := range layout {
  121. if i == j {
  122. continue
  123. }
  124. for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
  125. if s[len(s)-k:] == t[:k] {
  126. besti = i
  127. bestj = j
  128. bestk = k
  129. }
  130. }
  131. }
  132. }
  133. if bestk > 0 {
  134. layout[besti] += layout[bestj][bestk:]
  135. layout[bestj] = ""
  136. continue
  137. }
  138. break
  139. }
  140. text := strings.Join(layout, "")
  141. atom := map[string]uint32{}
  142. for _, s := range all {
  143. off := strings.Index(text, s)
  144. if off < 0 {
  145. panic("lost string " + s)
  146. }
  147. atom[s] = uint32(off<<8 | len(s))
  148. }
  149. // Generate the Go code.
  150. fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
  151. fmt.Printf("package atom\n\nconst (\n")
  152. for _, s := range all {
  153. fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
  154. }
  155. fmt.Printf(")\n\n")
  156. fmt.Printf("const hash0 = %#x\n\n", best.h0)
  157. fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
  158. fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
  159. for i, s := range best.tab {
  160. if s == "" {
  161. continue
  162. }
  163. fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
  164. }
  165. fmt.Printf("}\n")
  166. datasize := (1 << best.k) * 4
  167. fmt.Printf("const atomText =\n")
  168. textsize := len(text)
  169. for len(text) > 60 {
  170. fmt.Printf("\t%q +\n", text[:60])
  171. text = text[60:]
  172. }
  173. fmt.Printf("\t%q\n\n", text)
  174. fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
  175. }
  176. type byLen []string
  177. func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
  178. func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  179. func (x byLen) Len() int { return len(x) }
  180. // fnv computes the FNV hash with an arbitrary starting value h.
  181. func fnv(h uint32, s string) uint32 {
  182. for i := 0; i < len(s); i++ {
  183. h ^= uint32(s[i])
  184. h *= 16777619
  185. }
  186. return h
  187. }
  188. // A table represents an attempt at constructing the lookup table.
  189. // The lookup table uses cuckoo hashing, meaning that each string
  190. // can be found in one of two positions.
  191. type table struct {
  192. h0 uint32
  193. k uint
  194. mask uint32
  195. tab []string
  196. }
  197. // hash returns the two hashes for s.
  198. func (t *table) hash(s string) (h1, h2 uint32) {
  199. h := fnv(t.h0, s)
  200. h1 = h & t.mask
  201. h2 = (h >> 16) & t.mask
  202. return
  203. }
  204. // init initializes the table with the given parameters.
  205. // h0 is the initial hash value,
  206. // k is the number of bits of hash value to use, and
  207. // x is the list of strings to store in the table.
  208. // init returns false if the table cannot be constructed.
  209. func (t *table) init(h0 uint32, k uint, x []string) bool {
  210. t.h0 = h0
  211. t.k = k
  212. t.tab = make([]string, 1<<k)
  213. t.mask = 1<<k - 1
  214. for _, s := range x {
  215. if !t.insert(s) {
  216. return false
  217. }
  218. }
  219. return true
  220. }
  221. // insert inserts s in the table.
  222. func (t *table) insert(s string) bool {
  223. h1, h2 := t.hash(s)
  224. if t.tab[h1] == "" {
  225. t.tab[h1] = s
  226. return true
  227. }
  228. if t.tab[h2] == "" {
  229. t.tab[h2] = s
  230. return true
  231. }
  232. if t.push(h1, 0) {
  233. t.tab[h1] = s
  234. return true
  235. }
  236. if t.push(h2, 0) {
  237. t.tab[h2] = s
  238. return true
  239. }
  240. return false
  241. }
  242. // push attempts to push aside the entry in slot i.
  243. func (t *table) push(i uint32, depth int) bool {
  244. if depth > len(t.tab) {
  245. return false
  246. }
  247. s := t.tab[i]
  248. h1, h2 := t.hash(s)
  249. j := h1 + h2 - i
  250. if t.tab[j] != "" && !t.push(j, depth+1) {
  251. return false
  252. }
  253. t.tab[j] = s
  254. return true
  255. }
  256. // The lists of element names and attribute keys were taken from
  257. // https://html.spec.whatwg.org/multipage/indices.html#index
  258. // as of the "HTML Living Standard - Last Updated 21 February 2015" version.
  259. var elements = []string{
  260. "a",
  261. "abbr",
  262. "address",
  263. "area",
  264. "article",
  265. "aside",
  266. "audio",
  267. "b",
  268. "base",
  269. "bdi",
  270. "bdo",
  271. "blockquote",
  272. "body",
  273. "br",
  274. "button",
  275. "canvas",
  276. "caption",
  277. "cite",
  278. "code",
  279. "col",
  280. "colgroup",
  281. "command",
  282. "data",
  283. "datalist",
  284. "dd",
  285. "del",
  286. "details",
  287. "dfn",
  288. "dialog",
  289. "div",
  290. "dl",
  291. "dt",
  292. "em",
  293. "embed",
  294. "fieldset",
  295. "figcaption",
  296. "figure",
  297. "footer",
  298. "form",
  299. "h1",
  300. "h2",
  301. "h3",
  302. "h4",
  303. "h5",
  304. "h6",
  305. "head",
  306. "header",
  307. "hgroup",
  308. "hr",
  309. "html",
  310. "i",
  311. "iframe",
  312. "img",
  313. "input",
  314. "ins",
  315. "kbd",
  316. "keygen",
  317. "label",
  318. "legend",
  319. "li",
  320. "link",
  321. "map",
  322. "mark",
  323. "menu",
  324. "menuitem",
  325. "meta",
  326. "meter",
  327. "nav",
  328. "noscript",
  329. "object",
  330. "ol",
  331. "optgroup",
  332. "option",
  333. "output",
  334. "p",
  335. "param",
  336. "pre",
  337. "progress",
  338. "q",
  339. "rp",
  340. "rt",
  341. "ruby",
  342. "s",
  343. "samp",
  344. "script",
  345. "section",
  346. "select",
  347. "small",
  348. "source",
  349. "span",
  350. "strong",
  351. "style",
  352. "sub",
  353. "summary",
  354. "sup",
  355. "table",
  356. "tbody",
  357. "td",
  358. "template",
  359. "textarea",
  360. "tfoot",
  361. "th",
  362. "thead",
  363. "time",
  364. "title",
  365. "tr",
  366. "track",
  367. "u",
  368. "ul",
  369. "var",
  370. "video",
  371. "wbr",
  372. }
  373. // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
  374. var attributes = []string{
  375. "abbr",
  376. "accept",
  377. "accept-charset",
  378. "accesskey",
  379. "action",
  380. "alt",
  381. "async",
  382. "autocomplete",
  383. "autofocus",
  384. "autoplay",
  385. "challenge",
  386. "charset",
  387. "checked",
  388. "cite",
  389. "class",
  390. "cols",
  391. "colspan",
  392. "command",
  393. "content",
  394. "contenteditable",
  395. "contextmenu",
  396. "controls",
  397. "coords",
  398. "crossorigin",
  399. "data",
  400. "datetime",
  401. "default",
  402. "defer",
  403. "dir",
  404. "dirname",
  405. "disabled",
  406. "download",
  407. "draggable",
  408. "dropzone",
  409. "enctype",
  410. "for",
  411. "form",
  412. "formaction",
  413. "formenctype",
  414. "formmethod",
  415. "formnovalidate",
  416. "formtarget",
  417. "headers",
  418. "height",
  419. "hidden",
  420. "high",
  421. "href",
  422. "hreflang",
  423. "http-equiv",
  424. "icon",
  425. "id",
  426. "inputmode",
  427. "ismap",
  428. "itemid",
  429. "itemprop",
  430. "itemref",
  431. "itemscope",
  432. "itemtype",
  433. "keytype",
  434. "kind",
  435. "label",
  436. "lang",
  437. "list",
  438. "loop",
  439. "low",
  440. "manifest",
  441. "max",
  442. "maxlength",
  443. "media",
  444. "mediagroup",
  445. "method",
  446. "min",
  447. "minlength",
  448. "multiple",
  449. "muted",
  450. "name",
  451. "novalidate",
  452. "open",
  453. "optimum",
  454. "pattern",
  455. "ping",
  456. "placeholder",
  457. "poster",
  458. "preload",
  459. "radiogroup",
  460. "readonly",
  461. "rel",
  462. "required",
  463. "reversed",
  464. "rows",
  465. "rowspan",
  466. "sandbox",
  467. "spellcheck",
  468. "scope",
  469. "scoped",
  470. "seamless",
  471. "selected",
  472. "shape",
  473. "size",
  474. "sizes",
  475. "sortable",
  476. "sorted",
  477. "span",
  478. "src",
  479. "srcdoc",
  480. "srclang",
  481. "start",
  482. "step",
  483. "style",
  484. "tabindex",
  485. "target",
  486. "title",
  487. "translate",
  488. "type",
  489. "typemustmatch",
  490. "usemap",
  491. "value",
  492. "width",
  493. "wrap",
  494. }
  495. var eventHandlers = []string{
  496. "onabort",
  497. "onautocomplete",
  498. "onautocompleteerror",
  499. "onafterprint",
  500. "onbeforeprint",
  501. "onbeforeunload",
  502. "onblur",
  503. "oncancel",
  504. "oncanplay",
  505. "oncanplaythrough",
  506. "onchange",
  507. "onclick",
  508. "onclose",
  509. "oncontextmenu",
  510. "oncuechange",
  511. "ondblclick",
  512. "ondrag",
  513. "ondragend",
  514. "ondragenter",
  515. "ondragleave",
  516. "ondragover",
  517. "ondragstart",
  518. "ondrop",
  519. "ondurationchange",
  520. "onemptied",
  521. "onended",
  522. "onerror",
  523. "onfocus",
  524. "onhashchange",
  525. "oninput",
  526. "oninvalid",
  527. "onkeydown",
  528. "onkeypress",
  529. "onkeyup",
  530. "onlanguagechange",
  531. "onload",
  532. "onloadeddata",
  533. "onloadedmetadata",
  534. "onloadstart",
  535. "onmessage",
  536. "onmousedown",
  537. "onmousemove",
  538. "onmouseout",
  539. "onmouseover",
  540. "onmouseup",
  541. "onmousewheel",
  542. "onoffline",
  543. "ononline",
  544. "onpagehide",
  545. "onpageshow",
  546. "onpause",
  547. "onplay",
  548. "onplaying",
  549. "onpopstate",
  550. "onprogress",
  551. "onratechange",
  552. "onreset",
  553. "onresize",
  554. "onscroll",
  555. "onseeked",
  556. "onseeking",
  557. "onselect",
  558. "onshow",
  559. "onsort",
  560. "onstalled",
  561. "onstorage",
  562. "onsubmit",
  563. "onsuspend",
  564. "ontimeupdate",
  565. "ontoggle",
  566. "onunload",
  567. "onvolumechange",
  568. "onwaiting",
  569. }
  570. // extra are ad-hoc values not covered by any of the lists above.
  571. var extra = []string{
  572. "align",
  573. "annotation",
  574. "annotation-xml",
  575. "applet",
  576. "basefont",
  577. "bgsound",
  578. "big",
  579. "blink",
  580. "center",
  581. "color",
  582. "desc",
  583. "face",
  584. "font",
  585. "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
  586. "foreignobject",
  587. "frame",
  588. "frameset",
  589. "image",
  590. "isindex",
  591. "listing",
  592. "malignmark",
  593. "marquee",
  594. "math",
  595. "mglyph",
  596. "mi",
  597. "mn",
  598. "mo",
  599. "ms",
  600. "mtext",
  601. "nobr",
  602. "noembed",
  603. "noframes",
  604. "plaintext",
  605. "prompt",
  606. "public",
  607. "spacer",
  608. "strike",
  609. "svg",
  610. "system",
  611. "tt",
  612. "xmp",
  613. }