You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
6.9 KiB

  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package html
  5. import (
  6. "bufio"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "strings"
  11. )
  12. type writer interface {
  13. io.Writer
  14. io.ByteWriter
  15. WriteString(string) (int, error)
  16. }
  17. // Render renders the parse tree n to the given writer.
  18. //
  19. // Rendering is done on a 'best effort' basis: calling Parse on the output of
  20. // Render will always result in something similar to the original tree, but it
  21. // is not necessarily an exact clone unless the original tree was 'well-formed'.
  22. // 'Well-formed' is not easily specified; the HTML5 specification is
  23. // complicated.
  24. //
  25. // Calling Parse on arbitrary input typically results in a 'well-formed' parse
  26. // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
  27. // For example, in a 'well-formed' parse tree, no <a> element is a child of
  28. // another <a> element: parsing "<a><a>" results in two sibling elements.
  29. // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
  30. // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
  31. // children; the <a> is reparented to the <table>'s parent. However, calling
  32. // Parse on "<a><table><a>" does not return an error, but the result has an <a>
  33. // element with an <a> child, and is therefore not 'well-formed'.
  34. //
  35. // Programmatically constructed trees are typically also 'well-formed', but it
  36. // is possible to construct a tree that looks innocuous but, when rendered and
  37. // re-parsed, results in a different tree. A simple example is that a solitary
  38. // text node would become a tree containing <html>, <head> and <body> elements.
  39. // Another example is that the programmatic equivalent of "a<head>b</head>c"
  40. // becomes "<html><head><head/><body>abc</body></html>".
  41. func Render(w io.Writer, n *Node) error {
  42. if x, ok := w.(writer); ok {
  43. return render(x, n)
  44. }
  45. buf := bufio.NewWriter(w)
  46. if err := render(buf, n); err != nil {
  47. return err
  48. }
  49. return buf.Flush()
  50. }
  51. // plaintextAbort is returned from render1 when a <plaintext> element
  52. // has been rendered. No more end tags should be rendered after that.
  53. var plaintextAbort = errors.New("html: internal error (plaintext abort)")
  54. func render(w writer, n *Node) error {
  55. err := render1(w, n)
  56. if err == plaintextAbort {
  57. err = nil
  58. }
  59. return err
  60. }
  61. func render1(w writer, n *Node) error {
  62. // Render non-element nodes; these are the easy cases.
  63. switch n.Type {
  64. case ErrorNode:
  65. return errors.New("html: cannot render an ErrorNode node")
  66. case TextNode:
  67. return escape(w, n.Data)
  68. case DocumentNode:
  69. for c := n.FirstChild; c != nil; c = c.NextSibling {
  70. if err := render1(w, c); err != nil {
  71. return err
  72. }
  73. }
  74. return nil
  75. case ElementNode:
  76. // No-op.
  77. case CommentNode:
  78. if _, err := w.WriteString("<!--"); err != nil {
  79. return err
  80. }
  81. if _, err := w.WriteString(n.Data); err != nil {
  82. return err
  83. }
  84. if _, err := w.WriteString("-->"); err != nil {
  85. return err
  86. }
  87. return nil
  88. case DoctypeNode:
  89. if _, err := w.WriteString("<!DOCTYPE "); err != nil {
  90. return err
  91. }
  92. if _, err := w.WriteString(n.Data); err != nil {
  93. return err
  94. }
  95. if n.Attr != nil {
  96. var p, s string
  97. for _, a := range n.Attr {
  98. switch a.Key {
  99. case "public":
  100. p = a.Val
  101. case "system":
  102. s = a.Val
  103. }
  104. }
  105. if p != "" {
  106. if _, err := w.WriteString(" PUBLIC "); err != nil {
  107. return err
  108. }
  109. if err := writeQuoted(w, p); err != nil {
  110. return err
  111. }
  112. if s != "" {
  113. if err := w.WriteByte(' '); err != nil {
  114. return err
  115. }
  116. if err := writeQuoted(w, s); err != nil {
  117. return err
  118. }
  119. }
  120. } else if s != "" {
  121. if _, err := w.WriteString(" SYSTEM "); err != nil {
  122. return err
  123. }
  124. if err := writeQuoted(w, s); err != nil {
  125. return err
  126. }
  127. }
  128. }
  129. return w.WriteByte('>')
  130. default:
  131. return errors.New("html: unknown node type")
  132. }
  133. // Render the <xxx> opening tag.
  134. if err := w.WriteByte('<'); err != nil {
  135. return err
  136. }
  137. if _, err := w.WriteString(n.Data); err != nil {
  138. return err
  139. }
  140. for _, a := range n.Attr {
  141. if err := w.WriteByte(' '); err != nil {
  142. return err
  143. }
  144. if a.Namespace != "" {
  145. if _, err := w.WriteString(a.Namespace); err != nil {
  146. return err
  147. }
  148. if err := w.WriteByte(':'); err != nil {
  149. return err
  150. }
  151. }
  152. if _, err := w.WriteString(a.Key); err != nil {
  153. return err
  154. }
  155. if _, err := w.WriteString(`="`); err != nil {
  156. return err
  157. }
  158. if err := escape(w, a.Val); err != nil {
  159. return err
  160. }
  161. if err := w.WriteByte('"'); err != nil {
  162. return err
  163. }
  164. }
  165. if voidElements[n.Data] {
  166. if n.FirstChild != nil {
  167. return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
  168. }
  169. _, err := w.WriteString("/>")
  170. return err
  171. }
  172. if err := w.WriteByte('>'); err != nil {
  173. return err
  174. }
  175. // Add initial newline where there is danger of a newline beging ignored.
  176. if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
  177. switch n.Data {
  178. case "pre", "listing", "textarea":
  179. if err := w.WriteByte('\n'); err != nil {
  180. return err
  181. }
  182. }
  183. }
  184. // Render any child nodes.
  185. switch n.Data {
  186. case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
  187. for c := n.FirstChild; c != nil; c = c.NextSibling {
  188. if c.Type == TextNode {
  189. if _, err := w.WriteString(c.Data); err != nil {
  190. return err
  191. }
  192. } else {
  193. if err := render1(w, c); err != nil {
  194. return err
  195. }
  196. }
  197. }
  198. if n.Data == "plaintext" {
  199. // Don't render anything else. <plaintext> must be the
  200. // last element in the file, with no closing tag.
  201. return plaintextAbort
  202. }
  203. default:
  204. for c := n.FirstChild; c != nil; c = c.NextSibling {
  205. if err := render1(w, c); err != nil {
  206. return err
  207. }
  208. }
  209. }
  210. // Render the </xxx> closing tag.
  211. if _, err := w.WriteString("</"); err != nil {
  212. return err
  213. }
  214. if _, err := w.WriteString(n.Data); err != nil {
  215. return err
  216. }
  217. return w.WriteByte('>')
  218. }
  219. // writeQuoted writes s to w surrounded by quotes. Normally it will use double
  220. // quotes, but if s contains a double quote, it will use single quotes.
  221. // It is used for writing the identifiers in a doctype declaration.
  222. // In valid HTML, they can't contain both types of quotes.
  223. func writeQuoted(w writer, s string) error {
  224. var q byte = '"'
  225. if strings.Contains(s, `"`) {
  226. q = '\''
  227. }
  228. if err := w.WriteByte(q); err != nil {
  229. return err
  230. }
  231. if _, err := w.WriteString(s); err != nil {
  232. return err
  233. }
  234. if err := w.WriteByte(q); err != nil {
  235. return err
  236. }
  237. return nil
  238. }
  239. // Section 12.1.2, "Elements", gives this list of void elements. Void elements
  240. // are those that can't have any contents.
  241. var voidElements = map[string]bool{
  242. "area": true,
  243. "base": true,
  244. "br": true,
  245. "col": true,
  246. "command": true,
  247. "embed": true,
  248. "hr": true,
  249. "img": true,
  250. "input": true,
  251. "keygen": true,
  252. "link": true,
  253. "meta": true,
  254. "param": true,
  255. "source": true,
  256. "track": true,
  257. "wbr": true,
  258. }