You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

253 lines
7.6 KiB

  1. // Copyright (c) 2014, David Kitchen <david@buro9.com>
  2. //
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice, this
  9. // list of conditions and the following disclaimer.
  10. //
  11. // * Redistributions in binary form must reproduce the above copyright notice,
  12. // this list of conditions and the following disclaimer in the documentation
  13. // and/or other materials provided with the distribution.
  14. //
  15. // * Neither the name of the organisation (Microcosm) nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  23. // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  25. // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  27. // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. package bluemonday
  30. import (
  31. "regexp"
  32. )
  33. // StrictPolicy returns an empty policy, which will effectively strip all HTML
  34. // elements and their attributes from a document.
  35. func StrictPolicy() *Policy {
  36. return NewPolicy()
  37. }
  38. // StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
  39. func StripTagsPolicy() *Policy {
  40. return StrictPolicy()
  41. }
  42. // UGCPolicy returns a policy aimed at user generated content that is a result
  43. // of HTML WYSIWYG tools and Markdown conversions.
  44. //
  45. // This is expected to be a fairly rich document where as much markup as
  46. // possible should be retained. Markdown permits raw HTML so we are basically
  47. // providing a policy to sanitise HTML5 documents safely but with the
  48. // least intrusion on the formatting expectations of the user.
  49. func UGCPolicy() *Policy {
  50. p := NewPolicy()
  51. ///////////////////////
  52. // Global attributes //
  53. ///////////////////////
  54. // "class" is not permitted as we are not allowing users to style their own
  55. // content
  56. p.AllowStandardAttributes()
  57. //////////////////////////////
  58. // Global URL format policy //
  59. //////////////////////////////
  60. p.AllowStandardURLs()
  61. ////////////////////////////////
  62. // Declarations and structure //
  63. ////////////////////////////////
  64. // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
  65. // expecting user generated content to be a fragment of HTML and not a full
  66. // document.
  67. //////////////////////////
  68. // Sectioning root tags //
  69. //////////////////////////
  70. // "article" and "aside" are permitted and takes no attributes
  71. p.AllowElements("article", "aside")
  72. // "body" is not permitted as we are expecting user generated content to be a fragment
  73. // of HTML and not a full document.
  74. // "details" is permitted, including the "open" attribute which can either
  75. // be blank or the value "open".
  76. p.AllowAttrs(
  77. "open",
  78. ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
  79. // "fieldset" is not permitted as we are not allowing forms to be created.
  80. // "figure" is permitted and takes no attributes
  81. p.AllowElements("figure")
  82. // "nav" is not permitted as it is assumed that the site (and not the user)
  83. // has defined navigation elements
  84. // "section" is permitted and takes no attributes
  85. p.AllowElements("section")
  86. // "summary" is permitted and takes no attributes
  87. p.AllowElements("summary")
  88. //////////////////////////
  89. // Headings and footers //
  90. //////////////////////////
  91. // "footer" is not permitted as we expect user content to be a fragment and
  92. // not structural to this extent
  93. // "h1" through "h6" are permitted and take no attributes
  94. p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
  95. // "header" is not permitted as we expect user content to be a fragment and
  96. // not structural to this extent
  97. // "hgroup" is permitted and takes no attributes
  98. p.AllowElements("hgroup")
  99. /////////////////////////////////////
  100. // Content grouping and separating //
  101. /////////////////////////////////////
  102. // "blockquote" is permitted, including the "cite" attribute which must be
  103. // a standard URL.
  104. p.AllowAttrs("cite").OnElements("blockquote")
  105. // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
  106. p.AllowElements("br", "div", "hr", "p", "span", "wbr")
  107. ///////////
  108. // Links //
  109. ///////////
  110. // "a" is permitted
  111. p.AllowAttrs("href").OnElements("a")
  112. // "area" is permitted along with the attributes that map image maps work
  113. p.AllowAttrs("name").Matching(
  114. regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
  115. ).OnElements("map")
  116. p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
  117. p.AllowAttrs("coords").Matching(
  118. regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
  119. ).OnElements("area")
  120. p.AllowAttrs("href").OnElements("area")
  121. p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
  122. p.AllowAttrs("shape").Matching(
  123. regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
  124. ).OnElements("area")
  125. p.AllowAttrs("usemap").Matching(
  126. regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
  127. ).OnElements("img")
  128. // "link" is not permitted
  129. /////////////////////
  130. // Phrase elements //
  131. /////////////////////
  132. // The following are all inline phrasing elements
  133. p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
  134. "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
  135. // "q" is permitted and "cite" is a URL and handled by URL policies
  136. p.AllowAttrs("cite").OnElements("q")
  137. // "time" is permitted
  138. p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
  139. ////////////////////
  140. // Style elements //
  141. ////////////////////
  142. // block and inline elements that impart no semantic meaning but style the
  143. // document
  144. p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
  145. // "style" is not permitted as we are not yet sanitising CSS and it is an
  146. // XSS attack vector
  147. //////////////////////
  148. // HTML5 Formatting //
  149. //////////////////////
  150. // "bdi" "bdo" are permitted
  151. p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
  152. // "rp" "rt" "ruby" are permitted
  153. p.AllowElements("rp", "rt", "ruby")
  154. ///////////////////////////
  155. // HTML5 Change tracking //
  156. ///////////////////////////
  157. // "del" "ins" are permitted
  158. p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
  159. p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
  160. ///////////
  161. // Lists //
  162. ///////////
  163. p.AllowLists()
  164. ////////////
  165. // Tables //
  166. ////////////
  167. p.AllowTables()
  168. ///////////
  169. // Forms //
  170. ///////////
  171. // By and large, forms are not permitted. However there are some form
  172. // elements that can be used to present data, and we do permit those
  173. //
  174. // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
  175. // "textarea" "optgroup" "option" are all not permitted
  176. // "meter" is permitted
  177. p.AllowAttrs(
  178. "value",
  179. "min",
  180. "max",
  181. "low",
  182. "high",
  183. "optimum",
  184. ).Matching(Number).OnElements("meter")
  185. // "progress" is permitted
  186. p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
  187. //////////////////////
  188. // Embedded content //
  189. //////////////////////
  190. // Vast majority not permitted
  191. // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
  192. // "video" are all not permitted
  193. p.AllowImages()
  194. return p
  195. }