|
|
- // Copyright (c) 2014, David Kitchen <david@buro9.com>
- //
- // All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice, this
- // list of conditions and the following disclaimer.
- //
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- //
- // * Neither the name of the organisation (Microcosm) nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- package bluemonday
-
- import (
- "regexp"
- )
-
- // StrictPolicy returns an empty policy, which will effectively strip all HTML
- // elements and their attributes from a document.
- func StrictPolicy() *Policy {
- return NewPolicy()
- }
-
- // StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
- func StripTagsPolicy() *Policy {
- return StrictPolicy()
- }
-
- // UGCPolicy returns a policy aimed at user generated content that is a result
- // of HTML WYSIWYG tools and Markdown conversions.
- //
- // This is expected to be a fairly rich document where as much markup as
- // possible should be retained. Markdown permits raw HTML so we are basically
- // providing a policy to sanitise HTML5 documents safely but with the
- // least intrusion on the formatting expectations of the user.
- func UGCPolicy() *Policy {
-
- p := NewPolicy()
-
- ///////////////////////
- // Global attributes //
- ///////////////////////
-
- // "class" is not permitted as we are not allowing users to style their own
- // content
-
- p.AllowStandardAttributes()
-
- //////////////////////////////
- // Global URL format policy //
- //////////////////////////////
-
- p.AllowStandardURLs()
-
- ////////////////////////////////
- // Declarations and structure //
- ////////////////////////////////
-
- // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
- // expecting user generated content to be a fragment of HTML and not a full
- // document.
-
- //////////////////////////
- // Sectioning root tags //
- //////////////////////////
-
- // "article" and "aside" are permitted and takes no attributes
- p.AllowElements("article", "aside")
-
- // "body" is not permitted as we are expecting user generated content to be a fragment
- // of HTML and not a full document.
-
- // "details" is permitted, including the "open" attribute which can either
- // be blank or the value "open".
- p.AllowAttrs(
- "open",
- ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
-
- // "fieldset" is not permitted as we are not allowing forms to be created.
-
- // "figure" is permitted and takes no attributes
- p.AllowElements("figure")
-
- // "nav" is not permitted as it is assumed that the site (and not the user)
- // has defined navigation elements
-
- // "section" is permitted and takes no attributes
- p.AllowElements("section")
-
- // "summary" is permitted and takes no attributes
- p.AllowElements("summary")
-
- //////////////////////////
- // Headings and footers //
- //////////////////////////
-
- // "footer" is not permitted as we expect user content to be a fragment and
- // not structural to this extent
-
- // "h1" through "h6" are permitted and take no attributes
- p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
-
- // "header" is not permitted as we expect user content to be a fragment and
- // not structural to this extent
-
- // "hgroup" is permitted and takes no attributes
- p.AllowElements("hgroup")
-
- /////////////////////////////////////
- // Content grouping and separating //
- /////////////////////////////////////
-
- // "blockquote" is permitted, including the "cite" attribute which must be
- // a standard URL.
- p.AllowAttrs("cite").OnElements("blockquote")
-
- // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
- p.AllowElements("br", "div", "hr", "p", "span", "wbr")
-
- ///////////
- // Links //
- ///////////
-
- // "a" is permitted
- p.AllowAttrs("href").OnElements("a")
-
- // "area" is permitted along with the attributes that map image maps work
- p.AllowAttrs("name").Matching(
- regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
- ).OnElements("map")
- p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
- p.AllowAttrs("coords").Matching(
- regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
- ).OnElements("area")
- p.AllowAttrs("href").OnElements("area")
- p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
- p.AllowAttrs("shape").Matching(
- regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
- ).OnElements("area")
- p.AllowAttrs("usemap").Matching(
- regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
- ).OnElements("img")
-
- // "link" is not permitted
-
- /////////////////////
- // Phrase elements //
- /////////////////////
-
- // The following are all inline phrasing elements
- p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
- "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
-
- // "q" is permitted and "cite" is a URL and handled by URL policies
- p.AllowAttrs("cite").OnElements("q")
-
- // "time" is permitted
- p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
-
- ////////////////////
- // Style elements //
- ////////////////////
-
- // block and inline elements that impart no semantic meaning but style the
- // document
- p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
-
- // "style" is not permitted as we are not yet sanitising CSS and it is an
- // XSS attack vector
-
- //////////////////////
- // HTML5 Formatting //
- //////////////////////
-
- // "bdi" "bdo" are permitted
- p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
-
- // "rp" "rt" "ruby" are permitted
- p.AllowElements("rp", "rt", "ruby")
-
- ///////////////////////////
- // HTML5 Change tracking //
- ///////////////////////////
-
- // "del" "ins" are permitted
- p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
- p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
-
- ///////////
- // Lists //
- ///////////
-
- p.AllowLists()
-
- ////////////
- // Tables //
- ////////////
-
- p.AllowTables()
-
- ///////////
- // Forms //
- ///////////
-
- // By and large, forms are not permitted. However there are some form
- // elements that can be used to present data, and we do permit those
- //
- // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
- // "textarea" "optgroup" "option" are all not permitted
-
- // "meter" is permitted
- p.AllowAttrs(
- "value",
- "min",
- "max",
- "low",
- "high",
- "optimum",
- ).Matching(Number).OnElements("meter")
-
- // "progress" is permitted
- p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
-
- //////////////////////
- // Embedded content //
- //////////////////////
-
- // Vast majority not permitted
- // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
- // "video" are all not permitted
-
- p.AllowImages()
-
- return p
- }
|