You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
3.2 KiB

  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package mdstripper
  5. import (
  6. "bytes"
  7. "sync"
  8. "io"
  9. "code.gitea.io/gitea/modules/log"
  10. "code.gitea.io/gitea/modules/markup/common"
  11. "github.com/yuin/goldmark"
  12. "github.com/yuin/goldmark/ast"
  13. "github.com/yuin/goldmark/extension"
  14. "github.com/yuin/goldmark/parser"
  15. "github.com/yuin/goldmark/renderer"
  16. "github.com/yuin/goldmark/renderer/html"
  17. "github.com/yuin/goldmark/text"
  18. )
  19. type stripRenderer struct {
  20. links []string
  21. empty bool
  22. }
  23. func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error {
  24. return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
  25. if !entering {
  26. return ast.WalkContinue, nil
  27. }
  28. switch v := n.(type) {
  29. case *ast.Text:
  30. if !v.IsRaw() {
  31. _, prevSibIsText := n.PreviousSibling().(*ast.Text)
  32. coalesce := prevSibIsText
  33. r.processString(
  34. w,
  35. v.Text(source),
  36. coalesce)
  37. if v.SoftLineBreak() {
  38. r.doubleSpace(w)
  39. }
  40. }
  41. return ast.WalkContinue, nil
  42. case *ast.Link:
  43. r.processLink(w, v.Destination)
  44. return ast.WalkSkipChildren, nil
  45. case *ast.AutoLink:
  46. r.processLink(w, v.URL(source))
  47. return ast.WalkSkipChildren, nil
  48. }
  49. return ast.WalkContinue, nil
  50. })
  51. }
  52. func (r *stripRenderer) doubleSpace(w io.Writer) {
  53. if !r.empty {
  54. _, _ = w.Write([]byte{'\n'})
  55. }
  56. }
  57. func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) {
  58. // Always break-up words
  59. if !coalesce {
  60. r.doubleSpace(w)
  61. }
  62. _, _ = w.Write(text)
  63. r.empty = false
  64. }
  65. func (r *stripRenderer) processLink(w io.Writer, link []byte) {
  66. // Links are processed out of band
  67. r.links = append(r.links, string(link))
  68. }
  69. // GetLinks returns the list of link data collected while parsing
  70. func (r *stripRenderer) GetLinks() []string {
  71. return r.links
  72. }
  73. // AddOptions adds given option to this renderer.
  74. func (r *stripRenderer) AddOptions(...renderer.Option) {
  75. // no-op
  76. }
  77. // StripMarkdown parses markdown content by removing all markup and code blocks
  78. // in order to extract links and other references
  79. func StripMarkdown(rawBytes []byte) (string, []string) {
  80. buf, links := StripMarkdownBytes(rawBytes)
  81. return string(buf), links
  82. }
  83. var stripParser parser.Parser
  84. var once = sync.Once{}
  85. // StripMarkdownBytes parses markdown content by removing all markup and code blocks
  86. // in order to extract links and other references
  87. func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
  88. once.Do(func() {
  89. gdMarkdown := goldmark.New(
  90. goldmark.WithExtensions(extension.Table,
  91. extension.Strikethrough,
  92. extension.TaskList,
  93. extension.DefinitionList,
  94. common.FootnoteExtension,
  95. common.Linkify,
  96. ),
  97. goldmark.WithParserOptions(
  98. parser.WithAttribute(),
  99. parser.WithAutoHeadingID(),
  100. ),
  101. goldmark.WithRendererOptions(
  102. html.WithUnsafe(),
  103. ),
  104. )
  105. stripParser = gdMarkdown.Parser()
  106. })
  107. stripper := &stripRenderer{
  108. links: make([]string, 0, 10),
  109. empty: true,
  110. }
  111. reader := text.NewReader(rawBytes)
  112. doc := stripParser.Parse(reader)
  113. var buf bytes.Buffer
  114. if err := stripper.Render(&buf, rawBytes, doc); err != nil {
  115. log.Error("Unable to strip: %v", err)
  116. }
  117. return buf.Bytes(), stripper.GetLinks()
  118. }