You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

187 lines
5.2 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package runes
  5. import (
  6. "unicode/utf8"
  7. "golang.org/x/text/transform"
  8. )
  9. // Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
  10. // This is done for various reasons:
  11. // - To retain the semantics of the Nop transformer: if input is passed to a Nop
  12. // one would expect it to be unchanged.
  13. // - It would be very expensive to pass a converted RuneError to a transformer:
  14. // a transformer might need more source bytes after RuneError, meaning that
  15. // the only way to pass it safely is to create a new buffer and manage the
  16. // intermingling of RuneErrors and normal input.
  17. // - Many transformers leave ill-formed UTF-8 as is, so this is not
  18. // inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
  19. // logical consequence of the operation (as for Map) or if it otherwise would
  20. // pose security concerns (as for Remove).
  21. // - An alternative would be to return an error on ill-formed UTF-8, but this
  22. // would be inconsistent with other operations.
  23. // If returns a transformer that applies tIn to consecutive runes for which
  24. // s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
  25. // is called on tIn and tNotIn at the start of each run. A Nop transformer will
  26. // substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
  27. // to RuneError to determine which transformer to apply, but is passed as is to
  28. // the respective transformer.
  29. func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
  30. if tIn == nil && tNotIn == nil {
  31. return Transformer{transform.Nop}
  32. }
  33. if tIn == nil {
  34. tIn = transform.Nop
  35. }
  36. if tNotIn == nil {
  37. tNotIn = transform.Nop
  38. }
  39. sIn, ok := tIn.(transform.SpanningTransformer)
  40. if !ok {
  41. sIn = dummySpan{tIn}
  42. }
  43. sNotIn, ok := tNotIn.(transform.SpanningTransformer)
  44. if !ok {
  45. sNotIn = dummySpan{tNotIn}
  46. }
  47. a := &cond{
  48. tIn: sIn,
  49. tNotIn: sNotIn,
  50. f: s.Contains,
  51. }
  52. a.Reset()
  53. return Transformer{a}
  54. }
  55. type dummySpan struct{ transform.Transformer }
  56. func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
  57. return 0, transform.ErrEndOfSpan
  58. }
  59. type cond struct {
  60. tIn, tNotIn transform.SpanningTransformer
  61. f func(rune) bool
  62. check func(rune) bool // current check to perform
  63. t transform.SpanningTransformer // current transformer to use
  64. }
  65. // Reset implements transform.Transformer.
  66. func (t *cond) Reset() {
  67. t.check = t.is
  68. t.t = t.tIn
  69. t.t.Reset() // notIn will be reset on first usage.
  70. }
  71. func (t *cond) is(r rune) bool {
  72. if t.f(r) {
  73. return true
  74. }
  75. t.check = t.isNot
  76. t.t = t.tNotIn
  77. t.tNotIn.Reset()
  78. return false
  79. }
  80. func (t *cond) isNot(r rune) bool {
  81. if !t.f(r) {
  82. return true
  83. }
  84. t.check = t.is
  85. t.t = t.tIn
  86. t.tIn.Reset()
  87. return false
  88. }
  89. // This implementation of Span doesn't help all too much, but it needs to be
  90. // there to satisfy this package's Transformer interface.
  91. // TODO: there are certainly room for improvements, though. For example, if
  92. // t.t == transform.Nop (which will a common occurrence) it will save a bundle
  93. // to special-case that loop.
  94. func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
  95. p := 0
  96. for n < len(src) && err == nil {
  97. // Don't process too much at a time as the Spanner that will be
  98. // called on this block may terminate early.
  99. const maxChunk = 4096
  100. max := len(src)
  101. if v := n + maxChunk; v < max {
  102. max = v
  103. }
  104. atEnd := false
  105. size := 0
  106. current := t.t
  107. for ; p < max; p += size {
  108. r := rune(src[p])
  109. if r < utf8.RuneSelf {
  110. size = 1
  111. } else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
  112. if !atEOF && !utf8.FullRune(src[p:]) {
  113. err = transform.ErrShortSrc
  114. break
  115. }
  116. }
  117. if !t.check(r) {
  118. // The next rune will be the start of a new run.
  119. atEnd = true
  120. break
  121. }
  122. }
  123. n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
  124. n += n2
  125. if err2 != nil {
  126. return n, err2
  127. }
  128. // At this point either err != nil or t.check will pass for the rune at p.
  129. p = n + size
  130. }
  131. return n, err
  132. }
  133. func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  134. p := 0
  135. for nSrc < len(src) && err == nil {
  136. // Don't process too much at a time, as the work might be wasted if the
  137. // destination buffer isn't large enough to hold the result or a
  138. // transform returns an error early.
  139. const maxChunk = 4096
  140. max := len(src)
  141. if n := nSrc + maxChunk; n < len(src) {
  142. max = n
  143. }
  144. atEnd := false
  145. size := 0
  146. current := t.t
  147. for ; p < max; p += size {
  148. r := rune(src[p])
  149. if r < utf8.RuneSelf {
  150. size = 1
  151. } else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
  152. if !atEOF && !utf8.FullRune(src[p:]) {
  153. err = transform.ErrShortSrc
  154. break
  155. }
  156. }
  157. if !t.check(r) {
  158. // The next rune will be the start of a new run.
  159. atEnd = true
  160. break
  161. }
  162. }
  163. nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
  164. nDst += nDst2
  165. nSrc += nSrc2
  166. if err2 != nil {
  167. return nDst, nSrc, err2
  168. }
  169. // At this point either err != nil or t.check will pass for the rune at p.
  170. p = nSrc + size
  171. }
  172. return nDst, nSrc, err
  173. }