You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
2.6 KiB

10 years ago
  1. package mahonia
  2. // This file is based on bufio.Reader in the Go standard library,
  3. // which has the following copyright notice:
  4. // Copyright 2009 The Go Authors. All rights reserved.
  5. // Use of this source code is governed by a BSD-style
  6. // license that can be found in the LICENSE file.
  7. import (
  8. "io"
  9. "unicode/utf8"
  10. )
  11. const (
  12. defaultBufSize = 4096
  13. )
  14. // Reader implements character-set decoding for an io.Reader object.
  15. type Reader struct {
  16. buf []byte
  17. rd io.Reader
  18. decode Decoder
  19. r, w int
  20. err error
  21. }
  22. // NewReader creates a new Reader that uses the receiver to decode text.
  23. func (d Decoder) NewReader(rd io.Reader) *Reader {
  24. b := new(Reader)
  25. b.buf = make([]byte, defaultBufSize)
  26. b.rd = rd
  27. b.decode = d
  28. return b
  29. }
  30. // fill reads a new chunk into the buffer.
  31. func (b *Reader) fill() {
  32. // Slide existing data to beginning.
  33. if b.r > 0 {
  34. copy(b.buf, b.buf[b.r:b.w])
  35. b.w -= b.r
  36. b.r = 0
  37. }
  38. // Read new data.
  39. n, e := b.rd.Read(b.buf[b.w:])
  40. b.w += n
  41. if e != nil {
  42. b.err = e
  43. }
  44. }
  45. // Read reads data into p.
  46. // It returns the number of bytes read into p.
  47. // It calls Read at most once on the underlying Reader,
  48. // hence n may be less than len(p).
  49. // At EOF, the count will be zero and err will be os.EOF.
  50. func (b *Reader) Read(p []byte) (n int, err error) {
  51. n = len(p)
  52. filled := false
  53. if n == 0 {
  54. return 0, b.err
  55. }
  56. if b.w == b.r {
  57. if b.err != nil {
  58. return 0, b.err
  59. }
  60. if n > len(b.buf) {
  61. // Large read, empty buffer.
  62. // Allocate a larger buffer for efficiency.
  63. b.buf = make([]byte, n)
  64. }
  65. b.fill()
  66. filled = true
  67. if b.w == b.r {
  68. return 0, b.err
  69. }
  70. }
  71. i := 0
  72. for i < n {
  73. rune, size, status := b.decode(b.buf[b.r:b.w])
  74. if status == STATE_ONLY {
  75. b.r += size
  76. continue
  77. }
  78. if status == NO_ROOM {
  79. if b.err != nil {
  80. rune = 0xfffd
  81. size = b.w - b.r
  82. if size == 0 {
  83. break
  84. }
  85. status = INVALID_CHAR
  86. } else if filled {
  87. break
  88. } else {
  89. b.fill()
  90. filled = true
  91. continue
  92. }
  93. }
  94. if i+utf8.RuneLen(rune) > n {
  95. break
  96. }
  97. b.r += size
  98. if rune < 128 {
  99. p[i] = byte(rune)
  100. i++
  101. } else {
  102. i += utf8.EncodeRune(p[i:], rune)
  103. }
  104. }
  105. return i, nil
  106. }
  107. // ReadRune reads a single Unicode character and returns the
  108. // rune and its size in bytes.
  109. func (b *Reader) ReadRune() (c rune, size int, err error) {
  110. read:
  111. c, size, status := b.decode(b.buf[b.r:b.w])
  112. if status == NO_ROOM && b.err == nil {
  113. b.fill()
  114. goto read
  115. }
  116. if status == STATE_ONLY {
  117. b.r += size
  118. goto read
  119. }
  120. if b.r == b.w {
  121. return 0, 0, b.err
  122. }
  123. if status == NO_ROOM {
  124. c = 0xfffd
  125. size = b.w - b.r
  126. status = INVALID_CHAR
  127. }
  128. b.r += size
  129. return c, size, nil
  130. }