You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

211 lines
4.5 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package japanese
  5. import (
  6. "errors"
  7. "unicode/utf8"
  8. "golang.org/x/text/encoding"
  9. "golang.org/x/text/encoding/internal"
  10. "golang.org/x/text/encoding/internal/identifier"
  11. "golang.org/x/text/transform"
  12. )
  13. // EUCJP is the EUC-JP encoding.
  14. var EUCJP encoding.Encoding = &eucJP
  15. var eucJP = internal.Encoding{
  16. &internal.SimpleEncoding{eucJPDecoder{}, eucJPEncoder{}},
  17. "EUC-JP",
  18. identifier.EUCPkdFmtJapanese,
  19. }
  20. var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding")
  21. type eucJPDecoder struct{ transform.NopResetter }
  22. func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  23. r, size := rune(0), 0
  24. loop:
  25. for ; nSrc < len(src); nSrc += size {
  26. switch c0 := src[nSrc]; {
  27. case c0 < utf8.RuneSelf:
  28. r, size = rune(c0), 1
  29. case c0 == 0x8e:
  30. if nSrc+1 >= len(src) {
  31. err = transform.ErrShortSrc
  32. break loop
  33. }
  34. c1 := src[nSrc+1]
  35. if c1 < 0xa1 || 0xdf < c1 {
  36. err = errInvalidEUCJP
  37. break loop
  38. }
  39. r, size = rune(c1)+(0xff61-0xa1), 2
  40. case c0 == 0x8f:
  41. if nSrc+2 >= len(src) {
  42. err = transform.ErrShortSrc
  43. break loop
  44. }
  45. c1 := src[nSrc+1]
  46. if c1 < 0xa1 || 0xfe < c1 {
  47. err = errInvalidEUCJP
  48. break loop
  49. }
  50. c2 := src[nSrc+2]
  51. if c2 < 0xa1 || 0xfe < c2 {
  52. err = errInvalidEUCJP
  53. break loop
  54. }
  55. r, size = '\ufffd', 3
  56. if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
  57. r = rune(jis0212Decode[i])
  58. if r == 0 {
  59. r = '\ufffd'
  60. }
  61. }
  62. case 0xa1 <= c0 && c0 <= 0xfe:
  63. if nSrc+1 >= len(src) {
  64. err = transform.ErrShortSrc
  65. break loop
  66. }
  67. c1 := src[nSrc+1]
  68. if c1 < 0xa1 || 0xfe < c1 {
  69. err = errInvalidEUCJP
  70. break loop
  71. }
  72. r, size = '\ufffd', 2
  73. if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
  74. r = rune(jis0208Decode[i])
  75. if r == 0 {
  76. r = '\ufffd'
  77. }
  78. }
  79. default:
  80. err = errInvalidEUCJP
  81. break loop
  82. }
  83. if nDst+utf8.RuneLen(r) > len(dst) {
  84. err = transform.ErrShortDst
  85. break loop
  86. }
  87. nDst += utf8.EncodeRune(dst[nDst:], r)
  88. }
  89. if atEOF && err == transform.ErrShortSrc {
  90. err = errInvalidEUCJP
  91. }
  92. return nDst, nSrc, err
  93. }
  94. type eucJPEncoder struct{ transform.NopResetter }
  95. func (eucJPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  96. r, size := rune(0), 0
  97. for ; nSrc < len(src); nSrc += size {
  98. r = rune(src[nSrc])
  99. // Decode a 1-byte rune.
  100. if r < utf8.RuneSelf {
  101. size = 1
  102. } else {
  103. // Decode a multi-byte rune.
  104. r, size = utf8.DecodeRune(src[nSrc:])
  105. if size == 1 {
  106. // All valid runes of size 1 (those below utf8.RuneSelf) were
  107. // handled above. We have invalid UTF-8 or we haven't seen the
  108. // full character yet.
  109. if !atEOF && !utf8.FullRune(src[nSrc:]) {
  110. err = transform.ErrShortSrc
  111. break
  112. }
  113. }
  114. // func init checks that the switch covers all tables.
  115. switch {
  116. case encode0Low <= r && r < encode0High:
  117. if r = rune(encode0[r-encode0Low]); r != 0 {
  118. goto write2or3
  119. }
  120. case encode1Low <= r && r < encode1High:
  121. if r = rune(encode1[r-encode1Low]); r != 0 {
  122. goto write2or3
  123. }
  124. case encode2Low <= r && r < encode2High:
  125. if r = rune(encode2[r-encode2Low]); r != 0 {
  126. goto write2or3
  127. }
  128. case encode3Low <= r && r < encode3High:
  129. if r = rune(encode3[r-encode3Low]); r != 0 {
  130. goto write2or3
  131. }
  132. case encode4Low <= r && r < encode4High:
  133. if r = rune(encode4[r-encode4Low]); r != 0 {
  134. goto write2or3
  135. }
  136. case encode5Low <= r && r < encode5High:
  137. if 0xff61 <= r && r < 0xffa0 {
  138. goto write2
  139. }
  140. if r = rune(encode5[r-encode5Low]); r != 0 {
  141. goto write2or3
  142. }
  143. }
  144. err = internal.ErrASCIIReplacement
  145. break
  146. }
  147. if nDst >= len(dst) {
  148. err = transform.ErrShortDst
  149. break
  150. }
  151. dst[nDst] = uint8(r)
  152. nDst++
  153. continue
  154. write2or3:
  155. if r>>tableShift == jis0208 {
  156. if nDst+2 > len(dst) {
  157. err = transform.ErrShortDst
  158. break
  159. }
  160. } else {
  161. if nDst+3 > len(dst) {
  162. err = transform.ErrShortDst
  163. break
  164. }
  165. dst[nDst] = 0x8f
  166. nDst++
  167. }
  168. dst[nDst+0] = 0xa1 + uint8(r>>codeShift)&codeMask
  169. dst[nDst+1] = 0xa1 + uint8(r)&codeMask
  170. nDst += 2
  171. continue
  172. write2:
  173. if nDst+2 > len(dst) {
  174. err = transform.ErrShortDst
  175. break
  176. }
  177. dst[nDst+0] = 0x8e
  178. dst[nDst+1] = uint8(r - (0xff61 - 0xa1))
  179. nDst += 2
  180. continue
  181. }
  182. return nDst, nSrc, err
  183. }
  184. func init() {
  185. // Check that the hard-coded encode switch covers all tables.
  186. if numEncodeTables != 6 {
  187. panic("bad numEncodeTables")
  188. }
  189. }