You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
1.7 KiB

10 years ago
  1. package mahonia
  2. import (
  3. "unicode/utf8"
  4. )
  5. // Converters for the EUC-JP encoding
  6. func init() {
  7. RegisterCharset(&Charset{
  8. Name: "EUC-JP",
  9. Aliases: []string{"extended_unix_code_packed_format_for_japanese", "cseucpkdfmtjapanese"},
  10. NewDecoder: func() Decoder {
  11. return decodeEucJP
  12. },
  13. NewEncoder: func() Encoder {
  14. jis0208Table.Reverse()
  15. jis0212Table.Reverse()
  16. return encodeEucJP
  17. },
  18. })
  19. }
  20. func decodeEucJP(p []byte) (c rune, size int, status Status) {
  21. if len(p) == 0 {
  22. return 0, 0, NO_ROOM
  23. }
  24. b := p[0]
  25. switch {
  26. case b < 0x80:
  27. return rune(b), 1, SUCCESS
  28. case b == 0x8e:
  29. if len(p) < 2 {
  30. return 0, 0, NO_ROOM
  31. }
  32. b2 := p[1]
  33. if b2 < 0xa1 || b2 > 0xdf {
  34. return utf8.RuneError, 1, INVALID_CHAR
  35. }
  36. return rune(b2) + (0xff61 - 0xa1), 2, SUCCESS
  37. case b == 0x8f:
  38. if len(p) < 3 {
  39. return 0, 0, NO_ROOM
  40. }
  41. c, size, status = jis0212Table.DecodeHigh(p[1:3])
  42. if status == SUCCESS {
  43. size = 3
  44. }
  45. return
  46. case 0xa1 <= b && b <= 0xfe:
  47. return jis0208Table.DecodeHigh(p)
  48. }
  49. return utf8.RuneError, 1, INVALID_CHAR
  50. }
  51. func encodeEucJP(p []byte, c rune) (size int, status Status) {
  52. if len(p) == 0 {
  53. return 0, NO_ROOM
  54. }
  55. if c < 0x80 {
  56. p[0] = byte(c)
  57. return 1, SUCCESS
  58. }
  59. if len(p) < 2 {
  60. return 0, NO_ROOM
  61. }
  62. if c > 0xffff {
  63. p[0] = '?'
  64. return 1, INVALID_CHAR
  65. }
  66. if 0xff61 <= c && c <= 0xff9f {
  67. p[0] = 0x8e
  68. p[1] = byte(c - (0xff61 - 0xa1))
  69. return 2, SUCCESS
  70. }
  71. size, status = jis0208Table.EncodeHigh(p, c)
  72. if status == SUCCESS {
  73. return size, status
  74. }
  75. size, status = jis0212Table.EncodeHigh(p[1:], c)
  76. switch status {
  77. case SUCCESS:
  78. p[0] = 0x8f
  79. return size + 1, SUCCESS
  80. case INVALID_CHAR:
  81. p[0] = '?'
  82. return 1, INVALID_CHAR
  83. }
  84. return size, status
  85. }