You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
2.2 KiB

10 years ago
  1. package mahonia
  2. import (
  3. "unicode/utf8"
  4. )
  5. // converters for ISO-2022-JP encoding
  6. const esc = 27
  7. func init() {
  8. type jpEncoding int
  9. const (
  10. ascii jpEncoding = iota
  11. jisX0201Roman
  12. jisX0208
  13. )
  14. RegisterCharset(&Charset{
  15. Name: "ISO-2022-JP",
  16. NewDecoder: func() Decoder {
  17. encoding := ascii
  18. return func(p []byte) (c rune, size int, status Status) {
  19. if len(p) == 0 {
  20. return 0, 0, NO_ROOM
  21. }
  22. b := p[0]
  23. if b == esc {
  24. if len(p) < 3 {
  25. return 0, 0, NO_ROOM
  26. }
  27. switch p[1] {
  28. case '(':
  29. switch p[2] {
  30. case 'B':
  31. encoding = ascii
  32. return 0, 3, STATE_ONLY
  33. case 'J':
  34. encoding = jisX0201Roman
  35. return 0, 3, STATE_ONLY
  36. }
  37. case '$':
  38. switch p[2] {
  39. case '@', 'B':
  40. encoding = jisX0208
  41. return 0, 3, STATE_ONLY
  42. }
  43. }
  44. }
  45. switch encoding {
  46. case ascii:
  47. if b > 127 {
  48. return utf8.RuneError, 1, INVALID_CHAR
  49. }
  50. return rune(b), 1, SUCCESS
  51. case jisX0201Roman:
  52. if b > 127 {
  53. return utf8.RuneError, 1, INVALID_CHAR
  54. }
  55. switch b {
  56. case '\\':
  57. return 0xA5, 1, SUCCESS
  58. case '~':
  59. return 0x203E, 1, SUCCESS
  60. }
  61. return rune(b), 1, SUCCESS
  62. case jisX0208:
  63. return jis0208Table.DecodeLow(p)
  64. }
  65. panic("unreachable")
  66. }
  67. },
  68. NewEncoder: func() Encoder {
  69. jis0208Table.Reverse()
  70. encoding := ascii
  71. return func(p []byte, c rune) (size int, status Status) {
  72. if len(p) == 0 {
  73. return 0, NO_ROOM
  74. }
  75. if c < 128 {
  76. if encoding != ascii {
  77. if len(p) < 4 {
  78. return 0, NO_ROOM
  79. }
  80. p[0], p[1], p[2] = esc, '(', 'B'
  81. p[3] = byte(c)
  82. encoding = ascii
  83. return 4, SUCCESS
  84. }
  85. p[0] = byte(c)
  86. return 1, SUCCESS
  87. }
  88. if c > 65535 {
  89. return 0, INVALID_CHAR
  90. }
  91. jis := jis0208Table.FromUnicode[c]
  92. if jis == [2]byte{0, 0} && c != rune(jis0208Table.Data[0][0]) {
  93. return 0, INVALID_CHAR
  94. }
  95. if encoding != jisX0208 {
  96. if len(p) < 3 {
  97. return 0, NO_ROOM
  98. }
  99. p[0], p[1], p[2] = esc, '$', 'B'
  100. encoding = jisX0208
  101. return 3, STATE_ONLY
  102. }
  103. p[0] = jis[0] + 0x21
  104. p[1] = jis[1] + 0x21
  105. return 2, SUCCESS
  106. }
  107. },
  108. })
  109. }