You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

88 lines
2.1 KiB

10 years ago
  1. package mahonia
  2. import (
  3. "sync"
  4. "unicode/utf8"
  5. )
  6. // A kutenTable holds the data for a double-byte character set, arranged by ku
  7. // (区, zone) and ten (点, position). These can be converted to various actual
  8. // encoding schemes.
  9. type kutenTable struct {
  10. // Data[ku][ten] is the unicode value for the character at that zone and
  11. // position.
  12. Data [94][94]uint16
  13. // FromUnicode holds the ku and ten for each Unicode code point.
  14. // It is not available until Reverse() has been called.
  15. FromUnicode [][2]byte
  16. // once is used to synchronize the generation of FromUnicode.
  17. once sync.Once
  18. }
  19. // Reverse generates FromUnicode.
  20. func (t *kutenTable) Reverse() {
  21. t.once.Do(func() {
  22. t.FromUnicode = make([][2]byte, 65536)
  23. for ku := range t.Data {
  24. for ten, unicode := range t.Data[ku] {
  25. t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)}
  26. }
  27. }
  28. })
  29. }
  30. // DecodeLow decodes a character from an encoding that does not have the high
  31. // bit set.
  32. func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) {
  33. if len(p) < 2 {
  34. return 0, 0, NO_ROOM
  35. }
  36. ku := p[0] - 0x21
  37. ten := p[1] - 0x21
  38. if ku > 93 || ten > 93 {
  39. return utf8.RuneError, 1, INVALID_CHAR
  40. }
  41. u := t.Data[ku][ten]
  42. if u == 0 {
  43. return utf8.RuneError, 1, INVALID_CHAR
  44. }
  45. return rune(u), 2, SUCCESS
  46. }
  47. // DecodeHigh decodes a character from an encoding that has the high bit set.
  48. func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) {
  49. if len(p) < 2 {
  50. return 0, 0, NO_ROOM
  51. }
  52. ku := p[0] - 0xa1
  53. ten := p[1] - 0xa1
  54. if ku > 93 || ten > 93 {
  55. return utf8.RuneError, 1, INVALID_CHAR
  56. }
  57. u := t.Data[ku][ten]
  58. if u == 0 {
  59. return utf8.RuneError, 1, INVALID_CHAR
  60. }
  61. return rune(u), 2, SUCCESS
  62. }
  63. // EncodeHigh encodes a character in an encoding that has the high bit set.
  64. func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) {
  65. if len(p) < 2 {
  66. return 0, NO_ROOM
  67. }
  68. if c > 0xffff {
  69. p[0] = '?'
  70. return 1, INVALID_CHAR
  71. }
  72. kuten := t.FromUnicode[c]
  73. if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) {
  74. p[0] = '?'
  75. return 1, INVALID_CHAR
  76. }
  77. p[0] = kuten[0] + 0xa1
  78. p[1] = kuten[1] + 0xa1
  79. return 2, SUCCESS
  80. }