You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

170 lines
2.9 KiB

10 years ago
  1. package mahonia
  2. import (
  3. "unicode/utf16"
  4. )
  5. func init() {
  6. for i := 0; i < len(utf16Charsets); i++ {
  7. RegisterCharset(&utf16Charsets[i])
  8. }
  9. }
  10. var utf16Charsets = []Charset{
  11. {
  12. Name: "UTF-16",
  13. NewDecoder: func() Decoder {
  14. var decodeRune Decoder
  15. return func(p []byte) (c rune, size int, status Status) {
  16. if decodeRune == nil {
  17. // haven't read the BOM yet
  18. if len(p) < 2 {
  19. status = NO_ROOM
  20. return
  21. }
  22. switch {
  23. case p[0] == 0xfe && p[1] == 0xff:
  24. decodeRune = decodeUTF16beRune
  25. return 0, 2, STATE_ONLY
  26. case p[0] == 0xff && p[1] == 0xfe:
  27. decodeRune = decodeUTF16leRune
  28. return 0, 2, STATE_ONLY
  29. default:
  30. decodeRune = decodeUTF16beRune
  31. }
  32. }
  33. return decodeRune(p)
  34. }
  35. },
  36. NewEncoder: func() Encoder {
  37. wroteBOM := false
  38. return func(p []byte, c rune) (size int, status Status) {
  39. if !wroteBOM {
  40. if len(p) < 2 {
  41. status = NO_ROOM
  42. return
  43. }
  44. p[0] = 0xfe
  45. p[1] = 0xff
  46. wroteBOM = true
  47. return 2, STATE_ONLY
  48. }
  49. return encodeUTF16beRune(p, c)
  50. }
  51. },
  52. },
  53. {
  54. Name: "UTF-16BE",
  55. NewDecoder: func() Decoder { return decodeUTF16beRune },
  56. NewEncoder: func() Encoder { return encodeUTF16beRune },
  57. },
  58. {
  59. Name: "UTF-16LE",
  60. NewDecoder: func() Decoder { return decodeUTF16leRune },
  61. NewEncoder: func() Encoder { return encodeUTF16leRune },
  62. },
  63. }
  64. func decodeUTF16beRune(p []byte) (r rune, size int, status Status) {
  65. if len(p) < 2 {
  66. status = NO_ROOM
  67. return
  68. }
  69. c := rune(p[0])<<8 + rune(p[1])
  70. if utf16.IsSurrogate(c) {
  71. if len(p) < 4 {
  72. status = NO_ROOM
  73. return
  74. }
  75. c2 := rune(p[2])<<8 + rune(p[3])
  76. c = utf16.DecodeRune(c, c2)
  77. if c == 0xfffd {
  78. return c, 2, INVALID_CHAR
  79. } else {
  80. return c, 4, SUCCESS
  81. }
  82. }
  83. return c, 2, SUCCESS
  84. }
  85. func encodeUTF16beRune(p []byte, c rune) (size int, status Status) {
  86. if c < 0x10000 {
  87. if len(p) < 2 {
  88. status = NO_ROOM
  89. return
  90. }
  91. p[0] = byte(c >> 8)
  92. p[1] = byte(c)
  93. return 2, SUCCESS
  94. }
  95. if len(p) < 4 {
  96. status = NO_ROOM
  97. return
  98. }
  99. s1, s2 := utf16.EncodeRune(c)
  100. p[0] = byte(s1 >> 8)
  101. p[1] = byte(s1)
  102. p[2] = byte(s2 >> 8)
  103. p[3] = byte(s2)
  104. return 4, SUCCESS
  105. }
  106. func decodeUTF16leRune(p []byte) (r rune, size int, status Status) {
  107. if len(p) < 2 {
  108. status = NO_ROOM
  109. return
  110. }
  111. c := rune(p[1])<<8 + rune(p[0])
  112. if utf16.IsSurrogate(c) {
  113. if len(p) < 4 {
  114. status = NO_ROOM
  115. return
  116. }
  117. c2 := rune(p[3])<<8 + rune(p[2])
  118. c = utf16.DecodeRune(c, c2)
  119. if c == 0xfffd {
  120. return c, 2, INVALID_CHAR
  121. } else {
  122. return c, 4, SUCCESS
  123. }
  124. }
  125. return c, 2, SUCCESS
  126. }
  127. func encodeUTF16leRune(p []byte, c rune) (size int, status Status) {
  128. if c < 0x10000 {
  129. if len(p) < 2 {
  130. status = NO_ROOM
  131. return
  132. }
  133. p[1] = byte(c >> 8)
  134. p[0] = byte(c)
  135. return 2, SUCCESS
  136. }
  137. if len(p) < 4 {
  138. status = NO_ROOM
  139. return
  140. }
  141. s1, s2 := utf16.EncodeRune(c)
  142. p[1] = byte(s1 >> 8)
  143. p[0] = byte(s1)
  144. p[3] = byte(s2 >> 8)
  145. p[2] = byte(s2)
  146. return 4, SUCCESS
  147. }