You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
1.3 KiB

10 years ago
  1. package mahonia
  2. // Converters for the EUC-KR encoding.
  3. import (
  4. "unicode/utf8"
  5. )
  6. func init() {
  7. RegisterCharset(&Charset{
  8. Name: "EUC-KR",
  9. Aliases: []string{
  10. "ibm-1363",
  11. "KS_C_5601-1987",
  12. "KS_C_5601-1989",
  13. "KSC_5601",
  14. "Korean",
  15. "iso-ir-149",
  16. "cp1363",
  17. "5601",
  18. "ksc",
  19. "windows-949",
  20. "ibm-970",
  21. "cp970",
  22. "970",
  23. "cp949",
  24. },
  25. NewDecoder: func() Decoder {
  26. return decodeEucKr
  27. },
  28. NewEncoder: func() Encoder {
  29. eucKrOnce.Do(reverseEucKrTable)
  30. return encodeEucKr
  31. },
  32. })
  33. }
  34. func decodeEucKr(p []byte) (c rune, size int, status Status) {
  35. if len(p) == 0 {
  36. return 0, 0, NO_ROOM
  37. }
  38. b := p[0]
  39. if b < 0x80 {
  40. return rune(b), 1, SUCCESS
  41. }
  42. if len(p) < 2 {
  43. return 0, 0, NO_ROOM
  44. }
  45. euc := int(b)<<8 + int(p[1])
  46. c = rune(eucKrToUnicode[euc])
  47. if c == 0 {
  48. return utf8.RuneError, 2, INVALID_CHAR
  49. }
  50. return c, 2, SUCCESS
  51. }
  52. func encodeEucKr(p []byte, c rune) (size int, status Status) {
  53. if len(p) == 0 {
  54. return 0, NO_ROOM
  55. }
  56. if c < 0x80 {
  57. p[0] = byte(c)
  58. return 1, SUCCESS
  59. }
  60. if len(p) < 2 {
  61. return 0, NO_ROOM
  62. }
  63. if c > 0xffff {
  64. p[0] = '?'
  65. return 1, INVALID_CHAR
  66. }
  67. euc := unicodeToEucKr[c]
  68. if euc == 0 {
  69. p[0] = '?'
  70. return 1, INVALID_CHAR
  71. }
  72. p[0] = byte(euc >> 8)
  73. p[1] = byte(euc)
  74. return 2, SUCCESS
  75. }