You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.4 KiB

10 years ago
10 years ago
  1. // This package is a character-set conversion library for Go.
  2. //
  3. // (DEPRECATED: use code.google.com/p/go.text/encoding, perhaps along with
  4. // code.google.com/p/go.net/html/charset)
  5. package mahonia
  6. import (
  7. "bytes"
  8. "unicode"
  9. )
  10. // Status is the type for the status return value from a Decoder or Encoder.
  11. type Status int
  12. const (
  13. // SUCCESS means that the character was converted with no problems.
  14. SUCCESS = Status(iota)
  15. // INVALID_CHAR means that the source contained invalid bytes, or that the character
  16. // could not be represented in the destination encoding.
  17. // The Encoder or Decoder should have output a substitute character.
  18. INVALID_CHAR
  19. // NO_ROOM means there were not enough input bytes to form a complete character,
  20. // or there was not enough room in the output buffer to write a complete character.
  21. // No bytes were written, and no internal state was changed in the Encoder or Decoder.
  22. NO_ROOM
  23. // STATE_ONLY means that bytes were read or written indicating a state transition,
  24. // but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences)
  25. STATE_ONLY
  26. )
  27. // A Decoder is a function that decodes a character set, one character at a time.
  28. // It works much like utf8.DecodeRune, but has an additional status return value.
  29. type Decoder func(p []byte) (c rune, size int, status Status)
  30. // An Encoder is a function that encodes a character set, one character at a time.
  31. // It works much like utf8.EncodeRune, but has an additional status return value.
  32. type Encoder func(p []byte, c rune) (size int, status Status)
  33. // A Charset represents a character set that can be converted, and contains functions
  34. // to create Converters to encode and decode strings in that character set.
  35. type Charset struct {
  36. // Name is the character set's canonical name.
  37. Name string
  38. // Aliases returns a list of alternate names.
  39. Aliases []string
  40. // NewDecoder returns a Decoder to convert from the charset to Unicode.
  41. NewDecoder func() Decoder
  42. // NewEncoder returns an Encoder to convert from Unicode to the charset.
  43. NewEncoder func() Encoder
  44. }
  45. // The charsets are stored in charsets under their canonical names.
  46. var charsets = make(map[string]*Charset)
  47. // aliases maps their aliases to their canonical names.
  48. var aliases = make(map[string]string)
  49. // simplifyName converts a name to lower case and removes non-alphanumeric characters.
  50. // This is how the names are used as keys to the maps.
  51. func simplifyName(name string) string {
  52. var buf bytes.Buffer
  53. for _, c := range name {
  54. switch {
  55. case unicode.IsDigit(c):
  56. buf.WriteRune(c)
  57. case unicode.IsLetter(c):
  58. buf.WriteRune(unicode.ToLower(c))
  59. default:
  60. }
  61. }
  62. return buf.String()
  63. }
  64. // RegisterCharset adds a charset to the charsetMap.
  65. func RegisterCharset(cs *Charset) {
  66. name := cs.Name
  67. charsets[name] = cs
  68. aliases[simplifyName(name)] = name
  69. for _, alias := range cs.Aliases {
  70. aliases[simplifyName(alias)] = name
  71. }
  72. }
  73. // GetCharset fetches a charset by name.
  74. // If the name is not found, it returns nil.
  75. func GetCharset(name string) *Charset {
  76. return charsets[aliases[simplifyName(name)]]
  77. }
  78. // NewDecoder returns a Decoder to decode the named charset.
  79. // If the name is not found, it returns nil.
  80. func NewDecoder(name string) Decoder {
  81. cs := GetCharset(name)
  82. if cs == nil {
  83. return nil
  84. }
  85. return cs.NewDecoder()
  86. }
  87. // NewEncoder returns an Encoder to encode the named charset.
  88. func NewEncoder(name string) Encoder {
  89. cs := GetCharset(name)
  90. if cs == nil {
  91. return nil
  92. }
  93. return cs.NewEncoder()
  94. }