|
|
- # frozen_string_literal: true
-
- require 'singleton'
-
- # See also `app/javascript/features/account/util/bio_metadata.js`.
-
- class FrontmatterHandler
- include Singleton
-
- # CONVENIENCE FUNCTIONS #
-
- def self.unirex(str)
- Regexp.new str, Regexp::MULTILINE, 'u'
- end
- def self.rexstr(exp)
- '(?:' + exp.source + ')'
- end
-
- # CHARACTER CLASSES #
-
- DOCUMENT_START = /^/
- DOCUMENT_END = /$/
- ALLOWED_CHAR = # c-printable` in the YAML 1.2 spec.
- /[\t\n\r\u{20}-\u{7e}\u{85}\u{a0}-\u{d7ff}\u{e000}-\u{fffd}\u{10000}-\u{10ffff}]/u
- WHITE_SPACE = /[ \t]/
- INDENTATION = / */
- LINE_BREAK = /\r?\n|\r|<br\s*\/?>/
- ESCAPE_CHAR = /[0abt\tnvfre "\/\\N_LP]/
- HEXADECIMAL_CHARS = /[0-9a-fA-F]/
- INDICATOR = /[-?:,\[\]{}&#*!|>'"%@`]/
- FLOW_CHAR = /[,\[\]{}]/
-
- # NEGATED CHARACTER CLASSES #
-
- NOT_WHITE_SPACE = unirex '(?!' + rexstr(WHITE_SPACE) + ').'
- NOT_LINE_BREAK = unirex '(?!' + rexstr(LINE_BREAK) + ').'
- NOT_INDICATOR = unirex '(?!' + rexstr(INDICATOR) + ').'
- NOT_FLOW_CHAR = unirex '(?!' + rexstr(FLOW_CHAR) + ').'
- NOT_ALLOWED_CHAR = unirex '(?!' + rexstr(ALLOWED_CHAR) + ').'
-
- # BASIC CONSTRUCTS #
-
- ANY_WHITE_SPACE = unirex rexstr(WHITE_SPACE) + '*'
- ANY_ALLOWED_CHARS = unirex rexstr(ALLOWED_CHAR) + '*'
- NEW_LINE = unirex(
- rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK)
- )
- SOME_NEW_LINES = unirex(
- '(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+'
- )
- POSSIBLE_STARTS = unirex(
- rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?'
- )
- POSSIBLE_ENDS = unirex(
- rexstr(SOME_NEW_LINES) + '|' +
- rexstr(DOCUMENT_END) + '|' +
- rexstr(/<\/p>/)
- )
- CHARACTER_ESCAPE = unirex(
- rexstr(/\\/) +
- '(?:' +
- rexstr(ESCAPE_CHAR) + '|' +
- rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' +
- rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' +
- rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' +
- ')'
- )
- ESCAPED_CHAR = unirex(
- rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' +
- rexstr(CHARACTER_ESCAPE)
- )
- ANY_ESCAPED_CHARS = unirex(
- rexstr(ESCAPED_CHAR) + '*'
- )
- ESCAPED_APOS = unirex(
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/)
- )
- ANY_ESCAPED_APOS = unirex(
- rexstr(ESCAPED_APOS) + '*'
- )
- FIRST_KEY_CHAR = unirex(
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
- '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
- rexstr(NOT_INDICATOR) + '|' +
- rexstr(/[?:-]/) +
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
- '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
- '(?=' + rexstr(NOT_FLOW_CHAR) + ')'
- )
- FIRST_VALUE_CHAR = unirex(
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
- '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
- rexstr(NOT_INDICATOR) + '|' +
- rexstr(/[?:-]/) +
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
- '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
- # Flow indicators are allowed in values.
- )
- LATER_KEY_CHAR = unirex(
- rexstr(WHITE_SPACE) + '|' +
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
- '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
- '(?=' + rexstr(NOT_FLOW_CHAR) + ')' +
- rexstr(/[^:#]#?/) + '|' +
- rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
- )
- LATER_VALUE_CHAR = unirex(
- rexstr(WHITE_SPACE) + '|' +
- '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
- '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
- # Flow indicators are allowed in values.
- rexstr(/[^:#]#?/) + '|' +
- rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
- )
-
- # YAML CONSTRUCTS #
-
- YAML_START = unirex(
- rexstr(ANY_WHITE_SPACE) + rexstr(/---/)
- )
- YAML_END = unirex(
- rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/)
- )
- YAML_LOOKAHEAD = unirex(
- '(?=' +
- rexstr(YAML_START) +
- rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) +
- rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) +
- ')'
- )
- YAML_DOUBLE_QUOTE = unirex(
- rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/)
- )
- YAML_SINGLE_QUOTE = unirex(
- rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/)
- )
- YAML_SIMPLE_KEY = unirex(
- rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*'
- )
- YAML_SIMPLE_VALUE = unirex(
- rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*'
- )
- YAML_KEY = unirex(
- rexstr(YAML_DOUBLE_QUOTE) + '|' +
- rexstr(YAML_SINGLE_QUOTE) + '|' +
- rexstr(YAML_SIMPLE_KEY)
- )
- YAML_VALUE = unirex(
- rexstr(YAML_DOUBLE_QUOTE) + '|' +
- rexstr(YAML_SINGLE_QUOTE) + '|' +
- rexstr(YAML_SIMPLE_VALUE)
- )
- YAML_SEPARATOR = unirex(
- rexstr(ANY_WHITE_SPACE) +
- ':' + rexstr(WHITE_SPACE) +
- rexstr(ANY_WHITE_SPACE)
- )
- YAML_LINE = unirex(
- '(' + rexstr(YAML_KEY) + ')' +
- rexstr(YAML_SEPARATOR) +
- '(' + rexstr(YAML_VALUE) + ')'
- )
-
- # FRONTMATTER REGEX #
-
- YAML_FRONTMATTER = unirex(
- rexstr(POSSIBLE_STARTS) +
- rexstr(YAML_LOOKAHEAD) +
- rexstr(YAML_START) + rexstr(SOME_NEW_LINES) +
- '(?:' +
- '(' + rexstr(INDENTATION) + ')' +
- rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
- '(?:' +
- '\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
- '){0,4}' +
- ')?' +
- rexstr(YAML_END) + rexstr(POSSIBLE_ENDS)
- )
-
- # SEARCHES #
-
- FIND_YAML_LINES = unirex(
- rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE)
- )
-
- # STRING PROCESSING #
-
- def process_string(str)
- case str[0]
- when '"'
- str[1..-2]
- .gsub(/\\0/, "\u{00}")
- .gsub(/\\a/, "\u{07}")
- .gsub(/\\b/, "\u{08}")
- .gsub(/\\t/, "\u{09}")
- .gsub(/\\\u{09}/, "\u{09}")
- .gsub(/\\n/, "\u{0a}")
- .gsub(/\\v/, "\u{0b}")
- .gsub(/\\f/, "\u{0c}")
- .gsub(/\\r/, "\u{0d}")
- .gsub(/\\e/, "\u{1b}")
- .gsub(/\\ /, "\u{20}")
- .gsub(/\\"/, "\u{22}")
- .gsub(/\\\//, "\u{2f}")
- .gsub(/\\\\/, "\u{5c}")
- .gsub(/\\N/, "\u{85}")
- .gsub(/\\_/, "\u{a0}")
- .gsub(/\\L/, "\u{2028}")
- .gsub(/\\P/, "\u{2029}")
- .gsub(/\\x([0-9a-fA-F]{2})/mu) {|s| $1.to_i.chr Encoding::UTF_8}
- .gsub(/\\u([0-9a-fA-F]{4})/mu) {|s| $1.to_i.chr Encoding::UTF_8}
- .gsub(/\\U([0-9a-fA-F]{8})/mu) {|s| $1.to_i.chr Encoding::UTF_8}
- when "'"
- str[1..-2].gsub(/''/, "'")
- else
- str
- end
- end
-
- # BIO PROCESSING #
-
- def process_bio content
- result = {
- text: content.gsub(/"/, '"').gsub(/'/, "'"),
- metadata: []
- }
- yaml = YAML_FRONTMATTER.match(result[:text])
- return result unless yaml
- yaml = yaml[0]
- start = YAML_START =~ result[:text]
- ending = start + yaml.length - (YAML_START =~ yaml)
- result[:text][start..ending - 1] = ''
- metadata = nil
- index = 0
- while metadata = FIND_YAML_LINES.match(yaml, index) do
- index = metadata.end(0)
- result[:metadata].push [
- process_string(metadata[1]), process_string(metadata[2])
- ]
- end
- return result
- end
-
- end
|