You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
4.6 KiB

  1. module Twitter::TwitterText
  2. class Configuration
  3. def emoji_parsing_enabled
  4. false
  5. end
  6. end
  7. class Regex
  8. REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}<>\(\)\?]/iou
  9. REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*"'「」<>;:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
  10. REGEXEN[:valid_url_balanced_parens] = /
  11. \(
  12. (?:
  13. #{REGEXEN[:valid_general_url_path_chars]}+
  14. |
  15. # allow one nested level of balanced parentheses
  16. (?:
  17. #{REGEXEN[:valid_general_url_path_chars]}*
  18. \(
  19. #{REGEXEN[:valid_general_url_path_chars]}+
  20. \)
  21. #{REGEXEN[:valid_general_url_path_chars]}*
  22. )
  23. )
  24. \)
  25. /iox
  26. REGEXEN[:valid_url_path] = /(?:
  27. (?:
  28. #{REGEXEN[:valid_general_url_path_chars]}*
  29. (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
  30. #{REGEXEN[:valid_url_path_ending_chars]}
  31. )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
  32. )/iox
  33. REGEXEN[:valid_url] = %r{
  34. ( # $1 total match
  35. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
  36. ( # $3 URL
  37. ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher|gemini):\/\/)? # $4 Protocol (optional)
  38. (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
  39. (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
  40. (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
  41. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
  42. )
  43. )
  44. }iox
  45. REGEXEN[:validate_nodeid] = /(?:
  46. #{REGEXEN[:validate_url_unreserved]}|
  47. #{REGEXEN[:validate_url_pct_encoded]}|
  48. [!$()*+,;=]
  49. )/iox
  50. REGEXEN[:validate_resid] = /(?:
  51. #{REGEXEN[:validate_url_unreserved]}|
  52. #{REGEXEN[:validate_url_pct_encoded]}|
  53. #{REGEXEN[:validate_url_sub_delims]}
  54. )/iox
  55. REGEXEN[:xmpp_uri] = %r{
  56. (xmpp:) # Protocol
  57. (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional)
  58. (#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional)
  59. (#{REGEXEN[:valid_domain]}) # Domain in path
  60. (/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional)
  61. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
  62. }iox
  63. REGEXEN[:magnet_uri] = %r{
  64. (magnet:) # Protocol
  65. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String
  66. }iox
  67. REGEXEN[:valid_extended_uri] = %r{
  68. ( # $1 total match
  69. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
  70. ( # $3 URL
  71. (#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
  72. )
  73. )
  74. }iox
  75. end
  76. module Extractor
  77. # Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
  78. # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
  79. # XMPP or magnet URIs an empty array will be returned.
  80. #
  81. # If a block is given then it will be called for each XMPP URI.
  82. def extract_extra_uris_with_indices(text, _options = {}) # :yields: uri, start, end
  83. return [] unless text && text.index(":")
  84. urls = []
  85. text.to_s.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
  86. valid_uri_match_data = $~
  87. start_position = valid_uri_match_data.char_begin(3)
  88. end_position = valid_uri_match_data.char_end(3)
  89. urls << {
  90. :url => valid_uri_match_data[3],
  91. :indices => [start_position, end_position]
  92. }
  93. end
  94. urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
  95. urls
  96. end
  97. end
  98. end