You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

622 lines
14 KiB

  1. package cascadia
  2. import (
  3. "bytes"
  4. "fmt"
  5. "regexp"
  6. "strings"
  7. "golang.org/x/net/html"
  8. )
  9. // the Selector type, and functions for creating them
  10. // A Selector is a function which tells whether a node matches or not.
  11. type Selector func(*html.Node) bool
  12. // hasChildMatch returns whether n has any child that matches a.
  13. func hasChildMatch(n *html.Node, a Selector) bool {
  14. for c := n.FirstChild; c != nil; c = c.NextSibling {
  15. if a(c) {
  16. return true
  17. }
  18. }
  19. return false
  20. }
  21. // hasDescendantMatch performs a depth-first search of n's descendants,
  22. // testing whether any of them match a. It returns true as soon as a match is
  23. // found, or false if no match is found.
  24. func hasDescendantMatch(n *html.Node, a Selector) bool {
  25. for c := n.FirstChild; c != nil; c = c.NextSibling {
  26. if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
  27. return true
  28. }
  29. }
  30. return false
  31. }
  32. // Compile parses a selector and returns, if successful, a Selector object
  33. // that can be used to match against html.Node objects.
  34. func Compile(sel string) (Selector, error) {
  35. p := &parser{s: sel}
  36. compiled, err := p.parseSelectorGroup()
  37. if err != nil {
  38. return nil, err
  39. }
  40. if p.i < len(sel) {
  41. return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
  42. }
  43. return compiled, nil
  44. }
  45. // MustCompile is like Compile, but panics instead of returning an error.
  46. func MustCompile(sel string) Selector {
  47. compiled, err := Compile(sel)
  48. if err != nil {
  49. panic(err)
  50. }
  51. return compiled
  52. }
  53. // MatchAll returns a slice of the nodes that match the selector,
  54. // from n and its children.
  55. func (s Selector) MatchAll(n *html.Node) []*html.Node {
  56. return s.matchAllInto(n, nil)
  57. }
  58. func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
  59. if s(n) {
  60. storage = append(storage, n)
  61. }
  62. for child := n.FirstChild; child != nil; child = child.NextSibling {
  63. storage = s.matchAllInto(child, storage)
  64. }
  65. return storage
  66. }
  67. // Match returns true if the node matches the selector.
  68. func (s Selector) Match(n *html.Node) bool {
  69. return s(n)
  70. }
  71. // MatchFirst returns the first node that matches s, from n and its children.
  72. func (s Selector) MatchFirst(n *html.Node) *html.Node {
  73. if s.Match(n) {
  74. return n
  75. }
  76. for c := n.FirstChild; c != nil; c = c.NextSibling {
  77. m := s.MatchFirst(c)
  78. if m != nil {
  79. return m
  80. }
  81. }
  82. return nil
  83. }
  84. // Filter returns the nodes in nodes that match the selector.
  85. func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
  86. for _, n := range nodes {
  87. if s(n) {
  88. result = append(result, n)
  89. }
  90. }
  91. return result
  92. }
  93. // typeSelector returns a Selector that matches elements with a given tag name.
  94. func typeSelector(tag string) Selector {
  95. tag = toLowerASCII(tag)
  96. return func(n *html.Node) bool {
  97. return n.Type == html.ElementNode && n.Data == tag
  98. }
  99. }
  100. // toLowerASCII returns s with all ASCII capital letters lowercased.
  101. func toLowerASCII(s string) string {
  102. var b []byte
  103. for i := 0; i < len(s); i++ {
  104. if c := s[i]; 'A' <= c && c <= 'Z' {
  105. if b == nil {
  106. b = make([]byte, len(s))
  107. copy(b, s)
  108. }
  109. b[i] = s[i] + ('a' - 'A')
  110. }
  111. }
  112. if b == nil {
  113. return s
  114. }
  115. return string(b)
  116. }
  117. // attributeSelector returns a Selector that matches elements
  118. // where the attribute named key satisifes the function f.
  119. func attributeSelector(key string, f func(string) bool) Selector {
  120. key = toLowerASCII(key)
  121. return func(n *html.Node) bool {
  122. if n.Type != html.ElementNode {
  123. return false
  124. }
  125. for _, a := range n.Attr {
  126. if a.Key == key && f(a.Val) {
  127. return true
  128. }
  129. }
  130. return false
  131. }
  132. }
  133. // attributeExistsSelector returns a Selector that matches elements that have
  134. // an attribute named key.
  135. func attributeExistsSelector(key string) Selector {
  136. return attributeSelector(key, func(string) bool { return true })
  137. }
  138. // attributeEqualsSelector returns a Selector that matches elements where
  139. // the attribute named key has the value val.
  140. func attributeEqualsSelector(key, val string) Selector {
  141. return attributeSelector(key,
  142. func(s string) bool {
  143. return s == val
  144. })
  145. }
  146. // attributeNotEqualSelector returns a Selector that matches elements where
  147. // the attribute named key does not have the value val.
  148. func attributeNotEqualSelector(key, val string) Selector {
  149. key = toLowerASCII(key)
  150. return func(n *html.Node) bool {
  151. if n.Type != html.ElementNode {
  152. return false
  153. }
  154. for _, a := range n.Attr {
  155. if a.Key == key && a.Val == val {
  156. return false
  157. }
  158. }
  159. return true
  160. }
  161. }
  162. // attributeIncludesSelector returns a Selector that matches elements where
  163. // the attribute named key is a whitespace-separated list that includes val.
  164. func attributeIncludesSelector(key, val string) Selector {
  165. return attributeSelector(key,
  166. func(s string) bool {
  167. for s != "" {
  168. i := strings.IndexAny(s, " \t\r\n\f")
  169. if i == -1 {
  170. return s == val
  171. }
  172. if s[:i] == val {
  173. return true
  174. }
  175. s = s[i+1:]
  176. }
  177. return false
  178. })
  179. }
  180. // attributeDashmatchSelector returns a Selector that matches elements where
  181. // the attribute named key equals val or starts with val plus a hyphen.
  182. func attributeDashmatchSelector(key, val string) Selector {
  183. return attributeSelector(key,
  184. func(s string) bool {
  185. if s == val {
  186. return true
  187. }
  188. if len(s) <= len(val) {
  189. return false
  190. }
  191. if s[:len(val)] == val && s[len(val)] == '-' {
  192. return true
  193. }
  194. return false
  195. })
  196. }
  197. // attributePrefixSelector returns a Selector that matches elements where
  198. // the attribute named key starts with val.
  199. func attributePrefixSelector(key, val string) Selector {
  200. return attributeSelector(key,
  201. func(s string) bool {
  202. if strings.TrimSpace(s) == "" {
  203. return false
  204. }
  205. return strings.HasPrefix(s, val)
  206. })
  207. }
  208. // attributeSuffixSelector returns a Selector that matches elements where
  209. // the attribute named key ends with val.
  210. func attributeSuffixSelector(key, val string) Selector {
  211. return attributeSelector(key,
  212. func(s string) bool {
  213. if strings.TrimSpace(s) == "" {
  214. return false
  215. }
  216. return strings.HasSuffix(s, val)
  217. })
  218. }
  219. // attributeSubstringSelector returns a Selector that matches nodes where
  220. // the attribute named key contains val.
  221. func attributeSubstringSelector(key, val string) Selector {
  222. return attributeSelector(key,
  223. func(s string) bool {
  224. if strings.TrimSpace(s) == "" {
  225. return false
  226. }
  227. return strings.Contains(s, val)
  228. })
  229. }
  230. // attributeRegexSelector returns a Selector that matches nodes where
  231. // the attribute named key matches the regular expression rx
  232. func attributeRegexSelector(key string, rx *regexp.Regexp) Selector {
  233. return attributeSelector(key,
  234. func(s string) bool {
  235. return rx.MatchString(s)
  236. })
  237. }
  238. // intersectionSelector returns a selector that matches nodes that match
  239. // both a and b.
  240. func intersectionSelector(a, b Selector) Selector {
  241. return func(n *html.Node) bool {
  242. return a(n) && b(n)
  243. }
  244. }
  245. // unionSelector returns a selector that matches elements that match
  246. // either a or b.
  247. func unionSelector(a, b Selector) Selector {
  248. return func(n *html.Node) bool {
  249. return a(n) || b(n)
  250. }
  251. }
  252. // negatedSelector returns a selector that matches elements that do not match a.
  253. func negatedSelector(a Selector) Selector {
  254. return func(n *html.Node) bool {
  255. if n.Type != html.ElementNode {
  256. return false
  257. }
  258. return !a(n)
  259. }
  260. }
  261. // writeNodeText writes the text contained in n and its descendants to b.
  262. func writeNodeText(n *html.Node, b *bytes.Buffer) {
  263. switch n.Type {
  264. case html.TextNode:
  265. b.WriteString(n.Data)
  266. case html.ElementNode:
  267. for c := n.FirstChild; c != nil; c = c.NextSibling {
  268. writeNodeText(c, b)
  269. }
  270. }
  271. }
  272. // nodeText returns the text contained in n and its descendants.
  273. func nodeText(n *html.Node) string {
  274. var b bytes.Buffer
  275. writeNodeText(n, &b)
  276. return b.String()
  277. }
  278. // nodeOwnText returns the contents of the text nodes that are direct
  279. // children of n.
  280. func nodeOwnText(n *html.Node) string {
  281. var b bytes.Buffer
  282. for c := n.FirstChild; c != nil; c = c.NextSibling {
  283. if c.Type == html.TextNode {
  284. b.WriteString(c.Data)
  285. }
  286. }
  287. return b.String()
  288. }
  289. // textSubstrSelector returns a selector that matches nodes that
  290. // contain the given text.
  291. func textSubstrSelector(val string) Selector {
  292. return func(n *html.Node) bool {
  293. text := strings.ToLower(nodeText(n))
  294. return strings.Contains(text, val)
  295. }
  296. }
  297. // ownTextSubstrSelector returns a selector that matches nodes that
  298. // directly contain the given text
  299. func ownTextSubstrSelector(val string) Selector {
  300. return func(n *html.Node) bool {
  301. text := strings.ToLower(nodeOwnText(n))
  302. return strings.Contains(text, val)
  303. }
  304. }
  305. // textRegexSelector returns a selector that matches nodes whose text matches
  306. // the specified regular expression
  307. func textRegexSelector(rx *regexp.Regexp) Selector {
  308. return func(n *html.Node) bool {
  309. return rx.MatchString(nodeText(n))
  310. }
  311. }
  312. // ownTextRegexSelector returns a selector that matches nodes whose text
  313. // directly matches the specified regular expression
  314. func ownTextRegexSelector(rx *regexp.Regexp) Selector {
  315. return func(n *html.Node) bool {
  316. return rx.MatchString(nodeOwnText(n))
  317. }
  318. }
  319. // hasChildSelector returns a selector that matches elements
  320. // with a child that matches a.
  321. func hasChildSelector(a Selector) Selector {
  322. return func(n *html.Node) bool {
  323. if n.Type != html.ElementNode {
  324. return false
  325. }
  326. return hasChildMatch(n, a)
  327. }
  328. }
  329. // hasDescendantSelector returns a selector that matches elements
  330. // with any descendant that matches a.
  331. func hasDescendantSelector(a Selector) Selector {
  332. return func(n *html.Node) bool {
  333. if n.Type != html.ElementNode {
  334. return false
  335. }
  336. return hasDescendantMatch(n, a)
  337. }
  338. }
  339. // nthChildSelector returns a selector that implements :nth-child(an+b).
  340. // If last is true, implements :nth-last-child instead.
  341. // If ofType is true, implements :nth-of-type instead.
  342. func nthChildSelector(a, b int, last, ofType bool) Selector {
  343. return func(n *html.Node) bool {
  344. if n.Type != html.ElementNode {
  345. return false
  346. }
  347. parent := n.Parent
  348. if parent == nil {
  349. return false
  350. }
  351. if parent.Type == html.DocumentNode {
  352. return false
  353. }
  354. i := -1
  355. count := 0
  356. for c := parent.FirstChild; c != nil; c = c.NextSibling {
  357. if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
  358. continue
  359. }
  360. count++
  361. if c == n {
  362. i = count
  363. if !last {
  364. break
  365. }
  366. }
  367. }
  368. if i == -1 {
  369. // This shouldn't happen, since n should always be one of its parent's children.
  370. return false
  371. }
  372. if last {
  373. i = count - i + 1
  374. }
  375. i -= b
  376. if a == 0 {
  377. return i == 0
  378. }
  379. return i%a == 0 && i/a >= 0
  380. }
  381. }
  382. // simpleNthChildSelector returns a selector that implements :nth-child(b).
  383. // If ofType is true, implements :nth-of-type instead.
  384. func simpleNthChildSelector(b int, ofType bool) Selector {
  385. return func(n *html.Node) bool {
  386. if n.Type != html.ElementNode {
  387. return false
  388. }
  389. parent := n.Parent
  390. if parent == nil {
  391. return false
  392. }
  393. if parent.Type == html.DocumentNode {
  394. return false
  395. }
  396. count := 0
  397. for c := parent.FirstChild; c != nil; c = c.NextSibling {
  398. if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
  399. continue
  400. }
  401. count++
  402. if c == n {
  403. return count == b
  404. }
  405. if count >= b {
  406. return false
  407. }
  408. }
  409. return false
  410. }
  411. }
  412. // simpleNthLastChildSelector returns a selector that implements
  413. // :nth-last-child(b). If ofType is true, implements :nth-last-of-type
  414. // instead.
  415. func simpleNthLastChildSelector(b int, ofType bool) Selector {
  416. return func(n *html.Node) bool {
  417. if n.Type != html.ElementNode {
  418. return false
  419. }
  420. parent := n.Parent
  421. if parent == nil {
  422. return false
  423. }
  424. if parent.Type == html.DocumentNode {
  425. return false
  426. }
  427. count := 0
  428. for c := parent.LastChild; c != nil; c = c.PrevSibling {
  429. if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
  430. continue
  431. }
  432. count++
  433. if c == n {
  434. return count == b
  435. }
  436. if count >= b {
  437. return false
  438. }
  439. }
  440. return false
  441. }
  442. }
  443. // onlyChildSelector returns a selector that implements :only-child.
  444. // If ofType is true, it implements :only-of-type instead.
  445. func onlyChildSelector(ofType bool) Selector {
  446. return func(n *html.Node) bool {
  447. if n.Type != html.ElementNode {
  448. return false
  449. }
  450. parent := n.Parent
  451. if parent == nil {
  452. return false
  453. }
  454. if parent.Type == html.DocumentNode {
  455. return false
  456. }
  457. count := 0
  458. for c := parent.FirstChild; c != nil; c = c.NextSibling {
  459. if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
  460. continue
  461. }
  462. count++
  463. if count > 1 {
  464. return false
  465. }
  466. }
  467. return count == 1
  468. }
  469. }
  470. // inputSelector is a Selector that matches input, select, textarea and button elements.
  471. func inputSelector(n *html.Node) bool {
  472. return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
  473. }
  474. // emptyElementSelector is a Selector that matches empty elements.
  475. func emptyElementSelector(n *html.Node) bool {
  476. if n.Type != html.ElementNode {
  477. return false
  478. }
  479. for c := n.FirstChild; c != nil; c = c.NextSibling {
  480. switch c.Type {
  481. case html.ElementNode, html.TextNode:
  482. return false
  483. }
  484. }
  485. return true
  486. }
  487. // descendantSelector returns a Selector that matches an element if
  488. // it matches d and has an ancestor that matches a.
  489. func descendantSelector(a, d Selector) Selector {
  490. return func(n *html.Node) bool {
  491. if !d(n) {
  492. return false
  493. }
  494. for p := n.Parent; p != nil; p = p.Parent {
  495. if a(p) {
  496. return true
  497. }
  498. }
  499. return false
  500. }
  501. }
  502. // childSelector returns a Selector that matches an element if
  503. // it matches d and its parent matches a.
  504. func childSelector(a, d Selector) Selector {
  505. return func(n *html.Node) bool {
  506. return d(n) && n.Parent != nil && a(n.Parent)
  507. }
  508. }
  509. // siblingSelector returns a Selector that matches an element
  510. // if it matches s2 and in is preceded by an element that matches s1.
  511. // If adjacent is true, the sibling must be immediately before the element.
  512. func siblingSelector(s1, s2 Selector, adjacent bool) Selector {
  513. return func(n *html.Node) bool {
  514. if !s2(n) {
  515. return false
  516. }
  517. if adjacent {
  518. for n = n.PrevSibling; n != nil; n = n.PrevSibling {
  519. if n.Type == html.TextNode || n.Type == html.CommentNode {
  520. continue
  521. }
  522. return s1(n)
  523. }
  524. return false
  525. }
  526. // Walk backwards looking for element that matches s1
  527. for c := n.PrevSibling; c != nil; c = c.PrevSibling {
  528. if s1(c) {
  529. return true
  530. }
  531. }
  532. return false
  533. }
  534. }
  535. // rootSelector implements :root
  536. func rootSelector(n *html.Node) bool {
  537. if n.Type != html.ElementNode {
  538. return false
  539. }
  540. if n.Parent == nil {
  541. return false
  542. }
  543. return n.Parent.Type == html.DocumentNode
  544. }