Parser.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector\Parser;
  11. use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
  12. use Symfony\Component\CssSelector\Node;
  13. use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
  14. /**
  15. * CSS selector parser.
  16. *
  17. * This component is a port of the Python cssselect library,
  18. * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
  19. *
  20. * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
  21. *
  22. * @internal
  23. */
  24. class Parser implements ParserInterface
  25. {
  26. private $tokenizer;
  27. public function __construct(Tokenizer $tokenizer = null)
  28. {
  29. $this->tokenizer = $tokenizer ?? new Tokenizer();
  30. }
  31. /**
  32. * {@inheritdoc}
  33. */
  34. public function parse(string $source): array
  35. {
  36. $reader = new Reader($source);
  37. $stream = $this->tokenizer->tokenize($reader);
  38. return $this->parseSelectorList($stream);
  39. }
  40. /**
  41. * Parses the arguments for ":nth-child()" and friends.
  42. *
  43. * @param Token[] $tokens
  44. *
  45. * @throws SyntaxErrorException
  46. */
  47. public static function parseSeries(array $tokens): array
  48. {
  49. foreach ($tokens as $token) {
  50. if ($token->isString()) {
  51. throw SyntaxErrorException::stringAsFunctionArgument();
  52. }
  53. }
  54. $joined = trim(implode('', array_map(function (Token $token) {
  55. return $token->getValue();
  56. }, $tokens)));
  57. $int = function ($string) {
  58. if (!is_numeric($string)) {
  59. throw SyntaxErrorException::stringAsFunctionArgument();
  60. }
  61. return (int) $string;
  62. };
  63. switch (true) {
  64. case 'odd' === $joined:
  65. return [2, 1];
  66. case 'even' === $joined:
  67. return [2, 0];
  68. case 'n' === $joined:
  69. return [1, 0];
  70. case !str_contains($joined, 'n'):
  71. return [0, $int($joined)];
  72. }
  73. $split = explode('n', $joined);
  74. $first = $split[0] ?? null;
  75. return [
  76. $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1,
  77. isset($split[1]) && $split[1] ? $int($split[1]) : 0,
  78. ];
  79. }
  80. private function parseSelectorList(TokenStream $stream): array
  81. {
  82. $stream->skipWhitespace();
  83. $selectors = [];
  84. while (true) {
  85. $selectors[] = $this->parserSelectorNode($stream);
  86. if ($stream->getPeek()->isDelimiter([','])) {
  87. $stream->getNext();
  88. $stream->skipWhitespace();
  89. } else {
  90. break;
  91. }
  92. }
  93. return $selectors;
  94. }
  95. private function parserSelectorNode(TokenStream $stream): Node\SelectorNode
  96. {
  97. [$result, $pseudoElement] = $this->parseSimpleSelector($stream);
  98. while (true) {
  99. $stream->skipWhitespace();
  100. $peek = $stream->getPeek();
  101. if ($peek->isFileEnd() || $peek->isDelimiter([','])) {
  102. break;
  103. }
  104. if (null !== $pseudoElement) {
  105. throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
  106. }
  107. if ($peek->isDelimiter(['+', '>', '~'])) {
  108. $combinator = $stream->getNext()->getValue();
  109. $stream->skipWhitespace();
  110. } else {
  111. $combinator = ' ';
  112. }
  113. [$nextSelector, $pseudoElement] = $this->parseSimpleSelector($stream);
  114. $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
  115. }
  116. return new Node\SelectorNode($result, $pseudoElement);
  117. }
  118. /**
  119. * Parses next simple node (hash, class, pseudo, negation).
  120. *
  121. * @throws SyntaxErrorException
  122. */
  123. private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = false): array
  124. {
  125. $stream->skipWhitespace();
  126. $selectorStart = \count($stream->getUsed());
  127. $result = $this->parseElementNode($stream);
  128. $pseudoElement = null;
  129. while (true) {
  130. $peek = $stream->getPeek();
  131. if ($peek->isWhitespace()
  132. || $peek->isFileEnd()
  133. || $peek->isDelimiter([',', '+', '>', '~'])
  134. || ($insideNegation && $peek->isDelimiter([')']))
  135. ) {
  136. break;
  137. }
  138. if (null !== $pseudoElement) {
  139. throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
  140. }
  141. if ($peek->isHash()) {
  142. $result = new Node\HashNode($result, $stream->getNext()->getValue());
  143. } elseif ($peek->isDelimiter(['.'])) {
  144. $stream->getNext();
  145. $result = new Node\ClassNode($result, $stream->getNextIdentifier());
  146. } elseif ($peek->isDelimiter(['['])) {
  147. $stream->getNext();
  148. $result = $this->parseAttributeNode($result, $stream);
  149. } elseif ($peek->isDelimiter([':'])) {
  150. $stream->getNext();
  151. if ($stream->getPeek()->isDelimiter([':'])) {
  152. $stream->getNext();
  153. $pseudoElement = $stream->getNextIdentifier();
  154. continue;
  155. }
  156. $identifier = $stream->getNextIdentifier();
  157. if (\in_array(strtolower($identifier), ['first-line', 'first-letter', 'before', 'after'])) {
  158. // Special case: CSS 2.1 pseudo-elements can have a single ':'.
  159. // Any new pseudo-element must have two.
  160. $pseudoElement = $identifier;
  161. continue;
  162. }
  163. if (!$stream->getPeek()->isDelimiter(['('])) {
  164. $result = new Node\PseudoNode($result, $identifier);
  165. continue;
  166. }
  167. $stream->getNext();
  168. $stream->skipWhitespace();
  169. if ('not' === strtolower($identifier)) {
  170. if ($insideNegation) {
  171. throw SyntaxErrorException::nestedNot();
  172. }
  173. [$argument, $argumentPseudoElement] = $this->parseSimpleSelector($stream, true);
  174. $next = $stream->getNext();
  175. if (null !== $argumentPseudoElement) {
  176. throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()');
  177. }
  178. if (!$next->isDelimiter([')'])) {
  179. throw SyntaxErrorException::unexpectedToken('")"', $next);
  180. }
  181. $result = new Node\NegationNode($result, $argument);
  182. } else {
  183. $arguments = [];
  184. $next = null;
  185. while (true) {
  186. $stream->skipWhitespace();
  187. $next = $stream->getNext();
  188. if ($next->isIdentifier()
  189. || $next->isString()
  190. || $next->isNumber()
  191. || $next->isDelimiter(['+', '-'])
  192. ) {
  193. $arguments[] = $next;
  194. } elseif ($next->isDelimiter([')'])) {
  195. break;
  196. } else {
  197. throw SyntaxErrorException::unexpectedToken('an argument', $next);
  198. }
  199. }
  200. if (empty($arguments)) {
  201. throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
  202. }
  203. $result = new Node\FunctionNode($result, $identifier, $arguments);
  204. }
  205. } else {
  206. throw SyntaxErrorException::unexpectedToken('selector', $peek);
  207. }
  208. }
  209. if (\count($stream->getUsed()) === $selectorStart) {
  210. throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek());
  211. }
  212. return [$result, $pseudoElement];
  213. }
  214. private function parseElementNode(TokenStream $stream): Node\ElementNode
  215. {
  216. $peek = $stream->getPeek();
  217. if ($peek->isIdentifier() || $peek->isDelimiter(['*'])) {
  218. if ($peek->isIdentifier()) {
  219. $namespace = $stream->getNext()->getValue();
  220. } else {
  221. $stream->getNext();
  222. $namespace = null;
  223. }
  224. if ($stream->getPeek()->isDelimiter(['|'])) {
  225. $stream->getNext();
  226. $element = $stream->getNextIdentifierOrStar();
  227. } else {
  228. $element = $namespace;
  229. $namespace = null;
  230. }
  231. } else {
  232. $element = $namespace = null;
  233. }
  234. return new Node\ElementNode($namespace, $element);
  235. }
  236. private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream): Node\AttributeNode
  237. {
  238. $stream->skipWhitespace();
  239. $attribute = $stream->getNextIdentifierOrStar();
  240. if (null === $attribute && !$stream->getPeek()->isDelimiter(['|'])) {
  241. throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek());
  242. }
  243. if ($stream->getPeek()->isDelimiter(['|'])) {
  244. $stream->getNext();
  245. if ($stream->getPeek()->isDelimiter(['='])) {
  246. $namespace = null;
  247. $stream->getNext();
  248. $operator = '|=';
  249. } else {
  250. $namespace = $attribute;
  251. $attribute = $stream->getNextIdentifier();
  252. $operator = null;
  253. }
  254. } else {
  255. $namespace = $operator = null;
  256. }
  257. if (null === $operator) {
  258. $stream->skipWhitespace();
  259. $next = $stream->getNext();
  260. if ($next->isDelimiter([']'])) {
  261. return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null);
  262. } elseif ($next->isDelimiter(['='])) {
  263. $operator = '=';
  264. } elseif ($next->isDelimiter(['^', '$', '*', '~', '|', '!'])
  265. && $stream->getPeek()->isDelimiter(['='])
  266. ) {
  267. $operator = $next->getValue().'=';
  268. $stream->getNext();
  269. } else {
  270. throw SyntaxErrorException::unexpectedToken('operator', $next);
  271. }
  272. }
  273. $stream->skipWhitespace();
  274. $value = $stream->getNext();
  275. if ($value->isNumber()) {
  276. // if the value is a number, it's casted into a string
  277. $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition());
  278. }
  279. if (!($value->isIdentifier() || $value->isString())) {
  280. throw SyntaxErrorException::unexpectedToken('string or identifier', $value);
  281. }
  282. $stream->skipWhitespace();
  283. $next = $stream->getNext();
  284. if (!$next->isDelimiter([']'])) {
  285. throw SyntaxErrorException::unexpectedToken('"]"', $next);
  286. }
  287. return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue());
  288. }
  289. }