AbstractUriElement.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\DomCrawler;
  11. /**
  12. * Any HTML element that can link to an URI.
  13. *
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. abstract class AbstractUriElement
  17. {
  18. /**
  19. * @var \DOMElement
  20. */
  21. protected $node;
  22. /**
  23. * @var string|null The method to use for the element
  24. */
  25. protected $method;
  26. /**
  27. * @var string The URI of the page where the element is embedded (or the base href)
  28. */
  29. protected $currentUri;
  30. /**
  31. * @param \DOMElement $node A \DOMElement instance
  32. * @param string|null $currentUri The URI of the page where the link is embedded (or the base href)
  33. * @param string|null $method The method to use for the link (GET by default)
  34. *
  35. * @throws \InvalidArgumentException if the node is not a link
  36. */
  37. public function __construct(\DOMElement $node, string $currentUri = null, ?string $method = 'GET')
  38. {
  39. $this->setNode($node);
  40. $this->method = $method ? strtoupper($method) : null;
  41. $this->currentUri = $currentUri;
  42. $elementUriIsRelative = null === parse_url(trim($this->getRawUri()), \PHP_URL_SCHEME);
  43. $baseUriIsAbsolute = null !== $this->currentUri && \in_array(strtolower(substr($this->currentUri, 0, 4)), ['http', 'file']);
  44. if ($elementUriIsRelative && !$baseUriIsAbsolute) {
  45. throw new \InvalidArgumentException(sprintf('The URL of the element is relative, so you must define its base URI passing an absolute URL to the constructor of the "%s" class ("%s" was passed).', __CLASS__, $this->currentUri));
  46. }
  47. }
  48. /**
  49. * Gets the node associated with this link.
  50. *
  51. * @return \DOMElement A \DOMElement instance
  52. */
  53. public function getNode()
  54. {
  55. return $this->node;
  56. }
  57. /**
  58. * Gets the method associated with this link.
  59. *
  60. * @return string The method
  61. */
  62. public function getMethod()
  63. {
  64. return $this->method ?? 'GET';
  65. }
  66. /**
  67. * Gets the URI associated with this link.
  68. *
  69. * @return string The URI
  70. */
  71. public function getUri()
  72. {
  73. $uri = trim($this->getRawUri());
  74. // absolute URL?
  75. if (null !== parse_url($uri, \PHP_URL_SCHEME)) {
  76. return $uri;
  77. }
  78. // empty URI
  79. if (!$uri) {
  80. return $this->currentUri;
  81. }
  82. // an anchor
  83. if ('#' === $uri[0]) {
  84. return $this->cleanupAnchor($this->currentUri).$uri;
  85. }
  86. $baseUri = $this->cleanupUri($this->currentUri);
  87. if ('?' === $uri[0]) {
  88. return $baseUri.$uri;
  89. }
  90. // absolute URL with relative schema
  91. if (str_starts_with($uri, '//')) {
  92. return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri;
  93. }
  94. $baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri);
  95. // absolute path
  96. if ('/' === $uri[0]) {
  97. return $baseUri.$uri;
  98. }
  99. // relative path
  100. $path = parse_url(substr($this->currentUri, \strlen($baseUri)), \PHP_URL_PATH);
  101. $path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
  102. return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
  103. }
  104. /**
  105. * Returns raw URI data.
  106. *
  107. * @return string
  108. */
  109. abstract protected function getRawUri();
  110. /**
  111. * Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
  112. *
  113. * @param string $path URI path
  114. *
  115. * @return string
  116. */
  117. protected function canonicalizePath($path)
  118. {
  119. if ('' === $path || '/' === $path) {
  120. return $path;
  121. }
  122. if (str_ends_with($path, '.')) {
  123. $path .= '/';
  124. }
  125. $output = [];
  126. foreach (explode('/', $path) as $segment) {
  127. if ('..' === $segment) {
  128. array_pop($output);
  129. } elseif ('.' !== $segment) {
  130. $output[] = $segment;
  131. }
  132. }
  133. return implode('/', $output);
  134. }
  135. /**
  136. * Sets current \DOMElement instance.
  137. *
  138. * @param \DOMElement $node A \DOMElement instance
  139. *
  140. * @throws \LogicException If given node is not an anchor
  141. */
  142. abstract protected function setNode(\DOMElement $node);
  143. /**
  144. * Removes the query string and the anchor from the given uri.
  145. */
  146. private function cleanupUri(string $uri): string
  147. {
  148. return $this->cleanupQuery($this->cleanupAnchor($uri));
  149. }
  150. /**
  151. * Remove the query string from the uri.
  152. */
  153. private function cleanupQuery(string $uri): string
  154. {
  155. if (false !== $pos = strpos($uri, '?')) {
  156. return substr($uri, 0, $pos);
  157. }
  158. return $uri;
  159. }
  160. /**
  161. * Remove the anchor from the uri.
  162. */
  163. private function cleanupAnchor(string $uri): string
  164. {
  165. if (false !== $pos = strpos($uri, '#')) {
  166. return substr($uri, 0, $pos);
  167. }
  168. return $uri;
  169. }
  170. }