LinkTrait.php 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2014 Carsten Brandt
  4. * @license https://github.com/cebe/markdown/blob/master/LICENSE
  5. * @link https://github.com/cebe/markdown#readme
  6. */
  7. namespace cebe\markdown\inline;
  8. // work around https://github.com/facebook/hhvm/issues/1120
  9. defined('ENT_HTML401') || define('ENT_HTML401', 0);
  10. /**
  11. * Addes links and images as well as url markers.
  12. *
  13. * This trait conflicts with the HtmlTrait. If both are used together,
  14. * you have to define a resolution, by defining the HtmlTrait::parseInlineHtml
  15. * as private so it is not used directly:
  16. *
  17. * ```php
  18. * use block\HtmlTrait {
  19. * parseInlineHtml as private parseInlineHtml;
  20. * }
  21. * ```
  22. *
  23. * If the method exists it is called internally by this trait.
  24. *
  25. * Also make sure to reset references on prepare():
  26. *
  27. * ```php
  28. * protected function prepare()
  29. * {
  30. * // reset references
  31. * $this->references = [];
  32. * }
  33. * ```
  34. */
  35. trait LinkTrait
  36. {
  37. /**
  38. * @var array a list of defined references in this document.
  39. */
  40. protected $references = [];
  41. /**
  42. * Remove backslash from escaped characters
  43. * @param $text
  44. * @return string
  45. */
  46. protected function replaceEscape($text)
  47. {
  48. $strtr = [];
  49. foreach($this->escapeCharacters as $char) {
  50. $strtr["\\$char"] = $char;
  51. }
  52. return strtr($text, $strtr);
  53. }
  54. /**
  55. * Parses a link indicated by `[`.
  56. * @marker [
  57. */
  58. protected function parseLink($markdown)
  59. {
  60. if (!in_array('parseLink', array_slice($this->context, 1)) && ($parts = $this->parseLinkOrImage($markdown)) !== false) {
  61. list($text, $url, $title, $offset, $key) = $parts;
  62. return [
  63. [
  64. 'link',
  65. 'text' => $this->parseInline($text),
  66. 'url' => $url,
  67. 'title' => $title,
  68. 'refkey' => $key,
  69. 'orig' => substr($markdown, 0, $offset),
  70. ],
  71. $offset
  72. ];
  73. } else {
  74. // remove all starting [ markers to avoid next one to be parsed as link
  75. $result = '[';
  76. $i = 1;
  77. while (isset($markdown[$i]) && $markdown[$i] === '[') {
  78. $result .= '[';
  79. $i++;
  80. }
  81. return [['text', $result], $i];
  82. }
  83. }
  84. /**
  85. * Parses an image indicated by `![`.
  86. * @marker ![
  87. */
  88. protected function parseImage($markdown)
  89. {
  90. if (($parts = $this->parseLinkOrImage(substr($markdown, 1))) !== false) {
  91. list($text, $url, $title, $offset, $key) = $parts;
  92. return [
  93. [
  94. 'image',
  95. 'text' => $text,
  96. 'url' => $url,
  97. 'title' => $title,
  98. 'refkey' => $key,
  99. 'orig' => substr($markdown, 0, $offset + 1),
  100. ],
  101. $offset + 1
  102. ];
  103. } else {
  104. // remove all starting [ markers to avoid next one to be parsed as link
  105. $result = '!';
  106. $i = 1;
  107. while (isset($markdown[$i]) && $markdown[$i] === '[') {
  108. $result .= '[';
  109. $i++;
  110. }
  111. return [['text', $result], $i];
  112. }
  113. }
  114. protected function parseLinkOrImage($markdown)
  115. {
  116. if (strpos($markdown, ']') !== false && preg_match('/\[((?>[^\]\[]+|(?R))*)\]/', $markdown, $textMatches)) { // TODO improve bracket regex
  117. $text = $textMatches[1];
  118. $offset = strlen($textMatches[0]);
  119. $markdown = substr($markdown, $offset);
  120. $pattern = <<<REGEXP
  121. /(?(R) # in case of recursion match parentheses
  122. \(((?>[^\s()]+)|(?R))*\)
  123. | # else match a link with title
  124. ^\(\s*(((?>[^\s()]+)|(?R))*)(\s+"(.*?)")?\s*\)
  125. )/x
  126. REGEXP;
  127. if (preg_match($pattern, $markdown, $refMatches)) {
  128. // inline link
  129. return [
  130. $text,
  131. isset($refMatches[2]) ? $this->replaceEscape($refMatches[2]) : '', // url
  132. empty($refMatches[5]) ? null: $refMatches[5], // title
  133. $offset + strlen($refMatches[0]), // offset
  134. null, // reference key
  135. ];
  136. } elseif (preg_match('/^([ \n]?\[(.*?)\])?/s', $markdown, $refMatches)) {
  137. // reference style link
  138. if (empty($refMatches[2])) {
  139. $key = strtolower($text);
  140. } else {
  141. $key = strtolower($refMatches[2]);
  142. }
  143. return [
  144. $text,
  145. null, // url
  146. null, // title
  147. $offset + strlen($refMatches[0]), // offset
  148. $key,
  149. ];
  150. }
  151. }
  152. return false;
  153. }
  154. /**
  155. * Parses inline HTML.
  156. * @marker <
  157. */
  158. protected function parseLt($text)
  159. {
  160. if (strpos($text, '>') !== false) {
  161. if (!in_array('parseLink', $this->context)) { // do not allow links in links
  162. if (preg_match('/^<([^\s>]*?@[^\s]*?\.\w+?)>/', $text, $matches)) {
  163. // email address
  164. return [
  165. ['email', $this->replaceEscape($matches[1])],
  166. strlen($matches[0])
  167. ];
  168. } elseif (preg_match('/^<([a-z]{3,}:\/\/[^\s]+?)>/', $text, $matches)) {
  169. // URL
  170. return [
  171. ['url', $this->replaceEscape($matches[1])],
  172. strlen($matches[0])
  173. ];
  174. }
  175. }
  176. // try inline HTML if it was neither a URL nor email if HtmlTrait is included.
  177. if (method_exists($this, 'parseInlineHtml')) {
  178. return $this->parseInlineHtml($text);
  179. }
  180. }
  181. return [['text', '&lt;'], 1];
  182. }
  183. protected function renderEmail($block)
  184. {
  185. $email = htmlspecialchars($block[1], ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8');
  186. return "<a href=\"mailto:$email\">$email</a>";
  187. }
  188. protected function renderUrl($block)
  189. {
  190. $url = htmlspecialchars($block[1], ENT_COMPAT | ENT_HTML401, 'UTF-8');
  191. $decodedUrl = urldecode($block[1]);
  192. $secureUrlText = preg_match('//u', $decodedUrl) ? $decodedUrl : $block[1];
  193. $text = htmlspecialchars($secureUrlText, ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8');
  194. return "<a href=\"$url\">$text</a>";
  195. }
  196. protected function lookupReference($key)
  197. {
  198. $normalizedKey = preg_replace('/\s+/', ' ', $key);
  199. if (isset($this->references[$key]) || isset($this->references[$key = $normalizedKey])) {
  200. return $this->references[$key];
  201. }
  202. return false;
  203. }
  204. protected function renderLink($block)
  205. {
  206. if (isset($block['refkey'])) {
  207. if (($ref = $this->lookupReference($block['refkey'])) !== false) {
  208. $block = array_merge($block, $ref);
  209. } else {
  210. if (strncmp($block['orig'], '[', 1) === 0) {
  211. return '[' . $this->renderAbsy($this->parseInline(substr($block['orig'], 1)));
  212. }
  213. return $block['orig'];
  214. }
  215. }
  216. return '<a href="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
  217. . (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
  218. . '>' . $this->renderAbsy($block['text']) . '</a>';
  219. }
  220. protected function renderImage($block)
  221. {
  222. if (isset($block['refkey'])) {
  223. if (($ref = $this->lookupReference($block['refkey'])) !== false) {
  224. $block = array_merge($block, $ref);
  225. } else {
  226. if (strncmp($block['orig'], '![', 2) === 0) {
  227. return '![' . $this->renderAbsy($this->parseInline(substr($block['orig'], 2)));
  228. }
  229. return $block['orig'];
  230. }
  231. }
  232. return '<img src="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
  233. . ' alt="' . htmlspecialchars($block['text'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"'
  234. . (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
  235. . ($this->html5 ? '>' : ' />');
  236. }
  237. // references
  238. protected function identifyReference($line)
  239. {
  240. return isset($line[0]) && ($line[0] === ' ' || $line[0] === '[') && preg_match('/^ {0,3}\[[^\[](.*?)\]:\s*([^\s]+?)(?:\s+[\'"](.+?)[\'"])?\s*$/', $line);
  241. }
  242. /**
  243. * Consume link references
  244. */
  245. protected function consumeReference($lines, $current)
  246. {
  247. while (isset($lines[$current]) && preg_match('/^ {0,3}\[(.+?)\]:\s*(.+?)(?:\s+[\(\'"](.+?)[\)\'"])?\s*$/', $lines[$current], $matches)) {
  248. $label = strtolower($matches[1]);
  249. $this->references[$label] = [
  250. 'url' => $this->replaceEscape($matches[2]),
  251. ];
  252. if (isset($matches[3])) {
  253. $this->references[$label]['title'] = $matches[3];
  254. } else {
  255. // title may be on the next line
  256. if (isset($lines[$current + 1]) && preg_match('/^\s+[\(\'"](.+?)[\)\'"]\s*$/', $lines[$current + 1], $matches)) {
  257. $this->references[$label]['title'] = $matches[1];
  258. $current++;
  259. }
  260. }
  261. $current++;
  262. }
  263. return [false, --$current];
  264. }
  265. abstract protected function parseInline($text);
  266. abstract protected function renderAbsy($blocks);
  267. }