123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- <?php
- /**
- * @copyright Copyright (c) 2014 Carsten Brandt
- * @license https://github.com/cebe/markdown/blob/master/LICENSE
- * @link https://github.com/cebe/markdown#readme
- */
- namespace cebe\markdown;
- use ReflectionMethod;
- /**
- * A generic parser for markdown-like languages.
- *
- * @author Carsten Brandt <mail@cebe.cc>
- */
- abstract class Parser
- {
- /**
- * @var integer the maximum nesting level for language elements.
- */
- public $maximumNestingLevel = 32;
- /**
- * @var array the current context the parser is in.
- * TODO remove in favor of absy
- */
- protected $context = [];
- /**
- * @var array these are "escapeable" characters. When using one of these prefixed with a
- * backslash, the character will be outputted without the backslash and is not interpreted
- * as markdown.
- */
- protected $escapeCharacters = [
- '\\', // backslash
- ];
- private $_depth = 0;
- /**
- * Parses the given text considering the full language.
- *
- * This includes parsing block elements as well as inline elements.
- *
- * @param string $text the text to parse
- * @return string parsed markup
- */
- public function parse($text)
- {
- $this->prepare();
- if (ltrim($text) === '') {
- return '';
- }
- $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
- $this->prepareMarkers($text);
- $absy = $this->parseBlocks(explode("\n", $text));
- $markup = $this->renderAbsy($absy);
- $this->cleanup();
- return $markup;
- }
- /**
- * Parses a paragraph without block elements (block elements are ignored).
- *
- * @param string $text the text to parse
- * @return string parsed markup
- */
- public function parseParagraph($text)
- {
- $this->prepare();
- if (ltrim($text) === '') {
- return '';
- }
- $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
- $this->prepareMarkers($text);
- $absy = $this->parseInline($text);
- $markup = $this->renderAbsy($absy);
- $this->cleanup();
- return $markup;
- }
- /**
- * This method will be called before `parse()` and `parseParagraph()`.
- * You can override it to do some initialization work.
- */
- protected function prepare()
- {
- }
- /**
- * This method will be called after `parse()` and `parseParagraph()`.
- * You can override it to do cleanup.
- */
- protected function cleanup()
- {
- }
- // block parsing
- private $_blockTypes;
- /**
- * @return array a list of block element types available.
- */
- protected function blockTypes()
- {
- if ($this->_blockTypes === null) {
- // detect block types via "identify" functions
- $reflection = new \ReflectionClass($this);
- $this->_blockTypes = array_filter(array_map(function($method) {
- $name = $method->getName();
- return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
- }, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
- sort($this->_blockTypes);
- }
- return $this->_blockTypes;
- }
- /**
- * Given a set of lines and an index of a current line it uses the registed block types to
- * detect the type of this line.
- * @param array $lines
- * @param integer $current
- * @return string name of the block type in lower case
- */
- protected function detectLineType($lines, $current)
- {
- $line = $lines[$current];
- $blockTypes = $this->blockTypes();
- foreach($blockTypes as $blockType) {
- if ($this->{'identify' . $blockType}($line, $lines, $current)) {
- return $blockType;
- }
- }
- // consider the line a normal paragraph if no other block type matches
- return 'paragraph';
- }
- /**
- * Parse block elements by calling `detectLineType()` to identify them
- * and call consume function afterwards.
- */
- protected function parseBlocks($lines)
- {
- if ($this->_depth >= $this->maximumNestingLevel) {
- // maximum depth is reached, do not parse input
- return [['text', implode("\n", $lines)]];
- }
- $this->_depth++;
- $blocks = [];
- // convert lines to blocks
- for ($i = 0, $count = count($lines); $i < $count; $i++) {
- $line = $lines[$i];
- if ($line !== '' && rtrim($line) !== '') { // skip empty lines
- // identify a blocks beginning and parse the content
- list($block, $i) = $this->parseBlock($lines, $i);
- if ($block !== false) {
- $blocks[] = $block;
- }
- }
- }
- $this->_depth--;
- return $blocks;
- }
- /**
- * Parses the block at current line by identifying the block type and parsing the content
- * @param $lines
- * @param $current
- * @return array Array of two elements, the first element contains the block,
- * the second contains the next line index to be parsed.
- */
- protected function parseBlock($lines, $current)
- {
- // identify block type for this line
- $blockType = $this->detectLineType($lines, $current);
- // call consume method for the detected block type to consume further lines
- return $this->{'consume' . $blockType}($lines, $current);
- }
- protected function renderAbsy($blocks)
- {
- $output = '';
- foreach ($blocks as $block) {
- array_unshift($this->context, $block[0]);
- $output .= $this->{'render' . $block[0]}($block);
- array_shift($this->context);
- }
- return $output;
- }
- /**
- * Consume lines for a paragraph
- *
- * @param $lines
- * @param $current
- * @return array
- */
- protected function consumeParagraph($lines, $current)
- {
- // consume until newline
- $content = [];
- for ($i = $current, $count = count($lines); $i < $count; $i++) {
- if (ltrim($lines[$i]) !== '') {
- $content[] = $lines[$i];
- } else {
- break;
- }
- }
- $block = [
- 'paragraph',
- 'content' => $this->parseInline(implode("\n", $content)),
- ];
- return [$block, --$i];
- }
- /**
- * Render a paragraph block
- *
- * @param $block
- * @return string
- */
- protected function renderParagraph($block)
- {
- return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
- }
- // inline parsing
- /**
- * @var array the set of inline markers to use in different contexts.
- */
- private $_inlineMarkers = [];
- /**
- * Returns a map of inline markers to the corresponding parser methods.
- *
- * This array defines handler methods for inline markdown markers.
- * When a marker is found in the text, the handler method is called with the text
- * starting at the position of the marker.
- *
- * Note that markers starting with whitespace may slow down the parser,
- * you may want to use [[renderText]] to deal with them.
- *
- * You may override this method to define a set of markers and parsing methods.
- * The default implementation looks for protected methods starting with `parse` that
- * also have an `@marker` annotation in PHPDoc.
- *
- * @return array a map of markers to parser methods
- */
- protected function inlineMarkers()
- {
- $markers = [];
- // detect "parse" functions
- $reflection = new \ReflectionClass($this);
- foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
- $methodName = $method->getName();
- if (strncmp($methodName, 'parse', 5) === 0) {
- preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
- foreach($matches[1] as $match) {
- $markers[$match] = $methodName;
- }
- }
- }
- return $markers;
- }
- /**
- * Prepare markers that are used in the text to parse
- *
- * Add all markers that are present in markdown.
- * Check is done to avoid iterations in parseInline(), good for huge markdown files
- * @param string $text
- */
- protected function prepareMarkers($text)
- {
- $this->_inlineMarkers = [];
- foreach ($this->inlineMarkers() as $marker => $method) {
- if (strpos($text, $marker) !== false) {
- $m = $marker[0];
- // put the longest marker first
- if (isset($this->_inlineMarkers[$m])) {
- reset($this->_inlineMarkers[$m]);
- if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
- $this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
- continue;
- }
- }
- $this->_inlineMarkers[$m][$marker] = $method;
- }
- }
- }
- /**
- * Parses inline elements of the language.
- *
- * @param string $text the inline text to parse.
- * @return array
- */
- protected function parseInline($text)
- {
- if ($this->_depth >= $this->maximumNestingLevel) {
- // maximum depth is reached, do not parse input
- return [['text', $text]];
- }
- $this->_depth++;
- $markers = implode('', array_keys($this->_inlineMarkers));
- $paragraph = [];
- while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
- $pos = strpos($text, $found);
- // add the text up to next marker to the paragraph
- if ($pos !== 0) {
- $paragraph[] = ['text', substr($text, 0, $pos)];
- }
- $text = $found;
- $parsed = false;
- foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
- if (strncmp($text, $marker, strlen($marker)) === 0) {
- // parse the marker
- array_unshift($this->context, $method);
- list($output, $offset) = $this->$method($text);
- array_shift($this->context);
- $paragraph[] = $output;
- $text = substr($text, $offset);
- $parsed = true;
- break;
- }
- }
- if (!$parsed) {
- $paragraph[] = ['text', substr($text, 0, 1)];
- $text = substr($text, 1);
- }
- }
- $paragraph[] = ['text', $text];
- $this->_depth--;
- return $paragraph;
- }
- /**
- * Parses escaped special characters.
- * @marker \
- */
- protected function parseEscape($text)
- {
- if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
- return [['text', $text[1]], 2];
- }
- return [['text', $text[0]], 1];
- }
- /**
- * This function renders plain text sections in the markdown text.
- * It can be used to work on normal text sections for example to highlight keywords or
- * do special escaping.
- */
- protected function renderText($block)
- {
- return $block[1];
- }
- }
|