wiki:xref:dokuwiki:inc:parsing:lexer:lexer.php
Различия
Показаны различия между двумя версиями страницы.
Следующая версия | Предыдущая версия | ||
wiki:xref:dokuwiki:inc:parsing:lexer:lexer.php [2025/01/16 13:19] – создано vladpolskiy | wiki:xref:dokuwiki:inc:parsing:lexer:lexer.php [2025/01/16 18:55] (текущий) – vladpolskiy | ||
---|---|---|---|
Строка 1: | Строка 1: | ||
- | 25 | + | <code php Lexer.php [enable_line_numbers=" |
+ | <?php | ||
+ | /** | ||
+ | * Lexer adapted from Simple Test: http:// | ||
+ | * For an intro to the Lexer see: | ||
+ | * https:// | ||
+ | * | ||
+ | * @author Marcus Baker http:// | ||
+ | */ | ||
+ | |||
+ | namespace dokuwiki\Parsing\Lexer; | ||
+ | |||
+ | /** | ||
+ | * Accepts text and breaks it into tokens. | ||
+ | * | ||
+ | * Some optimisation to make the sure the content is only scanned by the PHP regex | ||
+ | * parser once. Lexer modes must not start with leading underscores. | ||
+ | */ | ||
+ | class Lexer | ||
+ | { | ||
+ | /** @var ParallelRegex[] */ | ||
+ | protected $regexes; | ||
+ | /** @var \Doku_Handler */ | ||
+ | protected $handler; | ||
+ | /** @var StateStack */ | ||
+ | protected $modeStack; | ||
+ | /** @var array mode " | ||
+ | protected $mode_handlers; | ||
+ | /** @var bool case sensitive? */ | ||
+ | protected $case; | ||
+ | |||
+ | /** | ||
+ | * Sets up the lexer in case insensitive matching by default. | ||
+ | * | ||
+ | * @param \Doku_Handler $handler | ||
+ | * @param string $start | ||
+ | * @param boolean $case True for case sensitive. | ||
+ | */ | ||
+ | public function __construct($handler, | ||
+ | { | ||
+ | $this-> | ||
+ | $this-> | ||
+ | $this-> | ||
+ | $this-> | ||
+ | $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Adds a token search pattern for a particular parsing mode. | ||
+ | * | ||
+ | * The pattern does not change the current mode. | ||
+ | * | ||
+ | * @param string $pattern | ||
+ | | ||
+ | * @param string $mode | ||
+ | | ||
+ | | ||
+ | */ | ||
+ | public function addPattern($pattern, | ||
+ | { | ||
+ | if (! isset($this-> | ||
+ | $this-> | ||
+ | } | ||
+ | $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Adds a pattern that will enter a new parsing mode. | ||
+ | * | ||
+ | * Useful for entering parenthesis, | ||
+ | * | ||
+ | * @param string $pattern | ||
+ | * @param string $mode | ||
+ | * @param string $new_mode | ||
+ | */ | ||
+ | public function addEntryPattern($pattern, | ||
+ | { | ||
+ | if (! isset($this-> | ||
+ | $this-> | ||
+ | } | ||
+ | $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Adds a pattern that will exit the current mode and re-enter the previous one. | ||
+ | * | ||
+ | * @param string $pattern | ||
+ | * @param string $mode Mode to leave. | ||
+ | */ | ||
+ | public function addExitPattern($pattern, | ||
+ | { | ||
+ | if (! isset($this-> | ||
+ | $this-> | ||
+ | } | ||
+ | $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Adds a pattern that has a special mode. | ||
+ | * | ||
+ | * Acts as an entry and exit pattern in one go, effectively calling a special | ||
+ | * parser handler for this token only. | ||
+ | * | ||
+ | * @param string $pattern | ||
+ | * @param string $mode | ||
+ | * @param string $special | ||
+ | */ | ||
+ | public function addSpecialPattern($pattern, | ||
+ | { | ||
+ | if (! isset($this-> | ||
+ | $this-> | ||
+ | } | ||
+ | $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Adds a mapping from a mode to another handler. | ||
+ | * | ||
+ | * @param string $mode Mode to be remapped. | ||
+ | * @param string $handler | ||
+ | */ | ||
+ | public function mapHandler($mode, | ||
+ | { | ||
+ | $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Splits the page text into tokens. | ||
+ | * | ||
+ | * Will fail if the handlers report an error or if no content is consumed. If successful then each | ||
+ | * unparsed and parsed token invokes a call to the held listener. | ||
+ | * | ||
+ | * @param string $raw Raw HTML text. | ||
+ | * @return boolean | ||
+ | */ | ||
+ | public function parse($raw) | ||
+ | { | ||
+ | if (! isset($this-> | ||
+ | return false; | ||
+ | } | ||
+ | $initialLength = strlen($raw); | ||
+ | $length = $initialLength; | ||
+ | $pos = 0; | ||
+ | while (is_array($parsed = $this-> | ||
+ | list($unmatched, | ||
+ | $currentLength = strlen($raw); | ||
+ | $matchPos = $initialLength - $currentLength - strlen($matched); | ||
+ | if (! $this-> | ||
+ | return false; | ||
+ | } | ||
+ | if ($currentLength == $length) { | ||
+ | return false; | ||
+ | } | ||
+ | $length = $currentLength; | ||
+ | $pos = $initialLength - $currentLength; | ||
+ | } | ||
+ | if (!$parsed) { | ||
+ | return false; | ||
+ | } | ||
+ | return $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Gives plugins access to the mode stack | ||
+ | * | ||
+ | * @return StateStack | ||
+ | */ | ||
+ | public function getModeStack() | ||
+ | { | ||
+ | return $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Sends the matched token and any leading unmatched | ||
+ | * text to the parser changing the lexer to a new | ||
+ | * mode if one is listed. | ||
+ | * | ||
+ | * @param string $unmatched Unmatched leading portion. | ||
+ | * @param string $matched Actual token match. | ||
+ | * @param bool|string $mode Mode after match. A boolean false mode causes no change. | ||
+ | * @param int $initialPos | ||
+ | * @param int $matchPos Current byte index location in raw doc thats being parsed | ||
+ | * @return boolean | ||
+ | */ | ||
+ | protected function dispatchTokens($unmatched, | ||
+ | { | ||
+ | if (! $this-> | ||
+ | return false; | ||
+ | } | ||
+ | if ($this-> | ||
+ | if (! $this-> | ||
+ | return false; | ||
+ | } | ||
+ | return $this-> | ||
+ | } | ||
+ | if ($this-> | ||
+ | $this-> | ||
+ | if (! $this-> | ||
+ | return false; | ||
+ | } | ||
+ | return $this-> | ||
+ | } | ||
+ | if (is_string($mode)) { | ||
+ | $this-> | ||
+ | return $this-> | ||
+ | } | ||
+ | return $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Tests to see if the new mode is actually to leave the current mode and pop an item from the matching | ||
+ | * mode stack. | ||
+ | * | ||
+ | * @param string $mode Mode to test. | ||
+ | * @return boolean | ||
+ | */ | ||
+ | protected function isModeEnd($mode) | ||
+ | { | ||
+ | return ($mode === " | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Test to see if the mode is one where this mode is entered for this token only and automatically | ||
+ | * leaves immediately afterwoods. | ||
+ | * | ||
+ | * @param string $mode Mode to test. | ||
+ | * @return boolean | ||
+ | */ | ||
+ | protected function isSpecialMode($mode) | ||
+ | { | ||
+ | return (strncmp($mode, | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Strips the magic underscore marking single token modes. | ||
+ | * | ||
+ | * @param string $mode Mode to decode. | ||
+ | * @return string | ||
+ | */ | ||
+ | protected function decodeSpecial($mode) | ||
+ | { | ||
+ | return substr($mode, | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Calls the parser method named after the current mode. | ||
+ | * | ||
+ | * Empty content will be ignored. The lexer has a parser handler for each mode in the lexer. | ||
+ | * | ||
+ | * @param string $content Text parsed. | ||
+ | * @param boolean $is_match Token is recognised rather | ||
+ | | ||
+ | * @param int $pos Current byte index location in raw doc | ||
+ | | ||
+ | * @return bool | ||
+ | */ | ||
+ | protected function invokeHandler($content, | ||
+ | { | ||
+ | if (($content === "" | ||
+ | return true; | ||
+ | } | ||
+ | $handler = $this-> | ||
+ | if (isset($this-> | ||
+ | $handler = $this-> | ||
+ | } | ||
+ | |||
+ | // modes starting with plugin_ are all handled by the same | ||
+ | // handler but with an additional parameter | ||
+ | if (substr($handler, | ||
+ | list($handler, | ||
+ | return $this-> | ||
+ | } | ||
+ | |||
+ | return $this-> | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Tries to match a chunk of text and if successful removes the recognised chunk and any leading | ||
+ | * unparsed data. Empty strings will not be matched. | ||
+ | * | ||
+ | * @param string $raw The subject to parse. This is the content that will be eaten. | ||
+ | * @return array|bool | ||
+ | | ||
+ | | ||
+ | */ | ||
+ | protected function reduce(& | ||
+ | { | ||
+ | if (! isset($this-> | ||
+ | return false; | ||
+ | } | ||
+ | if ($raw === "" | ||
+ | return true; | ||
+ | } | ||
+ | if ($action = $this-> | ||
+ | list($unparsed, | ||
+ | return array($unparsed, | ||
+ | } | ||
+ | return true; | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Escapes regex characters other than (, ) and / | ||
+ | * | ||
+ | * @param string $str | ||
+ | * @return string | ||
+ | */ | ||
+ | public static function escape($str) | ||
+ | { | ||
+ | $chars = array( | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | '/ | ||
+ | ); | ||
+ | |||
+ | $escaped = array( | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ); | ||
+ | return preg_replace($chars, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | </ |
wiki/xref/dokuwiki/inc/parsing/lexer/lexer.php.1737022779.txt.gz · Последнее изменение: — vladpolskiy