wiki:xref:dokuwiki:inc:parsing:lexer:lexer.php
Различия
Показаны различия между двумя версиями страницы.
| Предыдущая версия справа и слеваПредыдущая версияСледующая версия | Предыдущая версия | ||
| wiki:xref:dokuwiki:inc:parsing:lexer:lexer.php [2025/01/16 13:23] – vladpolskiy | wiki:xref:dokuwiki:inc:parsing:lexer:lexer.php [2025/01/16 18:55] (текущий) – vladpolskiy | ||
|---|---|---|---|
| Строка 1: | Строка 1: | ||
| - | <code php Lexer.php> | + | <code php Lexer.php |
| - | Вставить код | + | <?php |
| + | /** | ||
| + | * Lexer adapted from Simple Test: http:// | ||
| + | * For an intro to the Lexer see: | ||
| + | * https:// | ||
| + | * | ||
| + | * @author Marcus Baker http:// | ||
| + | */ | ||
| + | |||
| + | namespace dokuwiki\Parsing\Lexer; | ||
| + | |||
| + | /** | ||
| + | * Accepts text and breaks it into tokens. | ||
| + | * | ||
| + | * Some optimisation to make the sure the content is only scanned by the PHP regex | ||
| + | * parser once. Lexer modes must not start with leading underscores. | ||
| + | */ | ||
| + | class Lexer | ||
| + | { | ||
| + | /** @var ParallelRegex[] */ | ||
| + | protected $regexes; | ||
| + | /** @var \Doku_Handler */ | ||
| + | protected $handler; | ||
| + | /** @var StateStack */ | ||
| + | protected $modeStack; | ||
| + | /** @var array mode " | ||
| + | protected $mode_handlers; | ||
| + | /** @var bool case sensitive? */ | ||
| + | protected $case; | ||
| + | |||
| + | /** | ||
| + | * Sets up the lexer in case insensitive matching by default. | ||
| + | * | ||
| + | * @param \Doku_Handler $handler | ||
| + | * @param string $start | ||
| + | * @param boolean $case True for case sensitive. | ||
| + | */ | ||
| + | public function __construct($handler, | ||
| + | { | ||
| + | $this-> | ||
| + | $this-> | ||
| + | $this-> | ||
| + | $this-> | ||
| + | $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Adds a token search pattern for a particular parsing mode. | ||
| + | * | ||
| + | * The pattern does not change the current mode. | ||
| + | * | ||
| + | * @param string $pattern | ||
| + | | ||
| + | * @param string $mode | ||
| + | | ||
| + | | ||
| + | */ | ||
| + | public function addPattern($pattern, | ||
| + | { | ||
| + | if (! isset($this-> | ||
| + | $this-> | ||
| + | } | ||
| + | $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Adds a pattern that will enter a new parsing mode. | ||
| + | * | ||
| + | * Useful for entering parenthesis, | ||
| + | * | ||
| + | * @param string $pattern | ||
| + | * @param string $mode | ||
| + | * @param string $new_mode | ||
| + | */ | ||
| + | public function addEntryPattern($pattern, | ||
| + | { | ||
| + | if (! isset($this-> | ||
| + | $this-> | ||
| + | } | ||
| + | $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Adds a pattern that will exit the current mode and re-enter the previous one. | ||
| + | * | ||
| + | * @param string $pattern | ||
| + | * @param string $mode Mode to leave. | ||
| + | */ | ||
| + | public function addExitPattern($pattern, | ||
| + | { | ||
| + | if (! isset($this-> | ||
| + | $this-> | ||
| + | } | ||
| + | $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Adds a pattern that has a special mode. | ||
| + | * | ||
| + | * Acts as an entry and exit pattern in one go, effectively calling a special | ||
| + | * parser handler for this token only. | ||
| + | * | ||
| + | * @param string $pattern | ||
| + | * @param string $mode | ||
| + | * @param string $special | ||
| + | */ | ||
| + | public function addSpecialPattern($pattern, | ||
| + | { | ||
| + | if (! isset($this-> | ||
| + | $this-> | ||
| + | } | ||
| + | $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Adds a mapping from a mode to another handler. | ||
| + | * | ||
| + | * @param string $mode Mode to be remapped. | ||
| + | * @param string $handler | ||
| + | */ | ||
| + | public function mapHandler($mode, | ||
| + | { | ||
| + | $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Splits the page text into tokens. | ||
| + | * | ||
| + | * Will fail if the handlers report an error or if no content is consumed. If successful then each | ||
| + | * unparsed and parsed token invokes a call to the held listener. | ||
| + | * | ||
| + | * @param string $raw Raw HTML text. | ||
| + | * @return boolean | ||
| + | */ | ||
| + | public function parse($raw) | ||
| + | { | ||
| + | if (! isset($this-> | ||
| + | return false; | ||
| + | } | ||
| + | $initialLength = strlen($raw); | ||
| + | $length = $initialLength; | ||
| + | $pos = 0; | ||
| + | while (is_array($parsed = $this-> | ||
| + | list($unmatched, | ||
| + | $currentLength = strlen($raw); | ||
| + | $matchPos = $initialLength - $currentLength - strlen($matched); | ||
| + | if (! $this-> | ||
| + | return false; | ||
| + | } | ||
| + | if ($currentLength == $length) { | ||
| + | return false; | ||
| + | } | ||
| + | $length = $currentLength; | ||
| + | $pos = $initialLength - $currentLength; | ||
| + | } | ||
| + | if (!$parsed) { | ||
| + | return false; | ||
| + | } | ||
| + | return $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Gives plugins access to the mode stack | ||
| + | * | ||
| + | * @return StateStack | ||
| + | */ | ||
| + | public function getModeStack() | ||
| + | { | ||
| + | return $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Sends the matched token and any leading unmatched | ||
| + | * text to the parser changing the lexer to a new | ||
| + | * mode if one is listed. | ||
| + | * | ||
| + | * @param string $unmatched Unmatched leading portion. | ||
| + | * @param string $matched Actual token match. | ||
| + | * @param bool|string $mode Mode after match. A boolean false mode causes no change. | ||
| + | * @param int $initialPos | ||
| + | * @param int $matchPos Current byte index location in raw doc thats being parsed | ||
| + | * @return boolean | ||
| + | */ | ||
| + | protected function dispatchTokens($unmatched, | ||
| + | { | ||
| + | if (! $this-> | ||
| + | return false; | ||
| + | } | ||
| + | if ($this-> | ||
| + | if (! $this-> | ||
| + | return false; | ||
| + | } | ||
| + | return $this-> | ||
| + | } | ||
| + | if ($this-> | ||
| + | $this-> | ||
| + | if (! $this-> | ||
| + | return false; | ||
| + | } | ||
| + | return $this-> | ||
| + | } | ||
| + | if (is_string($mode)) { | ||
| + | $this-> | ||
| + | return $this-> | ||
| + | } | ||
| + | return $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Tests to see if the new mode is actually to leave the current mode and pop an item from the matching | ||
| + | * mode stack. | ||
| + | * | ||
| + | * @param string $mode Mode to test. | ||
| + | * @return boolean | ||
| + | */ | ||
| + | protected function isModeEnd($mode) | ||
| + | { | ||
| + | return ($mode === " | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Test to see if the mode is one where this mode is entered for this token only and automatically | ||
| + | * leaves immediately afterwoods. | ||
| + | * | ||
| + | * @param string $mode Mode to test. | ||
| + | * @return boolean | ||
| + | */ | ||
| + | protected function isSpecialMode($mode) | ||
| + | { | ||
| + | return (strncmp($mode, | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Strips the magic underscore marking single token modes. | ||
| + | * | ||
| + | * @param string $mode Mode to decode. | ||
| + | * @return string | ||
| + | */ | ||
| + | protected function decodeSpecial($mode) | ||
| + | { | ||
| + | return substr($mode, | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Calls the parser method named after the current mode. | ||
| + | * | ||
| + | * Empty content will be ignored. The lexer has a parser handler for each mode in the lexer. | ||
| + | * | ||
| + | * @param string $content Text parsed. | ||
| + | * @param boolean $is_match Token is recognised rather | ||
| + | | ||
| + | * @param int $pos Current byte index location in raw doc | ||
| + | | ||
| + | * @return bool | ||
| + | */ | ||
| + | protected function invokeHandler($content, | ||
| + | { | ||
| + | if (($content === "" | ||
| + | return true; | ||
| + | } | ||
| + | $handler = $this-> | ||
| + | if (isset($this-> | ||
| + | $handler = $this-> | ||
| + | } | ||
| + | |||
| + | // modes starting with plugin_ are all handled by the same | ||
| + | // handler but with an additional parameter | ||
| + | if (substr($handler, | ||
| + | list($handler, | ||
| + | return $this-> | ||
| + | } | ||
| + | |||
| + | return $this-> | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Tries to match a chunk of text and if successful removes the recognised chunk and any leading | ||
| + | * unparsed data. Empty strings will not be matched. | ||
| + | * | ||
| + | * @param string $raw The subject to parse. This is the content that will be eaten. | ||
| + | * @return array|bool | ||
| + | | ||
| + | | ||
| + | */ | ||
| + | protected function reduce(& | ||
| + | { | ||
| + | if (! isset($this-> | ||
| + | return false; | ||
| + | } | ||
| + | if ($raw === "" | ||
| + | return true; | ||
| + | } | ||
| + | if ($action = $this-> | ||
| + | list($unparsed, | ||
| + | return array($unparsed, | ||
| + | } | ||
| + | return true; | ||
| + | } | ||
| + | |||
| + | /** | ||
| + | * Escapes regex characters other than (, ) and / | ||
| + | * | ||
| + | * @param string $str | ||
| + | * @return string | ||
| + | */ | ||
| + | public static function escape($str) | ||
| + | { | ||
| + | $chars = array( | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | '/ | ||
| + | ); | ||
| + | |||
| + | $escaped = array( | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ); | ||
| + | return preg_replace($chars, | ||
| + | } | ||
| + | } | ||
| </ | </ | ||
wiki/xref/dokuwiki/inc/parsing/lexer/lexer.php.1737023006.txt.gz · Последнее изменение: — vladpolskiy
