| 1: | <?php declare(strict_types = 1); | 
| 2: |  | 
| 3: | namespace PHPStan\PhpDocParser\Lexer; | 
| 4: |  | 
| 5: | use PHPStan\PhpDocParser\ParserConfig; | 
| 6: | use function implode; | 
| 7: | use function preg_match_all; | 
| 8: | use const PREG_SET_ORDER; | 
| 9: |  | 
| 10: |  | 
| 11: |  | 
| 12: |  | 
| 13: | class Lexer | 
| 14: | { | 
| 15: |  | 
| 16: | public const TOKEN_REFERENCE = 0; | 
| 17: | public const TOKEN_UNION = 1; | 
| 18: | public const TOKEN_INTERSECTION = 2; | 
| 19: | public const TOKEN_NULLABLE = 3; | 
| 20: | public const TOKEN_OPEN_PARENTHESES = 4; | 
| 21: | public const TOKEN_CLOSE_PARENTHESES = 5; | 
| 22: | public const TOKEN_OPEN_ANGLE_BRACKET = 6; | 
| 23: | public const TOKEN_CLOSE_ANGLE_BRACKET = 7; | 
| 24: | public const TOKEN_OPEN_SQUARE_BRACKET = 8; | 
| 25: | public const TOKEN_CLOSE_SQUARE_BRACKET = 9; | 
| 26: | public const TOKEN_COMMA = 10; | 
| 27: | public const TOKEN_VARIADIC = 11; | 
| 28: | public const TOKEN_DOUBLE_COLON = 12; | 
| 29: | public const TOKEN_DOUBLE_ARROW = 13; | 
| 30: | public const TOKEN_EQUAL = 14; | 
| 31: | public const TOKEN_OPEN_PHPDOC = 15; | 
| 32: | public const TOKEN_CLOSE_PHPDOC = 16; | 
| 33: | public const TOKEN_PHPDOC_TAG = 17; | 
| 34: | public const TOKEN_DOCTRINE_TAG = 18; | 
| 35: | public const TOKEN_FLOAT = 19; | 
| 36: | public const TOKEN_INTEGER = 20; | 
| 37: | public const TOKEN_SINGLE_QUOTED_STRING = 21; | 
| 38: | public const TOKEN_DOUBLE_QUOTED_STRING = 22; | 
| 39: | public const TOKEN_DOCTRINE_ANNOTATION_STRING = 23; | 
| 40: | public const TOKEN_IDENTIFIER = 24; | 
| 41: | public const TOKEN_THIS_VARIABLE = 25; | 
| 42: | public const TOKEN_VARIABLE = 26; | 
| 43: | public const TOKEN_HORIZONTAL_WS = 27; | 
| 44: | public const TOKEN_PHPDOC_EOL = 28; | 
| 45: | public const TOKEN_OTHER = 29; | 
| 46: | public const TOKEN_END = 30; | 
| 47: | public const TOKEN_COLON = 31; | 
| 48: | public const TOKEN_WILDCARD = 32; | 
| 49: | public const TOKEN_OPEN_CURLY_BRACKET = 33; | 
| 50: | public const TOKEN_CLOSE_CURLY_BRACKET = 34; | 
| 51: | public const TOKEN_NEGATED = 35; | 
| 52: | public const TOKEN_ARROW = 36; | 
| 53: |  | 
| 54: | public const TOKEN_COMMENT = 37; | 
| 55: |  | 
| 56: | public const TOKEN_LABELS = [ | 
| 57: | self::TOKEN_REFERENCE => '\'&\'', | 
| 58: | self::TOKEN_UNION => '\'|\'', | 
| 59: | self::TOKEN_INTERSECTION => '\'&\'', | 
| 60: | self::TOKEN_NULLABLE => '\'?\'', | 
| 61: | self::TOKEN_NEGATED => '\'!\'', | 
| 62: | self::TOKEN_OPEN_PARENTHESES => '\'(\'', | 
| 63: | self::TOKEN_CLOSE_PARENTHESES => '\')\'', | 
| 64: | self::TOKEN_OPEN_ANGLE_BRACKET => '\'<\'', | 
| 65: | self::TOKEN_CLOSE_ANGLE_BRACKET => '\'>\'', | 
| 66: | self::TOKEN_OPEN_SQUARE_BRACKET => '\'[\'', | 
| 67: | self::TOKEN_CLOSE_SQUARE_BRACKET => '\']\'', | 
| 68: | self::TOKEN_OPEN_CURLY_BRACKET => '\'{\'', | 
| 69: | self::TOKEN_CLOSE_CURLY_BRACKET => '\'}\'', | 
| 70: | self::TOKEN_COMMA => '\',\'', | 
| 71: | self::TOKEN_COMMENT => '\'//\'', | 
| 72: | self::TOKEN_COLON => '\':\'', | 
| 73: | self::TOKEN_VARIADIC => '\'...\'', | 
| 74: | self::TOKEN_DOUBLE_COLON => '\'::\'', | 
| 75: | self::TOKEN_DOUBLE_ARROW => '\'=>\'', | 
| 76: | self::TOKEN_ARROW => '\'->\'', | 
| 77: | self::TOKEN_EQUAL => '\'=\'', | 
| 78: | self::TOKEN_OPEN_PHPDOC => '\'/**\'', | 
| 79: | self::TOKEN_CLOSE_PHPDOC => '\'*/\'', | 
| 80: | self::TOKEN_PHPDOC_TAG => 'TOKEN_PHPDOC_TAG', | 
| 81: | self::TOKEN_DOCTRINE_TAG => 'TOKEN_DOCTRINE_TAG', | 
| 82: | self::TOKEN_PHPDOC_EOL => 'TOKEN_PHPDOC_EOL', | 
| 83: | self::TOKEN_FLOAT => 'TOKEN_FLOAT', | 
| 84: | self::TOKEN_INTEGER => 'TOKEN_INTEGER', | 
| 85: | self::TOKEN_SINGLE_QUOTED_STRING => 'TOKEN_SINGLE_QUOTED_STRING', | 
| 86: | self::TOKEN_DOUBLE_QUOTED_STRING => 'TOKEN_DOUBLE_QUOTED_STRING', | 
| 87: | self::TOKEN_DOCTRINE_ANNOTATION_STRING => 'TOKEN_DOCTRINE_ANNOTATION_STRING', | 
| 88: | self::TOKEN_IDENTIFIER => 'type', | 
| 89: | self::TOKEN_THIS_VARIABLE => '\'$this\'', | 
| 90: | self::TOKEN_VARIABLE => 'variable', | 
| 91: | self::TOKEN_HORIZONTAL_WS => 'TOKEN_HORIZONTAL_WS', | 
| 92: | self::TOKEN_OTHER => 'TOKEN_OTHER', | 
| 93: | self::TOKEN_END => 'TOKEN_END', | 
| 94: | self::TOKEN_WILDCARD => '*', | 
| 95: | ]; | 
| 96: |  | 
| 97: | public const VALUE_OFFSET = 0; | 
| 98: | public const TYPE_OFFSET = 1; | 
| 99: | public const LINE_OFFSET = 2; | 
| 100: |  | 
| 101: | private ParserConfig $config; | 
| 102: |  | 
| 103: | private ?string $regexp = null; | 
| 104: |  | 
| 105: | public function __construct(ParserConfig $config) | 
| 106: | { | 
| 107: | $this->config = $config; | 
| 108: | } | 
| 109: |  | 
| 110: |  | 
| 111: |  | 
| 112: |  | 
| 113: |  | 
| 114: | public function tokenize(string $s): array | 
| 115: | { | 
| 116: | if ($this->regexp === null) { | 
| 117: | $this->regexp = $this->generateRegexp(); | 
| 118: | } | 
| 119: |  | 
| 120: | preg_match_all($this->regexp, $s, $matches, PREG_SET_ORDER); | 
| 121: |  | 
| 122: | $tokens = []; | 
| 123: | $line = 1; | 
| 124: | foreach ($matches as $match) { | 
| 125: | $type = (int) $match['MARK']; | 
| 126: | $tokens[] = [$match[0], $type, $line]; | 
| 127: | if ($type !== self::TOKEN_PHPDOC_EOL) { | 
| 128: | continue; | 
| 129: | } | 
| 130: |  | 
| 131: | $line++; | 
| 132: | } | 
| 133: |  | 
| 134: | $tokens[] = ['', self::TOKEN_END, $line]; | 
| 135: |  | 
| 136: | return $tokens; | 
| 137: | } | 
| 138: |  | 
| 139: |  | 
| 140: | private function generateRegexp(): string | 
| 141: | { | 
| 142: | $patterns = [ | 
| 143: | self::TOKEN_HORIZONTAL_WS => '[\\x09\\x20]++', | 
| 144: |  | 
| 145: | self::TOKEN_IDENTIFIER => '(?:[\\\\]?+[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF-]*+)++', | 
| 146: | self::TOKEN_THIS_VARIABLE => '\\$this(?![0-9a-z_\\x80-\\xFF])', | 
| 147: | self::TOKEN_VARIABLE => '\\$[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF]*+', | 
| 148: |  | 
| 149: |  | 
| 150: | self::TOKEN_REFERENCE => '&(?=\\s*+(?:[.,=)]|(?:\\$(?!this(?![0-9a-z_\\x80-\\xFF])))))', | 
| 151: | self::TOKEN_UNION => '\\|', | 
| 152: | self::TOKEN_INTERSECTION => '&', | 
| 153: | self::TOKEN_NULLABLE => '\\?', | 
| 154: | self::TOKEN_NEGATED => '!', | 
| 155: |  | 
| 156: | self::TOKEN_OPEN_PARENTHESES => '\\(', | 
| 157: | self::TOKEN_CLOSE_PARENTHESES => '\\)', | 
| 158: | self::TOKEN_OPEN_ANGLE_BRACKET => '<', | 
| 159: | self::TOKEN_CLOSE_ANGLE_BRACKET => '>', | 
| 160: | self::TOKEN_OPEN_SQUARE_BRACKET => '\\[', | 
| 161: | self::TOKEN_CLOSE_SQUARE_BRACKET => '\\]', | 
| 162: | self::TOKEN_OPEN_CURLY_BRACKET => '\\{', | 
| 163: | self::TOKEN_CLOSE_CURLY_BRACKET => '\\}', | 
| 164: |  | 
| 165: | self::TOKEN_COMMA => ',', | 
| 166: | self::TOKEN_COMMENT => '\/\/[^\\r\\n]*(?=\n|\r|\*/)', | 
| 167: | self::TOKEN_VARIADIC => '\\.\\.\\.', | 
| 168: | self::TOKEN_DOUBLE_COLON => '::', | 
| 169: | self::TOKEN_DOUBLE_ARROW => '=>', | 
| 170: | self::TOKEN_ARROW => '->', | 
| 171: | self::TOKEN_EQUAL => '=', | 
| 172: | self::TOKEN_COLON => ':', | 
| 173: |  | 
| 174: | self::TOKEN_OPEN_PHPDOC => '/\\*\\*(?=\\s)\\x20?+', | 
| 175: | self::TOKEN_CLOSE_PHPDOC => '\\*/', | 
| 176: | self::TOKEN_PHPDOC_TAG => '@(?:[a-z][a-z0-9-\\\\]+:)?[a-z][a-z0-9-\\\\]*+', | 
| 177: | self::TOKEN_DOCTRINE_TAG => '@[a-z_\\\\][a-z0-9_\:\\\\]*[a-z_][a-z0-9_]*', | 
| 178: | self::TOKEN_PHPDOC_EOL => '\\r?+\\n[\\x09\\x20]*+(?:\\*(?!/)\\x20?+)?', | 
| 179: |  | 
| 180: | self::TOKEN_FLOAT => '[+\-]?(?:(?:[0-9]++(_[0-9]++)*\\.[0-9]*+(_[0-9]++)*(?:e[+\-]?[0-9]++(_[0-9]++)*)?)|(?:[0-9]*+(_[0-9]++)*\\.[0-9]++(_[0-9]++)*(?:e[+\-]?[0-9]++(_[0-9]++)*)?)|(?:[0-9]++(_[0-9]++)*e[+\-]?[0-9]++(_[0-9]++)*))', | 
| 181: | self::TOKEN_INTEGER => '[+\-]?(?:(?:0b[0-1]++(_[0-1]++)*)|(?:0o[0-7]++(_[0-7]++)*)|(?:0x[0-9a-f]++(_[0-9a-f]++)*)|(?:[0-9]++(_[0-9]++)*))', | 
| 182: | self::TOKEN_SINGLE_QUOTED_STRING => '\'(?:\\\\[^\\r\\n]|[^\'\\r\\n\\\\])*+\'', | 
| 183: | self::TOKEN_DOUBLE_QUOTED_STRING => '"(?:\\\\[^\\r\\n]|[^"\\r\\n\\\\])*+"', | 
| 184: | self::TOKEN_DOCTRINE_ANNOTATION_STRING => '"(?:""|[^"])*+"', | 
| 185: |  | 
| 186: | self::TOKEN_WILDCARD => '\\*', | 
| 187: |  | 
| 188: |  | 
| 189: | self::TOKEN_OTHER => '(?:(?!\\*/)[^\\s])++', | 
| 190: | ]; | 
| 191: |  | 
| 192: | foreach ($patterns as $type => &$pattern) { | 
| 193: | $pattern = '(?:' . $pattern . ')(*MARK:' . $type . ')'; | 
| 194: | } | 
| 195: |  | 
| 196: | return '~' . implode('|', $patterns) . '~Asi'; | 
| 197: | } | 
| 198: |  | 
| 199: | } | 
| 200: |  |