Commit version 24.12.13800

This commit is contained in:
2025-01-06 17:35:06 -05:00
parent b7f6a79c2c
commit 55d9218816
6133 changed files with 4239740 additions and 1374287 deletions

View File

@ -70,6 +70,7 @@ final class Tokenizer
'CONVERT',
'CREATE',
'CROSS',
'CURRENT ROW',
'CURRENT_TIMESTAMP',
'DATABASE',
'DATABASES',
@ -108,11 +109,13 @@ final class Tokenizer
'FAST',
'FIELDS',
'FILE',
'FILTER',
'FIRST',
'FIXED',
'FLUSH',
'FOR',
'FORCE',
'FOLLOWING',
'FOREIGN',
'FULL',
'FULLTEXT',
@ -120,7 +123,8 @@ final class Tokenizer
'GLOBAL',
'GRANT',
'GRANTS',
'GROUP_CONCAT',
'GROUP',
'GROUPS',
'HEAP',
'HIGH_PRIORITY',
'HOSTS',
@ -180,6 +184,7 @@ final class Tokenizer
'MYISAM',
'NAMES',
'NATURAL',
'NO OTHERS',
'NOT',
'NOW()',
'NULL',
@ -192,12 +197,14 @@ final class Tokenizer
'ON UPDATE',
'ON DELETE',
'OUTFILE',
'OVER',
'PACK_KEYS',
'PAGE',
'PARTIAL',
'PARTITION',
'PARTITIONS',
'PASSWORD',
'PRECEDING',
'PRIMARY',
'PRIVILEGES',
'PROCEDURE',
@ -213,6 +220,7 @@ final class Tokenizer
'READ',
'READ_ONLY',
'READ_WRITE',
'RECURSIVE',
'REFERENCES',
'REGEXP',
'RELOAD',
@ -277,6 +285,7 @@ final class Tokenizer
'TEMPORARY',
'TERMINATED',
'THEN',
'TIES',
'TO',
'TRAILING',
'TRANSACTIONAL',
@ -284,6 +293,7 @@ final class Tokenizer
'TRUNCATE',
'TYPE',
'TYPES',
'UNBOUNDED',
'UNCOMMITTED',
'UNIQUE',
'UNLOCK',
@ -307,6 +317,7 @@ final class Tokenizer
* @var string[]
*/
private $reservedToplevel = [
'WITH',
'SELECT',
'FROM',
'WHERE',
@ -327,6 +338,11 @@ final class Tokenizer
'UNION',
'EXCEPT',
'INTERSECT',
'PARTITION BY',
'ROWS',
'RANGE',
'GROUPS',
'WINDOW',
];
/** @var string[] */
@ -341,6 +357,7 @@ final class Tokenizer
'XOR',
'OR',
'AND',
'EXCLUDE',
];
/** @var string[] */
@ -351,6 +368,7 @@ final class Tokenizer
'ADDTIME',
'AES_DECRYPT',
'AES_ENCRYPT',
'APPROX_COUNT_DISTINCT',
'AREA',
'ASBINARY',
'ASCII',
@ -380,6 +398,7 @@ final class Tokenizer
'CHARACTER_LENGTH',
'CHARSET',
'CHAR_LENGTH',
'CHECKSUM_AGG',
'COALESCE',
'COERCIBILITY',
'COLLATION',
@ -395,8 +414,10 @@ final class Tokenizer
'COS',
'COT',
'COUNT',
'COUNT_BIG',
'CRC32',
'CROSSES',
'CUME_DIST',
'CURDATE',
'CURRENT_DATE',
'CURRENT_TIME',
@ -418,6 +439,7 @@ final class Tokenizer
'DECODE',
'DEFAULT',
'DEGREES',
'DENSE_RANK',
'DES_DECRYPT',
'DES_ENCRYPT',
'DIFFERENCE',
@ -437,6 +459,7 @@ final class Tokenizer
'EXTRACTVALUE',
'FIELD',
'FIND_IN_SET',
'FIRST_VALUE',
'FLOOR',
'FORMAT',
'FOUND_ROWS',
@ -457,6 +480,8 @@ final class Tokenizer
'GET_LOCK',
'GLENGTH',
'GREATEST',
'GROUPING',
'GROUPING_ID',
'GROUP_CONCAT',
'GROUP_UNIQUE_USERS',
'HEX',
@ -478,9 +503,12 @@ final class Tokenizer
'ISSIMPLE',
'IS_FREE_LOCK',
'IS_USED_LOCK',
'LAG',
'LAST_DAY',
'LAST_INSERT_ID',
'LAST_VALUE',
'LCASE',
'LEAD',
'LEAST',
'LEFT',
'LENGTH',
@ -489,6 +517,7 @@ final class Tokenizer
'LINESTRING',
'LINESTRINGFROMTEXT',
'LINESTRINGFROMWKB',
'LISTAGG',
'LN',
'LOAD_FILE',
'LOCALTIME',
@ -536,6 +565,8 @@ final class Tokenizer
'MULTIPOLYGONFROMTEXT',
'MULTIPOLYGONFROMWKB',
'NAME_CONST',
'NTH_VALUE',
'NTILE',
'NULLIF',
'NUMGEOMETRIES',
'NUMINTERIORRINGS',
@ -546,6 +577,9 @@ final class Tokenizer
'ORD',
'OVERLAPS',
'PASSWORD',
'PERCENT_RANK',
'PERCENTILE_CONT',
'PERCENTILE_DISC',
'PERIOD_ADD',
'PERIOD_DIFF',
'PI',
@ -566,6 +600,7 @@ final class Tokenizer
'QUOTE',
'RADIANS',
'RAND',
'RANK',
'RELATED',
'RELEASE_LOCK',
'REPEAT',
@ -574,6 +609,7 @@ final class Tokenizer
'RIGHT',
'ROUND',
'ROW_COUNT',
'ROW_NUMBER',
'RPAD',
'RTRIM',
'SCHEMA',
@ -591,9 +627,12 @@ final class Tokenizer
'SRID',
'STARTPOINT',
'STD',
'STDEV',
'STDEVP',
'STDDEV',
'STDDEV_POP',
'STDDEV_SAMP',
'STRING_AGG',
'STRCMP',
'STR_TO_DATE',
'SUBDATE',
@ -630,7 +669,9 @@ final class Tokenizer
'UTC_TIME',
'UTC_TIMESTAMP',
'UUID',
'VAR',
'VARIANCE',
'VARP',
'VAR_POP',
'VAR_SAMP',
'VERSION',
@ -669,6 +710,7 @@ final class Tokenizer
private $boundaries = [
',',
';',
'::', // PostgreSQL cast operator
':',
')',
'(',
@ -727,7 +769,7 @@ final class Tokenizer
*
* @param string $string The SQL string
*/
public function tokenize(string $string) : Cursor
public function tokenize(string $string): Cursor
{
$tokens = [];
@ -774,7 +816,7 @@ final class Tokenizer
*
* @return Token An associative array containing the type and value of the token.
*/
private function createNextToken(string $string, Token $previous = null) : Token
private function createNextToken(string $string, ?Token $previous = null): Token
{
$matches = [];
// Whitespace
@ -783,9 +825,11 @@ final class Tokenizer
}
// Comment
if ($string[0] === '#' ||
(isset($string[1]) && ($string[0]==='-' && $string[1]==='-') ||
(isset($string[1]) && $string[0]==='/' && $string[1]==='*'))) {
if (
$string[0] === '#' ||
(isset($string[1]) && (($string[0] === '-' && $string[1] === '-') ||
($string[0] === '/' && $string[1] === '*')))
) {
// Comment until end of line
if ($string[0] === '-' || $string[0] === '#') {
$last = strpos($string, "\n");
@ -805,9 +849,9 @@ final class Tokenizer
}
// Quoted String
if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`' || $string[0]==='[') {
if ($string[0] === '"' || $string[0] === '\'' || $string[0] === '`' || $string[0] === '[') {
return new Token(
($string[0]==='`' || $string[0]==='['
($string[0] === '`' || $string[0] === '['
? Token::TOKEN_TYPE_BACKTICK_QUOTE
: Token::TOKEN_TYPE_QUOTE),
$this->getQuotedString($string)
@ -820,7 +864,7 @@ final class Tokenizer
$type = Token::TOKEN_TYPE_VARIABLE;
// If the variable name is quoted
if ($string[1]==='"' || $string[1]==='\'' || $string[1]==='`') {
if ($string[1] === '"' || $string[1] === '\'' || $string[1] === '`') {
$value = $string[0] . $this->getQuotedString(substr($string, 1));
} else {
// Non-quoted variable name
@ -836,11 +880,13 @@ final class Tokenizer
}
// Number (decimal, binary, or hex)
if (preg_match(
'/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|' . $this->regexBoundaries . ')/',
$string,
$matches
)) {
if (
preg_match(
'/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|' . $this->regexBoundaries . ')/',
$string,
$matches
)
) {
return new Token(Token::TOKEN_TYPE_NUMBER, $matches[1]);
}
@ -854,38 +900,44 @@ final class Tokenizer
if (! $previous || $previous->value() !== '.') {
$upper = strtoupper($string);
// Top Level Reserved Word
if (preg_match(
'/^(' . $this->regexReservedToplevel . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)) {
if (
preg_match(
'/^(' . $this->regexReservedToplevel . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)
) {
return new Token(
Token::TOKEN_TYPE_RESERVED_TOPLEVEL,
substr($string, 0, strlen($matches[1]))
substr($upper, 0, strlen($matches[1]))
);
}
// Newline Reserved Word
if (preg_match(
'/^(' . $this->regexReservedNewline . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)) {
if (
preg_match(
'/^(' . $this->regexReservedNewline . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)
) {
return new Token(
Token::TOKEN_TYPE_RESERVED_NEWLINE,
substr($string, 0, strlen($matches[1]))
substr($upper, 0, strlen($matches[1]))
);
}
// Other Reserved Word
if (preg_match(
'/^(' . $this->regexReserved . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)) {
if (
preg_match(
'/^(' . $this->regexReserved . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)
) {
return new Token(
Token::TOKEN_TYPE_RESERVED,
substr($string, 0, strlen($matches[1]))
substr($upper, 0, strlen($matches[1]))
);
}
}
@ -897,7 +949,7 @@ final class Tokenizer
if (preg_match('/^(' . $this->regexFunction . '[(]|\s|[)])/', $upper, $matches)) {
return new Token(
Token::TOKEN_TYPE_RESERVED,
substr($string, 0, strlen($matches[1])-1)
substr($upper, 0, strlen($matches[1]) - 1)
);
}
@ -914,14 +966,14 @@ final class Tokenizer
*
* @return string[] The quoted strings
*/
private function quoteRegex(array $strings) : array
private function quoteRegex(array $strings): array
{
return array_map(static function (string $string) : string {
return array_map(static function (string $string): string {
return preg_quote($string, '/');
}, $strings);
}
private function getQuotedString(string $string) : string
private function getQuotedString(string $string): string
{
$ret = '';
@ -930,14 +982,16 @@ final class Tokenizer
// 2. square bracket quoted string (SQL Server) using ]] to escape
// 3. double quoted string using "" or \" to escape
// 4. single quoted string using '' or \' to escape
if (preg_match(
'/^(((`[^`]*($|`))+)|
if (
preg_match(
'/^(((`[^`]*($|`))+)|
((\[[^\]]*($|\]))(\][^\]]*($|\]))*)|
(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|
((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/sx',
$string,
$matches
)) {
$string,
$matches
)
) {
$ret = $matches[1];
}