initial commit; version 22.5.12042

This commit is contained in:
2022-12-12 23:28:25 -05:00
commit af1b03d79f
17653 changed files with 22692970 additions and 0 deletions

View File

@ -0,0 +1,79 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
use function sprintf;
use const PHP_EOL;
final class CliHighlighter implements Highlighter
{
const HIGHLIGHT_FUNCTIONS = 'functions';
/** @var array<string, string> */
private $escapeSequences;
/**
* @param array<string, string> $escapeSequences
*/
public function __construct(array $escapeSequences = [])
{
$this->escapeSequences = $escapeSequences + [
self::HIGHLIGHT_QUOTE => "\x1b[34;1m",
self::HIGHLIGHT_BACKTICK_QUOTE => "\x1b[35;1m",
self::HIGHLIGHT_RESERVED => "\x1b[37m",
self::HIGHLIGHT_BOUNDARY => '',
self::HIGHLIGHT_NUMBER => "\x1b[32;1m",
self::HIGHLIGHT_WORD => '',
self::HIGHLIGHT_ERROR => "\x1b[31;1;7m",
self::HIGHLIGHT_COMMENT => "\x1b[30;1m",
self::HIGHLIGHT_VARIABLE => "\x1b[36;1m",
self::HIGHLIGHT_FUNCTIONS => "\x1b[37m",
];
}
public function highlightToken(int $type, string $value) : string
{
if ($type === Token::TOKEN_TYPE_BOUNDARY && ($value==='(' || $value===')')) {
return $value;
}
$prefix = $this->prefix($type);
if ($prefix === null) {
return $value;
}
return $prefix . $value . "\x1b[0m";
}
private function prefix(int $type)
{
if (! isset(self::TOKEN_TYPE_TO_HIGHLIGHT[$type])) {
return null;
}
return $this->escapeSequences[self::TOKEN_TYPE_TO_HIGHLIGHT[$type]];
}
public function highlightError(string $value) : string
{
return sprintf(
'%s%s%s%s',
PHP_EOL,
$this->escapeSequences[self::HIGHLIGHT_ERROR],
$value,
"\x1b[0m"
);
}
public function highlightErrorMessage(string $value) : string
{
return $this->highlightError($value);
}
public function output(string $string) : string
{
return $string . "\n";
}
}

View File

@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
final class Cursor
{
/** @var int */
private $position = -1;
/** @var Token[] */
private $tokens;
/**
* @param Token[] $tokens
*/
public function __construct(array $tokens)
{
$this->tokens = $tokens;
}
public function next(int $exceptTokenType = null)
{
while ($token = $this->tokens[++$this->position] ?? null) {
if ($exceptTokenType !== null && $token->isOfType($exceptTokenType)) {
continue;
}
return $token;
}
return null;
}
public function previous(int $exceptTokenType = null)
{
while ($token = $this->tokens[--$this->position] ?? null) {
if ($exceptTokenType !== null && $token->isOfType($exceptTokenType)) {
continue;
}
return $token;
}
return null;
}
public function subCursor() : self
{
$cursor = new self($this->tokens);
$cursor->position = $this->position;
return $cursor;
}
}

View File

@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
interface Highlighter
{
const TOKEN_TYPE_TO_HIGHLIGHT = [
Token::TOKEN_TYPE_BOUNDARY => self::HIGHLIGHT_BOUNDARY,
Token::TOKEN_TYPE_WORD => self::HIGHLIGHT_WORD,
Token::TOKEN_TYPE_BACKTICK_QUOTE => self::HIGHLIGHT_BACKTICK_QUOTE,
Token::TOKEN_TYPE_QUOTE => self::HIGHLIGHT_QUOTE,
Token::TOKEN_TYPE_RESERVED => self::HIGHLIGHT_RESERVED,
Token::TOKEN_TYPE_RESERVED_TOPLEVEL => self::HIGHLIGHT_RESERVED,
Token::TOKEN_TYPE_RESERVED_NEWLINE => self::HIGHLIGHT_RESERVED,
Token::TOKEN_TYPE_NUMBER => self::HIGHLIGHT_NUMBER,
Token::TOKEN_TYPE_VARIABLE => self::HIGHLIGHT_VARIABLE,
Token::TOKEN_TYPE_COMMENT => self::HIGHLIGHT_COMMENT,
Token::TOKEN_TYPE_BLOCK_COMMENT => self::HIGHLIGHT_COMMENT,
];
const HIGHLIGHT_BOUNDARY = 'boundary';
const HIGHLIGHT_WORD = 'word';
const HIGHLIGHT_BACKTICK_QUOTE = 'backtickQuote';
const HIGHLIGHT_QUOTE = 'quote';
const HIGHLIGHT_RESERVED = 'reserved';
const HIGHLIGHT_NUMBER = 'number';
const HIGHLIGHT_VARIABLE = 'variable';
const HIGHLIGHT_COMMENT = 'comment';
const HIGHLIGHT_ERROR = 'error';
/**
* Highlights a token depending on its type.
*/
public function highlightToken(int $type, string $value) : string;
/**
* Highlights a token which causes an issue
*/
public function highlightError(string $value) : string;
/**
* Highlights an error message
*/
public function highlightErrorMessage(string $value) : string;
/**
* Helper function for building string output
*
* @param string $string The string to be quoted
*
* @return string The quoted string
*/
public function output(string $string) : string;
}

View File

@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
use function htmlentities;
use function sprintf;
use function trim;
use const ENT_COMPAT;
use const ENT_IGNORE;
use const PHP_EOL;
final class HtmlHighlighter implements Highlighter
{
const HIGHLIGHT_PRE = 'pre';
/**
* This flag tells us if queries need to be enclosed in <pre> tags
*
* @var bool
*/
private $usePre;
/** @var array<string, string> */
private $htmlAttributes;
/**
* @param array<string, string> $htmlAttributes
*/
public function __construct(array $htmlAttributes = [], bool $usePre = true)
{
$this->htmlAttributes = $htmlAttributes + [
self::HIGHLIGHT_QUOTE => 'style="color: blue;"',
self::HIGHLIGHT_BACKTICK_QUOTE => 'style="color: purple;"',
self::HIGHLIGHT_RESERVED => 'style="font-weight:bold;"',
self::HIGHLIGHT_BOUNDARY => '',
self::HIGHLIGHT_NUMBER => 'style="color: green;"',
self::HIGHLIGHT_WORD => 'style="color: #333;"',
self::HIGHLIGHT_ERROR => 'style="background-color: red;"',
self::HIGHLIGHT_COMMENT => 'style="color: #aaa;"',
self::HIGHLIGHT_VARIABLE => 'style="color: orange;"',
self::HIGHLIGHT_PRE => 'style="color: black; background-color: white;"',
];
$this->usePre = $usePre;
}
public function highlightToken(int $type, string $value) : string
{
$value = htmlentities($value, ENT_COMPAT | ENT_IGNORE, 'UTF-8');
if ($type === Token::TOKEN_TYPE_BOUNDARY && ($value==='(' || $value===')')) {
return $value;
}
$attributes = $this->attributes($type);
if ($attributes === null) {
return $value;
}
return '<span ' . $attributes . '>' . $value . '</span>';
}
public function attributes(int $type)
{
if (! isset(self::TOKEN_TYPE_TO_HIGHLIGHT[$type])) {
return null;
}
return $this->htmlAttributes[self::TOKEN_TYPE_TO_HIGHLIGHT[$type]];
}
public function highlightError(string $value) : string
{
return sprintf(
'%s<span %s>%s</span>',
PHP_EOL,
$this->htmlAttributes[self::HIGHLIGHT_ERROR],
$value
);
}
public function highlightErrorMessage(string $value) : string
{
return $this->highlightError($value);
}
public function output(string $string) : string
{
$string =trim($string);
// This is derp truncate for long list
$string = preg_replace('!(IN</span>\s*)(\()([^\)]+)(\))!', '$1$2<div class="text-truncate" onclick="revealHiddenOverflow(this)">$3</div>$4', $string);
if (! $this->usePre) {
return $string;
}
return '<pre ' . $this->htmlAttributes[self::HIGHLIGHT_PRE] . '>' . $string . '</pre>';
}
}

View File

@ -0,0 +1,28 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
final class NullHighlighter implements Highlighter
{
public function highlightToken(int $type, string $value) : string
{
return $value;
}
public function highlightError(string $value) : string
{
return $value;
}
public function highlightErrorMessage(string $value) : string
{
return ' ' . $value;
}
public function output(string $string) : string
{
return $string;
}
}

View File

@ -0,0 +1,423 @@
<?php
declare(strict_types=1);
/**
* SQL Formatter is a collection of utilities for debugging SQL queries.
* It includes methods for formatting, syntax highlighting, removing comments, etc.
*
* @link http://github.com/jdorn/sql-formatter
*/
namespace Doctrine\SqlFormatter;
use function array_search;
use function array_shift;
use function array_unshift;
use function assert;
use function current;
use function preg_replace;
use function reset;
use function rtrim;
use function str_repeat;
use function str_replace;
use function strlen;
use function trim;
use const PHP_SAPI;
final class SqlFormatter
{
/** @var Highlighter */
private $highlighter;
/** @var Tokenizer */
private $tokenizer;
public function __construct(Highlighter $highlighter = null)
{
$this->tokenizer = new Tokenizer();
$this->highlighter = $highlighter ?? (PHP_SAPI === 'cli' ? new CliHighlighter() : new HtmlHighlighter());
}
/**
* Format the whitespace in a SQL string to make it easier to read.
*
* @param string $string The SQL string
*
* @return string The SQL string with HTML styles and formatting wrapped in a <pre> tag
*/
public function format(string $string, string $indentString = ' ') : string
{
// This variable will be populated with formatted html
$return = '';
// Use an actual tab while formatting and then switch out with $indentString at the end
$tab = "\t";
$indentLevel = 0;
$newline = false;
$inlineParentheses = false;
$increaseSpecialIndent = false;
$increaseBlockIndent = false;
$indentTypes = [];
$addedNewline = false;
$inlineCount = 0;
$inlineIndented = false;
$clauseLimit = false;
// Tokenize String
$cursor = $this->tokenizer->tokenize($string);
// Format token by token
while ($token = $cursor->next(Token::TOKEN_TYPE_WHITESPACE)) {
$highlighted = $this->highlighter->highlightToken(
$token->type(),
$token->value()
);
// If we are increasing the special indent level now
if ($increaseSpecialIndent) {
$indentLevel++;
$increaseSpecialIndent = false;
array_unshift($indentTypes, 'special');
}
// If we are increasing the block indent level now
if ($increaseBlockIndent) {
$indentLevel++;
$increaseBlockIndent = false;
array_unshift($indentTypes, 'block');
}
// If we need a new line before the token
if ($newline) {
$return = rtrim($return, ' ');
$return .= "\n" . str_repeat($tab, $indentLevel);
$newline = false;
$addedNewline = true;
} else {
$addedNewline = false;
}
// Display comments directly where they appear in the source
if ($token->isOfType(Token::TOKEN_TYPE_COMMENT, Token::TOKEN_TYPE_BLOCK_COMMENT)) {
if ($token->isOfType(Token::TOKEN_TYPE_BLOCK_COMMENT)) {
$indent = str_repeat($tab, $indentLevel);
$return = rtrim($return, " \t");
$return .= "\n" . $indent;
$highlighted = str_replace("\n", "\n" . $indent, $highlighted);
}
$return .= $highlighted;
$newline = true;
continue;
}
if ($inlineParentheses) {
// End of inline parentheses
if ($token->value() === ')') {
$return = rtrim($return, ' ');
if ($inlineIndented) {
array_shift($indentTypes);
$indentLevel--;
$return = rtrim($return, ' ');
$return .= "\n" . str_repeat($tab, $indentLevel);
}
$inlineParentheses = false;
$return .= $highlighted . ' ';
continue;
}
if ($token->value() === ',') {
if ($inlineCount >= 30) {
$inlineCount = 0;
$newline = true;
}
}
$inlineCount += strlen($token->value());
}
// Opening parentheses increase the block indent level and start a new line
if ($token->value() === '(') {
// First check if this should be an inline parentheses block
// Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
// Allow up to 3 non-whitespace tokens inside inline parentheses
$length = 0;
$subCursor = $cursor->subCursor();
for ($j=1; $j<=250; $j++) {
// Reached end of string
$next = $subCursor->next(Token::TOKEN_TYPE_WHITESPACE);
if (! $next) {
break;
}
// Reached closing parentheses, able to inline it
if ($next->value() === ')') {
$inlineParentheses = true;
$inlineCount = 0;
$inlineIndented = false;
break;
}
// Reached an invalid token for inline parentheses
if ($next->value()===';' || $next->value()==='(') {
break;
}
// Reached an invalid token type for inline parentheses
if ($next->isOfType(
Token::TOKEN_TYPE_RESERVED_TOPLEVEL,
Token::TOKEN_TYPE_RESERVED_NEWLINE,
Token::TOKEN_TYPE_COMMENT,
Token::TOKEN_TYPE_BLOCK_COMMENT
)) {
break;
}
$length += strlen($next->value());
}
if ($inlineParentheses && $length > 30) {
$increaseBlockIndent = true;
$inlineIndented = true;
$newline = true;
}
// Take out the preceding space unless there was whitespace there in the original query
$prevToken = $cursor->subCursor()->previous();
if ($prevToken && ! $prevToken->isOfType(Token::TOKEN_TYPE_WHITESPACE)) {
$return = rtrim($return, ' ');
}
if (! $inlineParentheses) {
$increaseBlockIndent = true;
// Add a newline after the parentheses
$newline = true;
}
} elseif ($token->value() === ')') {
// Closing parentheses decrease the block indent level
// Remove whitespace before the closing parentheses
$return = rtrim($return, ' ');
$indentLevel--;
// Reset indent level
while ($j=array_shift($indentTypes)) {
if ($j!=='special') {
break;
}
$indentLevel--;
}
if ($indentLevel < 0) {
// This is an error
$indentLevel = 0;
$return .= $this->highlighter->highlightError($token->value());
continue;
}
// Add a newline before the closing parentheses (if not already added)
if (! $addedNewline) {
$return .= "\n" . str_repeat($tab, $indentLevel);
}
} elseif ($token->isOfType(Token::TOKEN_TYPE_RESERVED_TOPLEVEL)) {
// Top level reserved words start a new line and increase the special indent level
$increaseSpecialIndent = true;
// If the last indent type was 'special', decrease the special indent for this round
reset($indentTypes);
if (current($indentTypes)==='special') {
$indentLevel--;
array_shift($indentTypes);
}
// Add a newline after the top level reserved word
$newline = true;
// Add a newline before the top level reserved word (if not already added)
if (! $addedNewline) {
$return = rtrim($return, ' ');
$return .= "\n" . str_repeat($tab, $indentLevel);
} else {
// If we already added a newline, redo the indentation since it may be different now
$return = rtrim($return, $tab) . str_repeat($tab, $indentLevel);
}
if ($token->hasExtraWhitespace()) {
$highlighted = preg_replace('/\s+/', ' ', $highlighted);
}
//if SQL 'LIMIT' clause, start variable to reset newline
if ($token->value() === 'LIMIT' && ! $inlineParentheses) {
$clauseLimit = true;
}
} elseif ($clauseLimit &&
$token->value() !== ',' &&
! $token->isOfType(Token::TOKEN_TYPE_NUMBER, Token::TOKEN_TYPE_WHITESPACE)) {
// Checks if we are out of the limit clause
$clauseLimit = false;
} elseif ($token->value() === ',' && ! $inlineParentheses) {
// Commas start a new line (unless within inline parentheses or SQL 'LIMIT' clause)
//If the previous TOKEN_VALUE is 'LIMIT', resets new line
if ($clauseLimit === true) {
$newline = false;
$clauseLimit = false;
} else {
// All other cases of commas
$newline = true;
}
} elseif ($token->isOfType(Token::TOKEN_TYPE_RESERVED_NEWLINE)) {
// Newline reserved words start a new line
// Add a newline before the reserved word (if not already added)
if (! $addedNewline) {
$return = rtrim($return, ' ');
$return .= "\n" . str_repeat($tab, $indentLevel);
}
if ($token->hasExtraWhitespace()) {
$highlighted = preg_replace('/\s+/', ' ', $highlighted);
}
} elseif ($token->isOfType(Token::TOKEN_TYPE_BOUNDARY)) {
// Multiple boundary characters in a row should not have spaces between them (not including parentheses)
$prevNotWhitespaceToken = $cursor->subCursor()->previous(Token::TOKEN_TYPE_WHITESPACE);
if ($prevNotWhitespaceToken && $prevNotWhitespaceToken->isOfType(Token::TOKEN_TYPE_BOUNDARY)) {
$prevToken = $cursor->subCursor()->previous();
if ($prevToken && ! $prevToken->isOfType(Token::TOKEN_TYPE_WHITESPACE)) {
$return = rtrim($return, ' ');
}
}
}
// If the token shouldn't have a space before it
if ($token->value() === '.' ||
$token->value() === ',' ||
$token->value() === ';') {
$return = rtrim($return, ' ');
}
$return .= $highlighted . ' ';
// If the token shouldn't have a space after it
if ($token->value() === '(' || $token->value() === '.') {
$return = rtrim($return, ' ');
}
// If this is the "-" of a negative number, it shouldn't have a space after it
if ($token->value() !== '-') {
continue;
}
$nextNotWhitespace = $cursor->subCursor()->next(Token::TOKEN_TYPE_WHITESPACE);
if (! $nextNotWhitespace || ! $nextNotWhitespace->isOfType(Token::TOKEN_TYPE_NUMBER)) {
continue;
}
$prev = $cursor->subCursor()->previous(Token::TOKEN_TYPE_WHITESPACE);
if (! $prev) {
continue;
}
if ($prev->isOfType(
Token::TOKEN_TYPE_QUOTE,
Token::TOKEN_TYPE_BACKTICK_QUOTE,
Token::TOKEN_TYPE_WORD,
Token::TOKEN_TYPE_NUMBER
)) {
continue;
}
$return = rtrim($return, ' ');
}
// If there are unmatched parentheses
if (array_search('block', $indentTypes) !== false) {
$return = rtrim($return, ' ');
$return .= $this->highlighter->highlightErrorMessage(
'WARNING: unclosed parentheses or section'
);
}
// Replace tab characters with the configuration tab character
$return = trim(str_replace("\t", $indentString, $return));
return $this->highlighter->output($return);
}
/**
* Add syntax highlighting to a SQL string
*
* @param string $string The SQL string
*
* @return string The SQL string with HTML styles applied
*/
public function highlight(string $string) : string
{
$cursor = $this->tokenizer->tokenize($string);
$return = '';
while ($token = $cursor->next()) {
$return .= $this->highlighter->highlightToken(
$token->type(),
$token->value()
);
}
return $this->highlighter->output($return);
}
/**
* Compress a query by collapsing white space and removing comments
*
* @param string $string The SQL string
*
* @return string The SQL string without comments
*/
public function compress(string $string) : string
{
$result = '';
$cursor = $this->tokenizer->tokenize($string);
$whitespace = true;
while ($token = $cursor->next()) {
// Skip comment tokens
if ($token->isOfType(Token::TOKEN_TYPE_COMMENT, Token::TOKEN_TYPE_BLOCK_COMMENT)) {
continue;
}
// Remove extra whitespace in reserved words (e.g "OUTER JOIN" becomes "OUTER JOIN")
if ($token->isOfType(
Token::TOKEN_TYPE_RESERVED,
Token::TOKEN_TYPE_RESERVED_NEWLINE,
Token::TOKEN_TYPE_RESERVED_TOPLEVEL
)) {
$newValue = preg_replace('/\s+/', ' ', $token->value());
assert($newValue !== null);
$token = $token->withValue($newValue);
}
if ($token->isOfType(Token::TOKEN_TYPE_WHITESPACE)) {
// If the last token was whitespace, don't add another one
if ($whitespace) {
continue;
}
$whitespace = true;
// Convert all whitespace to a single space
$token = $token->withValue(' ');
} else {
$whitespace = false;
}
$result .= $token->value();
}
return rtrim($result);
}
}

View File

@ -0,0 +1,69 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
use function in_array;
use function strpos;
final class Token
{
// Constants for token types
const TOKEN_TYPE_WHITESPACE = 0;
const TOKEN_TYPE_WORD = 1;
const TOKEN_TYPE_QUOTE = 2;
const TOKEN_TYPE_BACKTICK_QUOTE = 3;
const TOKEN_TYPE_RESERVED = 4;
const TOKEN_TYPE_RESERVED_TOPLEVEL = 5;
const TOKEN_TYPE_RESERVED_NEWLINE = 6;
const TOKEN_TYPE_BOUNDARY = 7;
const TOKEN_TYPE_COMMENT = 8;
const TOKEN_TYPE_BLOCK_COMMENT = 9;
const TOKEN_TYPE_NUMBER = 10;
const TOKEN_TYPE_ERROR = 11;
const TOKEN_TYPE_VARIABLE = 12;
// Constants for different components of a token
const TOKEN_TYPE = 0;
const TOKEN_VALUE = 1;
/** @var int */
private $type;
/** @var string */
private $value;
public function __construct(int $type, string $value)
{
$this->type = $type;
$this->value = $value;
}
public function value() : string
{
return $this->value;
}
public function type() : int
{
return $this->type;
}
public function isOfType(int ...$types) : bool
{
return in_array($this->type, $types, true);
}
public function hasExtraWhitespace() : bool
{
return strpos($this->value(), ' ')!== false ||
strpos($this->value(), "\n") !== false ||
strpos($this->value(), "\t") !== false;
}
public function withValue(string $value) : self
{
return new self($this->type(), $value);
}
}

View File

@ -0,0 +1,946 @@
<?php
declare(strict_types=1);
namespace Doctrine\SqlFormatter;
use function array_combine;
use function array_keys;
use function array_map;
use function arsort;
use function assert;
use function implode;
use function preg_match;
use function preg_quote;
use function str_replace;
use function strlen;
use function strpos;
use function strtoupper;
use function substr;
/**
* @internal
*/
final class Tokenizer
{
/**
* Reserved words (for syntax highlighting)
*
* @var string[]
*/
private $reserved = [
'ACCESSIBLE',
'ACTION',
'AFTER',
'AGAINST',
'AGGREGATE',
'ALGORITHM',
'ALL',
'ALTER',
'ANALYSE',
'ANALYZE',
'AS',
'ASC',
'AUTOCOMMIT',
'AUTO_INCREMENT',
'BACKUP',
'BEGIN',
'BETWEEN',
'BINLOG',
'BOTH',
'CASCADE',
'CASE',
'CHANGE',
'CHANGED',
'CHARACTER SET',
'CHARSET',
'CHECK',
'CHECKSUM',
'COLLATE',
'COLLATION',
'COLUMN',
'COLUMNS',
'COMMENT',
'COMMIT',
'COMMITTED',
'COMPRESSED',
'CONCURRENT',
'CONSTRAINT',
'CONTAINS',
'CONVERT',
'CREATE',
'CROSS',
'CURRENT_TIMESTAMP',
'DATABASE',
'DATABASES',
'DAY',
'DAY_HOUR',
'DAY_MINUTE',
'DAY_SECOND',
'DEFAULT',
'DEFINER',
'DELAYED',
'DELETE',
'DESC',
'DESCRIBE',
'DETERMINISTIC',
'DISTINCT',
'DISTINCTROW',
'DIV',
'DO',
'DUMPFILE',
'DUPLICATE',
'DYNAMIC',
'ELSE',
'ENCLOSED',
'END',
'ENGINE',
'ENGINE_TYPE',
'ENGINES',
'ESCAPE',
'ESCAPED',
'EVENTS',
'EXEC',
'EXECUTE',
'EXISTS',
'EXPLAIN',
'EXTENDED',
'FAST',
'FIELDS',
'FILE',
'FIRST',
'FIXED',
'FLUSH',
'FOR',
'FORCE',
'FOREIGN',
'FULL',
'FULLTEXT',
'FUNCTION',
'GLOBAL',
'GRANT',
'GRANTS',
'GROUP_CONCAT',
'HEAP',
'HIGH_PRIORITY',
'HOSTS',
'HOUR',
'HOUR_MINUTE',
'HOUR_SECOND',
'IDENTIFIED',
'IF',
'IFNULL',
'IGNORE',
'IN',
'INDEX',
'INDEXES',
'INFILE',
'INSERT',
'INSERT_ID',
'INSERT_METHOD',
'INTERVAL',
'INTO',
'INVOKER',
'IS',
'ISOLATION',
'KEY',
'KEYS',
'KILL',
'LAST_INSERT_ID',
'LEADING',
'LEVEL',
'LIKE',
'LINEAR',
'LINES',
'LOAD',
'LOCAL',
'LOCK',
'LOCKS',
'LOGS',
'LOW_PRIORITY',
'MARIA',
'MASTER',
'MASTER_CONNECT_RETRY',
'MASTER_HOST',
'MASTER_LOG_FILE',
'MATCH',
'MAX_CONNECTIONS_PER_HOUR',
'MAX_QUERIES_PER_HOUR',
'MAX_ROWS',
'MAX_UPDATES_PER_HOUR',
'MAX_USER_CONNECTIONS',
'MEDIUM',
'MERGE',
'MINUTE',
'MINUTE_SECOND',
'MIN_ROWS',
'MODE',
'MONTH',
'MRG_MYISAM',
'MYISAM',
'NAMES',
'NATURAL',
'NOT',
'NOW()',
'NULL',
'OFFSET',
'ON',
'OPEN',
'OPTIMIZE',
'OPTION',
'OPTIONALLY',
'ON UPDATE',
'ON DELETE',
'OUTFILE',
'PACK_KEYS',
'PAGE',
'PARTIAL',
'PARTITION',
'PARTITIONS',
'PASSWORD',
'PRIMARY',
'PRIVILEGES',
'PROCEDURE',
'PROCESS',
'PROCESSLIST',
'PURGE',
'QUICK',
'RANGE',
'RAID0',
'RAID_CHUNKS',
'RAID_CHUNKSIZE',
'RAID_TYPE',
'READ',
'READ_ONLY',
'READ_WRITE',
'REFERENCES',
'REGEXP',
'RELOAD',
'RENAME',
'REPAIR',
'REPEATABLE',
'REPLACE',
'REPLICATION',
'RESET',
'RESTORE',
'RESTRICT',
'RETURN',
'RETURNS',
'REVOKE',
'RLIKE',
'ROLLBACK',
'ROW',
'ROWS',
'ROW_FORMAT',
'SECOND',
'SECURITY',
'SEPARATOR',
'SERIALIZABLE',
'SESSION',
'SHARE',
'SHOW',
'SHUTDOWN',
'SLAVE',
'SONAME',
'SOUNDS',
'SQL',
'SQL_AUTO_IS_NULL',
'SQL_BIG_RESULT',
'SQL_BIG_SELECTS',
'SQL_BIG_TABLES',
'SQL_BUFFER_RESULT',
'SQL_CALC_FOUND_ROWS',
'SQL_LOG_BIN',
'SQL_LOG_OFF',
'SQL_LOG_UPDATE',
'SQL_LOW_PRIORITY_UPDATES',
'SQL_MAX_JOIN_SIZE',
'SQL_QUOTE_SHOW_CREATE',
'SQL_SAFE_UPDATES',
'SQL_SELECT_LIMIT',
'SQL_SLAVE_SKIP_COUNTER',
'SQL_SMALL_RESULT',
'SQL_WARNINGS',
'SQL_CACHE',
'SQL_NO_CACHE',
'START',
'STARTING',
'STATUS',
'STOP',
'STORAGE',
'STRAIGHT_JOIN',
'STRING',
'STRIPED',
'SUPER',
'TABLE',
'TABLES',
'TEMPORARY',
'TERMINATED',
'THEN',
'TO',
'TRAILING',
'TRANSACTIONAL',
'TRUE',
'TRUNCATE',
'TYPE',
'TYPES',
'UNCOMMITTED',
'UNIQUE',
'UNLOCK',
'UNSIGNED',
'USAGE',
'USE',
'USING',
'VARIABLES',
'VIEW',
'WHEN',
'WITH',
'WORK',
'WRITE',
'YEAR_MONTH',
];
/**
* For SQL formatting
* These keywords will all be on their own line
*
* @var string[]
*/
private $reservedToplevel = [
'SELECT',
'FROM',
'WHERE',
'SET',
'ORDER BY',
'GROUP BY',
'LIMIT',
'DROP',
'VALUES',
'UPDATE',
'HAVING',
'ADD',
'CHANGE',
'MODIFY',
'ALTER TABLE',
'DELETE FROM',
'UNION ALL',
'UNION',
'EXCEPT',
'INTERSECT',
];
/** @var string[] */
private $reservedNewline = [
'LEFT OUTER JOIN',
'RIGHT OUTER JOIN',
'LEFT JOIN',
'RIGHT JOIN',
'OUTER JOIN',
'INNER JOIN',
'JOIN',
'XOR',
'OR',
'AND',
];
/** @var string[] */
private $functions = [
'ABS',
'ACOS',
'ADDDATE',
'ADDTIME',
'AES_DECRYPT',
'AES_ENCRYPT',
'AREA',
'ASBINARY',
'ASCII',
'ASIN',
'ASTEXT',
'ATAN',
'ATAN2',
'AVG',
'BDMPOLYFROMTEXT',
'BDMPOLYFROMWKB',
'BDPOLYFROMTEXT',
'BDPOLYFROMWKB',
'BENCHMARK',
'BIN',
'BIT_AND',
'BIT_COUNT',
'BIT_LENGTH',
'BIT_OR',
'BIT_XOR',
'BOUNDARY',
'BUFFER',
'CAST',
'CEIL',
'CEILING',
'CENTROID',
'CHAR',
'CHARACTER_LENGTH',
'CHARSET',
'CHAR_LENGTH',
'COALESCE',
'COERCIBILITY',
'COLLATION',
'COMPRESS',
'CONCAT',
'CONCAT_WS',
'CONNECTION_ID',
'CONTAINS',
'CONV',
'CONVERT',
'CONVERT_TZ',
'CONVEXHULL',
'COS',
'COT',
'COUNT',
'CRC32',
'CROSSES',
'CURDATE',
'CURRENT_DATE',
'CURRENT_TIME',
'CURRENT_TIMESTAMP',
'CURRENT_USER',
'CURTIME',
'DATABASE',
'DATE',
'DATEDIFF',
'DATE_ADD',
'DATE_DIFF',
'DATE_FORMAT',
'DATE_SUB',
'DAY',
'DAYNAME',
'DAYOFMONTH',
'DAYOFWEEK',
'DAYOFYEAR',
'DECODE',
'DEFAULT',
'DEGREES',
'DES_DECRYPT',
'DES_ENCRYPT',
'DIFFERENCE',
'DIMENSION',
'DISJOINT',
'DISTANCE',
'ELT',
'ENCODE',
'ENCRYPT',
'ENDPOINT',
'ENVELOPE',
'EQUALS',
'EXP',
'EXPORT_SET',
'EXTERIORRING',
'EXTRACT',
'EXTRACTVALUE',
'FIELD',
'FIND_IN_SET',
'FLOOR',
'FORMAT',
'FOUND_ROWS',
'FROM_DAYS',
'FROM_UNIXTIME',
'GEOMCOLLFROMTEXT',
'GEOMCOLLFROMWKB',
'GEOMETRYCOLLECTION',
'GEOMETRYCOLLECTIONFROMTEXT',
'GEOMETRYCOLLECTIONFROMWKB',
'GEOMETRYFROMTEXT',
'GEOMETRYFROMWKB',
'GEOMETRYN',
'GEOMETRYTYPE',
'GEOMFROMTEXT',
'GEOMFROMWKB',
'GET_FORMAT',
'GET_LOCK',
'GLENGTH',
'GREATEST',
'GROUP_CONCAT',
'GROUP_UNIQUE_USERS',
'HEX',
'HOUR',
'IF',
'IFNULL',
'INET_ATON',
'INET_NTOA',
'INSERT',
'INSTR',
'INTERIORRINGN',
'INTERSECTION',
'INTERSECTS',
'INTERVAL',
'ISCLOSED',
'ISEMPTY',
'ISNULL',
'ISRING',
'ISSIMPLE',
'IS_FREE_LOCK',
'IS_USED_LOCK',
'LAST_DAY',
'LAST_INSERT_ID',
'LCASE',
'LEAST',
'LEFT',
'LENGTH',
'LINEFROMTEXT',
'LINEFROMWKB',
'LINESTRING',
'LINESTRINGFROMTEXT',
'LINESTRINGFROMWKB',
'LN',
'LOAD_FILE',
'LOCALTIME',
'LOCALTIMESTAMP',
'LOCATE',
'LOG',
'LOG10',
'LOG2',
'LOWER',
'LPAD',
'LTRIM',
'MAKEDATE',
'MAKETIME',
'MAKE_SET',
'MASTER_POS_WAIT',
'MAX',
'MBRCONTAINS',
'MBRDISJOINT',
'MBREQUAL',
'MBRINTERSECTS',
'MBROVERLAPS',
'MBRTOUCHES',
'MBRWITHIN',
'MD5',
'MICROSECOND',
'MID',
'MIN',
'MINUTE',
'MLINEFROMTEXT',
'MLINEFROMWKB',
'MOD',
'MONTH',
'MONTHNAME',
'MPOINTFROMTEXT',
'MPOINTFROMWKB',
'MPOLYFROMTEXT',
'MPOLYFROMWKB',
'MULTILINESTRING',
'MULTILINESTRINGFROMTEXT',
'MULTILINESTRINGFROMWKB',
'MULTIPOINT',
'MULTIPOINTFROMTEXT',
'MULTIPOINTFROMWKB',
'MULTIPOLYGON',
'MULTIPOLYGONFROMTEXT',
'MULTIPOLYGONFROMWKB',
'NAME_CONST',
'NULLIF',
'NUMGEOMETRIES',
'NUMINTERIORRINGS',
'NUMPOINTS',
'OCT',
'OCTET_LENGTH',
'OLD_PASSWORD',
'ORD',
'OVERLAPS',
'PASSWORD',
'PERIOD_ADD',
'PERIOD_DIFF',
'PI',
'POINT',
'POINTFROMTEXT',
'POINTFROMWKB',
'POINTN',
'POINTONSURFACE',
'POLYFROMTEXT',
'POLYFROMWKB',
'POLYGON',
'POLYGONFROMTEXT',
'POLYGONFROMWKB',
'POSITION',
'POW',
'POWER',
'QUARTER',
'QUOTE',
'RADIANS',
'RAND',
'RELATED',
'RELEASE_LOCK',
'REPEAT',
'REPLACE',
'REVERSE',
'RIGHT',
'ROUND',
'ROW_COUNT',
'RPAD',
'RTRIM',
'SCHEMA',
'SECOND',
'SEC_TO_TIME',
'SESSION_USER',
'SHA',
'SHA1',
'SIGN',
'SIN',
'SLEEP',
'SOUNDEX',
'SPACE',
'SQRT',
'SRID',
'STARTPOINT',
'STD',
'STDDEV',
'STDDEV_POP',
'STDDEV_SAMP',
'STRCMP',
'STR_TO_DATE',
'SUBDATE',
'SUBSTR',
'SUBSTRING',
'SUBSTRING_INDEX',
'SUBTIME',
'SUM',
'SYMDIFFERENCE',
'SYSDATE',
'SYSTEM_USER',
'TAN',
'TIME',
'TIMEDIFF',
'TIMESTAMP',
'TIMESTAMPADD',
'TIMESTAMPDIFF',
'TIME_FORMAT',
'TIME_TO_SEC',
'TOUCHES',
'TO_DAYS',
'TRIM',
'TRUNCATE',
'UCASE',
'UNCOMPRESS',
'UNCOMPRESSED_LENGTH',
'UNHEX',
'UNIQUE_USERS',
'UNIX_TIMESTAMP',
'UPDATEXML',
'UPPER',
'USER',
'UTC_DATE',
'UTC_TIME',
'UTC_TIMESTAMP',
'UUID',
'VARIANCE',
'VAR_POP',
'VAR_SAMP',
'VERSION',
'WEEK',
'WEEKDAY',
'WEEKOFYEAR',
'WITHIN',
'X',
'Y',
'YEAR',
'YEARWEEK',
];
// Regular expressions for tokenizing
/** @var string */
private $regexBoundaries;
/** @var string */
private $regexReserved;
/** @var string */
private $regexReservedNewline;
/** @var string */
private $regexReservedToplevel;
/** @var string */
private $regexFunction;
/**
* Punctuation that can be used as a boundary between other tokens
*
* @var string[]
*/
private $boundaries = [
',',
';',
':',
')',
'(',
'.',
'=',
'<',
'>',
'+',
'-',
'*',
'/',
'!',
'^',
'%',
'|',
'&',
'#',
];
/**
* Stuff that only needs to be done once. Builds regular expressions and
* sorts the reserved words.
*/
public function __construct()
{
// Sort reserved word list from longest word to shortest, 3x faster than usort
$reservedMap = array_combine($this->reserved, array_map('strlen', $this->reserved));
assert($reservedMap !== false);
arsort($reservedMap);
$this->reserved = array_keys($reservedMap);
// Set up regular expressions
$this->regexBoundaries = '(' . implode(
'|',
$this->quoteRegex($this->boundaries)
) . ')';
$this->regexReserved = '(' . implode(
'|',
$this->quoteRegex($this->reserved)
) . ')';
$this->regexReservedToplevel = str_replace(' ', '\\s+', '(' . implode(
'|',
$this->quoteRegex($this->reservedToplevel)
) . ')');
$this->regexReservedNewline = str_replace(' ', '\\s+', '(' . implode(
'|',
$this->quoteRegex($this->reservedNewline)
) . ')');
$this->regexFunction = '(' . implode('|', $this->quoteRegex($this->functions)) . ')';
}
/**
* Takes a SQL string and breaks it into tokens.
* Each token is an associative array with type and value.
*
* @param string $string The SQL string
*/
public function tokenize(string $string) : Cursor
{
$tokens = [];
// Used to make sure the string keeps shrinking on each iteration
$oldStringLen = strlen($string) + 1;
$token = null;
$currentLength = strlen($string);
// Keep processing the string until it is empty
while ($currentLength) {
// If the string stopped shrinking, there was a problem
if ($oldStringLen <= $currentLength) {
$tokens[] = new Token(Token::TOKEN_TYPE_ERROR, $string);
return new Cursor($tokens);
}
$oldStringLen = $currentLength;
// Get the next token and the token type
$token = $this->createNextToken($string, $token);
$tokenLength = strlen($token->value());
$tokens[] = $token;
// Advance the string
$string = substr($string, $tokenLength);
$currentLength -= $tokenLength;
}
return new Cursor($tokens);
}
/**
* Return the next token and token type in a SQL string.
* Quoted strings, comments, reserved words, whitespace, and punctuation
* are all their own tokens.
*
* @param string $string The SQL string
* @param Token|null $previous The result of the previous createNextToken() call
*
* @return Token An associative array containing the type and value of the token.
*/
private function createNextToken(string $string, Token $previous = null) : Token
{
$matches = [];
// Whitespace
if (preg_match('/^\s+/', $string, $matches)) {
return new Token(Token::TOKEN_TYPE_WHITESPACE, $matches[0]);
}
// Comment
if ($string[0] === '#' ||
(isset($string[1]) && ($string[0]==='-' && $string[1]==='-') ||
(isset($string[1]) && $string[0]==='/' && $string[1]==='*'))) {
// Comment until end of line
if ($string[0] === '-' || $string[0] === '#') {
$last = strpos($string, "\n");
$type = Token::TOKEN_TYPE_COMMENT;
} else { // Comment until closing comment tag
$pos = strpos($string, '*/', 2);
assert($pos !== false);
$last = $pos + 2;
$type = Token::TOKEN_TYPE_BLOCK_COMMENT;
}
if ($last === false) {
$last = strlen($string);
}
return new Token($type, substr($string, 0, $last));
}
// Quoted String
if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`' || $string[0]==='[') {
return new Token(
($string[0]==='`' || $string[0]==='['
? Token::TOKEN_TYPE_BACKTICK_QUOTE
: Token::TOKEN_TYPE_QUOTE),
$this->getQuotedString($string)
);
}
// User-defined Variable
if (($string[0] === '@' || $string[0] === ':') && isset($string[1])) {
$value = null;
$type = Token::TOKEN_TYPE_VARIABLE;
// If the variable name is quoted
if ($string[1]==='"' || $string[1]==='\'' || $string[1]==='`') {
$value = $string[0] . $this->getQuotedString(substr($string, 1));
} else {
// Non-quoted variable name
preg_match('/^(' . $string[0] . '[a-zA-Z0-9\._\$]+)/', $string, $matches);
if ($matches) {
$value = $matches[1];
}
}
if ($value !== null) {
return new Token($type, $value);
}
}
// Number (decimal, binary, or hex)
if (preg_match(
'/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|' . $this->regexBoundaries . ')/',
$string,
$matches
)) {
return new Token(Token::TOKEN_TYPE_NUMBER, $matches[1]);
}
// Boundary Character (punctuation and symbols)
if (preg_match('/^(' . $this->regexBoundaries . ')/', $string, $matches)) {
return new Token(Token::TOKEN_TYPE_BOUNDARY, $matches[1]);
}
// A reserved word cannot be preceded by a '.'
// this makes it so in "mytable.from", "from" is not considered a reserved word
if (! $previous || $previous->value() !== '.') {
$upper = strtoupper($string);
// Top Level Reserved Word
if (preg_match(
'/^(' . $this->regexReservedToplevel . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)) {
return new Token(
Token::TOKEN_TYPE_RESERVED_TOPLEVEL,
substr($string, 0, strlen($matches[1]))
);
}
// Newline Reserved Word
if (preg_match(
'/^(' . $this->regexReservedNewline . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)) {
return new Token(
Token::TOKEN_TYPE_RESERVED_NEWLINE,
substr($string, 0, strlen($matches[1]))
);
}
// Other Reserved Word
if (preg_match(
'/^(' . $this->regexReserved . ')($|\s|' . $this->regexBoundaries . ')/',
$upper,
$matches
)) {
return new Token(
Token::TOKEN_TYPE_RESERVED,
substr($string, 0, strlen($matches[1]))
);
}
}
// A function must be succeeded by '('
// this makes it so "count(" is considered a function, but "count" alone is not
$upper = strtoupper($string);
// function
if (preg_match('/^(' . $this->regexFunction . '[(]|\s|[)])/', $upper, $matches)) {
return new Token(
Token::TOKEN_TYPE_RESERVED,
substr($string, 0, strlen($matches[1])-1)
);
}
// Non reserved word
preg_match('/^(.*?)($|\s|["\'`]|' . $this->regexBoundaries . ')/', $string, $matches);
return new Token(Token::TOKEN_TYPE_WORD, $matches[1]);
}
/**
* Helper function for building regular expressions for reserved words and boundary characters
*
* @param string[] $strings The strings to be quoted
*
* @return string[] The quoted strings
*/
private function quoteRegex(array $strings) : array
{
return array_map(static function (string $string) : string {
return preg_quote($string, '/');
}, $strings);
}
private function getQuotedString(string $string) : string
{
$ret = '';
// This checks for the following patterns:
// 1. backtick quoted string using `` to escape
// 2. square bracket quoted string (SQL Server) using ]] to escape
// 3. double quoted string using "" or \" to escape
// 4. single quoted string using '' or \' to escape
if (preg_match(
'/^(((`[^`]*($|`))+)|
((\[[^\]]*($|\]))(\][^\]]*($|\]))*)|
(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|
((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/sx',
$string,
$matches
)) {
$ret = $matches[1];
}
return $ret;
}
}