-
Notifications
You must be signed in to change notification settings - Fork 59
Expand file tree
/
Copy pathclass-wp-mysql-naive-query-stream.php
More file actions
165 lines (145 loc) · 4.68 KB
/
class-wp-mysql-naive-query-stream.php
File metadata and controls
165 lines (145 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
<?php
/**
* Naively splits an SQL string into a sequence of queries. It
* streams the data so you can process very large chunks of SQL
* without running out of memory.
*
* This class is **naive** because it doesn't understand what a
* valid query is. The lexer does not provide a way to distinguish
* between a syntax error and an incomplete input yet. Lacking this
* information, we assume that no SQL query is larger than 2MB and,
* failing to extract a query from a 2MB buffer, we fail. This heuristic
* is often sufficient, but may fail in pathological cases.
*
* Usage:
*
* $stream = new WP_MySQL_Naive_Query_Stream();
* $stream->append_sql( 'SELECT id FROM users; SELECT * FROM posts;' );
* while ( $stream->next_query() ) {
* $sql_string = $stream->get_query();
* // Process the query.
* }
* $stream->append_sql( 'CREATE TABLE users (id INT, name VARCHAR(255));' );
* while ( $stream->next_query() ) {
* $sql_string = $stream->get_query();
* // Process the query.
* }
* $stream->mark_input_complete();
* $stream->next_query(); // returns false
*/
class WP_MySQL_Naive_Query_Stream {
private $sql_buffer = '';
private $input_complete = false;
private $state = true;
private $last_query = false;
const STATE_QUERY = 'valid';
const STATE_SYNTAX_ERROR = 'syntax_error';
const STATE_PAUSED_ON_INCOMPLETE_INPUT = 'paused_on_incomplete_input';
const STATE_FINISHED = 'finished';
/**
* The maximum size of the buffer to store the SQL input. We don't
* have enough information from the lexer to distinguish between
* an incomplete input and a syntax error so we use a heuristic –
* if we've accumulated more than this amount of SQL input, we assume
* it's a syntax error. That's why this class is called a "naive" query
* stream.
*/
const MAX_SQL_BUFFER_SIZE = 1024 * 1024 * 2;
public function __construct() {}
public function append_sql( string $sql ) {
if($this->input_complete) {
return false;
}
$this->sql_buffer .= $sql;
$this->state = self::STATE_QUERY;
return true;
}
public function is_paused_on_incomplete_input(): bool {
return $this->state === self::STATE_PAUSED_ON_INCOMPLETE_INPUT;
}
public function mark_input_complete() {
$this->input_complete = true;
}
public function next_query() {
$this->last_query = false;
if($this->state === self::STATE_PAUSED_ON_INCOMPLETE_INPUT) {
return false;
}
$result = $this->do_next_query();
if(!$result && strlen($this->sql_buffer) > self::MAX_SQL_BUFFER_SIZE) {
$this->state = self::STATE_SYNTAX_ERROR;
return false;
}
return $result;
}
private function do_next_query() {
$query = [];
$lexer = new WP_MySQL_Lexer( $this->sql_buffer );
while ( $lexer->next_token() ) {
$token = $lexer->get_token();
$query[] = $token;
if ( $token->id === WP_MySQL_Lexer::SEMICOLON_SYMBOL ) {
// Got a complete query!
break;
}
}
// @TODO: expose this method from the lexer
// if($lexer->get_state() === WP_MySQL_Lexer::STATE_SYNTAX_ERROR) {
// return false;
// }
if(!count($query)) {
if ( $this->input_complete ) {
$this->state = self::STATE_FINISHED;
} else {
$this->state = self::STATE_PAUSED_ON_INCOMPLETE_INPUT;
}
return false;
}
// The last token either needs to end with a semicolon, or be the
// last token in the input.
$last_token = $query[count($query) - 1];
if (
$last_token->id !== WP_MySQL_Lexer::SEMICOLON_SYMBOL &&
! $this->input_complete
) {
$this->state = self::STATE_PAUSED_ON_INCOMPLETE_INPUT;
return false;
}
// See if the query has any meaningful tokens. We don't want to return
// to give the caller a comment disguised as a query.
$has_meaningful_tokens = false;
foreach($query as $token) {
if (
$token->id !== WP_MySQL_Lexer::WHITESPACE &&
$token->id !== WP_MySQL_Lexer::COMMENT &&
$token->id !== WP_MySQL_Lexer::MYSQL_COMMENT_START &&
$token->id !== WP_MySQL_Lexer::MYSQL_COMMENT_END &&
$token->id !== WP_MySQL_Lexer::EOF
) {
$has_meaningful_tokens = true;
break;
}
}
if(!$has_meaningful_tokens) {
if ( $this->input_complete ) {
$this->state = self::STATE_FINISHED;
} else {
$this->state = self::STATE_PAUSED_ON_INCOMPLETE_INPUT;
}
return false;
}
// Remove the query from the input buffer and return it.
$last_byte = $last_token->start + $last_token->length;
$query = substr($this->sql_buffer, 0, $last_byte);
$this->sql_buffer = substr($this->sql_buffer, $last_byte);
$this->last_query = $query;
$this->state = self::STATE_QUERY;
return true;
}
public function get_query() {
return $this->last_query;
}
public function get_state() {
return $this->state;
}
}