elen;
bool afterescape = false;
int nquotes = 0;
- int charclass_depth = 0; /* Nesting level of character classes,
- * encompassed by square brackets */
- int charclass_start = 0; /* State of the character class start,
- * for carets */
+ int bracket_depth = 0; /* square bracket nesting level */
+ int charclass_pos = 0; /* position inside a character class */
p = VARDATA_ANY(pat_text);
plen = VARSIZE_ANY_EXHDR(pat_text);
* the relevant part separators in the above expansion. If the result
* of this function is used in a plain regexp match (SIMILAR TO), the
* escape-double-quotes have no effect on the match behavior.
+ *
+ * While we don't fully validate character classes (bracket expressions),
+ * we do need to parse them well enough to know where they end.
+ * "charclass_pos" tracks where we are in a character class.
+ * Its value is uninteresting when bracket_depth is 0.
+ * But when bracket_depth > 0, it will be
+ * 1: right after the opening '[' (a following '^' will negate
+ * the class, while ']' is a literal character)
+ * 2: right after a '^' after the opening '[' (']' is still a literal
+ * character)
+ * 3 or more: further inside the character class (']' ends the class)
*----------
*/
/* fast path */
if (afterescape)
{
- if (pchar == '"' && charclass_depth < 1) /* escape-double-quote? */
+ if (pchar == '"' && bracket_depth < 1) /* escape-double-quote? */
{
/* emit appropriate part separator, per notes above */
if (nquotes == 0)
*/
*r++ = '\\';
*r++ = pchar;
+
+ /*
+ * If we encounter an escaped character in a character class,
+ * we are no longer at the beginning.
+ */
+ charclass_pos = 3;
}
afterescape = false;
}
/* SQL escape character; do not send to output */
afterescape = true;
}
- else if (charclass_depth > 0)
+ else if (bracket_depth > 0)
{
+ /* inside a character class */
if (pchar == '\\')
+ {
+ /*
+ * If we're here, backslash is not the SQL escape character,
+ * so treat it as a literal class element, which requires
+ * doubling it. (This matches our behavior for backslashes
+ * outside character classes.)
+ */
*r++ = '\\';
+ }
*r++ = pchar;
- /*
- * Ignore a closing bracket at the start of a character class.
- * Such a bracket is taken literally rather than closing the
- * class. "charclass_start" is 1 right at the beginning of a
- * class and 2 after an initial caret.
- */
- if (pchar == ']' && charclass_start > 2)
- charclass_depth--;
+ /* parse the character class well enough to identify ending ']' */
+ if (pchar == ']' && charclass_pos > 2)
+ {
+ /* found the real end of a bracket pair */
+ bracket_depth--;
+ /* don't reset charclass_pos, this may be an inner bracket */
+ }
else if (pchar == '[')
- charclass_depth++;
+ {
+ /* start of a nested bracket pair */
+ bracket_depth++;
- /*
- * If there is a caret right after the opening bracket, it negates
- * the character class, but a following closing bracket should
- * still be treated as a normal character. That holds only for
- * the first caret, so only the values 1 and 2 mean that closing
- * brackets should be taken literally.
- */
- if (pchar == '^')
- charclass_start++;
+ /*
+ * We are no longer at the beginning of a character class.
+ * (The nested bracket pair is a collating element, not a
+ * character class in its own right.)
+ */
+ charclass_pos = 3;
+ }
+ else if (pchar == '^')
+ {
+ /*
+ * A caret right after the opening bracket negates the
+ * character class. In that case, the following will
+ * increment charclass_pos from 1 to 2, so that a following
+ * ']' is still a literal character and does not end the
+ * character class. If we are further inside a character
+ * class, charclass_pos might get incremented past 3, which is
+ * fine.
+ */
+ charclass_pos++;
+ }
else
- charclass_start = 3; /* definitely past the start */
+ {
+ /*
+ * Anything else (including a backslash or leading ']') is an
+ * element of the character class, so we are no longer at the
+ * beginning of the class.
+ */
+ charclass_pos = 3;
+ }
}
else if (pchar == '[')
{
/* start of a character class */
*r++ = pchar;
- charclass_depth++;
- charclass_start = 1;
+ bracket_depth = 1;
+ charclass_pos = 1;
}
else if (pchar == '%')
{