Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
660438e
Fix UNION grouping for parenthesized queries with DISTINCT->ALL trans…
kyleconroy Jan 15, 2026
cbc4d68
Handle UUID clause in CREATE MATERIALIZED VIEW
kyleconroy Jan 15, 2026
734ba84
Handle SETTINGS/COMMENT order in CREATE TABLE explain output
kyleconroy Jan 15, 2026
6216399
Support parameterized functions in APPLY column transformers
kyleconroy Jan 15, 2026
99534b0
Support ORDER BY in CREATE DATABASE and multiple SETTINGS clauses
kyleconroy Jan 15, 2026
28717df
Support RENAME DATABASE statement
kyleconroy Jan 15, 2026
0855448
Parse dictionary SETTINGS clause and output as Dictionary settings
kyleconroy Jan 15, 2026
17fe6ac
Handle trailing comma in IN expressions as single-element tuple
kyleconroy Jan 15, 2026
2fa01e3
Fix negative number with cast in BETWEEN expressions
kyleconroy Jan 15, 2026
f304b1d
Support underscores in binary and octal literals
kyleconroy Jan 15, 2026
6ad7b56
Handle TernaryExpr with alias in WITH clause
kyleconroy Jan 15, 2026
6046d60
Handle SHOW CHANGED SETTINGS variant in parser (#117)
kyleconroy Jan 15, 2026
4576008
Remove redundant explain_todo for clientError SYNTAX_ERROR stmt (#118)
kyleconroy Jan 15, 2026
5e72b8a
Add short interval unit notations h, m, s, d, w (#119)
kyleconroy Jan 15, 2026
eb9f9a9
Fix ternary operator precedence to be lower than AND (#120)
kyleconroy Jan 15, 2026
96db89a
Strip trailing OK from ClickHouse EXPLAIN output in tests (#121)
kyleconroy Jan 15, 2026
7bf233f
Eliminate unary plus from AST (no-op in ClickHouse) (#122)
kyleconroy Jan 15, 2026
0d9f7c9
Add ALTER TABLE MODIFY QUERY support (#123)
kyleconroy Jan 15, 2026
917864d
Escape single quotes in function aliases for EXPLAIN output
kyleconroy Jan 15, 2026
599dee5
Skip EXPLAIN tests for statements with --{clientError annotations
kyleconroy Jan 15, 2026
9261423
Handle escape sequences in backtick identifiers and sanitize invalid …
kyleconroy Jan 15, 2026
619c89d
Always output CASE expression alias in EXPLAIN output
kyleconroy Jan 15, 2026
03c44b0
Fix column declaration child order in EXPLAIN output
kyleconroy Jan 15, 2026
75354e3
Handle PARTITION ID syntax in APPLY DELETED MASK command
kyleconroy Jan 15, 2026
67ba7b0
Support FORCE keyword in OPTIMIZE TABLE statement
kyleconroy Jan 15, 2026
317d037
Handle parenthesized literals in nested arrays for EXPLAIN output
kyleconroy Jan 15, 2026
18a6990
Handle USE DATABASE syntax and improve clientError detection
kyleconroy Jan 15, 2026
1e10e2e
Output WINDOW clause before QUALIFY in SelectQuery EXPLAIN
kyleconroy Jan 15, 2026
5b6ea17
Format function calls in AggregateFunction type parameters
kyleconroy Jan 15, 2026
c370f54
Handle QueryParameter with alias in EXPLAIN output
kyleconroy Jan 15, 2026
35d1e5f
Handle FROM (SELECT...) as clause keyword after trailing comma
kyleconroy Jan 15, 2026
613e3e5
Output GROUP BY before ORDER BY in ProjectionSelectQuery
kyleconroy Jan 15, 2026
5121d5c
Fix EPHEMERAL column parsing to not consume COMMENT keyword
kyleconroy Jan 15, 2026
b218c3d
Support CREATE INDEX expression without parentheses
kyleconroy Jan 15, 2026
cbde83c
Fix SYSTEM FLUSH DISTRIBUTED table name parsing
kyleconroy Jan 15, 2026
8738b5d
Add support for ALTER TABLE DROP DETACHED PARTITION
kyleconroy Jan 15, 2026
a9e48cb
Handle IF NOT EXISTS in CREATE WORKLOAD parsing
kyleconroy Jan 15, 2026
20d3016
Allow keywords as column names in ALTER TABLE DROP COLUMN
kyleconroy Jan 15, 2026
50d0966
Handle LIKE expression alias in WITH clause
kyleconroy Jan 15, 2026
b7d0a23
Add backtick quoting for special characters in type parameters
kyleconroy Jan 15, 2026
a26db45
Fix DESCRIBE parsing to handle SETTINGS after FORMAT
kyleconroy Jan 15, 2026
e471e45
Handle PARTITION ID syntax in UPDATE mutation commands
kyleconroy Jan 15, 2026
5e4634c
Add FROM clause parsing for ATTACH TABLE
kyleconroy Jan 15, 2026
9564725
Handle SYNC keyword token in KILL QUERY parsing
kyleconroy Jan 15, 2026
c34e702
Add ON CLUSTER clause parsing to DELETE statement
kyleconroy Jan 15, 2026
3da9497
Fix CRLF line ending comparison in explain tests
kyleconroy Jan 15, 2026
9a96fab
Allow SYNC keyword as implicit alias in expressions
kyleconroy Jan 15, 2026
bcc22f3
Handle ASSUME keyword in ALTER TABLE ADD CONSTRAINT
kyleconroy Jan 15, 2026
75af8d1
Fix SETTINGS clause parsing after MODIFY COLUMN REMOVE
kyleconroy Jan 15, 2026
d8ab7eb
Fix EXPLAIN children count when both options and SETTINGS present
kyleconroy Jan 15, 2026
3d7ed55
Skip FINAL keyword in DESCRIBE to parse SETTINGS clause
kyleconroy Jan 15, 2026
e72bb31
Allow keywords as column names in ALTER UPDATE assignments
kyleconroy Jan 15, 2026
befdafd
Handle alias on IS NULL expressions in explain output
kyleconroy Jan 15, 2026
050d0b2
Allow NOT NULL constraint after DEFAULT expression
kyleconroy Jan 15, 2026
04d68a5
Preserve function name case from SQL source in EXPLAIN AST output
kyleconroy Jan 15, 2026
5ec3094
Handle DISTINCT modifier in parametric function calls
kyleconroy Jan 15, 2026
242667a
Handle implicit aliases in projection SELECT column parsing
kyleconroy Jan 15, 2026
aed95c0
Map CLEAR_PROJECTION to DROP_PROJECTION in EXPLAIN AST output
kyleconroy Jan 15, 2026
5bb6e5e
Recursively check nested arrays for non-literal expressions in EXPLAI…
kyleconroy Jan 15, 2026
d7b6e81
Parse ON CLUSTER before column definitions in CREATE MATERIALIZED VIEW
kyleconroy Jan 15, 2026
353708c
Parse and output COMMENT clause for CREATE DICTIONARY
kyleconroy Jan 15, 2026
cc208ea
Parse MOVE PARTITION TO DISK/VOLUME syntax in ALTER statements
kyleconroy Jan 15, 2026
4b241a1
Add SETTINGS clause support for SYSTEM queries
kyleconroy Jan 15, 2026
d928cce
Add duplicate table output for LOAD/UNLOAD PRIMARY KEY commands
kyleconroy Jan 15, 2026
98e6e8e
Add TRUNCATE DATABASE support
kyleconroy Jan 15, 2026
90ff84d
Add REMOVE TTL support for ALTER TABLE
kyleconroy Jan 15, 2026
d7968a1
Add KILL QUERY SETTINGS support and fix operator mapping
kyleconroy Jan 15, 2026
37d572e
Remove incorrect concat_ws to concat normalization
kyleconroy Jan 15, 2026
a874217
Add IF EXISTS support for RENAME COLUMN in ALTER TABLE
kyleconroy Jan 15, 2026
3956e3f
Preserve tuple SpacedCommas flag in EXPLAIN AST output
kyleconroy Jan 15, 2026
26a512c
Handle all-NULL tuple literals in IN expression EXPLAIN output
kyleconroy Jan 15, 2026
8941020
Continue parsing binary operators after parenthesized ORDER BY expres…
kyleconroy Jan 15, 2026
8790477
Handle INSERT VALUES followed by SELECT on same line (#118)
kyleconroy Jan 15, 2026
8bd61bf
Fix TTL SET clause lookahead to use peekPeek instead of consuming tok…
kyleconroy Jan 15, 2026
5a31cd5
Fix INTERVAL parsing to stop before AND operators (#120)
kyleconroy Jan 15, 2026
aa1b527
Add support for REFRESH clause in CREATE MATERIALIZED VIEW (#121)
kyleconroy Jan 15, 2026
5deac6f
Fix Settings['key'] map access being confused with SETTINGS clause (#…
kyleconroy Jan 15, 2026
caaaad4
Handle double-paren grouping sets as Function tuple (#123)
kyleconroy Jan 15, 2026
5392e89
Fix INTERVAL parsing to handle both embedded and separate units
kyleconroy Jan 15, 2026
a273941
Add support for CREATE WINDOW VIEW parsing
kyleconroy Jan 15, 2026
7a62b45
Support binary expression WITH clauses like (SELECT ...) + (SELECT ..…
kyleconroy Jan 15, 2026
e99acd5
Fix CAST parsing to handle expression type arguments like 'Str'||'ing'
kyleconroy Jan 15, 2026
294c76b
Fix REPLACE transformer consuming comma from SELECT clause
kyleconroy Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Handle escape sequences in backtick identifiers and sanitize invalid …
…UTF-8

1. Lexer: Process escape sequences (\xFF, \0, etc.) in backtick-quoted
   identifiers, matching the behavior of string literals.

2. Explain output:
   - Replace invalid UTF-8 bytes with replacement character (U+FFFD)
   - Display null bytes as escape sequence \0
   - Escape backslashes and single quotes in identifier/alias output

3. Apply sanitization to:
   - Column declarations
   - Identifier names
   - Function aliases
   - Storage definition ORDER BY identifiers

This matches ClickHouse's EXPLAIN AST behavior for handling special
characters in identifiers.

Fixes test: 03356_tables_with_binary_identifiers_invalid_utf8/stmt2

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
  • Loading branch information
kyleconroy and claude committed Jan 15, 2026
commit 9261423ac6624fd5f49f5372ff20ff937c987447
4 changes: 2 additions & 2 deletions internal/explain/explain.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
children++
}
if children > 0 {
fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children)
fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, sanitizeUTF8(col.Name), children)
} else {
fmt.Fprintf(sb, "%sColumnDeclaration %s\n", indent, col.Name)
fmt.Fprintf(sb, "%sColumnDeclaration %s\n", indent, sanitizeUTF8(col.Name))
}
if col.Type != nil {
Node(sb, col.Type, depth+1)
Expand Down
58 changes: 53 additions & 5 deletions internal/explain/expressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,48 @@ import (
"fmt"
"strconv"
"strings"
"unicode/utf8"

"github.com/sqlc-dev/doubleclick/ast"
)

// sanitizeUTF8 replaces invalid UTF-8 bytes with the Unicode replacement character (U+FFFD)
// and null bytes with the escape sequence \0.
// This matches ClickHouse's behavior of displaying special bytes in EXPLAIN AST output.
func sanitizeUTF8(s string) string {
// Check if we need to process at all
needsProcessing := !utf8.ValidString(s)
if !needsProcessing {
for i := 0; i < len(s); i++ {
if s[i] == 0 {
needsProcessing = true
break
}
}
}
if !needsProcessing {
return s
}

var result strings.Builder
for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 1 {
// Invalid byte - write replacement character
result.WriteRune('\uFFFD')
i++
} else if r == 0 {
// Null byte - write as escape sequence \0
result.WriteString("\\0")
i += size
} else {
result.WriteRune(r)
i += size
}
}
return result.String()
}

// escapeAlias escapes backslashes and single quotes in alias names for EXPLAIN output
func escapeAlias(alias string) string {
// Escape backslashes first, then single quotes
Expand All @@ -25,21 +63,31 @@ func explainIdentifier(sb *strings.Builder, n *ast.Identifier, indent string) {
}
}

// formatIdentifierName formats an identifier name, handling JSON path notation
// escapeIdentifierPart escapes backslashes and single quotes in an identifier part
// and sanitizes invalid UTF-8 bytes
func escapeIdentifierPart(s string) string {
s = sanitizeUTF8(s)
s = strings.ReplaceAll(s, "\\", "\\\\")
s = strings.ReplaceAll(s, "'", "\\'")
return s
}

// formatIdentifierName formats an identifier name, handling JSON path notation,
// sanitizing invalid UTF-8 bytes, and escaping special characters
func formatIdentifierName(n *ast.Identifier) string {
if len(n.Parts) == 0 {
return ""
}
if len(n.Parts) == 1 {
return n.Parts[0]
return escapeIdentifierPart(n.Parts[0])
}
result := n.Parts[0]
result := escapeIdentifierPart(n.Parts[0])
for _, p := range n.Parts[1:] {
// JSON path notation: ^fieldname should be formatted as ^`fieldname`
if strings.HasPrefix(p, "^") {
result += ".^`" + p[1:] + "`"
result += ".^`" + escapeIdentifierPart(p[1:]) + "`"
} else {
result += "." + p
result += "." + escapeIdentifierPart(p)
}
}
return result
Expand Down
8 changes: 4 additions & 4 deletions internal/explain/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (
"github.com/sqlc-dev/doubleclick/ast"
)

// escapeFunctionAlias escapes single quotes in function alias names.
// Unlike escapeAlias (for column aliases), this does NOT escape backslashes
// since ClickHouse EXPLAIN AST preserves backslashes in function aliases.
// escapeFunctionAlias escapes backslashes and single quotes in function alias names.
// This is needed because the lexer processes escape sequences in backtick identifiers.
func escapeFunctionAlias(alias string) string {
return strings.ReplaceAll(alias, "'", "\\'")
result := strings.ReplaceAll(alias, "\\", "\\\\")
return strings.ReplaceAll(result, "'", "\\'")
}

// normalizeIntervalUnit converts interval units to title-cased singular form
Expand Down
4 changes: 2 additions & 2 deletions internal/explain/statements.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,9 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string,
// When ORDER BY has modifiers (ASC/DESC), wrap in StorageOrderByElement
if n.OrderByHasModifiers {
fmt.Fprintf(sb, "%s StorageOrderByElement (children %d)\n", storageIndent, 1)
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, ident.Name())
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, sanitizeUTF8(ident.Name()))
} else {
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, ident.Name())
fmt.Fprintf(sb, "%s Identifier %s\n", storageIndent, sanitizeUTF8(ident.Name()))
}
} else if lit, ok := n.OrderBy[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple {
// Handle tuple literal - for ORDER BY with modifiers (DESC/ASC),
Expand Down
57 changes: 57 additions & 0 deletions lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,63 @@ func (l *Lexer) readBacktickIdentifier() Item {
l.readChar() // skip closing backtick
break
}
if l.ch == '\\' {
l.readChar() // consume backslash
if l.eof {
break
}
// Interpret escape sequence (same as readString)
switch l.ch {
case '\'':
sb.WriteRune('\'')
case '"':
sb.WriteRune('"')
case '\\':
sb.WriteRune('\\')
case '`':
sb.WriteRune('`')
case 'n':
sb.WriteRune('\n')
case 't':
sb.WriteRune('\t')
case 'r':
sb.WriteRune('\r')
case '0':
sb.WriteRune('\x00')
case 'a':
sb.WriteRune('\a')
case 'b':
sb.WriteRune('\b')
case 'f':
sb.WriteRune('\f')
case 'v':
sb.WriteRune('\v')
case 'e':
sb.WriteRune('\x1b') // escape character (ASCII 27)
case 'x':
// Hex escape: \xNN
l.readChar()
if l.eof {
break
}
hex1 := l.ch
l.readChar()
if l.eof {
sb.WriteRune(rune(hexValue(hex1)))
continue
}
hex2 := l.ch
// Convert hex digits to byte
val := hexValue(hex1)*16 + hexValue(hex2)
sb.WriteByte(byte(val))
default:
// Unknown escape, preserve both the backslash and the character
sb.WriteRune('\\')
sb.WriteRune(l.ch)
}
l.readChar()
continue
}
sb.WriteRune(l.ch)
l.readChar()
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
{
"explain_todo": {
"stmt2": true
}
}
{}