Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion fyi/semgrep-grammars/src/semgrep-java/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ module.exports = grammar(base_grammar, {

// this is from adding toplevel_explicit_constructor_invocation
[$.type_parameter, $._unannotated_type],

// LANG-467: A bare `$...META` (or `...`) after `{` could be parsed as
// either an enum-constants list element or as the body of
// `enum_body_declarations`. GLR resolves this; the parses are
// operationally equivalent for Semgrep's purposes.
[$.enum_body, $.enum_body_declarations],
]),

rules: {
Expand All @@ -65,9 +71,43 @@ module.exports = grammar(base_grammar, {
$.toplevel_explicit_constructor_invocation,
),

// LANG-467: Allow `...` and `$...META` in the enum-constants list, and
// allow `$...META` (in addition to `...`) inside the post-`;` body region.
// Override `enum_body` wholesale since the constants list itself needs
// ellipsis support, which can't be reached via a `previous`-style
// augmentation.
enum_body: $ => seq(
'{',
commaSep(choice(
$.enum_constant,
$.semgrep_ellipsis,
$.semgrep_named_ellipsis,
)),
optional(','),
optional($.enum_body_declarations),
'}',
),

enum_body_declarations: ($, previous) => choice(
previous,
$.semgrep_ellipsis,
$.semgrep_named_ellipsis,
),

// LANG-466: allow `...` (and `$...META`) interleaved with arrow-rule
// switch entries. The colon-form arm already supports ellipsis via the
// `statement` augmentation; the arrow-rule arm needs explicit support.
switch_block: $ => seq(
'{',
choice(
repeat($.switch_block_statement_group),
repeat(choice(
$.switch_rule,
$.semgrep_ellipsis,
$.semgrep_named_ellipsis,
)),
),
'}',
),

// This is a copy of `explicit_constructor_invocation`,
Expand Down Expand Up @@ -111,10 +151,13 @@ module.exports = grammar(base_grammar, {
'}',
),

// So we can put ellipses within annotation type declarations
// So we can put ellipses within annotation type declarations.
// LANG-469: also allow `$...META` so `@interface $A { $...ELEMENTS }`
// parses cleanly.
annotation_type_element_declaration: ($, previous) => choice(
previous,
$.semgrep_ellipsis,
$.semgrep_named_ellipsis,
),

semgrep_ellipsis: $ => '...',
Expand Down Expand Up @@ -173,9 +216,12 @@ module.exports = grammar(base_grammar, {
field('field', choice($.identifier, $._reserved_identifier, $.this, '...')),
),

// LANG-469: also allow `$...META` so patterns like
// `@$ANN(value = "...", $...PROPS)` parse cleanly.
element_value_pair: ($, previous) => choice(
previous,
$.semgrep_ellipsis,
$.semgrep_named_ellipsis,
),


Expand Down
27 changes: 21 additions & 6 deletions fyi/versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,27 @@ Last change in file:
feat: allow reserved identifiers in `element_value_pair`
---
File: semgrep-grammars/src/semgrep-java/grammar.js
Git repo name: ocaml-tree-sitter-semgrep
Latest commit in repo: 78b9c78b6f1e98cd1f1d51d1436dd8b99ac617a7
Git repo name: agent-a1e125b44664eae6a
Latest commit in repo: 80708da730e76d4fb9aea75470297ae83b28324b
Last change in file:
commit 78b9c78b6f1e98cd1f1d51d1436dd8b99ac617a7
Author: brandonspark <wu.brandonj@gmail.com>
Date: Mon Jun 23 17:14:11 2025 -0700
commit 80708da730e76d4fb9aea75470297ae83b28324b
Author: brandonspark <brandon@semgrep.com>
Date: Wed Apr 29 17:26:27 2026 -0700

allow no smemicolon in toplevel typed metavariable decl
fix(java): allow Semgrep ellipsis in switch/enum/annotation positions

Augments the Java grammar so canonical Semgrep patterns parse cleanly:

- LANG-466: `...` (and `$...META`) between arrow-rule entries in
`switch_block`. The colon-form arm already gets ellipsis via the
`statement` augmentation; this extends the arrow-rule arm.
- LANG-467: `...` and `$...META` inside the enum-constants list of
`enum_body`, plus `$...META` in `enum_body_declarations` (`...` was
already allowed). Adds a GLR conflict between `enum_body` and
`enum_body_declarations` to disambiguate a bare ellipsis after `{`.
- LANG-469: `$...META` as an entry in `annotation_argument_list` (via
`element_value_pair`) and as an annotation type element declaration in
`@interface` bodies (via `annotation_type_element_declaration`).

Adds 5 corpus tests covering the minimal ticket repros.
---
93 changes: 62 additions & 31 deletions lib/Boilerplate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,46 @@ and map_annotation_type_element_declaration (env : env) (x : CST.annotation_type
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Semg_named_ellips tok -> R.Case ("Semg_named_ellips",
(* pattern \$\.\.\.[A-Z_][A-Z_0-9]* *) token env tok
)
)

and map_anon_choice_enum_cst_1653aac (env : env) (x : CST.anon_choice_enum_cst_1653aac) =
(match x with
| `Enum_cst (v1, v2, v3, v4) -> R.Case ("Enum_cst",
let v1 =
(match v1 with
| Some x -> R.Option (Some (
map_modifiers env x
))
| None -> R.Option None)
in
let v2 =
(* pattern [\p{XID_Start}_$][\p{XID_Continue}\u00A2_$]* *) token env v2
in
let v3 =
(match v3 with
| Some x -> R.Option (Some (
map_argument_list env x
))
| None -> R.Option None)
in
let v4 =
(match v4 with
| Some x -> R.Option (Some (
map_class_body env x
))
| None -> R.Option None)
in
R.Tuple [v1; v2; v3; v4]
)
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Semg_named_ellips tok -> R.Case ("Semg_named_ellips",
(* pattern \$\.\.\.[A-Z_][A-Z_0-9]* *) token env tok
)
)

and map_anon_choice_formal_param_3e261ef (env : env) (x : CST.anon_choice_formal_param_3e261ef) =
Expand Down Expand Up @@ -1134,6 +1174,9 @@ and map_element_value_pair (env : env) (x : CST.element_value_pair) =
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Semg_named_ellips tok -> R.Case ("Semg_named_ellips",
(* pattern \$\.\.\.[A-Z_][A-Z_0-9]* *) token env tok
)
)

and map_enhanced_for_statement (env : env) ((v1, v2, v3, v4, v5, v6, v7, v8, v9) : CST.enhanced_for_statement) =
Expand All @@ -1159,11 +1202,11 @@ and map_enum_body (env : env) ((v1, v2, v3, v4, v5) : CST.enum_body) =
let v2 =
(match v2 with
| Some (v1, v2) -> R.Option (Some (
let v1 = map_enum_constant env v1 in
let v1 = map_anon_choice_enum_cst_1653aac env v1 in
let v2 =
R.List (List.map (fun (v1, v2) ->
let v1 = (* "," *) token env v1 in
let v2 = map_enum_constant env v2 in
let v2 = map_anon_choice_enum_cst_1653aac env v2 in
R.Tuple [v1; v2]
) v2)
in
Expand Down Expand Up @@ -1200,35 +1243,11 @@ and map_enum_body_declarations (env : env) (x : CST.enum_body_declarations) =
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Semg_named_ellips tok -> R.Case ("Semg_named_ellips",
(* pattern \$\.\.\.[A-Z_][A-Z_0-9]* *) token env tok
)
)

and map_enum_constant (env : env) ((v1, v2, v3, v4) : CST.enum_constant) =
let v1 =
(match v1 with
| Some x -> R.Option (Some (
map_modifiers env x
))
| None -> R.Option None)
in
let v2 =
(* pattern [\p{XID_Start}_$][\p{XID_Continue}\u00A2_$]* *) token env v2
in
let v3 =
(match v3 with
| Some x -> R.Option (Some (
map_argument_list env x
))
| None -> R.Option None)
in
let v4 =
(match v4 with
| Some x -> R.Option (Some (
map_class_body env x
))
| None -> R.Option None)
in
R.Tuple [v1; v2; v3; v4]

and map_enum_declaration (env : env) ((v1, v2, v3, v4, v5) : CST.enum_declaration) =
let v1 =
(match v1 with
Expand Down Expand Up @@ -2378,8 +2397,20 @@ and map_switch_block (env : env) ((v1, v2, v3) : CST.switch_block) =
| `Rep_switch_blk_stmt_group xs -> R.Case ("Rep_switch_blk_stmt_group",
R.List (List.map (map_switch_block_statement_group env) xs)
)
| `Rep_switch_rule xs -> R.Case ("Rep_switch_rule",
R.List (List.map (map_switch_rule env) xs)
| `Rep_choice_switch_rule xs -> R.Case ("Rep_choice_switch_rule",
R.List (List.map (fun x ->
(match x with
| `Switch_rule x -> R.Case ("Switch_rule",
map_switch_rule env x
)
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Semg_named_ellips tok -> R.Case ("Semg_named_ellips",
(* pattern \$\.\.\.[A-Z_][A-Z_0-9]* *) token env tok
)
)
) xs)
)
)
in
Expand Down
41 changes: 31 additions & 10 deletions lib/CST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,18 @@ and annotation_type_element_declaration = [
* Token.t (* ";" *)
)
| `Semg_ellips of Token.t (* "..." *)
| `Semg_named_ellips of semgrep_named_ellipsis (*tok*)
]

and anon_choice_enum_cst_1653aac = [
`Enum_cst of (
modifiers option
* identifier (*tok*)
* argument_list option
* class_body option
)
| `Semg_ellips of Token.t (* "..." *)
| `Semg_named_ellips of semgrep_named_ellipsis (*tok*)
]

and anon_choice_formal_param_3e261ef = [
Expand Down Expand Up @@ -509,6 +521,7 @@ and element_value_pair = [
anon_choice_id_662bcdc * Token.t (* "=" *) * element_value
)
| `Semg_ellips of Token.t (* "..." *)
| `Semg_named_ellips of semgrep_named_ellipsis (*tok*)
]

and enhanced_for_statement = (
Expand All @@ -526,8 +539,9 @@ and enhanced_for_statement = (
and enum_body = (
Token.t (* "{" *)
* (
enum_constant
* (Token.t (* "," *) * enum_constant) list (* zero or more *)
anon_choice_enum_cst_1653aac
* (Token.t (* "," *) * anon_choice_enum_cst_1653aac)
list (* zero or more *)
)
option
* Token.t (* "," *) option
Expand All @@ -541,15 +555,9 @@ and enum_body_declarations = [
* class_body_declaration list (* zero or more *)
)
| `Semg_ellips of Token.t (* "..." *)
| `Semg_named_ellips of semgrep_named_ellipsis (*tok*)
]

and enum_constant = (
modifiers option
* identifier (*tok*)
* argument_list option
* class_body option
)

and enum_declaration = (
modifiers option
* Token.t (* "enum" *)
Expand Down Expand Up @@ -1023,7 +1031,13 @@ and switch_block = (
* [
`Rep_switch_blk_stmt_group of
switch_block_statement_group list (* zero or more *)
| `Rep_switch_rule of switch_rule list (* zero or more *)
| `Rep_choice_switch_rule of
[
`Switch_rule of switch_rule
| `Semg_ellips of Token.t (* "..." *)
| `Semg_named_ellips of semgrep_named_ellipsis (*tok*)
]
list (* zero or more *)
]
* Token.t (* "}" *)
)
Expand Down Expand Up @@ -1374,6 +1388,13 @@ type element_value_array_initializer (* inlined *) = (
* Token.t (* "}" *)
)

type enum_constant (* inlined *) = (
modifiers option
* identifier (*tok*)
* argument_list option
* class_body option
)

type instanceof_expression (* inlined *) = (
expression
* Token.t (* "instanceof" *)
Expand Down
Loading