From dadeb7e67953a356ffcad5e487aba3779908fb7a Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sat, 7 Mar 2026 11:51:57 -0500
Subject: [PATCH 01/28] Use an arena for parser metadata

---
 include/prism/parser.h                   |   3 +
 include/prism/util/pm_char.h             |   2 +-
 include/prism/util/pm_line_offset_list.h |  23 ++---
 src/prism.c                              | 105 ++++++++---------------
 src/util/pm_char.c                       |   4 +-
 src/util/pm_line_offset_list.c           |  42 +++------
 src/util/pm_strpbrk.c                    |   4 +-
 templates/include/prism/diagnostic.h.erb |  23 ++---
 templates/src/diagnostic.c.erb           |  45 ++--------
 9 files changed, 77 insertions(+), 174 deletions(-)

diff --git a/include/prism/parser.h b/include/prism/parser.h
index d8e7a550e7..caa08538c6 100644
--- a/include/prism/parser.h
+++ b/include/prism/parser.h
@@ -639,6 +639,9 @@ struct pm_parser {
     /** The arena used for all AST-lifetime allocations. Caller-owned. */
     pm_arena_t *arena;
 
+    /** The arena used for parser metadata (comments, diagnostics, etc.). */
+    pm_arena_t metadata_arena;
+
     /**
      * The next node identifier that will be assigned. This is a unique
      * identifier used to track nodes such that the syntax tree can be dropped
diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h
index ab1f513a66..f9a556cabe 100644
--- a/include/prism/util/pm_char.h
+++ b/include/prism/util/pm_char.h
@@ -36,7 +36,7 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
  * @return The number of characters at the start of the string that are
  *     whitespace.
  */
-size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_offset_list_t *line_offsets, uint32_t start_offset);
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset);
 
 /**
  * Returns the number of characters at the start of the string that are inline
diff --git a/include/prism/util/pm_line_offset_list.h b/include/prism/util/pm_line_offset_list.h
index 968eeae52d..2b14b060a1 100644
--- a/include/prism/util/pm_line_offset_list.h
+++ b/include/prism/util/pm_line_offset_list.h
@@ -15,6 +15,7 @@
 #define PRISM_LINE_OFFSET_LIST_H
 
 #include "prism/defines.h"
+#include "prism/util/pm_arena.h"
 
 #include <assert.h>
 #include <stdbool.h>
@@ -48,14 +49,13 @@ typedef struct {
 } pm_line_column_t;
 
 /**
- * Initialize a new line offset list with the given capacity. Returns true if
- * the allocation of the offsets succeeds, otherwise returns false.
+ * Initialize a new line offset list with the given capacity.
  *
+ * @param arena The arena to allocate from.
  * @param list The list to initialize.
  * @param capacity The initial capacity of the list.
- * @return True if the allocation of the offsets succeeds, otherwise false.
  */
-bool pm_line_offset_list_init(pm_line_offset_list_t *list, size_t capacity);
+void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity);
 
 /**
  * Clear out the offsets that have been appended to the list.
@@ -65,15 +65,13 @@ bool pm_line_offset_list_init(pm_line_offset_list_t *list, size_t capacity);
 void pm_line_offset_list_clear(pm_line_offset_list_t *list);
 
 /**
- * Append a new offset to the list. Returns true if the reallocation of the
- * offsets succeeds (if one was necessary), otherwise returns false.
+ * Append a new offset to the list.
  *
+ * @param arena The arena to allocate from.
  * @param list The list to append to.
  * @param cursor The offset to append.
- * @return True if the reallocation of the offsets succeeds (if one was
- *     necessary), otherwise false.
  */
-bool pm_line_offset_list_append(pm_line_offset_list_t *list, uint32_t cursor);
+void pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor);
 
 /**
  * Returns the line of the given offset. If the offset is not in the list, the
@@ -98,11 +96,4 @@ int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cur
  */
 PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line);
 
-/**
- * Free the internal memory allocated for the list.
- *
- * @param list The list to free.
- */
-void pm_line_offset_list_free(pm_line_offset_list_t *list);
-
 #endif
diff --git a/src/prism.c b/src/prism.c
index 9d58bdb43d..f5902b6f98 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -451,7 +451,7 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
  */
 static inline void
 pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
-    pm_diagnostic_list_append(&parser->error_list, start, length, diag_id);
+    pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
 }
 
 /**
@@ -494,7 +494,7 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
  * Append an error to the list of errors on the parser using a format string.
  */
 #define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
-    pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
+    pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
 
 /**
  * Append an error to the list of errors on the parser using the location of the
@@ -529,7 +529,7 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
  */
 static inline void
 pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
-    pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id);
+    pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
 }
 
 /**
@@ -555,7 +555,7 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
  * and the given location.
  */
 #define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
-    pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
+    pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
 
 /**
  * Append a warning to the list of warnings on the parser using the location of
@@ -3897,7 +3897,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
             ellipsis = "";
         }
 
-        pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
+        pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
         value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
     }
 
@@ -7525,12 +7525,10 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
         pm_string_free(&key);
 
         // Allocate a new magic comment node to append to the parser's list.
-        pm_magic_comment_t *magic_comment;
-        if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
-            magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
-            magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
-            pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
-        }
+        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_zalloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
+        magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
+        magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
+        pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
     }
 
     return result;
@@ -9189,8 +9187,7 @@ parser_lex_callback(pm_parser_t *parser) {
  */
 static inline pm_comment_t *
 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
-    pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
-    if (comment == NULL) return NULL;
+    pm_comment_t *comment = (pm_comment_t *) pm_arena_zalloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
 
     *comment = (pm_comment_t) {
         .type = type,
@@ -9213,7 +9210,7 @@ lex_embdoc(pm_parser_t *parser) {
     if (newline == NULL) {
         parser->current.end = parser->end;
     } else {
-        pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
+        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
         parser->current.end = newline + 1;
     }
 
@@ -9223,7 +9220,6 @@ lex_embdoc(pm_parser_t *parser) {
     // Now, create a comment that is going to be attached to the parser.
     const uint8_t *comment_start = parser->current.start;
     pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
-    if (comment == NULL) return PM_TOKEN_EOF;
 
     // Now, loop until we find the end of the embedded documentation or the end
     // of the file.
@@ -9247,7 +9243,7 @@ lex_embdoc(pm_parser_t *parser) {
             if (newline == NULL) {
                 parser->current.end = parser->end;
             } else {
-                pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
+                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
                 parser->current.end = newline + 1;
             }
 
@@ -9267,7 +9263,7 @@ lex_embdoc(pm_parser_t *parser) {
         if (newline == NULL) {
             parser->current.end = parser->end;
         } else {
-            pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
+            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
             parser->current.end = newline + 1;
         }
 
@@ -9577,7 +9573,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
             parser_flush_heredoc_end(parser);
         } else {
             // Otherwise, we'll add the newline to the list of newlines.
-            pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
+            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
         }
 
         uint8_t delimiter = *parser->current.end;
@@ -9681,7 +9677,7 @@ parser_lex(pm_parser_t *parser) {
                                 parser->heredoc_end = NULL;
                             } else {
                                 parser->current.end += eol_length + 1;
-                                pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
                                 space_seen = true;
                             }
                         } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
@@ -9783,7 +9779,7 @@ parser_lex(pm_parser_t *parser) {
                         }
 
                         if (parser->heredoc_end == NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
                         }
                     }
 
@@ -10309,7 +10305,7 @@ parser_lex(pm_parser_t *parser) {
                                     } else {
                                         // Otherwise, we want to indicate that the body of the
                                         // heredoc starts on the character after the next newline.
-                                        pm_line_offset_list_append(&parser->line_offsets, U32(body_start - parser->start + 1));
+                                        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
                                         body_start++;
                                     }
 
@@ -10950,7 +10946,7 @@ parser_lex(pm_parser_t *parser) {
                         // correct column information for it.
                         const uint8_t *cursor = parser->current.end;
                         while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, U32(++cursor - parser->start));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
                         }
 
                         parser->current.end = parser->end;
@@ -11011,7 +11007,7 @@ parser_lex(pm_parser_t *parser) {
                     whitespace += 1;
                 }
             } else {
-                whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
             }
 
             if (whitespace > 0) {
@@ -11126,7 +11122,7 @@ parser_lex(pm_parser_t *parser) {
                                 LEX(PM_TOKEN_STRING_CONTENT);
                             } else {
                                 // ... else track the newline.
-                                pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+                                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                             }
 
                             parser->current.end++;
@@ -11264,7 +11260,7 @@ parser_lex(pm_parser_t *parser) {
                         // would have already have added the newline to the
                         // list.
                         if (parser->heredoc_end == NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
                         }
                     } else {
                         parser->current.end = breakpoint + 1;
@@ -11311,7 +11307,7 @@ parser_lex(pm_parser_t *parser) {
                         // If we've hit a newline, then we need to track that in
                         // the list of newlines.
                         if (parser->heredoc_end == NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
                             parser->current.end = breakpoint + 1;
                             breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
                             break;
@@ -11359,7 +11355,7 @@ parser_lex(pm_parser_t *parser) {
                                     LEX(PM_TOKEN_STRING_CONTENT);
                                 } else {
                                     // ... else track the newline.
-                                    pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+                                    pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                                 }
 
                                 parser->current.end++;
@@ -11524,7 +11520,7 @@ parser_lex(pm_parser_t *parser) {
                         // would have already have added the newline to the
                         // list.
                         if (parser->heredoc_end == NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
                         }
                     } else {
                         parser->current.end = breakpoint + 1;
@@ -11576,7 +11572,7 @@ parser_lex(pm_parser_t *parser) {
                         // for the terminator in case the terminator is a
                         // newline character.
                         if (parser->heredoc_end == NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
                             parser->current.end = breakpoint + 1;
                             breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
                             break;
@@ -11630,7 +11626,7 @@ parser_lex(pm_parser_t *parser) {
                                     LEX(PM_TOKEN_STRING_CONTENT);
                                 } else {
                                     // ... else track the newline.
-                                    pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+                                    pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                                 }
 
                                 parser->current.end++;
@@ -11759,7 +11755,7 @@ parser_lex(pm_parser_t *parser) {
                         (memcmp(terminator_start, ident_start, ident_length) == 0)
                     ) {
                         if (newline != NULL) {
-                            pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
+                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
                         }
 
                         parser->current.end = terminator_end;
@@ -11831,7 +11827,7 @@ parser_lex(pm_parser_t *parser) {
                             LEX(PM_TOKEN_STRING_CONTENT);
                         }
 
-                        pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
+                        pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
 
                         // If we have a - or ~ heredoc, then we can match after
                         // some leading whitespace.
@@ -11951,7 +11947,7 @@ parser_lex(pm_parser_t *parser) {
                                         const uint8_t *end = parser->current.end;
 
                                         if (parser->heredoc_end == NULL) {
-                                            pm_line_offset_list_append(&parser->line_offsets, U32(end - parser->start + 1));
+                                            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
                                         }
 
                                         // Here we want the buffer to only
@@ -13177,6 +13173,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
         pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
 
         pm_diagnostic_list_append_format(
+            &parser->metadata_arena,
             &parser->warning_list,
             duplicated->location.start,
             duplicated->location.length,
@@ -13200,6 +13197,7 @@ pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *li
 
     if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
         pm_diagnostic_list_append_format(
+            &parser->metadata_arena,
             &parser->warning_list,
             PM_NODE_START(node),
             PM_NODE_LENGTH(node),
@@ -21884,6 +21882,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
 
     *parser = (pm_parser_t) {
         .arena = arena,
+        .metadata_arena = { 0 },
         .node_id = 0,
         .lex_state = PM_LEX_STATE_BEG,
         .enclosure_nesting = 0,
@@ -21957,7 +21956,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
     // guess at the number of newlines that we'll need based on the size of the
     // input.
     size_t newline_size = size / 22;
-    pm_line_offset_list_init(&parser->line_offsets, newline_size < 4 ? 4 : newline_size);
+    pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
 
     // If options were provided to this parse, establish them here.
     if (options != NULL) {
@@ -22096,7 +22095,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
         const uint8_t *newline = next_newline(cursor, parser->end - cursor);
 
         while (newline != NULL) {
-            pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
+            pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
 
             cursor = newline + 1;
             newline = next_newline(cursor, parser->end - cursor);
@@ -22145,48 +22144,14 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch
     parser->encoding_changed_callback = callback;
 }
 
-/**
- * Free all of the memory associated with the comment list.
- */
-static inline void
-pm_comment_list_free(pm_list_t *list) {
-    pm_list_node_t *node, *next;
-
-    for (node = list->head; node != NULL; node = next) {
-        next = node->next;
-
-        pm_comment_t *comment = (pm_comment_t *) node;
-        xfree_sized(comment, sizeof(pm_comment_t));
-    }
-}
-
-/**
- * Free all of the memory associated with the magic comment list.
- */
-static inline void
-pm_magic_comment_list_free(pm_list_t *list) {
-    pm_list_node_t *node, *next;
-
-    for (node = list->head; node != NULL; node = next) {
-        next = node->next;
-
-        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
-        xfree_sized(magic_comment, sizeof(pm_magic_comment_t));
-    }
-}
-
 /**
  * Free any memory associated with the given parser.
  */
 PRISM_EXPORTED_FUNCTION void
 pm_parser_free(pm_parser_t *parser) {
     pm_string_free(&parser->filepath);
-    pm_diagnostic_list_free(&parser->error_list);
-    pm_diagnostic_list_free(&parser->warning_list);
-    pm_comment_list_free(&parser->comment_list);
-    pm_magic_comment_list_free(&parser->magic_comment_list);
     pm_constant_pool_free(&parser->constant_pool);
-    pm_line_offset_list_free(&parser->line_offsets);
+    pm_arena_free(&parser->metadata_arena);
 
     while (parser->current_scope != NULL) {
         // Normally, popping the scope doesn't free the locals since it is
diff --git a/src/util/pm_char.c b/src/util/pm_char.c
index f0baf47784..ff8a88a687 100644
--- a/src/util/pm_char.c
+++ b/src/util/pm_char.c
@@ -83,7 +83,7 @@ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
  * searching past the given maximum number of characters.
  */
 size_t
-pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_offset_list_t *line_offsets, uint32_t start_offset) {
+pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) {
     if (length <= 0) return 0;
 
     uint32_t size = 0;
@@ -91,7 +91,7 @@ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_o
 
     while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
         if (string[size] == '\n') {
-            pm_line_offset_list_append(line_offsets, start_offset + size + 1);
+            pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1);
         }
 
         size++;
diff --git a/src/util/pm_line_offset_list.c b/src/util/pm_line_offset_list.c
index d55b2f6874..c0b41df406 100644
--- a/src/util/pm_line_offset_list.c
+++ b/src/util/pm_line_offset_list.c
@@ -1,20 +1,16 @@
 #include "prism/util/pm_line_offset_list.h"
 
 /**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
+ * Initialize a new line offset list with the given capacity.
  */
-bool
-pm_line_offset_list_init(pm_line_offset_list_t *list, size_t capacity) {
-    list->offsets = (uint32_t *) xcalloc(capacity, sizeof(uint32_t));
-    if (list->offsets == NULL) return false;
+void
+pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity) {
+    list->offsets = (uint32_t *) pm_arena_zalloc(arena, capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
 
     // This is 1 instead of 0 because we want to include the first line of the
-    // file as having offset 0, which is set because of calloc.
+    // file as having offset 0, which is set because of the zero-initialization.
     list->size = 1;
     list->capacity = capacity;
-
-    return true;
 }
 
 /**
@@ -26,26 +22,22 @@ pm_line_offset_list_clear(pm_line_offset_list_t *list) {
 }
 
 /**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
+ * Append a new offset to the newline list.
  */
-bool
-pm_line_offset_list_append(pm_line_offset_list_t *list, uint32_t cursor) {
+void
+pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
     if (list->size == list->capacity) {
-        uint32_t *original_offsets = list->offsets;
+        size_t new_capacity = (list->capacity * 3) / 2;
+        uint32_t *new_offsets = (uint32_t *) pm_arena_alloc(arena, new_capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
 
-        list->capacity = (list->capacity * 3) / 2;
-        list->offsets = (uint32_t *) xcalloc(list->capacity, sizeof(uint32_t));
-        if (list->offsets == NULL) return false;
+        memcpy(new_offsets, list->offsets, list->size * sizeof(uint32_t));
 
-        memcpy(list->offsets, original_offsets, list->size * sizeof(uint32_t));
-        xfree_sized(original_offsets, list->size * sizeof(uint32_t));
+        list->offsets = new_offsets;
+        list->capacity = new_capacity;
     }
 
     assert(list->size == 0 || cursor > list->offsets[list->size - 1]);
     list->offsets[list->size++] = cursor;
-
-    return true;
 }
 
 /**
@@ -103,11 +95,3 @@ pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t curs
         .column = cursor - list->offsets[left - 1]
     });
 }
-
-/**
- * Free the internal memory allocated for the newline list.
- */
-void
-pm_line_offset_list_free(pm_line_offset_list_t *list) {
-    xfree_sized(list->offsets, list->capacity * sizeof(uint32_t));
-}
diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c
index 60c67b2983..ddd6ef0ead 100644
--- a/src/util/pm_strpbrk.c
+++ b/src/util/pm_strpbrk.c
@@ -5,7 +5,7 @@
  */
 static inline void
 pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) {
-    pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]);
+    pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]);
 }
 
 /**
@@ -19,7 +19,7 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l
         } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
             // Not okay, we already found a Unicode escape sequence and this
             // conflicts.
-            pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+            pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name);
         } else {
             // Should not be anything else.
             assert(false && "unreachable");
diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb
index c1864e6021..935fb663ea 100644
--- a/templates/include/prism/diagnostic.h.erb
+++ b/templates/include/prism/diagnostic.h.erb
@@ -8,6 +8,7 @@
 
 #include "prism/ast.h"
 #include "prism/defines.h"
+#include "prism/util/pm_arena.h"
 #include "prism/util/pm_list.h"
 
 #include <stdbool.h>
@@ -48,13 +49,6 @@ typedef struct {
     /** The message associated with the diagnostic. */
     const char *message;
 
-    /**
-     * Whether or not the memory related to the message of this diagnostic is
-     * owned by this diagnostic. If it is, it needs to be freed when the
-     * diagnostic is freed.
-     */
-    bool owned;
-
     /**
      * The level of the diagnostic, see `pm_error_level_t` and
      * `pm_warning_level_t` for possible values.
@@ -99,32 +93,25 @@ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
  * Append a diagnostic to the given list of diagnostics that is using shared
  * memory for its message.
  *
+ * @param arena The arena to allocate from.
  * @param list The list to append to.
  * @param start The source offset of the start of the diagnostic.
  * @param length The length of the diagnostic.
  * @param diag_id The diagnostic ID.
- * @return Whether the diagnostic was successfully appended.
  */
-bool pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id);
+void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id);
 
 /**
  * Append a diagnostic to the given list of diagnostics that is using a format
  * string for its message.
  *
+ * @param arena The arena to allocate from.
  * @param list The list to append to.
  * @param start The source offset of the start of the diagnostic.
  * @param length The length of the diagnostic.
  * @param diag_id The diagnostic ID.
  * @param ... The arguments to the format string for the message.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...);
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- *
- * @param list The list to deallocate.
  */
-void pm_diagnostic_list_free(pm_list_t *list);
+void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...);
 
 #endif
diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb
index 8fa47590c0..b02714637d 100644
--- a/templates/src/diagnostic.c.erb
+++ b/templates/src/diagnostic.c.erb
@@ -1,4 +1,5 @@
 #include "prism/diagnostic.h"
+#include "prism/util/pm_arena.h"
 
 #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %>
 
@@ -451,29 +452,26 @@ pm_diagnostic_level(pm_diagnostic_id_t diag_id) {
 /**
  * Append an error to the given list of diagnostic.
  */
-bool
-pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
-    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
-    if (diagnostic == NULL) return false;
+void
+pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
 
     *diagnostic = (pm_diagnostic_t) {
         .location = { .start = start, .length = length },
         .diag_id = diag_id,
         .message = pm_diagnostic_message(diag_id),
-        .owned = false,
         .level = pm_diagnostic_level(diag_id)
     };
 
     pm_list_append(list, (pm_list_node_t *) diagnostic);
-    return true;
 }
 
 /**
  * Append a diagnostic to the given list of diagnostics that is using a format
  * string for its message.
  */
-bool
-pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) {
+void
+pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) {
     va_list arguments;
     va_start(arguments, diag_id);
 
@@ -482,20 +480,13 @@ pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t lengt
     va_end(arguments);
 
     if (result < 0) {
-        return false;
+        return;
     }
 
-    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
-    if (diagnostic == NULL) {
-        return false;
-    }
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
 
     size_t message_length = (size_t) (result + 1);
-    char *message = (char *) xmalloc(message_length);
-    if (message == NULL) {
-        xfree_sized(diagnostic, sizeof(pm_diagnostic_t));
-        return false;
-    }
+    char *message = (char *) pm_arena_alloc(arena, message_length, 1);
 
     va_start(arguments, diag_id);
     vsnprintf(message, message_length, format, arguments);
@@ -505,27 +496,9 @@ pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t lengt
         .location = { .start = start, .length = length },
         .diag_id = diag_id,
         .message = message,
-        .owned = true,
         .level = pm_diagnostic_level(diag_id)
     };
 
     pm_list_append(list, (pm_list_node_t *) diagnostic);
-    return true;
 }
 
-/**
- * Deallocate the internal state of the given diagnostic list.
- */
-void
-pm_diagnostic_list_free(pm_list_t *list) {
-    pm_diagnostic_t *node = (pm_diagnostic_t *) list->head;
-
-    while (node != NULL) {
-        pm_diagnostic_t *next = (pm_diagnostic_t *) node->node.next;
-
-        if (node->owned) xfree((void *) node->message);
-        xfree_sized(node, sizeof(pm_diagnostic_t));
-
-        node = next;
-    }
-}

From 390bdaa1f16582a9a7daddbf7add09ce82106981 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sat, 7 Mar 2026 12:09:39 -0500
Subject: [PATCH 02/28] Use the parser arena for the constant pool

---
 include/prism/util/pm_constant_pool.h | 15 ++---
 src/prism.c                           | 21 +++----
 src/util/pm_constant_pool.c           | 81 +++++++--------------------
 3 files changed, 31 insertions(+), 86 deletions(-)

diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/util/pm_constant_pool.h
index 1d4922a661..285a636a3a 100644
--- a/include/prism/util/pm_constant_pool.h
+++ b/include/prism/util/pm_constant_pool.h
@@ -146,7 +146,7 @@ typedef struct {
  * @param capacity The initial capacity of the pool.
  * @return Whether the initialization succeeded.
  */
-bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
+void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity);
 
 /**
  * Return a pointer to the constant indicated by the given constant id.
@@ -177,7 +177,7 @@ pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uin
  * @param length The length of the constant.
  * @return The id of the constant.
  */
-pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length);
 
 /**
  * Insert a constant into a constant pool from memory that is now owned by the
@@ -189,7 +189,7 @@ pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const
  * @param length The length of the constant.
  * @return The id of the constant.
  */
-pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
+pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length);
 
 /**
  * Insert a constant into a constant pool from memory that is constant. Returns
@@ -200,13 +200,6 @@ pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t
  * @param length The length of the constant.
  * @return The id of the constant.
  */
-pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Free the memory associated with a constant pool.
- *
- * @param pool The pool to free.
- */
-void pm_constant_pool_free(pm_constant_pool_t *pool);
+pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length);
 
 #endif
diff --git a/src/prism.c b/src/prism.c
index f5902b6f98..602e3bfb99 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -1028,7 +1028,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
  */
 static inline pm_constant_id_t
 pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
-    return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
+    return pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
 }
 
 /**
@@ -1036,7 +1036,7 @@ pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8
  */
 static inline pm_constant_id_t
 pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
-    return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
+    return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
 }
 
 /**
@@ -1044,7 +1044,7 @@ pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length)
  */
 static inline pm_constant_id_t
 pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
-    return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
+    return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
 }
 
 /**
@@ -2908,10 +2908,10 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p
     if (write_constant->length > 0) {
         size_t length = write_constant->length - 1;
 
-        void *memory = xmalloc(length);
+        uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
         memcpy(memory, write_constant->start, length);
 
-        *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
+        *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
     } else {
         // We can get here if the message was missing because of a syntax error.
         *read_name = pm_parser_constant_id_constant(parser, "", 0);
@@ -12543,16 +12543,12 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
     // append an =.
     pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
     size_t length = constant->length;
-    uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
-    if (name == NULL) return;
+    uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
 
     memcpy(name, constant->start, length);
     name[length] = '=';
 
-    // Now switch the name to the new string.
-    // This silences clang analyzer warning about leak of memory pointed by `name`.
-    // NOLINTNEXTLINE(clang-analyzer-*)
-    *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
+    *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
 }
 
 /**
@@ -21950,7 +21946,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
     // This ratio will need to change if we add more constants to the constant
     // pool for another node type.
     uint32_t constant_size = ((uint32_t) size) / 95;
-    pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
+    pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
 
     // Initialize the newline list. Similar to the constant pool, we're going to
     // guess at the number of newlines that we'll need based on the size of the
@@ -22150,7 +22146,6 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch
 PRISM_EXPORTED_FUNCTION void
 pm_parser_free(pm_parser_t *parser) {
     pm_string_free(&parser->filepath);
-    pm_constant_pool_free(&parser->constant_pool);
     pm_arena_free(&parser->metadata_arena);
 
     while (parser->current_scope != NULL) {
diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c
index f7173dd062..0c9a7dec9a 100644
--- a/src/util/pm_constant_pool.c
+++ b/src/util/pm_constant_pool.c
@@ -115,21 +115,15 @@ is_power_of_two(uint32_t size) {
 /**
  * Resize a constant pool to a given capacity.
  */
-static inline bool
-pm_constant_pool_resize(pm_constant_pool_t *pool) {
+static inline void
+pm_constant_pool_resize(pm_arena_t *arena, pm_constant_pool_t *pool) {
     assert(is_power_of_two(pool->capacity));
 
     uint32_t next_capacity = pool->capacity * 2;
-    if (next_capacity < pool->capacity) return false;
-
     const uint32_t mask = next_capacity - 1;
-    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
-
-    void *next = xcalloc(next_capacity, element_size);
-    if (next == NULL) return false;
 
-    pm_constant_pool_bucket_t *next_buckets = next;
-    pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
+    pm_constant_pool_bucket_t *next_buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, next_capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
+    pm_constant_t *next_constants = (pm_constant_t *) pm_arena_alloc(arena, next_capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
 
     // For each bucket in the current constant pool, find the index in the
     // next constant pool, and insert it.
@@ -157,33 +151,22 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
     // The constants are stable with respect to hash table resizes.
     memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
 
-    // pool->constants and pool->buckets are allocated out of the same chunk
-    // of memory, with the buckets coming first.
-    xfree_sized(pool->buckets, pool->capacity * element_size);
     pool->constants = next_constants;
     pool->buckets = next_buckets;
     pool->capacity = next_capacity;
-    return true;
 }
 
 /**
  * Initialize a new constant pool with a given capacity.
  */
-bool
-pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
-    const uint32_t maximum = (~((uint32_t) 0));
-    if (capacity >= ((maximum / 2) + 1)) return false;
-
+void
+pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity) {
     capacity = next_power_of_two(capacity);
-    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
-    void *memory = xcalloc(capacity, element_size);
-    if (memory == NULL) return false;
 
-    pool->buckets = memory;
-    pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
+    pool->buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
+    pool->constants = (pm_constant_t *) pm_arena_alloc(arena, capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
     pool->size = 0;
     pool->capacity = capacity;
-    return true;
 }
 
 /**
@@ -224,9 +207,9 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size
  * Insert a constant into a constant pool and return its index in the pool.
  */
 static inline pm_constant_id_t
-pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
+pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
     if (pool->size >= (pool->capacity / 4 * 3)) {
-        if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
+        pm_constant_pool_resize(arena, pool);
     }
 
     assert(is_power_of_two(pool->capacity));
@@ -246,17 +229,10 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
             // Since we have found a match, we need to check if this is
             // attempting to insert a shared or an owned constant. We want to
             // prefer shared constants since they don't require allocations.
-            if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
-                // If we're attempting to insert an owned constant and we have
-                // an existing constant, then either way we don't want the given
-                // memory. Either it's duplicated with the existing constant or
-                // it's not necessary because we have a shared version.
-                xfree_sized((void *) start, length);
-            } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+            if (type != PM_CONSTANT_POOL_BUCKET_OWNED && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
                 // If we're attempting to insert a shared constant and the
-                // existing constant is owned, then we can free the owned
-                // constant and replace it with the shared constant.
-                xfree_sized((void *) constant->start, constant->length);
+                // existing constant is owned, then we can replace it with the
+                // shared constant to prefer non-owned references.
                 constant->start = start;
                 bucket->type = (unsigned int) (type & 0x3);
             }
@@ -291,8 +267,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
  * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
  */
 pm_constant_id_t
-pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
-    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
+pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
 }
 
 /**
@@ -301,8 +277,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
  * potential calls to resize fail.
  */
 pm_constant_id_t
-pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
-    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
+pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
 }
 
 /**
@@ -311,26 +287,7 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t l
  * resize fail.
  */
 pm_constant_id_t
-pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
-    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
+pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
 }
 
-/**
- * Free the memory associated with a constant pool.
- */
-void
-pm_constant_pool_free(pm_constant_pool_t *pool) {
-    // For each constant in the current constant pool, free the contents if the
-    // contents are owned.
-    for (uint32_t index = 0; index < pool->capacity; index++) {
-        pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
-
-        // If an id is set on this constant, then we know we have content here.
-        if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
-            pm_constant_t *constant = &pool->constants[bucket->id - 1];
-            xfree_sized((void *) constant->start, constant->length);
-        }
-    }
-
-    xfree_sized(pool->buckets, pool->capacity * (sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t)));
-}

From 1dd985306f806dc9c275474b01f8c6f4b72a25ef Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sun, 8 Mar 2026 14:26:57 -0400
Subject: [PATCH 03/28] Speed up the constant hash function

---
 src/util/pm_constant_pool.c | 50 +++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c
index 0c9a7dec9a..c8c27a9618 100644
--- a/src/util/pm_constant_pool.c
+++ b/src/util/pm_constant_pool.c
@@ -70,19 +70,55 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
 }
 
 /**
- * A relatively simple hash function (djb2) that is used to hash strings. We are
- * optimizing here for simplicity and speed.
+ * A multiply-xorshift hash that processes input a word at a time. This is
+ * significantly faster than the byte-at-a-time djb2 hash for the short strings
+ * typical in Ruby source (~15 bytes average). Each word is mixed into the hash
+ * by XOR followed by multiplication by a large odd constant, which spreads
+ * entropy across all bits. A final xorshift fold produces the 32-bit result.
  */
 static inline uint32_t
 pm_constant_pool_hash(const uint8_t *start, size_t length) {
-    // This is a prime number used as the initial value for the hash function.
-    uint32_t value = 5381;
+    // This constant is borrowed from wyhash. It is a 64-bit odd integer with
+    // roughly equal 0/1 bits, chosen for good avalanche behavior when used in
+    // multiply-xorshift sequences.
+    static const uint64_t secret = 0x517cc1b727220a95ULL;
+    uint64_t hash = (uint64_t) length;
+
+    const uint8_t *ptr = start;
+    size_t remaining = length;
+
+    while (remaining >= 8) {
+        uint64_t word;
+        memcpy(&word, ptr, 8);
+        hash ^= word;
+        hash *= secret;
+        ptr += 8;
+        remaining -= 8;
+    }
+
+    if (remaining >= 4) {
+        uint32_t word;
+        memcpy(&word, ptr, 4);
+        hash ^= (uint64_t) word;
+        hash *= secret;
+        ptr += 4;
+        remaining -= 4;
+    }
+
+    if (remaining >= 2) {
+        hash ^= (uint64_t) ptr[0] | ((uint64_t) ptr[1] << 8);
+        hash *= secret;
+        ptr += 2;
+        remaining -= 2;
+    }
 
-    for (size_t index = 0; index < length; index++) {
-        value = ((value << 5) + value) + start[index];
+    if (remaining >= 1) {
+        hash ^= (uint64_t) ptr[0];
+        hash *= secret;
     }
 
-    return value;
+    hash ^= hash >> 32;
+    return (uint32_t) hash;
 }
 
 /**

From e0708c495c7d5f37f5ddf7251b05f2bcff1151dc Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sun, 8 Mar 2026 13:53:54 -0400
Subject: [PATCH 04/28] Small optimization for parser_lex_magic_comment

---
 src/prism.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 602e3bfb99..97c969ff90 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -7316,11 +7316,13 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
  */
 static inline const uint8_t *
 parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
-    while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
-        if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
-            return cursor;
+    // Scan for '*' as the middle character, since it is rarer than '-' in
+    // typical comments and avoids repeated memchr calls for '-' that hit
+    // dashes in words like "foo-bar".
+    while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
+        if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
+            return cursor - 1;
         }
-        cursor++;
     }
     return NULL;
 }
@@ -7357,6 +7359,13 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
             // have a magic comment.
             return false;
         }
+    } else {
+        // Non-emacs magic comments must contain a colon for `key: value`.
+        // Reject early if there is no colon to avoid scanning the entire
+        // comment character-by-character.
+        if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
+            return false;
+        }
     }
 
     cursor = start;

From c1ad25ebf8b2e5215aaca18d277f459f2df89a90 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sun, 8 Mar 2026 14:58:29 -0400
Subject: [PATCH 05/28] Scan forward through inline whitespace to avoid writing
 to parser->current.end continuously

---
 src/prism.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 97c969ff90..0f21b950dc 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -9658,17 +9658,24 @@ parser_lex(pm_parser_t *parser) {
             bool space_seen = false;
 
             // First, we're going to skip past any whitespace at the front of the next
-            // token.
+            // token. Skip runs of inline whitespace in bulk to avoid per-character
+            // stores back to parser->current.end.
             bool chomping = true;
             while (parser->current.end < parser->end && chomping) {
-                switch (*parser->current.end) {
-                    case ' ':
-                    case '\t':
-                    case '\f':
-                    case '\v':
-                        parser->current.end++;
+                {
+                    static const uint8_t inline_whitespace[256] = {
+                        [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
+                    };
+                    const uint8_t *scan = parser->current.end;
+                    while (scan < parser->end && inline_whitespace[*scan]) scan++;
+                    if (scan > parser->current.end) {
+                        parser->current.end = scan;
                         space_seen = true;
-                        break;
+                        continue;
+                    }
+                }
+
+                switch (*parser->current.end) {
                     case '\r':
                         if (match_eol_offset(parser, 1)) {
                             chomping = false;

From fb526a824337f017d22f12689561cdcc4872226d Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sun, 8 Mar 2026 16:10:05 -0400
Subject: [PATCH 06/28] Fast-paths for ASCII-only identifiers

---
 include/prism/defines.h |  31 ++++++
 src/prism.c             | 225 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 256 insertions(+)

diff --git a/include/prism/defines.h b/include/prism/defines.h
index c48a600b21..6afc239035 100644
--- a/include/prism/defines.h
+++ b/include/prism/defines.h
@@ -264,6 +264,37 @@
     #define PRISM_UNLIKELY(x) (x)
 #endif
 
+/**
+ * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning
+ * to find the first non-matching byte in a word.
+ *
+ * Precondition: v must be nonzero. The result is undefined when v == 0
+ * (matching the behavior of __builtin_ctzll and _BitScanForward64).
+ */
+#if defined(__GNUC__) || defined(__clang__)
+    #define pm_ctzll(v) ((unsigned) __builtin_ctzll(v))
+#elif defined(_MSC_VER)
+    #include <intrin.h>
+    static inline unsigned pm_ctzll(uint64_t v) {
+        unsigned long index;
+        _BitScanForward64(&index, v);
+        return (unsigned) index;
+    }
+#else
+    static inline unsigned
+    pm_ctzll(uint64_t v) {
+        unsigned c = 0;
+        v &= (uint64_t) (-(int64_t) v);
+        if (v & 0x00000000FFFFFFFFULL) c += 0;  else c += 32;
+        if (v & 0x0000FFFF0000FFFFULL) c += 0;  else c += 16;
+        if (v & 0x00FF00FF00FF00FFULL) c += 0;  else c += 8;
+        if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0;  else c += 4;
+        if (v & 0x3333333333333333ULL) c += 0;  else c += 2;
+        if (v & 0x5555555555555555ULL) c += 0;  else c += 1;
+        return c;
+    }
+#endif
+
 /**
  * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
  * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
diff --git a/src/prism.c b/src/prism.c
index 0f21b950dc..dace322ee9 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -1777,6 +1777,227 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
     }
 }
 
+/**
+ * Scan forward through ASCII identifier characters (a-z, A-Z, 0-9, _) using
+ * wide operations. Returns the number of leading ASCII identifier bytes.
+ * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8)
+ * with a byte-at-a-time loop.
+ *
+ * Up to four optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ *   1. NEON — processes 16 bytes per iteration on aarch64.
+ *   2. SSE2 — processes 16 bytes per iteration on x86-64.
+ *   3. WASM SIMD — processes 16 bytes per iteration on WebAssembly.
+ *   4. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ *   5. No-op — returns 0; the caller's byte-at-a-time loop handles everything.
+ */
+
+#if defined(__aarch64__) && defined(__ARM_NEON)
+#include <arm_neon.h>
+
+static inline size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    const uint8_t *cursor = start;
+
+    // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
+    // Each high nibble is assigned a unique bit; the low nibble table
+    // contains the OR of bits for all high nibbles that have an
+    // identifier character at that low nibble position. A byte is an
+    // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
+    const uint8x16_t low_lut = (uint8x16_t) {
+        0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
+        0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
+    };
+    const uint8x16_t high_lut = (uint8x16_t) {
+        0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    };
+    const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+
+    while (cursor + 16 <= end) {
+        uint8x16_t v = vld1q_u8(cursor);
+
+        uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+        uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+        uint8x16_t ident = vandq_u8(lo_class, hi_class);
+
+        // Fast check: if the per-byte minimum is nonzero, every byte matched.
+        if (vminvq_u8(ident) != 0) {
+            cursor += 16;
+            continue;
+        }
+
+        // Find the first non-identifier byte (zero in ident).
+        uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
+        uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
+
+        if (lo != 0) {
+            cursor += pm_ctzll(lo) / 8;
+        } else {
+            uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
+            cursor += 8 + pm_ctzll(hi) / 8;
+        }
+
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+#elif defined(__x86_64__) && defined(__SSE2__)
+#include <emmintrin.h>
+
+static inline size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    const uint8_t *cursor = start;
+
+    while (cursor + 16 <= end) {
+        __m128i v = _mm_loadu_si128((const __m128i *) cursor);
+        __m128i zero = _mm_setzero_si128();
+
+        // Unsigned range check via saturating subtraction:
+        //   byte >= lo  ⟺  saturate(lo - byte) == 0
+        //   byte <= hi  ⟺  saturate(byte - hi) == 0
+
+        // Fold case: OR with 0x20 maps A-Z to a-z.
+        __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
+        __m128i letter = _mm_and_si128(
+            _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
+            _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
+
+        __m128i digit = _mm_and_si128(
+            _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
+            _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
+
+        __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
+
+        __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
+        int mask = _mm_movemask_epi8(ident);
+
+        if (mask == 0xFFFF) {
+            cursor += 16;
+            continue;
+        }
+
+        cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+#elif defined(__wasm_simd128__)
+#include <wasm_simd128.h>
+
+static inline size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    const uint8_t *cursor = start;
+
+    while (cursor + 16 <= end) {
+        v128_t v = wasm_v128_load(cursor);
+
+        // Range checks via subtract-and-unsigned-compare: (v - lo) < count
+        // is true iff v is in [lo, lo + count). One subtract + one compare
+        // per range instead of two comparisons + AND.
+
+        // Fold case: OR with 0x20 maps A-Z to a-z.
+        v128_t lowered = wasm_v128_or(v, wasm_u8x16_splat(0x20));
+        v128_t letter = wasm_u8x16_lt(
+            wasm_i8x16_sub(lowered, wasm_u8x16_splat(0x61)),
+            wasm_u8x16_splat(0x1A));
+
+        v128_t digit = wasm_u8x16_lt(
+            wasm_i8x16_sub(v, wasm_u8x16_splat(0x30)),
+            wasm_u8x16_splat(0x0A));
+
+        v128_t underscore = wasm_i8x16_eq(v, wasm_u8x16_splat(0x5F));
+
+        v128_t ident = wasm_v128_or(wasm_v128_or(letter, digit), underscore);
+
+        // Fast path: if all 16 bytes are identifier chars, advance.
+        if (wasm_i8x16_all_true(ident)) {
+            cursor += 16;
+            continue;
+        }
+
+        // Extract bitmask only on the exit path to find the first non-match.
+        uint32_t mask = wasm_i8x16_bitmask(ident);
+        cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+// The SWAR path uses pm_ctzll to find the first non-matching byte within a
+// word, which only yields the correct byte index on little-endian targets.
+// We gate on a positive little-endian check so that unknown-endianness
+// platforms safely fall through to the no-op fallback.
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+/**
+ * Portable SWAR fallback — processes 8 bytes per iteration.
+ *
+ * The byte-wise range checks avoid cross-byte borrows by pre-setting the high
+ * bit of each byte before subtraction: (byte | 0x80) - lo has a minimum value
+ * of 0x80 - 0x7F = 1, so underflow (and thus a borrow into the next byte) is
+ * impossible. The result has bit 7 set if and only if byte >= lo. The same
+ * reasoning applies to the upper-bound direction.
+ */
+static inline size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+    static const uint64_t ones = 0x0101010101010101ULL;
+    static const uint64_t highs = 0x8080808080808080ULL;
+    const uint8_t *cursor = start;
+
+    while (cursor + 8 <= end) {
+        uint64_t word;
+        memcpy(&word, cursor, 8);
+
+        // Bail on any non-ASCII byte.
+        if (word & highs) break;
+
+        uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
+
+        // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
+        // then check the lowercase range once. A-Z maps to a-z; the
+        // only non-letter byte that could alias into [0x61,0x7A] is one
+        // whose original value was in [0x41,0x5A] — which is exactly
+        // the uppercase letters we want to match.
+        uint64_t lowered = word | (ones * 0x20);
+        uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
+
+        // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
+        // bytes equal to underscore. Safe from cross-byte borrows because
+        // the ASCII guard above ensures all bytes are < 0x80.
+        uint64_t xor_us = word ^ (ones * 0x5F);
+        uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
+
+        uint64_t ident = digit | letter | underscore;
+
+        if (ident == highs) {
+            cursor += 8;
+            continue;
+        }
+
+        // Find the first non-identifier byte. On little-endian the first
+        // byte sits in the least-significant position.
+        uint64_t not_ident = ~ident & highs;
+        cursor += pm_ctzll(not_ident) / 8;
+        return (size_t) (cursor - start);
+    }
+
+    return (size_t) (cursor - start);
+}
+
+#else
+
+// No-op fallback for big-endian or other unsupported platforms.
+// The caller's byte-at-a-time loop handles everything.
+#define scan_identifier_ascii(start, end) ((size_t) 0)
+
+#endif
+
 /**
  * Like the above, this function is also used extremely frequently to lex all of
  * the identifiers in a source file once the first character has been found. So
@@ -8155,6 +8376,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
             current_end += width;
         }
     } else {
+        // Fast path: scan ASCII identifier bytes using wide operations.
+        current_end += scan_identifier_ascii(current_end, end);
+
+        // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
         while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
             current_end += width;
         }

From bfa769271560436cfd032dba4796416b734c3f4f Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sun, 8 Mar 2026 23:04:27 -0400
Subject: [PATCH 07/28] Avoid unnecessary zero-ing of memory

---
 src/prism.c                    | 5 +++--
 src/util/pm_line_offset_list.c | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index dace322ee9..45333b8185 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -7755,7 +7755,8 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
         pm_string_free(&key);
 
         // Allocate a new magic comment node to append to the parser's list.
-        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_zalloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
+        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
+        magic_comment->node.next = NULL;
         magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
         magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
         pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
@@ -9421,7 +9422,7 @@ parser_lex_callback(pm_parser_t *parser) {
  */
 static inline pm_comment_t *
 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
-    pm_comment_t *comment = (pm_comment_t *) pm_arena_zalloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
+    pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
 
     *comment = (pm_comment_t) {
         .type = type,
diff --git a/src/util/pm_line_offset_list.c b/src/util/pm_line_offset_list.c
index c0b41df406..41d3b2c81d 100644
--- a/src/util/pm_line_offset_list.c
+++ b/src/util/pm_line_offset_list.c
@@ -5,10 +5,10 @@
  */
 void
 pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity) {
-    list->offsets = (uint32_t *) pm_arena_zalloc(arena, capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
+    list->offsets = (uint32_t *) pm_arena_alloc(arena, capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
 
-    // This is 1 instead of 0 because we want to include the first line of the
-    // file as having offset 0, which is set because of the zero-initialization.
+    // The first line always has offset 0.
+    list->offsets[0] = 0;
     list->size = 1;
     list->capacity = capacity;
 }

From f94fe6ba02d0fa1dbdf21618fd025dbdd4fdb362 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sun, 8 Mar 2026 23:43:18 -0400
Subject: [PATCH 08/28] Pre-size arena to avoid unnecessary growth

---
 include/prism/util/pm_arena.h | 10 +++++++
 src/prism.c                   |  8 ++++++
 src/util/pm_arena.c           | 51 ++++++++++++++++++++++++++---------
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/include/prism/util/pm_arena.h b/include/prism/util/pm_arena.h
index f376d13459..ac34c9b967 100644
--- a/include/prism/util/pm_arena.h
+++ b/include/prism/util/pm_arena.h
@@ -44,6 +44,16 @@ typedef struct {
     size_t block_count;
 } pm_arena_t;
 
+/**
+ * Ensure the arena has at least `capacity` bytes available in its current
+ * block, allocating a new block if necessary. This allows callers to
+ * pre-size the arena to avoid repeated small block allocations.
+ *
+ * @param arena The arena to pre-size.
+ * @param capacity The minimum number of bytes to ensure are available.
+ */
+void pm_arena_reserve(pm_arena_t *arena, size_t capacity);
+
 /**
  * Allocate memory from the arena. The returned memory is NOT zeroed. This
  * function is infallible — it aborts on allocation failure.
diff --git a/src/prism.c b/src/prism.c
index 45333b8185..25e11bab36 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -22173,6 +22173,14 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
         .warn_mismatched_indentation = true
     };
 
+    // Pre-size the arenas based on input size to reduce the number of block
+    // allocations (and the kernel page zeroing they trigger). The ratios were
+    // measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
+    // The reserve call is a no-op when the capacity is at or below the default
+    // arena block size, so small inputs don't waste an extra allocation.
+    if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
+    if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
+
     // Initialize the constant pool. We're going to completely guess as to the
     // number of constants that we'll need based on the size of the input. The
     // ratio we chose here is actually less arbitrary than you might think.
diff --git a/src/util/pm_arena.c b/src/util/pm_arena.c
index a9b69b3c8d..5f1050ed03 100644
--- a/src/util/pm_arena.c
+++ b/src/util/pm_arena.c
@@ -1,5 +1,7 @@
 #include "prism/util/pm_arena.h"
 
+#include <assert.h>
+
 /**
  * Compute the block allocation size using offsetof so it is correct regardless
  * of PM_FLEX_ARY_LEN.
@@ -29,6 +31,42 @@ pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) {
     return size > min_size ? size : min_size;
 }
 
+/**
+ * Allocate a new block with the given data capacity and initial usage, link it
+ * into the arena, and return it. Aborts on allocation failure.
+ */
+static pm_arena_block_t *
+pm_arena_new_block(pm_arena_t *arena, size_t data_size, size_t initial_used) {
+    assert(initial_used <= data_size);
+    pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(data_size));
+
+    if (block == NULL) {
+        fprintf(stderr, "prism: out of memory; aborting\n");
+        abort();
+    }
+
+    block->capacity = data_size;
+    block->used = initial_used;
+    block->prev = arena->current;
+    arena->current = block;
+    arena->block_count++;
+
+    return block;
+}
+
+/**
+ * Ensure the arena has at least `capacity` bytes available in its current
+ * block, allocating a new block if necessary. This allows callers to
+ * pre-size the arena to avoid repeated small block allocations.
+ */
+void
+pm_arena_reserve(pm_arena_t *arena, size_t capacity) {
+    if (capacity <= PM_ARENA_INITIAL_SIZE) return;
+    if (arena->current != NULL && (arena->current->capacity - arena->current->used) >= capacity) return;
+
+    pm_arena_new_block(arena, capacity, 0);
+}
+
 /**
  * Allocate memory from the arena. The returned memory is NOT zeroed. This
  * function is infallible — it aborts on allocation failure.
@@ -51,18 +89,7 @@ pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) {
     // New blocks from xmalloc are max-aligned, so data[] starts aligned for
     // any C type. No padding needed at the start.
     size_t block_data_size = pm_arena_next_block_size(arena, size);
-    pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(block_data_size));
-
-    if (block == NULL) {
-        fprintf(stderr, "prism: out of memory; aborting\n");
-        abort();
-    }
-
-    block->capacity = block_data_size;
-    block->used = size;
-    block->prev = arena->current;
-    arena->current = block;
-    arena->block_count++;
+    pm_arena_block_t *block = pm_arena_new_block(arena, block_data_size, size);
 
     return block->data;
 }

From dfdc9304566d6664df3c69e1881539f5586221fa Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 10:20:07 -0400
Subject: [PATCH 09/28] Force the allocation to be inlined

---
 include/prism/defines.h               | 12 +++++++
 include/prism/util/pm_arena.h         | 42 ++++++++++++++++++++--
 include/prism/util/pm_char.h          |  1 +
 include/prism/util/pm_constant_pool.h |  5 ++-
 src/util/pm_arena.c                   | 52 ++++-----------------------
 5 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/include/prism/defines.h b/include/prism/defines.h
index 6afc239035..017f0b86e0 100644
--- a/include/prism/defines.h
+++ b/include/prism/defines.h
@@ -91,6 +91,18 @@
 #   define inline __inline
 #endif
 
+/**
+ * Force a function to be inlined at every call site. Use sparingly — only for
+ * small, hot functions where the compiler's heuristics fail to inline.
+ */
+#if defined(_MSC_VER)
+#   define PRISM_FORCE_INLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__)
+#   define PRISM_FORCE_INLINE inline __attribute__((always_inline))
+#else
+#   define PRISM_FORCE_INLINE inline
+#endif
+
 /**
  * Old Visual Studio versions before 2015 do not implement sprintf, but instead
  * implement _snprintf. We standard that here.
diff --git a/include/prism/util/pm_arena.h b/include/prism/util/pm_arena.h
index ac34c9b967..175b39c6df 100644
--- a/include/prism/util/pm_arena.h
+++ b/include/prism/util/pm_arena.h
@@ -54,16 +54,42 @@ typedef struct {
  */
 void pm_arena_reserve(pm_arena_t *arena, size_t capacity);
 
+/**
+ * Slow path for pm_arena_alloc: allocate a new block and return a pointer to
+ * the first `size` bytes. Do not call directly — use pm_arena_alloc instead.
+ *
+ * @param arena The arena to allocate from.
+ * @param size The number of bytes to allocate.
+ * @returns A pointer to the allocated memory.
+ */
+void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size);
+
 /**
  * Allocate memory from the arena. The returned memory is NOT zeroed. This
  * function is infallible — it aborts on allocation failure.
  *
+ * The fast path (bump pointer within the current block) is inlined at each
+ * call site. The slow path (new block allocation) is out-of-line.
+ *
  * @param arena The arena to allocate from.
  * @param size The number of bytes to allocate.
  * @param alignment The required alignment (must be a power of 2).
  * @returns A pointer to the allocated memory.
  */
-void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment);
+static PRISM_FORCE_INLINE void *
+pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) {
+    if (arena->current != NULL) {
+        size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1);
+        size_t needed = used_aligned + size;
+
+        if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) {
+            arena->current->used = needed;
+            return arena->current->data + used_aligned;
+        }
+    }
+
+    return pm_arena_alloc_slow(arena, size);
+}
 
 /**
  * Allocate zero-initialized memory from the arena. This function is infallible
@@ -74,7 +100,12 @@ void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment);
  * @param alignment The required alignment (must be a power of 2).
  * @returns A pointer to the allocated, zero-initialized memory.
  */
-void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment);
+static inline void *
+pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) {
+    void *ptr = pm_arena_alloc(arena, size, alignment);
+    memset(ptr, 0, size);
+    return ptr;
+}
 
 /**
  * Allocate memory from the arena and copy the given data into it. This is a
@@ -86,7 +117,12 @@ void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment);
  * @param alignment The required alignment (must be a power of 2).
  * @returns A pointer to the allocated copy.
  */
-void * pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment);
+static inline void *
+pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) {
+    void *dst = pm_arena_alloc(arena, size, alignment);
+    memcpy(dst, src, size);
+    return dst;
+}
 
 /**
  * Free all blocks in the arena. After this call, all pointers returned by
diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h
index f9a556cabe..06728ba938 100644
--- a/include/prism/util/pm_char.h
+++ b/include/prism/util/pm_char.h
@@ -30,6 +30,7 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
  *
  * @param string The string to search.
  * @param length The maximum number of characters to search.
+ * @param arena The arena to allocate from when appending to line_offsets.
  * @param line_offsets The list of newlines to populate.
  * @param start_offset The offset at which the string occurs in the source, for
  *   the purpose of tracking newlines.
diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/util/pm_constant_pool.h
index 285a636a3a..fa74ee7b39 100644
--- a/include/prism/util/pm_constant_pool.h
+++ b/include/prism/util/pm_constant_pool.h
@@ -142,9 +142,9 @@ typedef struct {
 /**
  * Initialize a new constant pool with a given capacity.
  *
+ * @param arena The arena to allocate from.
  * @param pool The pool to initialize.
  * @param capacity The initial capacity of the pool.
- * @return Whether the initialization succeeded.
  */
 void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity);
 
@@ -172,6 +172,7 @@ pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uin
  * Insert a constant into a constant pool that is a slice of a source string.
  * Returns the id of the constant, or 0 if any potential calls to resize fail.
  *
+ * @param arena The arena to allocate from.
  * @param pool The pool to insert the constant into.
  * @param start A pointer to the start of the constant.
  * @param length The length of the constant.
@@ -184,6 +185,7 @@ pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_p
  * constant pool. Returns the id of the constant, or 0 if any potential calls to
  * resize fail.
  *
+ * @param arena The arena to allocate from.
  * @param pool The pool to insert the constant into.
  * @param start A pointer to the start of the constant.
  * @param length The length of the constant.
@@ -195,6 +197,7 @@ pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_po
  * Insert a constant into a constant pool from memory that is constant. Returns
  * the id of the constant, or 0 if any potential calls to resize fail.
  *
+ * @param arena The arena to allocate from.
  * @param pool The pool to insert the constant into.
  * @param start A pointer to the start of the constant.
  * @param length The length of the constant.
diff --git a/src/util/pm_arena.c b/src/util/pm_arena.c
index 5f1050ed03..6b07e25210 100644
--- a/src/util/pm_arena.c
+++ b/src/util/pm_arena.c
@@ -24,7 +24,7 @@ static size_t
 pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) {
     size_t size = PM_ARENA_INITIAL_SIZE;
 
-    for (size_t i = PM_ARENA_GROWTH_INTERVAL; i <= arena->block_count; i += PM_ARENA_GROWTH_INTERVAL) {
+    for (size_t exp = PM_ARENA_GROWTH_INTERVAL; exp <= arena->block_count; exp += PM_ARENA_GROWTH_INTERVAL) {
         if (size < PM_ARENA_MAX_SIZE) size *= 2;
     }
 
@@ -36,7 +36,7 @@ pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) {
  * into the arena, and return it. Aborts on allocation failure.
  */
 static pm_arena_block_t *
-pm_arena_new_block(pm_arena_t *arena, size_t data_size, size_t initial_used) {
+pm_arena_block_new(pm_arena_t *arena, size_t data_size, size_t initial_used) {
     assert(initial_used <= data_size);
     pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(data_size));
 
@@ -63,58 +63,20 @@ void
 pm_arena_reserve(pm_arena_t *arena, size_t capacity) {
     if (capacity <= PM_ARENA_INITIAL_SIZE) return;
     if (arena->current != NULL && (arena->current->capacity - arena->current->used) >= capacity) return;
-
-    pm_arena_new_block(arena, capacity, 0);
+    pm_arena_block_new(arena, capacity, 0);
 }
 
 /**
- * Allocate memory from the arena. The returned memory is NOT zeroed. This
- * function is infallible — it aborts on allocation failure.
+ * Slow path for pm_arena_alloc: allocate a new block and return a pointer to
+ * the first `size` bytes. Called when the current block has insufficient space.
  */
 void *
-pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) {
-    // Try current block.
-    if (arena->current != NULL) {
-        size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1);
-        size_t needed = used_aligned + size;
-
-        // Guard against overflow in the alignment or size arithmetic.
-        if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) {
-            arena->current->used = needed;
-            return arena->current->data + used_aligned;
-        }
-    }
-
-    // Allocate new block via xmalloc — memory is NOT zeroed.
-    // New blocks from xmalloc are max-aligned, so data[] starts aligned for
-    // any C type. No padding needed at the start.
+pm_arena_alloc_slow(pm_arena_t *arena, size_t size) {
     size_t block_data_size = pm_arena_next_block_size(arena, size);
-    pm_arena_block_t *block = pm_arena_new_block(arena, block_data_size, size);
-
+    pm_arena_block_t *block = pm_arena_block_new(arena, block_data_size, size);
     return block->data;
 }
 
-/**
- * Allocate zero-initialized memory from the arena. This function is infallible
- * — it aborts on allocation failure.
- */
-void *
-pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) {
-    void *ptr = pm_arena_alloc(arena, size, alignment);
-    memset(ptr, 0, size);
-    return ptr;
-}
-
-/**
- * Allocate memory from the arena and copy the given data into it.
- */
-void *
-pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) {
-    void *dst = pm_arena_alloc(arena, size, alignment);
-    memcpy(dst, src, size);
-    return dst;
-}
-
 /**
  * Free all blocks in the arena.
  */

From 83f54c2dc2fb6b139c32533ffa25181d5a23f380 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 10:39:37 -0400
Subject: [PATCH 10/28] Inline pm_node_list_append, pm_char_is_whitespace, and
 pm_char_is_inline_whitespace

---
 include/prism/node.h         | 19 +++++++++++++-
 include/prism/util/pm_char.h | 49 +++++++++++++++++++++++++-----------
 src/util/pm_char.c           | 19 +-------------
 3 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/include/prism/node.h b/include/prism/node.h
index 253f890055..f02f8ba892 100644
--- a/include/prism/node.h
+++ b/include/prism/node.h
@@ -17,6 +17,16 @@
 #define PM_NODE_LIST_FOREACH(list, index, node) \
     for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
 
+/**
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly — use pm_node_list_append instead.
+ *
+ * @param arena The arena to allocate from.
+ * @param list The list to append to.
+ * @param node The node to append.
+ */
+void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
+
 /**
  * Append a new node onto the end of the node list.
  *
@@ -24,7 +34,14 @@
  * @param list The list to append to.
  * @param node The node to append.
  */
-void pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
+static PRISM_FORCE_INLINE void
+pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+    if (list->size < list->capacity) {
+        list->nodes[list->size++] = node;
+    } else {
+        pm_node_list_append_slow(arena, list, node);
+    }
+}
 
 /**
  * Prepend a new node onto the beginning of the node list.
diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h
index 06728ba938..f93ba6fe32 100644
--- a/include/prism/util/pm_char.h
+++ b/include/prism/util/pm_char.h
@@ -12,6 +12,40 @@
 #include <stdbool.h>
 #include <stddef.h>
 
+/** Bit flag for whitespace characters in pm_byte_table. */
+#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
+
+/** Bit flag for inline whitespace characters in pm_byte_table. */
+#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+
+/**
+ * A lookup table for classifying bytes. Each entry is a bitfield of
+ * PRISM_CHAR_BIT_* flags. Defined in pm_char.c.
+ */
+extern const uint8_t pm_byte_table[256];
+
+/**
+ * Returns true if the given character is a whitespace character.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a whitespace character.
+ */
+static PRISM_FORCE_INLINE bool
+pm_char_is_whitespace(const uint8_t b) {
+    return (pm_byte_table[b] & PRISM_CHAR_BIT_WHITESPACE) != 0;
+}
+
+/**
+ * Returns true if the given character is an inline whitespace character.
+ *
+ * @param b The character to check.
+ * @return True if the given character is an inline whitespace character.
+ */
+static PRISM_FORCE_INLINE bool
+pm_char_is_inline_whitespace(const uint8_t b) {
+    return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0;
+}
+
 /**
  * Returns the number of characters at the start of the string that are
  * whitespace. Disallows searching past the given maximum number of characters.
@@ -156,21 +190,6 @@ size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
  */
 size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
 
-/**
- * Returns true if the given character is a whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is a whitespace character.
- */
-bool pm_char_is_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is an inline whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is an inline whitespace character.
- */
-bool pm_char_is_inline_whitespace(const uint8_t b);
 
 /**
  * Returns true if the given character is a binary digit.
diff --git a/src/util/pm_char.c b/src/util/pm_char.c
index ff8a88a687..3308d410b7 100644
--- a/src/util/pm_char.c
+++ b/src/util/pm_char.c
@@ -1,7 +1,5 @@
 #include "prism/util/pm_char.h"
 
-#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
-#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
 #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
 
 #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
@@ -13,7 +11,7 @@
 #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
 #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
 
-static const uint8_t pm_byte_table[256] = {
+const uint8_t pm_byte_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
     0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -126,21 +124,6 @@ pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
     return (pm_byte_table[b] & kind) != 0;
 }
 
-/**
- * Returns true if the given character is a whitespace character.
- */
-bool
-pm_char_is_whitespace(const uint8_t b) {
-    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
-}
-
-/**
- * Returns true if the given character is an inline whitespace character.
- */
-bool
-pm_char_is_inline_whitespace(const uint8_t b) {
-    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
 
 /**
  * Scan through the string and return the number of characters at the start of

From a14431c2f1419e688403b754a6c34e04b4dea01e Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 13:12:32 -0400
Subject: [PATCH 11/28] Avoid redundant whitespace scanning in magic comment
 lexing

---
 src/prism.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/prism.c b/src/prism.c
index 25e11bab36..6f47f734cf 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -7587,11 +7587,17 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
         if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
             return false;
         }
+
+        // Advance start past leading whitespace so the main loop begins
+        // directly at the key, avoiding a redundant whitespace scan.
+        start += pm_strspn_whitespace(start, end - start);
     }
 
     cursor = start;
     while (cursor < end) {
-        while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+        if (indicator) {
+            cursor += pm_strspn_whitespace(cursor, end - cursor);
+        }
 
         const uint8_t *key_start = cursor;
         while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;

From b5b88bae803f793151c9c3515cf15e6cb8ecd8b9 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 14:50:22 -0400
Subject: [PATCH 12/28] Potentially skip whitespace scanning for speed

---
 src/prism.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 6f47f734cf..2d9c414e0f 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -9894,17 +9894,12 @@ parser_lex(pm_parser_t *parser) {
             // stores back to parser->current.end.
             bool chomping = true;
             while (parser->current.end < parser->end && chomping) {
-                {
-                    static const uint8_t inline_whitespace[256] = {
-                        [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
-                    };
-                    const uint8_t *scan = parser->current.end;
-                    while (scan < parser->end && inline_whitespace[*scan]) scan++;
-                    if (scan > parser->current.end) {
-                        parser->current.end = scan;
-                        space_seen = true;
-                        continue;
-                    }
+                if (pm_char_is_inline_whitespace(*parser->current.end)) {
+                    const uint8_t *scan = parser->current.end + 1;
+                    while (scan < parser->end && pm_char_is_inline_whitespace(*scan)) scan++;
+                    parser->current.end = scan;
+                    space_seen = true;
+                    continue;
                 }
 
                 switch (*parser->current.end) {

From fbcd3fc69e47fa8064c3ce5612625d10d10b896d Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 16:57:44 -0400
Subject: [PATCH 13/28] Inline three more functions, and lower the hash
 threshold for locals

---
 include/prism/util/pm_char.h             | 29 +++++++++++++++---------
 include/prism/util/pm_line_offset_list.h | 18 ++++++++++++++-
 src/prism.c                              |  2 +-
 src/util/pm_char.c                       |  9 --------
 src/util/pm_line_offset_list.c           | 16 ++++++-------
 5 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h
index f93ba6fe32..516390b21c 100644
--- a/include/prism/util/pm_char.h
+++ b/include/prism/util/pm_char.h
@@ -46,6 +46,24 @@ pm_char_is_inline_whitespace(const uint8_t b) {
     return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0;
 }
 
+/**
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace (space/tab). Scans the byte table directly for use in hot paths.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are inline
+ *     whitespace.
+ */
+static PRISM_FORCE_INLINE size_t
+pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+    if (length <= 0) return 0;
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+    while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_INLINE_WHITESPACE)) size++;
+    return size;
+}
+
 /**
  * Returns the number of characters at the start of the string that are
  * whitespace. Disallows searching past the given maximum number of characters.
@@ -73,17 +91,6 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
  */
 size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset);
 
-/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are inline
- *     whitespace.
- */
-size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
-
 /**
  * Returns the number of characters at the start of the string that are decimal
  * digits. Disallows searching past the given maximum number of characters.
diff --git a/include/prism/util/pm_line_offset_list.h b/include/prism/util/pm_line_offset_list.h
index 2b14b060a1..62a52da4ec 100644
--- a/include/prism/util/pm_line_offset_list.h
+++ b/include/prism/util/pm_line_offset_list.h
@@ -64,6 +64,15 @@ void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, si
  */
 void pm_line_offset_list_clear(pm_line_offset_list_t *list);
 
+/**
+ * Append a new offset to the list (slow path with resize).
+ *
+ * @param arena The arena to allocate from.
+ * @param list The list to append to.
+ * @param cursor The offset to append.
+ */
+void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor);
+
 /**
  * Append a new offset to the list.
  *
@@ -71,7 +80,14 @@ void pm_line_offset_list_clear(pm_line_offset_list_t *list);
  * @param list The list to append to.
  * @param cursor The offset to append.
  */
-void pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor);
+static PRISM_FORCE_INLINE void
+pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
+    if (list->size < list->capacity) {
+        list->offsets[list->size++] = cursor;
+    } else {
+        pm_line_offset_list_append_slow(arena, list, cursor);
+    }
+}
 
 /**
  * Returns the line of the given offset. If the offset is not in the list, the
diff --git a/src/prism.c b/src/prism.c
index 2d9c414e0f..c17397c7b7 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -773,7 +773,7 @@ pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constan
 /**
  * The point at which the set of locals switches from being a list to a hash.
  */
-#define PM_LOCALS_HASH_THRESHOLD 9
+#define PM_LOCALS_HASH_THRESHOLD 5
 
 static void
 pm_locals_free(pm_locals_t *locals) {
diff --git a/src/util/pm_char.c b/src/util/pm_char.c
index 3308d410b7..fc41b90601 100644
--- a/src/util/pm_char.c
+++ b/src/util/pm_char.c
@@ -98,15 +98,6 @@ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_
     return size;
 }
 
-/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
-    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
-
 /**
  * Returns the number of characters at the start of the string that are regexp
  * options. Disallows searching past the given maximum number of characters.
diff --git a/src/util/pm_line_offset_list.c b/src/util/pm_line_offset_list.c
index 41d3b2c81d..0648901e29 100644
--- a/src/util/pm_line_offset_list.c
+++ b/src/util/pm_line_offset_list.c
@@ -22,19 +22,17 @@ pm_line_offset_list_clear(pm_line_offset_list_t *list) {
 }
 
 /**
- * Append a new offset to the newline list.
+ * Append a new offset to the newline list (slow path: resize and store).
  */
 void
-pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
-    if (list->size == list->capacity) {
-        size_t new_capacity = (list->capacity * 3) / 2;
-        uint32_t *new_offsets = (uint32_t *) pm_arena_alloc(arena, new_capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
+pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
+    size_t new_capacity = (list->capacity * 3) / 2;
+    uint32_t *new_offsets = (uint32_t *) pm_arena_alloc(arena, new_capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
 
-        memcpy(new_offsets, list->offsets, list->size * sizeof(uint32_t));
+    memcpy(new_offsets, list->offsets, list->size * sizeof(uint32_t));
 
-        list->offsets = new_offsets;
-        list->capacity = new_capacity;
-    }
+    list->offsets = new_offsets;
+    list->capacity = new_capacity;
 
     assert(list->size == 0 || cursor > list->offsets[list->size - 1]);
     list->offsets[list->size++] = cursor;

From 20e626ada5038e1ec0de00b1a8629318284337c1 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 22:35:55 -0400
Subject: [PATCH 14/28] Lex simple integer values as we are lexing

---
 include/prism/parser.h | 24 ++++++++---
 src/prism.c            | 92 ++++++++++++++++++++++++++++++++----------
 2 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/include/prism/parser.h b/include/prism/parser.h
index caa08538c6..60306a9974 100644
--- a/include/prism/parser.h
+++ b/include/prism/parser.h
@@ -793,12 +793,26 @@ struct pm_parser {
     pm_line_offset_list_t line_offsets;
 
     /**
-     * We want to add a flag to integer nodes that indicates their base. We only
-     * want to parse these once, but we don't have space on the token itself to
-     * communicate this information. So we store it here and pass it through
-     * when we find tokens that we need it for.
+     * State communicated from the lexer to the parser for integer tokens.
      */
-    pm_node_flags_t integer_base;
+    struct {
+        /**
+         * A flag indicating the base of the integer (binary, octal, decimal,
+         * hexadecimal). Set during lexing and read during node creation.
+         */
+        pm_node_flags_t base;
+
+        /**
+         * When lexing a decimal integer that fits in a uint32_t, we compute
+         * the value during lexing to avoid re-scanning the digits during
+         * parsing. If lexed is true, this holds the result and
+         * pm_integer_parse can be skipped.
+         */
+        uint32_t value;
+
+        /** Whether value holds a valid pre-computed integer. */
+        bool lexed;
+    } integer;
 
     /**
      * This string is used to pass information from the lexer to the parser. It
diff --git a/src/prism.c b/src/prism.c
index c17397c7b7..6a73adb2c4 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -4710,17 +4710,24 @@ pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token
         ((pm_integer_t) { 0 })
     );
 
-    pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
-    switch (base) {
-        case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
-        case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
-        case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
-        case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
-        default: assert(false && "unreachable"); break;
+    if (parser->integer.lexed) {
+        // The value was already computed during lexing.
+        node->value.value = parser->integer.value;
+        parser->integer.lexed = false;
+    } else {
+        pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+        switch (base) {
+            case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+            case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+            case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+            case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+            default: assert(false && "unreachable"); break;
+        }
+
+        pm_integer_parse(&node->value, integer_base, token->start, token->end);
+        pm_integer_arena_move(parser->arena, &node->value);
     }
 
-    pm_integer_parse(&node->value, integer_base, token->start, token->end);
-    pm_integer_arena_move(parser->arena, &node->value);
     return node;
 }
 
@@ -8112,7 +8119,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
                     pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
                 }
 
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
                 break;
 
             // 0o1111 is an octal number
@@ -8126,7 +8133,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
                     pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
                 }
 
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
                 break;
 
             // 01111 is an octal number
@@ -8140,7 +8147,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
             case '6':
             case '7':
                 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
                 break;
 
             // 0x1111 is a hexadecimal number
@@ -8154,7 +8161,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
                     pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
                 }
 
-                parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
+                parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
                 break;
 
             // 0.xxx is a float
@@ -8172,11 +8179,53 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
         }
     } else {
         // If it didn't start with a 0, then we'll lex as far as we can into a
-        // decimal number.
-        parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+        // decimal number. We compute the integer value inline to avoid
+        // re-scanning the digits later in pm_integer_parse.
+        {
+            const uint8_t *cursor = parser->current.end;
+            const uint8_t *end = parser->end;
+            uint64_t value = (uint64_t) (cursor[-1] - '0');
+
+            bool has_underscore = false;
+            bool prev_underscore = false;
+            const uint8_t *invalid = NULL;
+
+            while (cursor < end) {
+                uint8_t c = *cursor;
+                if (c >= '0' && c <= '9') {
+                    if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
+                    prev_underscore = false;
+                    cursor++;
+                } else if (c == '_') {
+                    has_underscore = true;
+                    if (prev_underscore && invalid == NULL) invalid = cursor;
+                    prev_underscore = true;
+                    cursor++;
+                } else {
+                    break;
+                }
+            }
+
+            if (has_underscore) {
+                if (prev_underscore && invalid == NULL) invalid = cursor - 1;
+                pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
+            }
+
+            if (value <= UINT32_MAX) {
+                parser->integer.value = (uint32_t) value;
+                parser->integer.lexed = true;
+            }
+
+            parser->current.end = cursor;
+        }
 
         // Afterward, we'll lex as far as we can into an optional float suffix.
         type = lex_optional_float_suffix(parser, seen_e);
+
+        // If it turned out to be a float, the cached integer value is invalid.
+        if (type != PM_TOKEN_INTEGER) {
+            parser->integer.lexed = false;
+        }
     }
 
     // At this point we have a completed number, but we want to provide the user
@@ -8195,7 +8244,8 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
 static pm_token_type_t
 lex_numeric(pm_parser_t *parser) {
     pm_token_type_t type = PM_TOKEN_INTEGER;
-    parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+    parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+    parser->integer.lexed = false;
 
     if (parser->current.end < parser->end) {
         bool seen_e = false;
@@ -18302,22 +18352,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
             return node;
         }
         case PM_TOKEN_INTEGER: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
             return UP(pm_integer_node_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_INTEGER_IMAGINARY: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
             return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_INTEGER_RATIONAL: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
             return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
         }
         case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
-            pm_node_flags_t base = parser->integer_base;
+            pm_node_flags_t base = parser->integer.base;
             parser_lex(parser);
             return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
         }
@@ -22154,7 +22204,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
         .filepath = { 0 },
         .constant_pool = { 0 },
         .line_offsets = { 0 },
-        .integer_base = 0,
+        .integer = { 0 },
         .current_string = PM_STRING_EMPTY,
         .start_line = 1,
         .explicit_encoding = NULL,

From 2a1dc7930e19d6e2621c7a986ce33b7cd485e7b8 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 9 Mar 2026 23:01:04 -0400
Subject: [PATCH 15/28] Only dispatch to lex_optional_float_suffix when it is
 possible

---
 src/prism.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 6a73adb2c4..561149764c 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -8220,11 +8220,20 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
         }
 
         // Afterward, we'll lex as far as we can into an optional float suffix.
-        type = lex_optional_float_suffix(parser, seen_e);
+        // Guard the function call: the vast majority of decimal numbers are
+        // plain integers, so avoid the call when the next byte cannot start a
+        // float suffix.
+        {
+            uint8_t next = peek(parser);
+            if (next == '.' || next == 'e' || next == 'E') {
+                type = lex_optional_float_suffix(parser, seen_e);
 
-        // If it turned out to be a float, the cached integer value is invalid.
-        if (type != PM_TOKEN_INTEGER) {
-            parser->integer.lexed = false;
+                // If it turned out to be a float, the cached integer value is
+                // invalid.
+                if (type != PM_TOKEN_INTEGER) {
+                    parser->integer.lexed = false;
+                }
+            }
         }
     }
 

From a52c2bd2c0a8b410851deac6bb212c7364ead9d8 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 10 Mar 2026 04:19:53 +0000
Subject: [PATCH 16/28] Optimize constant pool hash for short strings

---
 src/util/pm_constant_pool.c | 63 ++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c
index c8c27a9618..4822130073 100644
--- a/src/util/pm_constant_pool.c
+++ b/src/util/pm_constant_pool.c
@@ -84,37 +84,48 @@ pm_constant_pool_hash(const uint8_t *start, size_t length) {
     static const uint64_t secret = 0x517cc1b727220a95ULL;
     uint64_t hash = (uint64_t) length;
 
-    const uint8_t *ptr = start;
-    size_t remaining = length;
-
-    while (remaining >= 8) {
+    if (length <= 8) {
+        // Short strings: read first and last 4 bytes (overlapping for len < 8).
+        // This covers the majority of Ruby identifiers with a single multiply.
+        if (length >= 4) {
+            uint32_t a, b;
+            memcpy(&a, start, 4);
+            memcpy(&b, start + length - 4, 4);
+            hash ^= (uint64_t) a | ((uint64_t) b << 32);
+        } else if (length > 0) {
+            hash ^= (uint64_t) start[0] | ((uint64_t) start[length >> 1] << 8) | ((uint64_t) start[length - 1] << 16);
+        }
+        hash *= secret;
+    } else if (length <= 16) {
+        // Medium strings: read first and last 8 bytes (overlapping).
+        // Two multiplies instead of the three the loop-based approach needs.
         uint64_t word;
-        memcpy(&word, ptr, 8);
+        memcpy(&word, start, 8);
         hash ^= word;
         hash *= secret;
-        ptr += 8;
-        remaining -= 8;
-    }
-
-    if (remaining >= 4) {
-        uint32_t word;
-        memcpy(&word, ptr, 4);
-        hash ^= (uint64_t) word;
-        hash *= secret;
-        ptr += 4;
-        remaining -= 4;
-    }
-
-    if (remaining >= 2) {
-        hash ^= (uint64_t) ptr[0] | ((uint64_t) ptr[1] << 8);
+        memcpy(&word, start + length - 8, 8);
+        hash ^= word;
         hash *= secret;
-        ptr += 2;
-        remaining -= 2;
-    }
+    } else {
+        const uint8_t *ptr = start;
+        size_t remaining = length;
+
+        while (remaining >= 8) {
+            uint64_t word;
+            memcpy(&word, ptr, 8);
+            hash ^= word;
+            hash *= secret;
+            ptr += 8;
+            remaining -= 8;
+        }
 
-    if (remaining >= 1) {
-        hash ^= (uint64_t) ptr[0];
-        hash *= secret;
+        if (remaining > 0) {
+            // Read the last 8 bytes (overlapping with already-processed data).
+            uint64_t word;
+            memcpy(&word, start + length - 8, 8);
+            hash ^= word;
+            hash *= secret;
+        }
     }
 
     hash ^= hash >> 32;

From dcb2e8c924026133e93d8e0910736c8272d4dd29 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 10 Mar 2026 09:26:11 -0400
Subject: [PATCH 17/28] Include string in constant pool entry to avoid chasing
 pointer

---
 include/prism/util/pm_constant_pool.h |  9 +++++++
 src/prism.c                           | 37 ++++++++++-----------------
 src/util/pm_constant_pool.c           | 14 +++++-----
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/util/pm_constant_pool.h
index fa74ee7b39..c527343273 100644
--- a/include/prism/util/pm_constant_pool.h
+++ b/include/prism/util/pm_constant_pool.h
@@ -113,6 +113,15 @@ typedef struct {
 
     /** The hash of the bucket. */
     uint32_t hash;
+
+    /**
+     * A pointer to the start of the string, stored directly in the bucket to
+     * avoid a pointer chase to the constants array during probing.
+     */
+    const uint8_t *start;
+
+    /** The length of the string. */
+    size_t length;
 } pm_constant_pool_bucket_t;
 
 /** A constant in the pool which effectively stores a string. */
diff --git a/src/prism.c b/src/prism.c
index 561149764c..738231a2ef 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -22233,34 +22233,25 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
         .warn_mismatched_indentation = true
     };
 
-    // Pre-size the arenas based on input size to reduce the number of block
-    // allocations (and the kernel page zeroing they trigger). The ratios were
-    // measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
-    // The reserve call is a no-op when the capacity is at or below the default
-    // arena block size, so small inputs don't waste an extra allocation.
+    /* Pre-size the arenas based on input size to reduce the number of block
+     * allocations (and the kernel page zeroing they trigger). The ratios were
+     * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
+     * The reserve call is a no-op when the capacity is at or below the default
+     * arena block size, so small inputs don't waste an extra allocation. */
     if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
     if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
 
-    // Initialize the constant pool. We're going to completely guess as to the
-    // number of constants that we'll need based on the size of the input. The
-    // ratio we chose here is actually less arbitrary than you might think.
-    //
-    // We took ~50K Ruby files and measured the size of the file versus the
-    // number of constants that were found in those files. Then we found the
-    // average and standard deviation of the ratios of constants/bytesize. Then
-    // we added 1.34 standard deviations to the average to get a ratio that
-    // would fit 75% of the files (for a two-tailed distribution). This works
-    // because there was about a 0.77 correlation and the distribution was
-    // roughly normal.
-    //
-    // This ratio will need to change if we add more constants to the constant
-    // pool for another node type.
-    uint32_t constant_size = ((uint32_t) size) / 95;
+    /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
+     * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
+     * We use 120 as a balance between over-allocation waste and resize
+     * frequency. Resizes are cheap with arena allocation, so we lean toward
+     * under-estimating. */
+    uint32_t constant_size = ((uint32_t) size) / 120;
     pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
 
-    // Initialize the newline list. Similar to the constant pool, we're going to
-    // guess at the number of newlines that we'll need based on the size of the
-    // input.
+    /* Initialize the line offset list. Similar to the constant pool, we are
+     * going to estimate the number of newlines that we will need based on the
+     * size of the input. */
     size_t newline_size = size / 22;
     pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
 
diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c
index 4822130073..74e2a12524 100644
--- a/src/util/pm_constant_pool.c
+++ b/src/util/pm_constant_pool.c
@@ -239,8 +239,7 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size
     pm_constant_pool_bucket_t *bucket;
 
     while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
-        pm_constant_t *constant = &pool->constants[bucket->id - 1];
-        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+        if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
             return bucket->id;
         }
 
@@ -270,9 +269,7 @@ pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8
         // If there is a collision, then we need to check if the content is the
         // same as the content we are trying to insert. If it is, then we can
         // return the id of the existing constant.
-        pm_constant_t *constant = &pool->constants[bucket->id - 1];
-
-        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+        if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
             // Since we have found a match, we need to check if this is
             // attempting to insert a shared or an owned constant. We want to
             // prefer shared constants since they don't require allocations.
@@ -280,8 +277,9 @@ pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8
                 // If we're attempting to insert a shared constant and the
                 // existing constant is owned, then we can replace it with the
                 // shared constant to prefer non-owned references.
-                constant->start = start;
+                bucket->start = start;
                 bucket->type = (unsigned int) (type & 0x3);
+                pool->constants[bucket->id - 1].start = start;
             }
 
             return bucket->id;
@@ -298,7 +296,9 @@ pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8
     *bucket = (pm_constant_pool_bucket_t) {
         .id = (unsigned int) (id & 0x3fffffff),
         .type = (unsigned int) (type & 0x3),
-        .hash = hash
+        .hash = hash,
+        .start = start,
+        .length = length
     };
 
     pool->constants[id - 1] = (pm_constant_t) {

From c464b298aa44de699a66041d10a344d807be7f84 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 10 Mar 2026 13:26:18 -0400
Subject: [PATCH 18/28] SIMD/SWAR for strpbrk

---
 include/prism/defines.h |  12 ++
 src/prism.c             |  59 ++--------
 src/util/pm_strpbrk.c   | 235 +++++++++++++++++++++++++++++++++++++---
 3 files changed, 237 insertions(+), 69 deletions(-)

diff --git a/include/prism/defines.h b/include/prism/defines.h
index 017f0b86e0..0c131dbaed 100644
--- a/include/prism/defines.h
+++ b/include/prism/defines.h
@@ -276,6 +276,18 @@
     #define PRISM_UNLIKELY(x) (x)
 #endif
 
+/**
+ * Platform detection for SIMD / fast-path implementations. At most one of
+ * these macros is defined, selecting the best available vectorization strategy.
+ */
+#if (defined(__aarch64__) && defined(__ARM_NEON)) || defined(_M_ARM64)
+    #define PRISM_HAS_NEON
+#elif (defined(__x86_64__) && defined(__SSSE3__)) || defined(_M_X64)
+    #define PRISM_HAS_SSSE3
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    #define PRISM_HAS_SWAR
+#endif
+
 /**
  * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning
  * to find the first non-matching byte in a word.
diff --git a/src/prism.c b/src/prism.c
index 738231a2ef..61a0417b4c 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -1783,16 +1783,14 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
  * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8)
  * with a byte-at-a-time loop.
  *
- * Up to four optimized implementations are selected at compile time, with a
+ * Up to three optimized implementations are selected at compile time, with a
  * no-op fallback for unsupported platforms:
  *   1. NEON — processes 16 bytes per iteration on aarch64.
- *   2. SSE2 — processes 16 bytes per iteration on x86-64.
- *   3. WASM SIMD — processes 16 bytes per iteration on WebAssembly.
- *   4. SWAR — little-endian fallback, processes 8 bytes per iteration.
- *   5. No-op — returns 0; the caller's byte-at-a-time loop handles everything.
+ *   2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ *   3. SWAR — little-endian fallback, processes 8 bytes per iteration.
  */
 
-#if defined(__aarch64__) && defined(__ARM_NEON)
+#if defined(PRISM_HAS_NEON)
 #include <arm_neon.h>
 
 static inline size_t
@@ -1844,8 +1842,8 @@ scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
     return (size_t) (cursor - start);
 }
 
-#elif defined(__x86_64__) && defined(__SSE2__)
-#include <emmintrin.h>
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
 
 static inline size_t
 scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
@@ -1886,54 +1884,11 @@ scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
     return (size_t) (cursor - start);
 }
 
-#elif defined(__wasm_simd128__)
-#include <wasm_simd128.h>
-
-static inline size_t
-scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
-    const uint8_t *cursor = start;
-
-    while (cursor + 16 <= end) {
-        v128_t v = wasm_v128_load(cursor);
-
-        // Range checks via subtract-and-unsigned-compare: (v - lo) < count
-        // is true iff v is in [lo, lo + count). One subtract + one compare
-        // per range instead of two comparisons + AND.
-
-        // Fold case: OR with 0x20 maps A-Z to a-z.
-        v128_t lowered = wasm_v128_or(v, wasm_u8x16_splat(0x20));
-        v128_t letter = wasm_u8x16_lt(
-            wasm_i8x16_sub(lowered, wasm_u8x16_splat(0x61)),
-            wasm_u8x16_splat(0x1A));
-
-        v128_t digit = wasm_u8x16_lt(
-            wasm_i8x16_sub(v, wasm_u8x16_splat(0x30)),
-            wasm_u8x16_splat(0x0A));
-
-        v128_t underscore = wasm_i8x16_eq(v, wasm_u8x16_splat(0x5F));
-
-        v128_t ident = wasm_v128_or(wasm_v128_or(letter, digit), underscore);
-
-        // Fast path: if all 16 bytes are identifier chars, advance.
-        if (wasm_i8x16_all_true(ident)) {
-            cursor += 16;
-            continue;
-        }
-
-        // Extract bitmask only on the exit path to find the first non-match.
-        uint32_t mask = wasm_i8x16_bitmask(ident);
-        cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
-        return (size_t) (cursor - start);
-    }
-
-    return (size_t) (cursor - start);
-}
-
 // The SWAR path uses pm_ctzll to find the first non-matching byte within a
 // word, which only yields the correct byte index on little-endian targets.
 // We gate on a positive little-endian check so that unknown-endianness
 // platforms safely fall through to the no-op fallback.
-#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#elif defined(PRISM_HAS_SWAR)
 
 /**
  * Portable SWAR fallback — processes 8 bytes per iteration.
diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c
index ddd6ef0ead..b1e4c9c6de 100644
--- a/src/util/pm_strpbrk.c
+++ b/src/util/pm_strpbrk.c
@@ -29,13 +29,214 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l
     parser->explicit_encoding = parser->encoding;
 }
 
+/**
+ * Scan forward through ASCII bytes looking for a byte that is in the given
+ * charset. Returns true if a match was found, storing its offset in *index.
+ * Returns false if no match was found, storing the number of ASCII bytes
+ * consumed in *index (so the caller can skip past them).
+ *
+ * All charset characters must be ASCII (< 0x80). The scanner stops at non-ASCII
+ * bytes, returning control to the caller's encoding-aware loop.
+ *
+ * Up to three optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ *   1. NEON — processes 16 bytes per iteration on aarch64.
+ *   2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ *   3. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ */
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static inline bool
+scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    // Build nibble-based lookup tables from the charset. All breakpoint
+    // characters are ASCII (< 0x80), so they fit within high nibbles 0-7.
+    //
+    // For each charset byte c, we set bit (1 << (c >> 4)) in low_lut[c & 0xF].
+    // high_lut[h] = (1 << h) for each high nibble h present in the charset.
+    // A source byte s matches iff (low_lut[s & 0xF] & high_lut[s >> 4]) != 0.
+    uint8_t low_arr[16] = { 0 };
+    uint8_t high_arr[16] = { 0 };
+    uint64_t table[4] = { 0 };
+
+    for (const uint8_t *c = charset; *c != '\0'; c++) {
+        low_arr[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
+        high_arr[*c >> 4] = (uint8_t) (1 << (*c >> 4));
+        table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+    }
+
+    uint8x16_t low_lut = vld1q_u8(low_arr);
+    uint8x16_t high_lut = vld1q_u8(high_arr);
+    uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+    uint8x16_t mask_80 = vdupq_n_u8(0x80);
+
+    size_t idx = 0;
+
+    while (idx + 16 <= maximum) {
+        uint8x16_t v = vld1q_u8(source + idx);
+
+        // If any byte has the high bit set, we have non-ASCII data.
+        // Return to let the caller's encoding-aware loop handle it.
+        if (vmaxvq_u8(vandq_u8(v, mask_80)) != 0) break;
+
+        uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+        uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+        uint8x16_t matched = vtstq_u8(lo_class, hi_class);
+
+        if (vmaxvq_u8(matched) == 0) {
+            idx += 16;
+            continue;
+        }
+
+        // Find the position of the first matching byte.
+        uint64_t lo64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 0);
+        if (lo64 != 0) {
+            *index = idx + pm_ctzll(lo64) / 8;
+            return true;
+        }
+        uint64_t hi64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 1);
+        *index = idx + 8 + pm_ctzll(hi64) / 8;
+        return true;
+    }
+
+    // Scalar tail for remaining < 16 ASCII bytes.
+    while (idx < maximum && source[idx] < 0x80) {
+        uint8_t byte = source[idx];
+        if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            *index = idx;
+            return true;
+        }
+        idx++;
+    }
+
+    *index = idx;
+    return false;
+}
+
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
+
+static inline bool
+scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    // Build nibble-based lookup tables and bitmap table in a single pass.
+    uint8_t low_arr[16] = { 0 };
+    uint8_t high_arr[16] = { 0 };
+    uint64_t table[4] = { 0 };
+
+    for (const uint8_t *c = charset; *c != '\0'; c++) {
+        low_arr[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
+        high_arr[*c >> 4] = (uint8_t) (1 << (*c >> 4));
+        table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+    }
+
+    __m128i low_lut = _mm_loadu_si128((const __m128i *) low_arr);
+    __m128i high_lut = _mm_loadu_si128((const __m128i *) high_arr);
+    __m128i mask_0f = _mm_set1_epi8(0x0F);
+
+    size_t idx = 0;
+
+    while (idx + 16 <= maximum) {
+        __m128i v = _mm_loadu_si128((const __m128i *) (source + idx));
+
+        // If any byte has the high bit set, stop.
+        if (_mm_movemask_epi8(v) != 0) break;
+
+        // Nibble-based classification using pshufb (SSSE3), same as NEON
+        // vqtbl1q_u8. A byte matches iff (low_lut[lo_nib] & high_lut[hi_nib]) != 0.
+        __m128i lo_class = _mm_shuffle_epi8(low_lut, _mm_and_si128(v, mask_0f));
+        __m128i hi_class = _mm_shuffle_epi8(high_lut, _mm_and_si128(_mm_srli_epi16(v, 4), mask_0f));
+        __m128i matched = _mm_and_si128(lo_class, hi_class);
+
+        // Check if any byte matched.
+        int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(matched, _mm_setzero_si128()));
+
+        if (mask == 0xFFFF) {
+            // All bytes were zero — no match in this chunk.
+            idx += 16;
+            continue;
+        }
+
+        // Find the first matching byte (first non-zero in matched).
+        *index = idx + pm_ctzll((uint64_t) (~mask & 0xFFFF));
+        return true;
+    }
+
+    // Scalar tail.
+    while (idx < maximum && source[idx] < 0x80) {
+        uint8_t byte = source[idx];
+        if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            *index = idx;
+            return true;
+        }
+        idx++;
+    }
+
+    *index = idx;
+    return false;
+}
+
+#elif defined(PRISM_HAS_SWAR)
+
+static inline bool
+scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    // Build a 256-bit lookup table (one bit per ASCII value).
+    uint64_t table[4] = { 0 };
+    for (const uint8_t *c = charset; *c != '\0'; c++) {
+        table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+    }
+
+    static const uint64_t highs = 0x8080808080808080ULL;
+    size_t idx = 0;
+
+    while (idx + 8 <= maximum) {
+        uint64_t word;
+        memcpy(&word, source + idx, 8);
+
+        // Bail on any non-ASCII byte.
+        if (word & highs) break;
+
+        // Check each byte against the charset table.
+        for (size_t j = 0; j < 8; j++) {
+            uint8_t byte = source[idx + j];
+            if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+                *index = idx + j;
+                return true;
+            }
+        }
+
+        idx += 8;
+    }
+
+    // Scalar tail.
+    while (idx < maximum && source[idx] < 0x80) {
+        uint8_t byte = source[idx];
+        if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            *index = idx;
+            return true;
+        }
+        idx++;
+    }
+
+    *index = idx;
+    return false;
+}
+
+#else
+
+static inline bool
+scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED const uint8_t *source, PRISM_ATTRIBUTE_UNUSED size_t maximum, PRISM_ATTRIBUTE_UNUSED const uint8_t *charset, size_t *index) {
+    *index = 0;
+    return false;
+}
+
+#endif
+
 /**
  * This is the default path.
  */
 static inline const uint8_t *
-pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
-
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
     while (index < maximum) {
         if (strchr((const char *) charset, source[index]) != NULL) {
             return source + index;
@@ -73,9 +274,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars
  * This is the path when the encoding is ASCII-8BIT.
  */
 static inline const uint8_t *
-pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
-
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
     while (index < maximum) {
         if (strchr((const char *) charset, source[index]) != NULL) {
             return source + index;
@@ -92,8 +291,7 @@ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t
  * This is the slow path that does care about the encoding.
  */
 static inline const uint8_t *
-pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
     const pm_encoding_t *encoding = parser->encoding;
 
     while (index < maximum) {
@@ -135,8 +333,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
  * the encoding only supports single-byte characters.
  */
 static inline const uint8_t *
-pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
-    size_t index = 0;
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
     const pm_encoding_t *encoding = parser->encoding;
 
     while (index < maximum) {
@@ -192,15 +389,19 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
  */
 const uint8_t *
 pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
-    if (length <= 0) {
-        return NULL;
-    } else if (!parser->encoding_changed) {
-        return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
+    if (length <= 0) return NULL;
+
+    size_t maximum = (size_t) length;
+    size_t index = 0;
+    if (scan_strpbrk_ascii(source, maximum, charset, &index)) return source + index;
+
+    if (!parser->encoding_changed) {
+        return pm_strpbrk_utf8(parser, source, charset, index, maximum, validate);
     } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
-        return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
+        return pm_strpbrk_ascii_8bit(parser, source, charset, index, maximum, validate);
     } else if (parser->encoding->multibyte) {
-        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
+        return pm_strpbrk_multi_byte(parser, source, charset, index, maximum, validate);
     } else {
-        return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
+        return pm_strpbrk_single_byte(parser, source, charset, index, maximum, validate);
     }
 }

From 559f24fae05e399056cf88fd2f498691e0b0f117 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 10 Mar 2026 15:08:59 -0400
Subject: [PATCH 19/28] Fix a bug where we removed the \r warning

---
 src/prism.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 61a0417b4c..9556161342 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -9908,12 +9908,17 @@ parser_lex(pm_parser_t *parser) {
             // stores back to parser->current.end.
             bool chomping = true;
             while (parser->current.end < parser->end && chomping) {
-                if (pm_char_is_inline_whitespace(*parser->current.end)) {
-                    const uint8_t *scan = parser->current.end + 1;
-                    while (scan < parser->end && pm_char_is_inline_whitespace(*scan)) scan++;
-                    parser->current.end = scan;
-                    space_seen = true;
-                    continue;
+                {
+                    static const uint8_t inline_whitespace[256] = {
+                        [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
+                    };
+                    const uint8_t *scan = parser->current.end;
+                    while (scan < parser->end && inline_whitespace[*scan]) scan++;
+                    if (scan > parser->current.end) {
+                        parser->current.end = scan;
+                        space_seen = true;
+                        continue;
+                    }
                 }
 
                 switch (*parser->current.end) {

From fc0ec4c9f487f37b6dfa32331b927bfbc66c26e2 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 10 Mar 2026 20:06:11 -0400
Subject: [PATCH 20/28] Use a bloom filter to quickly reject local lookups

---
 include/prism/parser.h | 7 +++++++
 src/prism.c            | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/include/prism/parser.h b/include/prism/parser.h
index 60306a9974..b7fe1a3c97 100644
--- a/include/prism/parser.h
+++ b/include/prism/parser.h
@@ -556,6 +556,13 @@ typedef struct pm_locals {
     /** The capacity of the local variables set. */
     uint32_t capacity;
 
+    /**
+     * A bloom filter over constant IDs stored in this set. Used to quickly
+     * reject lookups for names that are definitely not present, avoiding the
+     * cost of a linear scan or hash probe.
+     */
+    uint32_t bloom;
+
     /** The nullable allocated memory for the local variables in the set. */
     pm_local_t *locals;
 } pm_locals_t;
diff --git a/src/prism.c b/src/prism.c
index 9556161342..dd56c71c64 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -855,6 +855,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint
         pm_locals_resize(locals);
     }
 
+    locals->bloom |= (1u << (name & 31));
+
     if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
         for (uint32_t index = 0; index < locals->capacity; index++) {
             pm_local_t *local = &locals->locals[index];
@@ -907,6 +909,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint
  */
 static uint32_t
 pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+    if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
+
     if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
         for (uint32_t index = 0; index < locals->size; index++) {
             pm_local_t *local = &locals->locals[index];

From 46656b2fd5999ba6302b933394b10d59d3b15077 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 10 Mar 2026 22:18:26 -0400
Subject: [PATCH 21/28] Cache strpbrk lookup tables

---
 include/prism/parser.h | 21 +++++++++
 src/util/pm_strpbrk.c  | 96 +++++++++++++++++++++++-------------------
 2 files changed, 74 insertions(+), 43 deletions(-)

diff --git a/include/prism/parser.h b/include/prism/parser.h
index b7fe1a3c97..b68d56b564 100644
--- a/include/prism/parser.h
+++ b/include/prism/parser.h
@@ -962,6 +962,27 @@ struct pm_parser {
      * toggled with a magic comment.
      */
     bool warn_mismatched_indentation;
+
+#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
+    /**
+     * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding
+     * the nibble-based tables on every call when the charset hasn't changed
+     * (which is the common case during string/regex/list lexing).
+     */
+    struct {
+        /** The cached charset (null-terminated, max 11 chars + NUL). */
+        uint8_t charset[12];
+
+        /** Nibble-based low lookup table for SIMD matching. */
+        uint8_t low_lut[16];
+
+        /** Nibble-based high lookup table for SIMD matching. */
+        uint8_t high_lut[16];
+
+        /** Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */
+        uint64_t table[4];
+    } strpbrk_cache;
+#endif
 };
 
 #endif
diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c
index b1e4c9c6de..f9b5bc85eb 100644
--- a/src/util/pm_strpbrk.c
+++ b/src/util/pm_strpbrk.c
@@ -45,29 +45,52 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l
  *   3. SWAR — little-endian fallback, processes 8 bytes per iteration.
  */
 
-#if defined(PRISM_HAS_NEON)
-#include <arm_neon.h>
+#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
 
-static inline bool
-scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
-    // Build nibble-based lookup tables from the charset. All breakpoint
-    // characters are ASCII (< 0x80), so they fit within high nibbles 0-7.
-    //
-    // For each charset byte c, we set bit (1 << (c >> 4)) in low_lut[c & 0xF].
-    // high_lut[h] = (1 << h) for each high nibble h present in the charset.
-    // A source byte s matches iff (low_lut[s & 0xF] & high_lut[s >> 4]) != 0.
-    uint8_t low_arr[16] = { 0 };
-    uint8_t high_arr[16] = { 0 };
-    uint64_t table[4] = { 0 };
+/**
+ * Update the cached strpbrk lookup tables if the charset has changed. The
+ * parser caches the last charset's precomputed tables so that repeated calls
+ * with the same breakpoints (the common case during string/regex/list lexing)
+ * skip table construction entirely.
+ *
+ * Builds three structures:
+ *   - low_lut/high_lut: nibble-based lookup tables for SIMD matching (NEON/SSSE3)
+ *   - table: 256-bit bitmap for scalar fallback matching (all platforms)
+ */
+static inline void
+pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) {
+    // The cache key is the full 12-byte charset buffer. Since it is always
+    // NUL-padded, a fixed-size comparison covers both content and length.
+    if (memcmp(parser->strpbrk_cache.charset, charset, sizeof(parser->strpbrk_cache.charset)) == 0) return;
+
+    memset(parser->strpbrk_cache.low_lut, 0, sizeof(parser->strpbrk_cache.low_lut));
+    memset(parser->strpbrk_cache.high_lut, 0, sizeof(parser->strpbrk_cache.high_lut));
+    memset(parser->strpbrk_cache.table, 0, sizeof(parser->strpbrk_cache.table));
 
+    size_t charset_len = 0;
     for (const uint8_t *c = charset; *c != '\0'; c++) {
-        low_arr[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
-        high_arr[*c >> 4] = (uint8_t) (1 << (*c >> 4));
-        table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+        parser->strpbrk_cache.low_lut[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
+        parser->strpbrk_cache.high_lut[*c >> 4] = (uint8_t) (1 << (*c >> 4));
+        parser->strpbrk_cache.table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+        charset_len++;
     }
 
-    uint8x16_t low_lut = vld1q_u8(low_arr);
-    uint8x16_t high_lut = vld1q_u8(high_arr);
+    // Store the new charset key, NUL-padded to the full buffer size.
+    memcpy(parser->strpbrk_cache.charset, charset, charset_len + 1);
+    memset(parser->strpbrk_cache.charset + charset_len + 1, 0, sizeof(parser->strpbrk_cache.charset) - charset_len - 1);
+}
+
+#endif
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static inline bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    pm_strpbrk_cache_update(parser, charset);
+
+    uint8x16_t low_lut = vld1q_u8(parser->strpbrk_cache.low_lut);
+    uint8x16_t high_lut = vld1q_u8(parser->strpbrk_cache.high_lut);
     uint8x16_t mask_0f = vdupq_n_u8(0x0F);
     uint8x16_t mask_80 = vdupq_n_u8(0x80);
 
@@ -103,7 +126,7 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
     // Scalar tail for remaining < 16 ASCII bytes.
     while (idx < maximum && source[idx] < 0x80) {
         uint8_t byte = source[idx];
-        if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+        if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
             *index = idx;
             return true;
         }
@@ -118,20 +141,11 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
 #include <tmmintrin.h>
 
 static inline bool
-scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
-    // Build nibble-based lookup tables and bitmap table in a single pass.
-    uint8_t low_arr[16] = { 0 };
-    uint8_t high_arr[16] = { 0 };
-    uint64_t table[4] = { 0 };
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    pm_strpbrk_cache_update(parser, charset);
 
-    for (const uint8_t *c = charset; *c != '\0'; c++) {
-        low_arr[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
-        high_arr[*c >> 4] = (uint8_t) (1 << (*c >> 4));
-        table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
-    }
-
-    __m128i low_lut = _mm_loadu_si128((const __m128i *) low_arr);
-    __m128i high_lut = _mm_loadu_si128((const __m128i *) high_arr);
+    __m128i low_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.low_lut);
+    __m128i high_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.high_lut);
     __m128i mask_0f = _mm_set1_epi8(0x0F);
 
     size_t idx = 0;
@@ -165,7 +179,7 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
     // Scalar tail.
     while (idx < maximum && source[idx] < 0x80) {
         uint8_t byte = source[idx];
-        if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+        if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
             *index = idx;
             return true;
         }
@@ -179,12 +193,8 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
 #elif defined(PRISM_HAS_SWAR)
 
 static inline bool
-scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
-    // Build a 256-bit lookup table (one bit per ASCII value).
-    uint64_t table[4] = { 0 };
-    for (const uint8_t *c = charset; *c != '\0'; c++) {
-        table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
-    }
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+    pm_strpbrk_cache_update(parser, charset);
 
     static const uint64_t highs = 0x8080808080808080ULL;
     size_t idx = 0;
@@ -199,7 +209,7 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
         // Check each byte against the charset table.
         for (size_t j = 0; j < 8; j++) {
             uint8_t byte = source[idx + j];
-            if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+            if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
                 *index = idx + j;
                 return true;
             }
@@ -211,7 +221,7 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
     // Scalar tail.
     while (idx < maximum && source[idx] < 0x80) {
         uint8_t byte = source[idx];
-        if (table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+        if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
             *index = idx;
             return true;
         }
@@ -225,7 +235,7 @@ scan_strpbrk_ascii(const uint8_t *source, size_t maximum, const uint8_t *charset
 #else
 
 static inline bool
-scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED const uint8_t *source, PRISM_ATTRIBUTE_UNUSED size_t maximum, PRISM_ATTRIBUTE_UNUSED const uint8_t *charset, size_t *index) {
+scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, PRISM_ATTRIBUTE_UNUSED const uint8_t *source, PRISM_ATTRIBUTE_UNUSED size_t maximum, PRISM_ATTRIBUTE_UNUSED const uint8_t *charset, size_t *index) {
     *index = 0;
     return false;
 }
@@ -393,7 +403,7 @@ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, p
 
     size_t maximum = (size_t) length;
     size_t index = 0;
-    if (scan_strpbrk_ascii(source, maximum, charset, &index)) return source + index;
+    if (scan_strpbrk_ascii(parser, source, maximum, charset, &index)) return source + index;
 
     if (!parser->encoding_changed) {
         return pm_strpbrk_utf8(parser, source, charset, index, maximum, validate);

From b2658d2262f13774c4beb9feffd7b3c4f7aadecc Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 16 Mar 2026 23:16:41 -0400
Subject: [PATCH 22/28] Fix up rebase errors

---
 src/prism.c              | 10 +++++-----
 src/regexp.c             |  5 +++--
 src/util/pm_char.c       |  8 --------
 src/util/pm_strpbrk.c    |  8 ++++++++
 templates/src/node.c.erb |  5 +++--
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index dd56c71c64..d98f82d8fe 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -8851,7 +8851,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_
     }
 
     if (width == 1) {
-        if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+        if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
         escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
     } else if (width > 1) {
         // Valid multibyte character.  Just ignore escape.
@@ -9168,7 +9168,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                         return;
                     }
 
-                    if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+                    if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                     parser->current.end++;
                     escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
                     return;
@@ -9227,7 +9227,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                         return;
                     }
 
-                    if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+                    if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                     parser->current.end++;
                     escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
                     return;
@@ -9281,7 +9281,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
                         return;
                     }
 
-                    if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
+                    if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
                     parser->current.end++;
                     escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
                     return;
@@ -9289,7 +9289,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
         }
         case '\r': {
             if (peek_offset(parser, 1) == '\n') {
-                pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
+                pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
                 parser->current.end += 2;
                 escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
                 return;
diff --git a/src/regexp.c b/src/regexp.c
index f864e187c9..df8bb69b21 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -128,7 +128,7 @@ pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const ui
         loc_length = (uint32_t) (parser->node_end - parser->node_start);
     }
 
-    pm_diagnostic_list_append_format(&pm->error_list, loc_start, loc_length, PM_ERR_REGEXP_PARSE_ERROR, message);
+    pm_diagnostic_list_append_format(&pm->metadata_arena, &pm->error_list, loc_start, loc_length, PM_ERR_REGEXP_PARSE_ERROR, message);
 }
 
 /**
@@ -146,7 +146,7 @@ pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const ui
             loc_start__ = (uint32_t) ((parser_)->node_start - pm__->start); \
             loc_length__ = (uint32_t) ((parser_)->node_end - (parser_)->node_start); \
         } \
-        pm_diagnostic_list_append_format(&pm__->error_list, loc_start__, loc_length__, diag_id, __VA_ARGS__); \
+        pm_diagnostic_list_append_format(&pm__->metadata_arena, &pm__->error_list, loc_start__, loc_length__, diag_id, __VA_ARGS__); \
     } while (0)
 
 /**
@@ -1397,6 +1397,7 @@ pm_regexp_format_for_error(pm_buffer_t *buffer, const pm_encoding_t *encoding, c
  */
 #define PM_REGEXP_ENCODING_ERROR(parser, diag_id, ...) \
     pm_diagnostic_list_append_format( \
+        &(parser)->parser->metadata_arena, \
         &(parser)->parser->error_list, \
         (uint32_t) ((parser)->node_start - (parser)->parser->start), \
         (uint32_t) ((parser)->node_end - (parser)->node_start), \
diff --git a/src/util/pm_char.c b/src/util/pm_char.c
index fc41b90601..ac283af356 100644
--- a/src/util/pm_char.c
+++ b/src/util/pm_char.c
@@ -107,14 +107,6 @@ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
     return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
 }
 
-/**
- * Returns true if the given character matches the given kind.
- */
-static inline bool
-pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
-    return (pm_byte_table[b] & kind) != 0;
-}
-
 
 /**
  * Scan through the string and return the number of characters at the start of
diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c
index f9b5bc85eb..496739c9f8 100644
--- a/src/util/pm_strpbrk.c
+++ b/src/util/pm_strpbrk.c
@@ -67,6 +67,14 @@ pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) {
     memset(parser->strpbrk_cache.high_lut, 0, sizeof(parser->strpbrk_cache.high_lut));
     memset(parser->strpbrk_cache.table, 0, sizeof(parser->strpbrk_cache.table));
 
+    // Always include NUL in the tables. The slow path uses strchr, which
+    // always matches NUL (it finds the C string terminator), so NUL is
+    // effectively always a breakpoint. Replicating that here lets the fast
+    // scanner handle NUL at full speed instead of bailing to the slow path.
+    parser->strpbrk_cache.low_lut[0x00] |= (uint8_t) (1 << 0);
+    parser->strpbrk_cache.high_lut[0x00] = (uint8_t) (1 << 0);
+    parser->strpbrk_cache.table[0] |= (uint64_t) 1;
+
     size_t charset_len = 0;
     for (const uint8_t *c = charset; *c != '\0'; c++) {
         parser->strpbrk_cache.low_lut[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb
index df59545129..93ea275a54 100644
--- a/templates/src/node.c.erb
+++ b/templates/src/node.c.erb
@@ -39,10 +39,11 @@ pm_node_list_grow(pm_arena_t *arena, pm_node_list_t *list, size_t size) {
 }
 
 /**
- * Append a new node onto the end of the node list.
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly - use pm_node_list_append instead.
  */
 void
-pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
     pm_node_list_grow(arena, list, 1);
     list->nodes[list->size++] = node;
 }

From 5fe0448219e9756832b9447111a452620003b99c Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 17 Mar 2026 06:48:50 -0400
Subject: [PATCH 23/28] More correctly detect SIMD on MSVC

---
 include/prism/defines.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/prism/defines.h b/include/prism/defines.h
index 0c131dbaed..d666582b17 100644
--- a/include/prism/defines.h
+++ b/include/prism/defines.h
@@ -280,9 +280,9 @@
  * Platform detection for SIMD / fast-path implementations. At most one of
  * these macros is defined, selecting the best available vectorization strategy.
  */
-#if (defined(__aarch64__) && defined(__ARM_NEON)) || defined(_M_ARM64)
+#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64))
     #define PRISM_HAS_NEON
-#elif (defined(__x86_64__) && defined(__SSSE3__)) || defined(_M_X64)
+#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64))
     #define PRISM_HAS_SSSE3
 #elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     #define PRISM_HAS_SWAR

From f5ae7b73eea485959347ebb73c92e7a40d3b8aa8 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 17 Mar 2026 07:18:05 -0400
Subject: [PATCH 24/28] Ensure allocations to the constant pool are through the
 arena

---
 src/prism.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index d98f82d8fe..783e624947 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -20717,11 +20717,9 @@ parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *c
         start = parser->start + PM_NODE_START(call->receiver);
         end = parser->start + PM_NODE_END(call->receiver);
 
-        void *memory = xmalloc(length);
-        if (memory == NULL) abort();
-
+        uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
         memcpy(memory, source, length);
-        name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+        name = pm_parser_constant_id_owned(parser, memory, length);
     }
 
     // Add this name to the list of constants if it is valid, not duplicated,
@@ -22267,11 +22265,9 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
                 const uint8_t *source = pm_string_source(local);
                 size_t length = pm_string_length(local);
 
-                void *allocated = xmalloc(length);
-                if (allocated == NULL) continue;
-
+                uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
                 memcpy(allocated, source, length);
-                pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
+                pm_parser_local_add_owned(parser, allocated, length);
             }
         }
     }

From 1cabef7946746d9468891c8827d10754bbeee281 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 18 Mar 2026 08:40:25 +0100
Subject: [PATCH 25/28] Fix ASAN reading off end of strpbrk cache

https://github.com/ruby/ruby/commit/968b999fe25f77ea556b5e962c4781e38a7e6863

Co-Authored-By: Kevin Newton <kddnewton@gmail.com>
---
 include/prism/parser.h | 17 ++++++++++++-----
 src/prism.c            | 11 +++++++----
 src/util/pm_strpbrk.c  |  5 +++--
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/prism/parser.h b/include/prism/parser.h
index b68d56b564..8187d8685a 100644
--- a/include/prism/parser.h
+++ b/include/prism/parser.h
@@ -107,6 +107,13 @@ typedef struct {
  * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
  * are found as part of a string.
  */
+/**
+ * The size of the breakpoints and strpbrk cache charset buffers. All
+ * breakpoint arrays and the strpbrk cache charset must share this size so
+ * that memcmp can safely compare the full buffer without overreading.
+ */
+#define PM_STRPBRK_CACHE_SIZE 16
+
 typedef struct pm_lex_mode {
     /** The type of this lex mode. */
     enum {
@@ -169,7 +176,7 @@ typedef struct pm_lex_mode {
              * This is the character set that should be used to delimit the
              * tokens within the list.
              */
-            uint8_t breakpoints[11];
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
         } list;
 
         struct {
@@ -191,7 +198,7 @@ typedef struct pm_lex_mode {
              * This is the character set that should be used to delimit the
              * tokens within the regular expression.
              */
-            uint8_t breakpoints[7];
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
         } regexp;
 
         struct {
@@ -224,7 +231,7 @@ typedef struct pm_lex_mode {
              * This is the character set that should be used to delimit the
              * tokens within the string.
              */
-            uint8_t breakpoints[7];
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
         } string;
 
         struct {
@@ -970,8 +977,8 @@ struct pm_parser {
      * (which is the common case during string/regex/list lexing).
      */
     struct {
-        /** The cached charset (null-terminated, max 11 chars + NUL). */
-        uint8_t charset[12];
+        /** The cached charset (null-terminated, NUL-padded). */
+        uint8_t charset[PM_STRPBRK_CACHE_SIZE];
 
         /** Nibble-based low lookup table for SIMD matching. */
         uint8_t low_lut[16];
diff --git a/src/prism.c b/src/prism.c
index 783e624947..d0bd2e1973 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -149,7 +149,8 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
     // These are the places where we need to split up the content of the list.
     // We'll use strpbrk to find the first of these characters.
     uint8_t *breakpoints = lex_mode.as.list.breakpoints;
-    memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
+    memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+    memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
     size_t index = 7;
 
     // Now we'll add the terminator to the list of breakpoints. If the
@@ -201,7 +202,8 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
     // regular expression. We'll use strpbrk to find the first of these
     // characters.
     uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
-    memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+    memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+    memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
     size_t index = 4;
 
     // First we'll add the terminator.
@@ -237,7 +239,8 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
     // These are the places where we need to split up the content of the
     // string. We'll use strpbrk to find the first of these characters.
     uint8_t *breakpoints = lex_mode.as.string.breakpoints;
-    memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+    memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+    memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
     size_t index = 3;
 
     // Now add in the terminator. If the terminator is not already a NULL byte,
@@ -12052,7 +12055,7 @@ parser_lex(pm_parser_t *parser) {
             // Otherwise we'll be parsing string content. These are the places
             // where we need to split up the content of the heredoc. We'll use
             // strpbrk to find the first of these characters.
-            uint8_t breakpoints[] = "\r\n\\#";
+            uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
 
             pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
             if (quote == PM_HEREDOC_QUOTE_SINGLE) {
diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c
index 496739c9f8..fdd2ab4567 100644
--- a/src/util/pm_strpbrk.c
+++ b/src/util/pm_strpbrk.c
@@ -59,8 +59,9 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l
  */
 static inline void
 pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) {
-    // The cache key is the full 12-byte charset buffer. Since it is always
-    // NUL-padded, a fixed-size comparison covers both content and length.
+    // The cache key is the full charset buffer (PM_STRPBRK_CACHE_SIZE bytes).
+    // Since it is always NUL-padded, a fixed-size comparison covers both
+    // content and length.
     if (memcmp(parser->strpbrk_cache.charset, charset, sizeof(parser->strpbrk_cache.charset)) == 0) return;
 
     memset(parser->strpbrk_cache.low_lut, 0, sizeof(parser->strpbrk_cache.low_lut));

From 898241c76c4aee13fca0800b9c2b017533521531 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 18 Mar 2026 08:42:15 +0100
Subject: [PATCH 26/28] Do not use GCC-specific syntax for lookup tables

https://github.com/ruby/ruby/commit/5026acfb6433f531a5cd24e904857a8d54b4473c

Co-Authored-By: Kevin Newton <kddnewton@gmail.com>
---
 src/prism.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index d0bd2e1973..7b6482b090 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -1809,14 +1809,16 @@ scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
     // contains the OR of bits for all high nibbles that have an
     // identifier character at that low nibble position. A byte is an
     // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
-    const uint8x16_t low_lut = (uint8x16_t) {
+    static const uint8_t low_lut_data[16] = {
         0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
         0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
     };
-    const uint8x16_t high_lut = (uint8x16_t) {
+    static const uint8_t high_lut_data[16] = {
         0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
     };
+    const uint8x16_t low_lut = vld1q_u8(low_lut_data);
+    const uint8x16_t high_lut = vld1q_u8(high_lut_data);
     const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
 
     while (cursor + 16 <= end) {

From ec2cf2e26160a2419838320f1de859265ecd4ae3 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 18 Mar 2026 08:51:15 +0100
Subject: [PATCH 27/28] Fix infinite loop in parser_lex_magic_comment

https://github.com/ruby/ruby/commit/ec3162cafc601cdb18af0032a23f3798d4551dea

Co-Authored-By: Kevin Newton <kddnewton@gmail.com>
---
 src/prism.c                      | 2 +-
 test/prism/magic_comment_test.rb | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/prism.c b/src/prism.c
index 7b6482b090..dc7cbef2d4 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -7567,7 +7567,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
     cursor = start;
     while (cursor < end) {
         if (indicator) {
-            cursor += pm_strspn_whitespace(cursor, end - cursor);
+            while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
         }
 
         const uint8_t *key_start = cursor;
diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb
index ccfe5a5d0a..7985bae568 100644
--- a/test/prism/magic_comment_test.rb
+++ b/test/prism/magic_comment_test.rb
@@ -69,6 +69,10 @@ def test_emacs_multiple
       assert_magic_encoding(Encoding::US_ASCII, "# -*- foo: bar; encoding: ascii -*-")
     end
 
+    def test_emacs_missing_delimiter
+      assert_magic_encoding(Encoding::US_ASCII, '# -*- \1; encoding: ascii -*-')
+    end
+
     def test_coding_whitespace
       assert_magic_encoding(Encoding::ASCII_8BIT, "# coding \t \r  \v   :     \t \v    \r   ascii-8bit")
     end

From eab6f33ba502ba36a18de010f253e8f3e1b1a065 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 18 Mar 2026 08:57:21 +0100
Subject: [PATCH 28/28] Fix C coverage by moving stuff slightly around

---
 include/prism/parser.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/prism/parser.h b/include/prism/parser.h
index 8187d8685a..66df791244 100644
--- a/include/prism/parser.h
+++ b/include/prism/parser.h
@@ -100,13 +100,6 @@ typedef struct {
     pm_heredoc_indent_t indent;
 } pm_heredoc_lex_mode_t;
 
-/**
- * When lexing Ruby source, the lexer has a small amount of state to tell which
- * kind of token it is currently lexing. For example, when we find the start of
- * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
- * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
- * are found as part of a string.
- */
 /**
  * The size of the breakpoints and strpbrk cache charset buffers. All
  * breakpoint arrays and the strpbrk cache charset must share this size so
@@ -114,6 +107,13 @@ typedef struct {
  */
 #define PM_STRPBRK_CACHE_SIZE 16
 
+/**
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
+ * kind of token it is currently lexing. For example, when we find the start of
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
+ * are found as part of a string.
+ */
 typedef struct pm_lex_mode {
     /** The type of this lex mode. */
     enum {