From 5a9e57703eb8c912e7c2dd9027d875773183f014 Mon Sep 17 00:00:00 2001
From: Pokey Rule <755842+pokey@users.noreply.github.com>
Date: Mon, 4 Jul 2022 15:08:38 +0100
Subject: [PATCH 1/3] Tweaks to text-based item PR

---
 .../modifiers/ItemStage/ItemStage.ts          | 10 +--
 .../modifiers/ItemStage/tokenizeRange.ts      | 86 +++++++++++--------
 2 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/src/processTargets/modifiers/ItemStage/ItemStage.ts b/src/processTargets/modifiers/ItemStage/ItemStage.ts
index 7c295ddc1d..b87650d84b 100644
--- a/src/processTargets/modifiers/ItemStage/ItemStage.ts
+++ b/src/processTargets/modifiers/ItemStage/ItemStage.ts
@@ -104,15 +104,15 @@ function getItemInfosForIterationScope(
   target: Target
 ) {
   const { range, boundary } = getIterationScope(context, target);
-  return rangeToItemInfos(target.editor, range, boundary);
+  return getItemsInRange(target.editor, range, boundary);
 }
 
-function rangeToItemInfos(
+function getItemsInRange(
   editor: TextEditor,
-  collectionRange: Range,
-  collectionBoundary?: [Range, Range]
+  interior: Range,
+  boundary?: [Range, Range]
 ): ItemInfo[] {
-  const tokens = tokenizeRange(editor, collectionRange, collectionBoundary);
+  const tokens = tokenizeRange(editor, interior, boundary);
   const itemInfos: ItemInfo[] = [];
 
   tokens.forEach((token, i) => {
diff --git a/src/processTargets/modifiers/ItemStage/tokenizeRange.ts b/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
index 04a94f6f57..de6cf37215 100644
--- a/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
+++ b/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
@@ -2,15 +2,24 @@ import { Range, TextEditor } from "vscode";
 
 export function tokenizeRange(
   editor: TextEditor,
-  collectionRange: Range,
-  collectionBoundary?: [Range, Range]
+  interior: Range,
+  boundary?: [Range, Range]
 ) {
   const { document } = editor;
-  const text = document.getText(collectionRange);
-  const lexemes = text.split(/([,(){}<>[\]"'`]|\\"|\\'|\\`)/g).filter(Boolean);
+  const text = document.getText(interior);
+  /**
+   * The interior range tokenized into delimited regions, including the delimiters themselves.  For example:
+   * `"foo(hello), bar, whatever"` =>
+   * `["foo", "(", "hello", ")", ",", " bar", ",", " whatever"]`
+   */
+  const lexemes = text
+    // NB: Both the delimiters and the text between them are included because we
+    // use a capture group in this split regex
+    .split(/([,(){}<>[\]"'`]|\\"|\\'|\\`)/g)
+    .filter((lexeme) => lexeme.length > 0);
   const joinedLexemes = joinLexemesBySkippingMatchingPairs(lexemes);
   const tokens: Token[] = [];
-  let offset = document.offsetAt(collectionRange.start);
+  let offset = document.offsetAt(interior.start);
 
   joinedLexemes.forEach((lexeme) => {
     // Whitespace found. Just skip
@@ -20,7 +29,7 @@ export function tokenizeRange(
     }
 
     // Separator delimiter found.
-    if (lexeme === delimiter) {
+    if (lexeme === separator) {
       tokens.push({
         type: "delimiter",
         range: new Range(
@@ -45,11 +54,11 @@ export function tokenizeRange(
     offset += lexeme.length;
   });
 
-  if (collectionBoundary != null) {
+  if (boundary != null) {
     return [
-      { type: "boundary", range: collectionBoundary[0] },
+      { type: "boundary", range: boundary[0] },
       ...tokens,
-      { type: "boundary", range: collectionBoundary[1] },
+      { type: "boundary", range: boundary[1] },
     ];
   }
 
@@ -58,48 +67,51 @@ export function tokenizeRange(
 
 export function joinLexemesBySkippingMatchingPairs(lexemes: string[]) {
   const result: string[] = [];
-  let delimiterCount = 0;
+  /**
+   * The number of left delimiters minus right delimiters we've seen.  If the
+   * balance is 0, we're at the top level of the collection, so separators are
+   * relevant.  Otherwise we ignore separators because they're nested
+   */
+  let delimiterBalance = 0;
+  /** The most recent opening delimiter we've seen */
   let openingDelimiter: string | null = null;
+  /** The closing delimiter we're currently looking for */
   let closingDelimiter: string | null = null;
   let startIndex: number = -1;
 
   lexemes.forEach((lexeme, index) => {
-    // We are waiting for a closing delimiter
-    if (delimiterCount > 0) {
-      // Closing delimiter found
-      if (closingDelimiter === lexeme) {
-        --delimiterCount;
+    if (delimiterBalance > 0) {
+      // We are waiting for a closing delimiter
+
+      if (lexeme === closingDelimiter) {
+        // Closing delimiter found
+        --delimiterBalance;
+      } else if (lexeme === openingDelimiter) {
+        // Additional opening delimiter found
+        ++delimiterBalance;
       }
-      // Additional opening delimiter found
-      else if (openingDelimiter === lexeme) {
-        ++delimiterCount;
-      }
-    }
-
-    // Starting delimiter found
-    else if (delimiters[lexeme] != null) {
+    } else if (leftToRightMap[lexeme] != null) {
+      // Starting delimiter found
       openingDelimiter = lexeme;
-      closingDelimiter = delimiters[lexeme];
-      delimiterCount = 1;
-      // This is the first lexeme to be joined
+      closingDelimiter = leftToRightMap[lexeme];
+      delimiterBalance = 1;
       if (startIndex < 0) {
+        // This is the first lexeme to be joined
         startIndex = index;
       }
-    }
-
-    // This is the first lexeme to be joined
-    else if (startIndex < 0) {
+    } else if (startIndex < 0) {
+      // This is the first lexeme to be joined
       startIndex = index;
     }
 
-    const isDelimiter = lexeme === delimiter && delimiterCount === 0;
+    const isSeparator = lexeme === separator && delimiterBalance === 0;
 
-    // This is the last lexeme to be joined
-    if (isDelimiter || index === lexemes.length - 1) {
-      const endIndex = isDelimiter ? index : index + 1;
+    if (isSeparator || index === lexemes.length - 1) {
+      // This is the last lexeme to be joined
+      const endIndex = isSeparator ? index : index + 1;
       result.push(lexemes.slice(startIndex, endIndex).join(""));
       startIndex = -1;
-      if (isDelimiter) {
+      if (isSeparator) {
         result.push(lexeme);
       }
     }
@@ -108,11 +120,11 @@ export function joinLexemesBySkippingMatchingPairs(lexemes: string[]) {
   return result;
 }
 
-const delimiter = ",";
+const separator = ",";
 
 // Mapping between opening and closing delimiters
 /* eslint-disable @typescript-eslint/naming-convention */
-const delimiters: { [key: string]: string } = {
+const leftToRightMap: { [key: string]: string } = {
   "(": ")",
   "{": "}",
   "<": ">",

From a03f1721c0a55ce535f567b318a3f43ac88d94fc Mon Sep 17 00:00:00 2001
From: Pokey Rule <755842+pokey@users.noreply.github.com>
Date: Mon, 4 Jul 2022 15:45:24 +0100
Subject: [PATCH 2/3] More tweaks and tests

---
 .../modifiers/ItemStage/ItemStage.ts          | 12 +++++---
 .../modifiers/ItemStage/tokenizeRange.ts      | 15 ++++++----
 .../recorded/itemTextual/clearItem10.yml      | 26 ++++++++++++++++
 .../recorded/itemTextual/clearItem11.yml      | 26 ++++++++++++++++
 .../recorded/itemTextual/clearItem9.yml       | 26 ++++++++++++++++
 .../recorded/itemTextual/clearItemDrip.yml    | 30 +++++++++++++++++++
 6 files changed, 125 insertions(+), 10 deletions(-)
 create mode 100644 src/test/suite/fixtures/recorded/itemTextual/clearItem10.yml
 create mode 100644 src/test/suite/fixtures/recorded/itemTextual/clearItem11.yml
 create mode 100644 src/test/suite/fixtures/recorded/itemTextual/clearItem9.yml
 create mode 100644 src/test/suite/fixtures/recorded/itemTextual/clearItemDrip.yml

diff --git a/src/processTargets/modifiers/ItemStage/ItemStage.ts b/src/processTargets/modifiers/ItemStage/ItemStage.ts
index b87650d84b..992adf2875 100644
--- a/src/processTargets/modifiers/ItemStage/ItemStage.ts
+++ b/src/processTargets/modifiers/ItemStage/ItemStage.ts
@@ -119,6 +119,7 @@ function getItemsInRange(
     if (token.type === "delimiter" || token.type === "boundary") {
       return;
     }
+
     const leadingDelimiterRange = (() => {
       if (tokens[i - 2]?.type === "item") {
         return new Range(tokens[i - 2].range.end, token.range.start);
@@ -128,6 +129,7 @@ function getItemsInRange(
       }
       return undefined;
     })();
+
     const trailingDelimiterRange = (() => {
       if (tokens[i + 2]?.type === "item") {
         return new Range(token.range.end, tokens[i + 2].range.start);
@@ -137,24 +139,26 @@ function getItemsInRange(
       }
       return undefined;
     })();
+
     // Leading boundary is excluded and leading delimiter is included
-    const leadingMatchStart =
+    const domainStart =
       tokens[i - 1]?.type === "boundary"
         ? tokens[i - 1].range.end
         : tokens[i - 1]?.type === "delimiter"
         ? tokens[i - 1].range.start
         : token.range.start;
+
     // Trailing boundary and delimiter is excluded
-    const trailingMatchEnd =
+    const domainEnd =
       tokens[i + 1]?.type === "boundary" || tokens[i + 1]?.type === "delimiter"
         ? tokens[i + 1].range.start
         : token.range.end;
-    const matchRange = new Range(leadingMatchStart, trailingMatchEnd);
+
     itemInfos.push({
       contentRange: token.range,
       leadingDelimiterRange,
       trailingDelimiterRange,
-      domain: matchRange,
+      domain: new Range(domainStart, domainEnd),
     });
   });
 
diff --git a/src/processTargets/modifiers/ItemStage/tokenizeRange.ts b/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
index de6cf37215..b221635dd6 100644
--- a/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
+++ b/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
@@ -90,16 +90,18 @@ export function joinLexemesBySkippingMatchingPairs(lexemes: string[]) {
         // Additional opening delimiter found
         ++delimiterBalance;
       }
-    } else if (leftToRightMap[lexeme] != null) {
+
+      return;
+    }
+
+    if (leftToRightMap[lexeme] != null) {
       // Starting delimiter found
       openingDelimiter = lexeme;
       closingDelimiter = leftToRightMap[lexeme];
       delimiterBalance = 1;
-      if (startIndex < 0) {
-        // This is the first lexeme to be joined
-        startIndex = index;
-      }
-    } else if (startIndex < 0) {
+    }
+
+    if (startIndex < 0) {
       // This is the first lexeme to be joined
       startIndex = index;
     }
@@ -112,6 +114,7 @@ export function joinLexemesBySkippingMatchingPairs(lexemes: string[]) {
       result.push(lexemes.slice(startIndex, endIndex).join(""));
       startIndex = -1;
       if (isSeparator) {
+        // Add the separator itself
         result.push(lexeme);
       }
     }
diff --git a/src/test/suite/fixtures/recorded/itemTextual/clearItem10.yml b/src/test/suite/fixtures/recorded/itemTextual/clearItem10.yml
new file mode 100644
index 0000000000..1ee4b4228f
--- /dev/null
+++ b/src/test/suite/fixtures/recorded/itemTextual/clearItem10.yml
@@ -0,0 +1,26 @@
+languageId: typescript
+command:
+  spokenForm: clear item
+  version: 2
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: collectionItem}
+  usePrePhraseSnapshot: false
+  action: {name: clearAndSetSelection}
+initialState:
+  documentContents: foo(hello, world)
+  selections:
+    - anchor: {line: 0, character: 10}
+      active: {line: 0, character: 10}
+  marks: {}
+finalState:
+  documentContents: foo(hello, )
+  selections:
+    - anchor: {line: 0, character: 11}
+      active: {line: 0, character: 11}
+  thatMark:
+    - anchor: {line: 0, character: 11}
+      active: {line: 0, character: 11}
+fullTargets: [{type: primitive, mark: {type: cursor}, modifiers: [{type: containingScope, scopeType: {type: collectionItem}}]}]
diff --git a/src/test/suite/fixtures/recorded/itemTextual/clearItem11.yml b/src/test/suite/fixtures/recorded/itemTextual/clearItem11.yml
new file mode 100644
index 0000000000..9b4e5e253d
--- /dev/null
+++ b/src/test/suite/fixtures/recorded/itemTextual/clearItem11.yml
@@ -0,0 +1,26 @@
+languageId: typescript
+command:
+  spokenForm: clear item
+  version: 2
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: collectionItem}
+  usePrePhraseSnapshot: false
+  action: {name: clearAndSetSelection}
+initialState:
+  documentContents: foo(hello, world)
+  selections:
+    - anchor: {line: 0, character: 7}
+      active: {line: 0, character: 13}
+  marks: {}
+finalState:
+  documentContents: foo()
+  selections:
+    - anchor: {line: 0, character: 4}
+      active: {line: 0, character: 4}
+  thatMark:
+    - anchor: {line: 0, character: 11}
+      active: {line: 0, character: 11}
+fullTargets: [{type: primitive, mark: {type: cursor}, modifiers: [{type: containingScope, scopeType: {type: collectionItem}}]}]
diff --git a/src/test/suite/fixtures/recorded/itemTextual/clearItem9.yml b/src/test/suite/fixtures/recorded/itemTextual/clearItem9.yml
new file mode 100644
index 0000000000..c6197799f0
--- /dev/null
+++ b/src/test/suite/fixtures/recorded/itemTextual/clearItem9.yml
@@ -0,0 +1,26 @@
+languageId: typescript
+command:
+  spokenForm: clear item
+  version: 2
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: collectionItem}
+  usePrePhraseSnapshot: true
+  action: {name: clearAndSetSelection}
+initialState:
+  documentContents: foo(hello, world)
+  selections:
+    - anchor: {line: 0, character: 9}
+      active: {line: 0, character: 9}
+  marks: {}
+finalState:
+  documentContents: foo(, world)
+  selections:
+    - anchor: {line: 0, character: 4}
+      active: {line: 0, character: 4}
+  thatMark:
+    - anchor: {line: 0, character: 4}
+      active: {line: 0, character: 4}
+fullTargets: [{type: primitive, mark: {type: cursor}, modifiers: [{type: containingScope, scopeType: {type: collectionItem}}]}]
diff --git a/src/test/suite/fixtures/recorded/itemTextual/clearItemDrip.yml b/src/test/suite/fixtures/recorded/itemTextual/clearItemDrip.yml
new file mode 100644
index 0000000000..1a8972fcc6
--- /dev/null
+++ b/src/test/suite/fixtures/recorded/itemTextual/clearItemDrip.yml
@@ -0,0 +1,30 @@
+languageId: typescript
+command:
+  spokenForm: clear item drip
+  version: 2
+  targets:
+    - type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: collectionItem}
+      mark: {type: decoratedSymbol, symbolColor: default, character: ','}
+  usePrePhraseSnapshot: true
+  action: {name: clearAndSetSelection}
+initialState:
+  documentContents: foo(hello, world)
+  selections:
+    - anchor: {line: 0, character: 13}
+      active: {line: 0, character: 13}
+  marks:
+    default.,:
+      start: {line: 0, character: 9}
+      end: {line: 0, character: 10}
+finalState:
+  documentContents: foo()
+  selections:
+    - anchor: {line: 0, character: 4}
+      active: {line: 0, character: 4}
+  thatMark:
+    - anchor: {line: 0, character: 11}
+      active: {line: 0, character: 11}
+fullTargets: [{type: primitive, mark: {type: decoratedSymbol, symbolColor: default, character: ','}, modifiers: [{type: containingScope, scopeType: {type: collectionItem}}]}]

From d34aba5c73d044131af20d5c3092ddc5b66afe76 Mon Sep 17 00:00:00 2001
From: Pokey Rule <755842+pokey@users.noreply.github.com>
Date: Mon, 4 Jul 2022 15:55:07 +0100
Subject: [PATCH 3/3] More doc string

---
 src/processTargets/modifiers/ItemStage/tokenizeRange.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/processTargets/modifiers/ItemStage/tokenizeRange.ts b/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
index b221635dd6..31c24d58ba 100644
--- a/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
+++ b/src/processTargets/modifiers/ItemStage/tokenizeRange.ts
@@ -77,6 +77,10 @@ export function joinLexemesBySkippingMatchingPairs(lexemes: string[]) {
   let openingDelimiter: string | null = null;
   /** The closing delimiter we're currently looking for */
   let closingDelimiter: string | null = null;
+  /**
+   * The index in {@link lexemes} of the first lexeme in the current token we're
+   * merging.
+   */
   let startIndex: number = -1;
 
   lexemes.forEach((lexeme, index) => {