From 8d775f432fbd105123e6281f8d86992776e2c227 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 10:44:18 +0800 Subject: [PATCH 01/20] feat(cli): add `files` subcommand for file manipulation --- src/vectorcode/cli_utils.py | 30 ++++++++++++++++++++ src/vectorcode/main.py | 4 +++ src/vectorcode/subcommands/__init__.py | 2 ++ src/vectorcode/subcommands/files/__init__.py | 22 ++++++++++++++ src/vectorcode/subcommands/files/ls.py | 30 ++++++++++++++++++++ src/vectorcode/subcommands/files/rm.py | 25 ++++++++++++++++ 6 files changed, 113 insertions(+) create mode 100644 src/vectorcode/subcommands/files/__init__.py create mode 100644 src/vectorcode/subcommands/files/ls.py create mode 100644 src/vectorcode/subcommands/files/rm.py diff --git a/src/vectorcode/cli_utils.py b/src/vectorcode/cli_utils.py index e49b4b24..b89fa9cc 100644 --- a/src/vectorcode/cli_utils.py +++ b/src/vectorcode/cli_utils.py @@ -66,6 +66,12 @@ class CliAction(Enum): clean = "clean" prompts = "prompts" chunks = "chunks" + files = "files" + + +class FilesAction(StrEnum): + ls = "ls" + rm = "rm" @dataclass @@ -104,6 +110,8 @@ class Config: encoding: str = "utf8" hooks: bool = False prompt_categories: Optional[list[str]] = None + files_action: Optional[FilesAction] = None + rm_paths: list[str] = field(default_factory=list) @classmethod async def import_from(cls, config_dict: dict[str, Any]) -> "Config": @@ -374,6 +382,23 @@ def get_cli_parser(): chunks_parser.add_argument( "file_paths", nargs="*", help="Paths to files to be chunked." ).complete = shtab.FILE # type:ignore + + files_parser = subparsers.add_parser( + "files", parents=[shared_parser], help="Manipulate files from a collection." + ) + files_subparser = files_parser.add_subparsers( + dest="files_action", required=True, title="Collecton file operations" + ) + files_subparser.add_parser("ls", help="List files in the collection.") + files_rm_parser = files_subparser.add_parser( + "rm", help="Remove files in the collection." + ) + files_rm_parser.add_argument( + "rm_paths", + nargs="+", + default=None, + help="Files to be removed from the collection.", + ) return main_parser @@ -418,6 +443,11 @@ async def parse_cli_args(args: Optional[Sequence[str]] = None): configs_items["encoding"] = main_args.encoding case "prompts": configs_items["prompt_categories"] = main_args.prompt_categories + case "files": + configs_items["files_action"] = main_args.files_action + match main_args.files_action: + case FilesAction.rm: + configs_items["rm_paths"] = main_args.rm_paths return Config(**configs_items) diff --git a/src/vectorcode/main.py b/src/vectorcode/main.py index 70cc1aba..d56b90f6 100644 --- a/src/vectorcode/main.py +++ b/src/vectorcode/main.py @@ -96,6 +96,10 @@ async def async_main(): from vectorcode.subcommands import clean return_val = await clean(final_configs) + case CliAction.files: + from vectorcode.subcommands import files + + return_val = await files(final_configs) except Exception: return_val = 1 logger.error(traceback.format_exc()) diff --git a/src/vectorcode/subcommands/__init__.py b/src/vectorcode/subcommands/__init__.py index f9f07b14..1bbb4c69 100644 --- a/src/vectorcode/subcommands/__init__.py +++ b/src/vectorcode/subcommands/__init__.py @@ -2,6 +2,7 @@ from vectorcode.subcommands.chunks import chunks from vectorcode.subcommands.clean import clean from vectorcode.subcommands.drop import drop +from vectorcode.subcommands.files import files from vectorcode.subcommands.init import init from vectorcode.subcommands.ls import ls from vectorcode.subcommands.prompt import prompts @@ -14,6 +15,7 @@ "chunks", "clean", "drop", + "files", "init", "ls", "prompts", diff --git a/src/vectorcode/subcommands/files/__init__.py b/src/vectorcode/subcommands/files/__init__.py new file mode 100644 index 00000000..44ec44c1 --- /dev/null +++ b/src/vectorcode/subcommands/files/__init__.py @@ -0,0 +1,22 @@ +import logging + +from vectorcode.cli_utils import Config, FilesAction + +logger = logging.getLogger(name=__name__) + + +async def files(configs: Config) -> int: + match configs.files_action: + case FilesAction.ls: + from vectorcode.subcommands.files import ls + + return await ls.ls(configs) + case FilesAction.rm: + from vectorcode.subcommands.files import rm + + return await rm.rm(configs) + case _: + logger.error( + f"Unsupported subcommand for `vectorcode files`: {configs.action}" + ) + return 1 diff --git a/src/vectorcode/subcommands/files/ls.py b/src/vectorcode/subcommands/files/ls.py new file mode 100644 index 00000000..a8e095a2 --- /dev/null +++ b/src/vectorcode/subcommands/files/ls.py @@ -0,0 +1,30 @@ +import json +import logging + +from chromadb.api.types import IncludeEnum + +from vectorcode.cli_utils import Config +from vectorcode.common import ClientManager, get_collection + +logger = logging.getLogger(name=__name__) + + +async def ls(configs: Config) -> int: + async with ClientManager().get_client(configs=configs) as client: + try: + collection = await get_collection(client, configs, False) + except ValueError: + logger.error(f"There's no existing collection at {configs.project_root}.") + return 1 + meta = (await collection.get(include=[IncludeEnum.metadatas]))["metadatas"] + if meta is None: + logger.warning("Failed to fetch metadatas from the database.") + return 0 + paths: list[str] = list(set(str(m.get("path")) for m in meta)) + paths.sort() + if configs.pipe: + print(json.dumps(list(paths))) + else: + for p in paths: + print(p) + return 0 diff --git a/src/vectorcode/subcommands/files/rm.py b/src/vectorcode/subcommands/files/rm.py new file mode 100644 index 00000000..44330084 --- /dev/null +++ b/src/vectorcode/subcommands/files/rm.py @@ -0,0 +1,25 @@ +import logging +import os +from typing import cast + +from chromadb.types import Where + +from vectorcode.cli_utils import Config, expand_path +from vectorcode.common import ClientManager, get_collection + +logger = logging.getLogger(name=__name__) + + +async def rm(configs: Config) -> int: + async with ClientManager().get_client(configs=configs) as client: + try: + collection = await get_collection(client, configs, False) + except ValueError: + logger.error(f"There's no existing collection at {configs.project_root}.") + return 1 + paths = list( + str(expand_path(p, True)) for p in configs.rm_paths if os.path.isfile(p) + ) + await collection.delete(where=cast(Where, {"path": {"$in": paths}})) + print(f"Removed {len(paths)} file(s).") + return 0 From d91b038d52ecce20f45bb70a9cca16303f00f5c1 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 10:47:02 +0800 Subject: [PATCH 02/20] docs(cli): mark `files` subcommand feature as completed --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index e36fd2cd..a75180a3 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,7 @@ This project follows an adapted semantic versioning: - [x] implement some sort of project-root anchors (such as `.git` or a custom `.vectorcode.json`) that enhances automatic project-root detection. **Implemented project-level `.vectorcode/` and `.git` as root anchor** -- [ ] ability to view and delete files in a collection (atm you can only `drop` - and `vectorise` again); +- [x] ability to view and delete files in a collection; - [x] joint search (kinda, using codecompanion.nvim/MCP); - [x] Nix support (unofficial packages [here](https://search.nixos.org/packages?channel=unstable&from=0&size=50&sort=relevance&type=packages&query=vectorcode)); - [ ] Query rewriting (#124). From d20188584e698426563ecaaedb7a7238470517c0 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 11:06:21 +0800 Subject: [PATCH 03/20] tests(cli): fix test coverage for existing codebase --- tests/test_cli_utils.py | 14 ++++++++++++++ tests/test_main.py | 14 +++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/test_cli_utils.py b/tests/test_cli_utils.py index 655ef19f..d93bcb8f 100644 --- a/tests/test_cli_utils.py +++ b/tests/test_cli_utils.py @@ -11,6 +11,7 @@ from vectorcode.cli_utils import ( CliAction, Config, + FilesAction, LockManager, PromptCategory, QueryInclude, @@ -511,6 +512,19 @@ async def test_parse_cli_args_chunks(): assert config.chunk_size == Config().chunk_size +@pytest.mark.asyncio +async def test_parse_cli_args_files(): + with patch("sys.argv", ["vectorcode", "files", "ls"]): + config = await parse_cli_args() + assert config.action == CliAction.files + assert config.files_action == FilesAction.ls + with patch("sys.argv", ["vectorcode", "files", "rm", "foo.txt"]): + config = await parse_cli_args() + assert config.action == CliAction.files + assert config.files_action == FilesAction.rm + assert config.rm_paths == ["foo.txt"] + + @pytest.mark.asyncio async def test_config_import_from_hnsw(): with tempfile.TemporaryDirectory() as temp_dir: diff --git a/tests/test_main.py b/tests/test_main.py index c9f9b718..285427c9 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,7 +3,7 @@ import pytest from vectorcode import __version__ -from vectorcode.cli_utils import CliAction +from vectorcode.cli_utils import CliAction, Config from vectorcode.main import async_main @@ -245,6 +245,18 @@ async def test_async_main_cli_action_ls(monkeypatch): mock_ls.assert_called_once_with(mock_final_configs) +@pytest.mark.asyncio +async def test_async_main_cli_action_files(monkeypatch): + cli_args = Config(action=CliAction.files) + mock_files = AsyncMock(return_value=0) + monkeypatch.setattr("vectorcode.subcommands.files", mock_files) + monkeypatch.setattr( + "vectorcode.main.parse_cli_args", AsyncMock(return_value=cli_args) + ) + assert await async_main() == 0 + mock_files.assert_called_once() + + @pytest.mark.asyncio async def test_async_main_cli_action_update(monkeypatch): mock_cli_args = MagicMock( From c6c52cb394489717ad4f639453a4918231dd5473 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 11:23:49 +0800 Subject: [PATCH 04/20] test(cli): test for `files ls` --- tests/subcommands/files/test_ls.py | 65 ++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 tests/subcommands/files/test_ls.py diff --git a/tests/subcommands/files/test_ls.py b/tests/subcommands/files/test_ls.py new file mode 100644 index 00000000..1a0ba239 --- /dev/null +++ b/tests/subcommands/files/test_ls.py @@ -0,0 +1,65 @@ +import json +from unittest.mock import AsyncMock, patch + +import pytest +from chromadb.api.models.AsyncCollection import AsyncCollection + +from vectorcode.cli_utils import CliAction, Config, FilesAction +from vectorcode.subcommands.files.ls import ls + + +@pytest.fixture +def client(): + return AsyncMock() + + +@pytest.fixture +def collection(): + col = AsyncMock(spec=AsyncCollection) + col.get.return_value = { + "ids": ["id1", "id2", "id3"], + "distances": [0.1, 0.2, 0.3], + "metadatas": [ + {"path": "file1.py", "start": 1, "end": 1}, + {"path": "file2.py", "start": 1, "end": 1}, + {"path": "file3.py", "start": 1, "end": 1}, + ], + "documents": [ + "content1", + "content2", + "content3", + ], + } + return col + + +@pytest.mark.asyncio +async def test_ls(client, collection, capsys): + with ( + patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, + patch( + "vectorcode.subcommands.files.ls.get_collection", return_value=collection + ), + patch("vectorcode.common.try_server", return_value=True), + ): + MockClientManager.return_value._create_client.return_value = client + await ls(Config(action=CliAction.files, files_action=FilesAction.ls)) + out = capsys.readouterr().out + assert "file1.py" in out + assert "file2.py" in out + assert "file3.py" in out + + +@pytest.mark.asyncio +async def test_ls_piped(client, collection, capsys): + with ( + patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, + patch( + "vectorcode.subcommands.files.ls.get_collection", return_value=collection + ), + patch("vectorcode.common.try_server", return_value=True), + ): + MockClientManager.return_value._create_client.return_value = client + await ls(Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True)) + out = capsys.readouterr().out + assert json.dumps(["file1.py", "file2.py", "file3.py"]).strip() == out.strip() From 261cad338ee5edc7cd332f1101f8226e4737ed8a Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 11:40:13 +0800 Subject: [PATCH 05/20] tests(cli): add tests for `files rm` --- src/vectorcode/subcommands/files/ls.py | 2 +- .../files/{test_ls.py => test_files_ls.py} | 33 ++++++++ tests/subcommands/files/test_files_rm.py | 75 +++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) rename tests/subcommands/files/{test_ls.py => test_files_ls.py} (64%) create mode 100644 tests/subcommands/files/test_files_rm.py diff --git a/src/vectorcode/subcommands/files/ls.py b/src/vectorcode/subcommands/files/ls.py index a8e095a2..6b5f36d0 100644 --- a/src/vectorcode/subcommands/files/ls.py +++ b/src/vectorcode/subcommands/files/ls.py @@ -16,7 +16,7 @@ async def ls(configs: Config) -> int: except ValueError: logger.error(f"There's no existing collection at {configs.project_root}.") return 1 - meta = (await collection.get(include=[IncludeEnum.metadatas]))["metadatas"] + meta = (await collection.get(include=[IncludeEnum.metadatas])).get("metadatas") if meta is None: logger.warning("Failed to fetch metadatas from the database.") return 0 diff --git a/tests/subcommands/files/test_ls.py b/tests/subcommands/files/test_files_ls.py similarity index 64% rename from tests/subcommands/files/test_ls.py rename to tests/subcommands/files/test_files_ls.py index 1a0ba239..fc51caa6 100644 --- a/tests/subcommands/files/test_ls.py +++ b/tests/subcommands/files/test_files_ls.py @@ -63,3 +63,36 @@ async def test_ls_piped(client, collection, capsys): await ls(Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True)) out = capsys.readouterr().out assert json.dumps(["file1.py", "file2.py", "file3.py"]).strip() == out.strip() + + +@pytest.mark.asyncio +async def test_ls_no_collection(client, collection, capsys): + with ( + patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, + patch("vectorcode.subcommands.files.ls.get_collection", side_effect=ValueError), + ): + MockClientManager.return_value._create_client.return_value = client + assert ( + await ls( + Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True) + ) + != 0 + ) + + +@pytest.mark.asyncio +async def test_ls_empty_collection(client, capsys): + mock_collection = AsyncMock(spec=AsyncCollection) + mock_collection.get.return_value = {} + with ( + patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, + patch( + "vectorcode.subcommands.files.ls.get_collection", + return_value=mock_collection, + ), + patch("vectorcode.common.try_server", return_value=True), + ): + MockClientManager.return_value._create_client.return_value = client + assert ( + await ls(Config(action=CliAction.files, files_action=FilesAction.ls)) == 0 + ) diff --git a/tests/subcommands/files/test_files_rm.py b/tests/subcommands/files/test_files_rm.py new file mode 100644 index 00000000..36d7026d --- /dev/null +++ b/tests/subcommands/files/test_files_rm.py @@ -0,0 +1,75 @@ +from unittest.mock import AsyncMock, patch + +import pytest +from chromadb.api.models.AsyncCollection import AsyncCollection + +from vectorcode.cli_utils import CliAction, Config, FilesAction +from vectorcode.subcommands.files.rm import rm + + +@pytest.fixture +def client(): + return AsyncMock() + + +@pytest.fixture +def collection(): + col = AsyncMock(spec=AsyncCollection) + col.get.return_value = { + "ids": ["id1", "id2", "id3"], + "distances": [0.1, 0.2, 0.3], + "metadatas": [ + {"path": "file1.py", "start": 1, "end": 1}, + {"path": "file2.py", "start": 1, "end": 1}, + {"path": "file3.py", "start": 1, "end": 1}, + ], + "documents": [ + "content1", + "content2", + "content3", + ], + } + return col + + +@pytest.mark.asyncio +async def test_rm(client, collection, capsys): + with ( + patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager, + patch( + "vectorcode.subcommands.files.rm.get_collection", return_value=collection + ), + patch("vectorcode.common.try_server", return_value=True), + patch("os.path.isfile", return_value=True), + patch( + "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x + ), + ): + MockClientManager.return_value._create_client.return_value = client + config = Config( + action=CliAction.files, + files_action=FilesAction.rm, + rm_paths=["file1.py"], + ) + await rm(config) + collection.delete.assert_called_with(where={"path": {"$in": ["file1.py"]}}) + + +@pytest.mark.asyncio +async def test_rm_no_collection(client, collection, capsys): + with ( + patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager, + patch("vectorcode.subcommands.files.rm.get_collection", side_effect=ValueError), + ): + MockClientManager.return_value._create_client.return_value = client + assert ( + await rm( + Config( + action=CliAction.files, + files_action=FilesAction.rm, + pipe=True, + rm_paths=["file1.py"], + ) + ) + != 0 + ) From d6fe67832d8c3b89a226d1034b7058a1f1d078ed Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 11:48:40 +0800 Subject: [PATCH 06/20] tests(cli): add tests for `files` subcommand --- tests/subcommands/files/test_files.py | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/subcommands/files/test_files.py diff --git a/tests/subcommands/files/test_files.py b/tests/subcommands/files/test_files.py new file mode 100644 index 00000000..d99e1e5a --- /dev/null +++ b/tests/subcommands/files/test_files.py @@ -0,0 +1,30 @@ +from unittest.mock import AsyncMock, patch + +import pytest + +from vectorcode.cli_utils import CliAction, Config, FilesAction +from vectorcode.subcommands.files import files + + +@pytest.mark.asyncio +async def test_files(): + with patch( + "vectorcode.subcommands.files.ls.ls", return_value=AsyncMock() + ) as mock_ls: + config = Config(action=CliAction.files, files_action=FilesAction.ls) + await files(config) + mock_ls.assert_called_with(config) + with patch( + "vectorcode.subcommands.files.rm.rm", return_value=AsyncMock() + ) as mock_rm: + config = Config(action=CliAction.files, files_action=FilesAction.rm) + await files(config) + mock_rm.assert_called_with(config) + + +@pytest.mark.asyncio +async def test_files_invalid_actions(): + with patch("vectorcode.subcommands.files.logger") as mock_logger: + config = Config(action=CliAction.files, files_action="foobar") + assert await files(config) != 0 + mock_logger.error.assert_called_once() From d92d4070d1e2def43545e745d8ccb6443f07896f Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 11:55:45 +0800 Subject: [PATCH 07/20] feat(cli): make sure `files` commands honor `--project_root` --- src/vectorcode/cli_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/vectorcode/cli_utils.py b/src/vectorcode/cli_utils.py index b89fa9cc..00e9dc48 100644 --- a/src/vectorcode/cli_utils.py +++ b/src/vectorcode/cli_utils.py @@ -389,9 +389,11 @@ def get_cli_parser(): files_subparser = files_parser.add_subparsers( dest="files_action", required=True, title="Collecton file operations" ) - files_subparser.add_parser("ls", help="List files in the collection.") + files_subparser.add_parser( + "ls", parents=[shared_parser], help="List files in the collection." + ) files_rm_parser = files_subparser.add_parser( - "rm", help="Remove files in the collection." + "rm", parents=[shared_parser], help="Remove files in the collection." ) files_rm_parser.add_argument( "rm_paths", From 82e87606175ecdc8697c6a9502eeaaaa16e10407 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 11:58:58 +0800 Subject: [PATCH 08/20] docs(cli): document `files` subcommand in cli.md --- docs/cli.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/cli.md b/docs/cli.md index f6dc17c2..72f03103 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -24,6 +24,7 @@ * [Removing a Collection](#removing-a-collection) * [Checking Project Setup](#checking-project-setup) * [Cleaning up](#cleaning-up) + * [Inspecting and Manupulating Files in an Indexed Project](#inspecting-and-manupulating-files-in-an-indexed-project) * [Debugging and Diagnosing](#debugging-and-diagnosing) * [Shell Completion](#shell-completion) * [Hardware Acceleration](#hardware-acceleration) @@ -507,6 +508,14 @@ some_message` and then getting an empty results. For empty collections and collections for removed projects, you can use the `vectorcode clean` command to remove them at once. +### Inspecting and Manupulating Files in an Indexed Project + +`vectorcode files ls` prints a list of files that are indexed in the project. +`vectorcode files rm file1 file2` removes the embeddings that belong to the +specified files from the project. + +Both commands will honor the `--project_root` flag. + ### Debugging and Diagnosing When something doesn't work as expected, you can enable logging by setting the From 4621f5b47001102d6f502361a8fd24d8edce7086 Mon Sep 17 00:00:00 2001 From: Davidyz Date: Mon, 30 Jun 2025 03:59:39 +0000 Subject: [PATCH 09/20] Auto generate docs --- doc/VectorCode-cli.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index 92aa5117..bc2dcc64 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -35,6 +35,7 @@ Table of Contents *VectorCode-cli-table-of-contents* - |VectorCode-cli-removing-a-collection| - |VectorCode-cli-checking-project-setup| - |VectorCode-cli-cleaning-up| + - |VectorCode-cli-inspecting-and-manupulating-files-in-an-indexed-project| - |VectorCode-cli-debugging-and-diagnosing| - |VectorCode-cli-shell-completion| - |VectorCode-cli-hardware-acceleration| @@ -556,6 +557,15 @@ For empty collections and collections for removed projects, you can use the `vectorcode clean` command to remove them at once. +INSPECTING AND MANUPULATING FILES IN AN INDEXED PROJECT ~ + +`vectorcode files ls` prints a list of files that are indexed in the project. +`vectorcode files rm file1 file2` removes the embeddings that belong to the +specified files from the project. + +Both commands will honor the `--project_root` flag. + + DEBUGGING AND DIAGNOSING ~ When something doesn’t work as expected, you can enable logging by setting From b3803b110b3a70f42e8a61e089d821dea47084cc Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 20:03:45 +0800 Subject: [PATCH 10/20] feat(cli): remove empty collection after removing files --- src/vectorcode/subcommands/files/rm.py | 5 +++++ tests/subcommands/files/test_files_rm.py | 28 ++++++++++++++++++++++++ tests/test_common.py | 7 +++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/vectorcode/subcommands/files/rm.py b/src/vectorcode/subcommands/files/rm.py index 44330084..89fd392d 100644 --- a/src/vectorcode/subcommands/files/rm.py +++ b/src/vectorcode/subcommands/files/rm.py @@ -22,4 +22,9 @@ async def rm(configs: Config) -> int: ) await collection.delete(where=cast(Where, {"path": {"$in": paths}})) print(f"Removed {len(paths)} file(s).") + if await collection.count() == 0: + logger.warning( + f"The collection at {configs.project_root} is now empty and will be removed." + ) + await client.delete_collection(collection.name) return 0 diff --git a/tests/subcommands/files/test_files_rm.py b/tests/subcommands/files/test_files_rm.py index 36d7026d..69a62bcf 100644 --- a/tests/subcommands/files/test_files_rm.py +++ b/tests/subcommands/files/test_files_rm.py @@ -29,6 +29,7 @@ def collection(): "content3", ], } + col.name = "test_collection" return col @@ -55,6 +56,33 @@ async def test_rm(client, collection, capsys): collection.delete.assert_called_with(where={"path": {"$in": ["file1.py"]}}) +@pytest.mark.asyncio +async def test_rm_empty_collection(client, collection, capsys): + with ( + # patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager, + patch( + "vectorcode.subcommands.files.rm.get_collection", return_value=collection + ), + patch("vectorcode.common.try_server", return_value=True), + patch("os.path.isfile", return_value=True), + patch( + "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x + ), + ): + from vectorcode.subcommands.files.rm import ClientManager + + ClientManager()._create_client = AsyncMock(return_value=client) + config = Config( + action=CliAction.files, + files_action=FilesAction.rm, + rm_paths=["file1.py"], + ) + collection.count = AsyncMock(return_value=0) + client.delete_collection = AsyncMock() + await rm(config) + client.delete_collection.assert_called_once_with(collection.name) + + @pytest.mark.asyncio async def test_rm_no_collection(client, collection, capsys): with ( diff --git a/tests/test_common.py b/tests/test_common.py index 40b51fd6..f4543ca7 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -509,6 +509,7 @@ async def test_wait_for_server_timeout(): @pytest.mark.asyncio async def test_client_manager_get_client(): + ClientManager().clear() config = Config( db_url="https://test_host:1234", db_path="test_db", project_root="test_proj" ) @@ -529,13 +530,12 @@ async def test_client_manager_get_client(): patch("chromadb.AsyncHttpClient") as MockAsyncHttpClient, patch("vectorcode.common.try_server", return_value=True), ): - mock_client = MagicMock(spec=AsyncClientAPI) + mock_client = MagicMock(spec=AsyncClientAPI, parent=AsyncClientAPI) MockAsyncHttpClient.return_value = mock_client async with ( - ClientManager().get_client(config) as client, + ClientManager().get_client(config), ): - assert isinstance(client, AsyncClientAPI) MockAsyncHttpClient.assert_called() assert ( MockAsyncHttpClient.call_args.kwargs["settings"].chroma_server_host @@ -560,7 +560,6 @@ async def test_client_manager_get_client(): ClientManager().get_client(config1) as client1, ClientManager().get_client(config1_alt) as client1_alt, ): - assert isinstance(client1, AsyncClientAPI) MockAsyncHttpClient.assert_called() assert ( MockAsyncHttpClient.call_args.kwargs["settings"].chroma_server_host From f2e69727b20ca4eb7e136c68d4abfc8586eb1435 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Mon, 30 Jun 2025 21:00:09 +0800 Subject: [PATCH 11/20] tests(cli): fix broken tests due to wrong mocking --- tests/subcommands/files/test_files_rm.py | 8 +- tests/test_common.py | 2 +- tests/test_mcp.py | 95 +++++++++++------------- 3 files changed, 49 insertions(+), 56 deletions(-) diff --git a/tests/subcommands/files/test_files_rm.py b/tests/subcommands/files/test_files_rm.py index 69a62bcf..f5bcf670 100644 --- a/tests/subcommands/files/test_files_rm.py +++ b/tests/subcommands/files/test_files_rm.py @@ -59,7 +59,6 @@ async def test_rm(client, collection, capsys): @pytest.mark.asyncio async def test_rm_empty_collection(client, collection, capsys): with ( - # patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager, patch( "vectorcode.subcommands.files.rm.get_collection", return_value=collection ), @@ -68,10 +67,11 @@ async def test_rm_empty_collection(client, collection, capsys): patch( "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x ), + patch( + "vectorcode.subcommands.files.rm.ClientManager._create_client", + return_value=client, + ), ): - from vectorcode.subcommands.files.rm import ClientManager - - ClientManager()._create_client = AsyncMock(return_value=client) config = Config( action=CliAction.files, files_action=FilesAction.rm, diff --git a/tests/test_common.py b/tests/test_common.py index f4543ca7..c0dbdc5f 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -595,11 +595,11 @@ async def _start_server(cfg): tempfile.TemporaryDirectory() as temp_dir, patch("vectorcode.common.start_server", side_effect=_start_server), patch("vectorcode.common.try_server", side_effect=_try_server), + patch("vectorcode.common.ClientManager._create_client"), ): db_path = os.path.join(temp_dir, "db") os.makedirs(db_path, exist_ok=True) - ClientManager._create_client = AsyncMock() async with ClientManager().get_client( Config( db_url="http://test_host:8001", diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 43b9eac9..2057c589 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -19,15 +19,14 @@ @pytest.mark.asyncio async def test_list_collections_success(): + mock_client = AsyncMock() with ( patch("vectorcode.mcp_main.get_collections") as mock_get_collections, patch("vectorcode.common.try_server", return_value=True), + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), ): - from vectorcode.mcp_main import ClientManager - - mock_client = AsyncMock() - ClientManager._create_client = AsyncMock(return_value=mock_client) - mock_collection1 = AsyncMock() mock_collection1.metadata = {"path": "path1"} mock_collection2 = AsyncMock() @@ -45,15 +44,14 @@ async def async_generator(): @pytest.mark.asyncio async def test_list_collections_no_metadata(): + mock_client = AsyncMock() with ( patch("vectorcode.mcp_main.get_collections") as mock_get_collections, patch("vectorcode.common.try_server", return_value=True), + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), ): - from vectorcode.mcp_main import ClientManager - - mock_client = AsyncMock() - ClientManager._create_client = AsyncMock(return_value=mock_client) - mock_collection1 = AsyncMock() mock_collection1.metadata = {"path": "path1"} mock_collection2 = AsyncMock() @@ -86,11 +84,15 @@ async def test_query_tool_invalid_project_root(): @pytest.mark.asyncio async def test_query_tool_success(): + mock_client = AsyncMock() with ( tempfile.TemporaryDirectory() as temp_dir, patch("os.path.isdir", return_value=True), patch("vectorcode.mcp_main.get_project_config") as mock_get_project_config, patch("vectorcode.mcp_main.get_collection") as mock_get_collection, + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), patch( "vectorcode.subcommands.query.get_query_result_files" ) as mock_get_query_result_files, @@ -100,15 +102,11 @@ async def test_query_tool_success(): patch("os.path.relpath", return_value="rel/path.py"), patch("vectorcode.cli_utils.load_config_file") as mock_load_config_file, ): - from vectorcode.mcp_main import ClientManager - mock_config = Config( chunk_size=100, overlap_ratio=0.1, reranker=None, project_root=temp_dir ) mock_load_config_file.return_value = mock_config mock_get_project_config.return_value = mock_config - mock_client = AsyncMock() - ClientManager._create_client = AsyncMock(return_value=mock_client) # Mock the collection's query method to return a valid QueryResult mock_collection = AsyncMock() @@ -141,15 +139,12 @@ async def test_query_tool_collection_access_failure(): with ( patch("os.path.isdir", return_value=True), patch("vectorcode.mcp_main.get_project_config"), - patch("vectorcode.mcp_main.get_collection"), # Still mock get_collection + patch("vectorcode.mcp_main.get_collection"), + patch( + "vectorcode.mcp_main.ClientManager._create_client", + side_effect=Exception("Failed to connect"), + ), ): - from vectorcode.mcp_main import ClientManager - - async def failing_get_client(*args, **kwargs): - raise Exception("Failed to connect") - - ClientManager._create_client = AsyncMock(side_effect=failing_get_client) - with pytest.raises(McpError) as exc_info: await query_tool( n_query=2, query_messages=["keyword1"], project_root="/valid/path" @@ -164,16 +159,15 @@ async def failing_get_client(*args, **kwargs): @pytest.mark.asyncio async def test_query_tool_no_collection(): + mock_client = AsyncMock() with ( patch("os.path.isdir", return_value=True), patch("vectorcode.mcp_main.get_project_config"), + patch("vectorcode.mcp_main.get_collection") as mock_get_collection, patch( - "vectorcode.mcp_main.get_collection" - ) as mock_get_collection, # Still mock get_collection - patch("vectorcode.common.ClientManager") as MockClientManager, + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), ): - mock_client = AsyncMock() - MockClientManager.return_value._create_client.return_value = mock_client mock_get_collection.return_value = None with pytest.raises(McpError) as exc_info: @@ -203,25 +197,24 @@ async def test_vectorise_files_success(): file_path = f"{temp_dir}/test_file.py" with open(file_path, "w") as f: f.write("def func(): pass") + mock_client = AsyncMock() with ( patch("os.path.isdir", return_value=True), patch("vectorcode.mcp_main.get_project_config") as mock_get_project_config, patch("vectorcode.mcp_main.get_collection") as mock_get_collection, + patch( + "vectorcode.mcp_main.ClientManager._create_client", + return_value=mock_client, + ), patch("vectorcode.subcommands.vectorise.chunked_add"), patch( "vectorcode.subcommands.vectorise.hash_file", return_value="test_hash" ), patch("vectorcode.common.try_server", return_value=True), ): - from vectorcode.mcp_main import ClientManager - mock_config = Config(project_root=temp_dir) mock_get_project_config.return_value = mock_config - mock_client = AsyncMock() - - # Ensure ClientManager's internal client creation method returns our mock. - ClientManager._create_client = AsyncMock(return_value=mock_client) mock_collection = AsyncMock() mock_collection.get.return_value = {"ids": [], "metadatas": []} @@ -241,13 +234,12 @@ async def test_vectorise_files_collection_access_failure(): with ( patch("os.path.isdir", return_value=True), patch("vectorcode.mcp_main.get_project_config"), - patch("vectorcode.common.ClientManager"), # Patch ClientManager class + patch( + "vectorcode.mcp_main.ClientManager._create_client", + side_effect=Exception("Client error"), + ), patch("vectorcode.mcp_main.get_collection"), ): - from vectorcode.mcp_main import ClientManager - - ClientManager._create_client = AsyncMock(side_effect=Exception("Client error")) - with pytest.raises(McpError) as exc_info: await vectorise_files(paths=["file.py"], project_root="/valid/path") @@ -280,10 +272,15 @@ def mock_open_side_effect(filename, *args, **kwargs): # For other files that might be opened, return a generic mock return MagicMock() + mock_client = AsyncMock() with ( patch("os.path.isdir", return_value=True), patch("vectorcode.mcp_main.get_project_config") as mock_get_project_config, patch("vectorcode.mcp_main.get_collection") as mock_get_collection, + patch( + "vectorcode.mcp_main.ClientManager._create_client", + return_value=mock_client, + ), patch("vectorcode.subcommands.vectorise.chunked_add") as mock_chunked_add, patch( "vectorcode.subcommands.vectorise.hash_file", return_value="test_hash" @@ -297,12 +294,8 @@ def mock_open_side_effect(filename, *args, **kwargs): ), patch("vectorcode.common.try_server", return_value=True), ): - from vectorcode.mcp_main import ClientManager - mock_config = Config(project_root=temp_dir) mock_get_project_config.return_value = mock_config - mock_client = AsyncMock() - ClientManager._create_client = AsyncMock(return_value=mock_client) mock_collection = AsyncMock() mock_collection.get.return_value = {"ids": [], "metadatas": []} @@ -321,23 +314,22 @@ def mock_open_side_effect(filename, *args, **kwargs): @pytest.mark.asyncio async def test_mcp_server(): + mock_client = AsyncMock() with ( patch( "vectorcode.mcp_main.find_project_config_dir" ) as mock_find_project_config_dir, patch("vectorcode.mcp_main.load_config_file") as mock_load_config_file, - # patch("vectorcode.mcp_main.get_client") as mock_get_client, # Removed patch("vectorcode.mcp_main.get_collection") as mock_get_collection, patch("mcp.server.fastmcp.FastMCP.add_tool") as mock_add_tool, patch("vectorcode.common.try_server", return_value=True), + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), ): - from vectorcode.mcp_main import ClientManager - mock_find_project_config_dir.return_value = "/path/to/config" mock_load_config_file.return_value = Config(project_root="/path/to/project") - mock_client = AsyncMock() - ClientManager._create_client = AsyncMock(return_value=mock_client) mock_collection = AsyncMock() mock_get_collection.return_value = mock_collection @@ -348,6 +340,7 @@ async def test_mcp_server(): @pytest.mark.asyncio async def test_mcp_server_ls_on_start(): + mock_client = AsyncMock() mock_collection = AsyncMock() with ( @@ -361,15 +354,15 @@ async def test_mcp_server_ls_on_start(): ) as mock_get_collections, patch("mcp.server.fastmcp.FastMCP.add_tool") as mock_add_tool, patch("vectorcode.common.try_server", return_value=True), + patch( + "vectorcode.mcp_main.ClientManager._create_client", return_value=mock_client + ), ): - from vectorcode.mcp_main import ClientManager, mcp_config + from vectorcode.mcp_main import mcp_config mcp_config.ls_on_start = True mock_find_project_config_dir.return_value = "/path/to/config" mock_load_config_file.return_value = Config(project_root="/path/to/project") - mock_client = AsyncMock() - - ClientManager._create_client = AsyncMock(return_value=mock_client) mock_collection.metadata = {"path": "/path/to/project"} mock_get_collection.return_value = mock_collection From 70103077c7acbca127a8c15fa398d97c219edb0b Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 12:22:48 +0800 Subject: [PATCH 12/20] refactor(cli): refactor `files ls` to reuse file listing logic --- src/vectorcode/subcommands/files/ls.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/vectorcode/subcommands/files/ls.py b/src/vectorcode/subcommands/files/ls.py index 6b5f36d0..cce58324 100644 --- a/src/vectorcode/subcommands/files/ls.py +++ b/src/vectorcode/subcommands/files/ls.py @@ -1,6 +1,7 @@ import json import logging +from chromadb.api.models.AsyncCollection import AsyncCollection from chromadb.api.types import IncludeEnum from vectorcode.cli_utils import Config @@ -9,6 +10,16 @@ logger = logging.getLogger(name=__name__) +async def list_files(collection: AsyncCollection) -> list[str]: + meta = (await collection.get(include=[IncludeEnum.metadatas])).get("metadatas") + if meta is None: + logger.warning("Failed to fetch metadatas from the database.") + return [] + paths: list[str] = list(set(str(m.get("path")) for m in meta)) + paths.sort() + return paths + + async def ls(configs: Config) -> int: async with ClientManager().get_client(configs=configs) as client: try: @@ -16,12 +27,7 @@ async def ls(configs: Config) -> int: except ValueError: logger.error(f"There's no existing collection at {configs.project_root}.") return 1 - meta = (await collection.get(include=[IncludeEnum.metadatas])).get("metadatas") - if meta is None: - logger.warning("Failed to fetch metadatas from the database.") - return 0 - paths: list[str] = list(set(str(m.get("path")) for m in meta)) - paths.sort() + paths = await list_files(collection) if configs.pipe: print(json.dumps(list(paths))) else: From 4fa068ab96938bc97ce17b5f2a05da9ffc61edec Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 12:33:18 +0800 Subject: [PATCH 13/20] feat(cli): add `files` subcommand to the LSP server --- src/vectorcode/common.py | 14 +-- src/vectorcode/lsp_main.py | 50 +++++++++- src/vectorcode/subcommands/files/ls.py | 17 +--- tests/test_lsp.py | 127 ++++++++++++++++++++++++- 4 files changed, 184 insertions(+), 24 deletions(-) diff --git a/src/vectorcode/common.py b/src/vectorcode/common.py index a297c25f..c5f7cee4 100644 --- a/src/vectorcode/common.py +++ b/src/vectorcode/common.py @@ -229,13 +229,15 @@ def verify_ef(collection: AsyncCollection, configs: Config): async def list_collection_files(collection: AsyncCollection) -> list[str]: - return list( - set( - str(c.get("path", None)) - for c in (await collection.get(include=[IncludeEnum.metadatas])).get( - "metadatas" + return sorted( + list( + set( + str(c.get("path", None)) + for c in (await collection.get(include=[IncludeEnum.metadatas])).get( + "metadatas" + ) + or [] ) - or [] ) ) diff --git a/src/vectorcode/lsp_main.py b/src/vectorcode/lsp_main.py index bd78854a..71756641 100644 --- a/src/vectorcode/lsp_main.py +++ b/src/vectorcode/lsp_main.py @@ -6,8 +6,10 @@ import time import traceback import uuid +from typing import cast import shtab +from chromadb.types import Where from vectorcode.subcommands.vectorise import ( VectoriseStats, @@ -32,17 +34,21 @@ file=sys.stderr, ) sys.exit(1) +from chromadb.errors import InvalidCollectionException + from vectorcode import __version__ from vectorcode.cli_utils import ( CliAction, + FilesAction, cleanup_path, config_logging, expand_globs, + expand_path, find_project_root, get_project_config, parse_cli_args, ) -from vectorcode.common import ClientManager, get_collection +from vectorcode.common import ClientManager, get_collection, list_collection_files from vectorcode.subcommands.ls import get_collection_list from vectorcode.subcommands.query import build_query_results @@ -105,7 +111,11 @@ async def execute_command(ls: LanguageServer, args: list[str]): async with ClientManager().get_client(final_configs) as client: progress_token = str(uuid.uuid4()) - if final_configs.action in {CliAction.vectorise, CliAction.query}: + if final_configs.action in { + CliAction.vectorise, + CliAction.query, + CliAction.files, + }: collection = await get_collection( client=client, configs=final_configs, @@ -222,6 +232,42 @@ async def execute_command(ls: LanguageServer, args: list[str]): ), ) return stats.to_dict() + case CliAction.files: + if collection is None: # pragma: nocover + raise InvalidCollectionException( + f"Failed to find the corresponding collection for {final_configs.project_root}" + ) + match final_configs.files_action: + case FilesAction.ls: + return await list_collection_files(collection) + case FilesAction.rm: + to_be_removed = list( + str(expand_path(p, True)) + for p in final_configs.rm_paths + if os.path.isfile(p) + ) + if len(to_be_removed) == 0: + return + ls.progress.begin( + progress_token, + types.WorkDoneProgressBegin( + title="VectorCode", + message=f"Removing {len(to_be_removed)} file(s).", + ), + ) + await collection.delete( + where=cast( + Where, + {"path": {"$in": to_be_removed}}, + ) + ) + ls.progress.begin( + progress_token, + types.WorkDoneProgressBegin( + title="VectorCode", + message="Removal finished.", + ), + ) case _ as c: # pragma: nocover error_message = f"Unsupported vectorcode subcommand: {str(c)}" logger.error( diff --git a/src/vectorcode/subcommands/files/ls.py b/src/vectorcode/subcommands/files/ls.py index cce58324..6dffd3d7 100644 --- a/src/vectorcode/subcommands/files/ls.py +++ b/src/vectorcode/subcommands/files/ls.py @@ -1,25 +1,12 @@ import json import logging -from chromadb.api.models.AsyncCollection import AsyncCollection -from chromadb.api.types import IncludeEnum - from vectorcode.cli_utils import Config -from vectorcode.common import ClientManager, get_collection +from vectorcode.common import ClientManager, get_collection, list_collection_files logger = logging.getLogger(name=__name__) -async def list_files(collection: AsyncCollection) -> list[str]: - meta = (await collection.get(include=[IncludeEnum.metadatas])).get("metadatas") - if meta is None: - logger.warning("Failed to fetch metadatas from the database.") - return [] - paths: list[str] = list(set(str(m.get("path")) for m in meta)) - paths.sort() - return paths - - async def ls(configs: Config) -> int: async with ClientManager().get_client(configs=configs) as client: try: @@ -27,7 +14,7 @@ async def ls(configs: Config) -> int: except ValueError: logger.error(f"There's no existing collection at {configs.project_root}.") return 1 - paths = await list_files(collection) + paths = await list_collection_files(collection) if configs.pipe: print(json.dumps(list(paths))) else: diff --git a/tests/test_lsp.py b/tests/test_lsp.py index 18f999ff..55e90056 100644 --- a/tests/test_lsp.py +++ b/tests/test_lsp.py @@ -1,3 +1,4 @@ +import os from contextlib import asynccontextmanager from unittest.mock import AsyncMock, MagicMock, patch @@ -6,7 +7,7 @@ from pygls.server import LanguageServer from vectorcode import __version__ -from vectorcode.cli_utils import CliAction, Config, QueryInclude +from vectorcode.cli_utils import CliAction, Config, FilesAction, QueryInclude from vectorcode.lsp_main import ( execute_command, lsp_start, @@ -374,3 +375,127 @@ async def test_execute_command_no_default_project_root( with pytest.raises((AssertionError, JsonRpcInternalError)): await execute_command(mock_language_server, ["query", "test"]) DEFAULT_PROJECT_ROOT = None # Reset the global variable + + +@pytest.mark.asyncio +async def test_execute_command_files_ls(mock_language_server, mock_config: Config): + mock_config.action = CliAction.files + mock_config.files_action = FilesAction.ls + mock_config.project_root = "/test/project" + + dummy_files = ["/test/project/file1.py", "/test/project/file2.txt"] + mock_client = AsyncMock() + mock_collection = AsyncMock() + + with ( + patch( + "vectorcode.lsp_main.parse_cli_args", new_callable=AsyncMock + ) as mock_parse_cli_args, + patch( + "vectorcode.lsp_main.ClientManager._create_client", return_value=mock_client + ), + patch("vectorcode.common.try_server", return_value=True), + patch("vectorcode.lsp_main.get_collection", return_value=mock_collection), + patch( + "vectorcode.lsp_main.list_collection_files", return_value=dummy_files + ) as mock_list_collection_files, + ): + mock_parse_cli_args.return_value = mock_config + + mock_config.merge_from = AsyncMock(return_value=mock_config) + + result = await execute_command(mock_language_server, ["files", "ls"]) + + assert result == dummy_files + mock_language_server.progress.create_async.assert_called_once() + + mock_list_collection_files.assert_called_once_with(mock_collection) + # For 'ls' action, progress.begin/end are not explicitly called in the lsp_main, + # but create_async is called before the match statement. + mock_language_server.progress.begin.assert_not_called() + mock_language_server.progress.end.assert_not_called() + + +@pytest.mark.asyncio +async def test_execute_command_files_rm(mock_language_server, mock_config: Config): + mock_config.action = CliAction.files + mock_config.files_action = FilesAction.rm + mock_config.project_root = "/test/project" + mock_config.rm_paths = ["file_to_remove.py", "another_file.txt"] + + expanded_paths = [ + "/test/project/file_to_remove.py", + "/test/project/another_file.txt", + ] + mock_client = AsyncMock() + mock_collection = AsyncMock() + + with ( + patch( + "vectorcode.lsp_main.parse_cli_args", new_callable=AsyncMock + ) as mock_parse_cli_args, + patch( + "vectorcode.lsp_main.ClientManager._create_client", return_value=mock_client + ), + patch("vectorcode.common.try_server", return_value=True), + patch("vectorcode.lsp_main.get_collection", return_value=mock_collection), + patch( + "os.path.isfile", + side_effect=lambda x: x in expanded_paths or x in mock_config.rm_paths, + ), + patch( + "vectorcode.lsp_main.expand_path", + side_effect=lambda p, *args: os.path.join(mock_config.project_root, p), + ), + ): + mock_parse_cli_args.return_value = mock_config + + mock_config.merge_from = AsyncMock(return_value=mock_config) + + await execute_command( + mock_language_server, + ["files", "rm", "file_to_remove.py", "another_file.txt"], + ) + + mock_collection.delete.assert_called_once_with( + where={"path": {"$in": expanded_paths}} + ) + + +@pytest.mark.asyncio +async def test_execute_command_files_rm_no_files_to_remove( + mock_language_server, mock_config: Config +): + mock_config.action = CliAction.files + mock_config.files_action = FilesAction.rm + mock_config.project_root = "/test/project" + mock_config.rm_paths = ["non_existent_file.py"] + + mock_client = AsyncMock() + mock_collection = AsyncMock() + + with ( + patch( + "vectorcode.lsp_main.parse_cli_args", new_callable=AsyncMock + ) as mock_parse_cli_args, + patch( + "vectorcode.lsp_main.ClientManager._create_client", return_value=mock_client + ), + patch("vectorcode.common.try_server", return_value=True), + patch("vectorcode.lsp_main.get_collection", return_value=mock_collection), + patch("os.path.isfile", return_value=False), + patch( + "vectorcode.lsp_main.expand_path", + side_effect=lambda p, *args: os.path.join(mock_config.project_root, p), + ), + ): + mock_parse_cli_args.return_value = mock_config + + mock_config.merge_from = AsyncMock(return_value=mock_config) + + result = await execute_command( + mock_language_server, ["files", "rm", "non_existent_file.py"] + ) + + assert result is None + mock_collection.delete.assert_not_called() From 985bbdbbb32da7574ba52d439da507872bcfe314 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 14:21:12 +0800 Subject: [PATCH 14/20] feat(nvim): add `files_ls` tool to CodeCompanion --- .../_extensions/vectorcode/init.lua | 22 +++-- .../codecompanion/files_ls_tool.lua | 86 +++++++++++++++++++ lua/vectorcode/types.lua | 4 + 3 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 lua/vectorcode/integrations/codecompanion/files_ls_tool.lua diff --git a/lua/codecompanion/_extensions/vectorcode/init.lua b/lua/codecompanion/_extensions/vectorcode/init.lua index a7eda209..336161cf 100644 --- a/lua/codecompanion/_extensions/vectorcode/init.lua +++ b/lua/codecompanion/_extensions/vectorcode/init.lua @@ -1,6 +1,6 @@ ---@module "codecompanion" ----@alias sub_cmd "ls"|"query"|"vectorise" +---@alias sub_cmd "ls"|"query"|"vectorise"|"files_ls" ---@class VectorCode.CodeCompanion.ExtensionOpts --- A table where the keys are the subcommand name (`ls`, `query`, `vectorise`) @@ -17,15 +17,24 @@ local use_lsp = vc_config.get_user_config().async_backend == "lsp" ---@type VectorCode.CodeCompanion.ExtensionOpts|{} local default_extension_opts = { tool_opts = { - ls = { use_lsp = use_lsp, requires_approval = false }, - query = { use_lsp = use_lsp, requires_approval = false }, - vectorise = { use_lsp = use_lsp, requires_approval = true }, + ls = { use_lsp = use_lsp, requires_approval = false, include_in_toolbox = true }, + query = { use_lsp = use_lsp, requires_approval = false, include_in_toolbox = true }, + vectorise = { + use_lsp = use_lsp, + requires_approval = true, + include_in_toolbox = true, + }, + files_ls = { + use_lsp = use_lsp, + requires_approval = false, + include_in_toolbox = false, + }, }, tool_group = { enabled = true, collapse = true, extras = {} }, } ---@type sub_cmd[] -local valid_tools = { "ls", "query", "vectorise" } +local valid_tools = { "ls", "query", "vectorise", "files_ls" } ---@type CodeCompanion.Extension local M = { @@ -65,6 +74,9 @@ local M = { if opts.tool_group.enabled then local included_tools = vim .iter(valid_tools) + :filter(function(cmd_name) + return opts.tool_opts[cmd_name].include_in_toolbox + end) :map(function(s) return "vectorcode_" .. s end) diff --git a/lua/vectorcode/integrations/codecompanion/files_ls_tool.lua b/lua/vectorcode/integrations/codecompanion/files_ls_tool.lua new file mode 100644 index 00000000..0d956c2d --- /dev/null +++ b/lua/vectorcode/integrations/codecompanion/files_ls_tool.lua @@ -0,0 +1,86 @@ +---@module "codecompanion" + +local cc_common = require("vectorcode.integrations.codecompanion.common") + +---@param opts VectorCode.CodeCompanion.FilesLsToolOpts +---@return CodeCompanion.Agent.Tool +return function(opts) + local job_runner = + require("vectorcode.integrations.codecompanion.common").initialise_runner( + opts.use_lsp + ) + local tool_name = "vectorcode_files_ls" + ---@type CodeCompanion.Agent.Tool|{} + return { + name = tool_name, + cmds = { + ---@param agent CodeCompanion.Agent + ---@param action {project_root: string} + ---@return nil|{ status: string, data: string } + function(agent, action, _, cb) + local args = { "files", "ls", "--pipe" } + if action ~= nil then + action.project_root = action.project_root + or vim.fs.root(0, { ".vectorcode", ".git" }) + if action.project_root ~= nil then + action.project_root = vim.fs.normalize(action.project_root) + local stat = vim.uv.fs_stat(action.project_root) + if stat and stat.type == "directory" then + vim.list_extend(args, { "--project_root", action.project_root }) + end + end + end + job_runner.run_async(args, function(result, error) + if vim.islist(result) and #result > 0 then + cb({ status = "success", data = result }) + else + if type(error) == "table" then + error = cc_common.flatten_table_to_string(error) + end + cb({ + status = "error", + data = error, + }) + end + end, agent.chat.bufnr) + end, + }, + schema = { + type = "function", + ["function"] = { + name = tool_name, + description = "Retrieve a list of files that have been added to the database for a given project.", + parameters = { + type = "object", + properties = { + project_root = { + type = "string", + description = "The project for which the indexed files will be listed. Leave this empty for the current project.", + }, + }, + }, + }, + }, + output = { + ---@param agent CodeCompanion.Agent + ---@param stdout string[][] + success = function(_, agent, _, stdout) + stdout = stdout[1] + local user_message + for i, col in ipairs(stdout) do + if i == 1 then + user_message = + string.format("**VectorCode `files_ls` Tool**: Found %d files.", #stdout) + else + user_message = "" + end + agent.chat:add_tool_output( + agent.tool, + string.format("%s", col), + user_message + ) + end + end, + }, + } +end diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index 01d62264..585e7c9b 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -76,9 +76,13 @@ --- Whether to use the LSP backend. Default: `false` ---@field use_lsp boolean? ---@field requires_approval boolean? +--- Whether this tool should be included in `vectorcode_toolbox` +---@field include_in_toolbox boolean? ---@class VectorCode.CodeCompanion.LsToolOpts: VectorCode.CodeCompanion.ToolOpts +---@class VectorCode.CodeCompanion.FilesLsToolOpts: VectorCode.CodeCompanion.ToolOpts + ---@class VectorCode.CodeCompanion.QueryToolOpts: VectorCode.CodeCompanion.ToolOpts --- Maximum number of results provided to the LLM. --- You may set this to a table to configure different values for document/chunk mode. From 24428e86dee30e7845a3b4fc0c08bff0790b6880 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 15:57:27 +0800 Subject: [PATCH 15/20] fix(cli): improve `files rm` feedback and fix progress handling --- src/vectorcode/lsp_main.py | 5 ++--- src/vectorcode/subcommands/files/rm.py | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vectorcode/lsp_main.py b/src/vectorcode/lsp_main.py index 71756641..e2f7385e 100644 --- a/src/vectorcode/lsp_main.py +++ b/src/vectorcode/lsp_main.py @@ -261,10 +261,9 @@ async def execute_command(ls: LanguageServer, args: list[str]): {"path": {"$in": to_be_removed}}, ) ) - ls.progress.begin( + ls.progress.end( progress_token, - types.WorkDoneProgressBegin( - title="VectorCode", + types.WorkDoneProgressEnd( message="Removal finished.", ), ) diff --git a/src/vectorcode/subcommands/files/rm.py b/src/vectorcode/subcommands/files/rm.py index 89fd392d..1d2e9fb3 100644 --- a/src/vectorcode/subcommands/files/rm.py +++ b/src/vectorcode/subcommands/files/rm.py @@ -21,7 +21,8 @@ async def rm(configs: Config) -> int: str(expand_path(p, True)) for p in configs.rm_paths if os.path.isfile(p) ) await collection.delete(where=cast(Where, {"path": {"$in": paths}})) - print(f"Removed {len(paths)} file(s).") + if not configs.pipe: + print(f"Removed {len(paths)} file(s).") if await collection.count() == 0: logger.warning( f"The collection at {configs.project_root} is now empty and will be removed." From c78170f5e07a25234af743a72091b763d2e8ef1f Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 15:59:28 +0800 Subject: [PATCH 16/20] feat(nvim): add `files_rm` tool to CodeCompanion --- .../_extensions/vectorcode/init.lua | 9 +- .../codecompanion/files_rm_tool.lua | 112 ++++++++++++++++++ .../integrations/codecompanion/init.lua | 2 +- lua/vectorcode/types.lua | 2 + 4 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 lua/vectorcode/integrations/codecompanion/files_rm_tool.lua diff --git a/lua/codecompanion/_extensions/vectorcode/init.lua b/lua/codecompanion/_extensions/vectorcode/init.lua index 336161cf..497e2ee7 100644 --- a/lua/codecompanion/_extensions/vectorcode/init.lua +++ b/lua/codecompanion/_extensions/vectorcode/init.lua @@ -1,6 +1,6 @@ ---@module "codecompanion" ----@alias sub_cmd "ls"|"query"|"vectorise"|"files_ls" +---@alias sub_cmd "ls"|"query"|"vectorise"|"files_ls"|"files_rm" ---@class VectorCode.CodeCompanion.ExtensionOpts --- A table where the keys are the subcommand name (`ls`, `query`, `vectorise`) @@ -29,12 +29,17 @@ local default_extension_opts = { requires_approval = false, include_in_toolbox = false, }, + files_rm = { + use_lsp = use_lsp, + requires_approval = true, + include_in_toolbox = false, + }, }, tool_group = { enabled = true, collapse = true, extras = {} }, } ---@type sub_cmd[] -local valid_tools = { "ls", "query", "vectorise", "files_ls" } +local valid_tools = { "ls", "query", "vectorise", "files_ls", "files_rm" } ---@type CodeCompanion.Extension local M = { diff --git a/lua/vectorcode/integrations/codecompanion/files_rm_tool.lua b/lua/vectorcode/integrations/codecompanion/files_rm_tool.lua new file mode 100644 index 00000000..f723ea8c --- /dev/null +++ b/lua/vectorcode/integrations/codecompanion/files_rm_tool.lua @@ -0,0 +1,112 @@ +---@module "codecompanion" + +local cc_common = require("vectorcode.integrations.codecompanion.common") + +---@alias FilesRmArgs { paths: string[], project_root: string } + +---@param opts VectorCode.CodeCompanion.FilesRmToolOpts +---@return CodeCompanion.Agent.Tool +return function(opts) + local tool_name = "vectorcode_files_rm" + local job_runner = cc_common.initialise_runner(opts.use_lsp) + + ---@type CodeCompanion.Agent.Tool|{} + return { + name = tool_name, + schema = { + type = "function", + ["function"] = { + name = tool_name, + description = "Remove files from the VectorCode database. The files will remain in the file system.", + parameters = { + type = "object", + properties = { + paths = { + type = "array", + items = { type = "string" }, + description = "Paths to the files to be removed from the database.", + }, + project_root = { + type = "string", + description = "The project that the files belong to. Either use a path from the `vectorcode_ls` tool, or leave empty to use the current git project. If the user did not specify a path, use empty string for this parameter.", + }, + }, + required = { "paths", "project_root" }, + additionalProperties = false, + }, + strict = true, + }, + }, + cmds = { + ---@param agent CodeCompanion.Agent + ---@param action VectoriseToolArgs + ---@return nil|{ status: string, data: string } + function(agent, action, _, cb) + local args = { "files", "rm", "--pipe" } + local project_root = vim.fs.abspath(vim.fs.normalize(action.project_root or "")) + if project_root ~= "" then + local stat = vim.uv.fs_stat(project_root) + if stat and stat.type == "directory" then + vim.list_extend(args, { "--project_root", project_root }) + else + return { status = "error", data = "Invalid path " .. project_root } + end + else + project_root = vim.fs.root(".", { ".vectorcode", ".git" }) or "" + if project_root == "" then + return { + status = "error", + data = "Please specify a project root. You may use the `vectorcode_ls` tool to find a list of existing projects.", + } + end + end + if project_root ~= "" then + action.project_root = project_root + end + vim.list_extend( + args, + vim + .iter(action.paths) + :filter( + ---@param item string + function(item) + local stat = vim.uv.fs_stat(item) + if stat and stat.type == "file" then + return true + else + return false + end + end + ) + :totable() + ) + job_runner.run_async( + args, + ---@param result VectoriseResult + function(result, error, code, _) + if result then + cb({ status = "success", data = result }) + else + cb({ status = "error", data = { error = error, code = code } }) + end + end, + agent.chat.bufnr + ) + end, + }, + output = { + ---@param self CodeCompanion.Agent.Tool + prompt = function(self, _) + return string.format( + "Remove %d files from VectorCode database?", + #self.args.paths + ) + end, + ---@param self CodeCompanion.Agent.Tool + ---@param agent CodeCompanion.Agent + success = function(self, agent, _, _) + agent.chat:add_tool_output(self, "**VectorCode `files_rm` tool**: successful.") + end, + }, + } +end diff --git a/lua/vectorcode/integrations/codecompanion/init.lua b/lua/vectorcode/integrations/codecompanion/init.lua index 2c52d69f..08ce26e8 100644 --- a/lua/vectorcode/integrations/codecompanion/init.lua +++ b/lua/vectorcode/integrations/codecompanion/init.lua @@ -35,7 +35,7 @@ return { } end), - ---@param subcommand "ls"|"query"|"vectorise" + ---@param subcommand sub_cmd ---@param opts VectorCode.CodeCompanion.ToolOpts ---@return CodeCompanion.Agent.Tool make_tool = function(subcommand, opts) diff --git a/lua/vectorcode/types.lua b/lua/vectorcode/types.lua index 585e7c9b..6e71483f 100644 --- a/lua/vectorcode/types.lua +++ b/lua/vectorcode/types.lua @@ -83,6 +83,8 @@ ---@class VectorCode.CodeCompanion.FilesLsToolOpts: VectorCode.CodeCompanion.ToolOpts +---@class VectorCode.CodeCompanion.FilesRmToolOpts: VectorCode.CodeCompanion.ToolOpts + ---@class VectorCode.CodeCompanion.QueryToolOpts: VectorCode.CodeCompanion.ToolOpts --- Maximum number of results provided to the LLM. --- You may set this to a table to configure different values for document/chunk mode. From 717df2287438f2b86ce3d9b0aa89ee4a992a61c5 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 16:02:22 +0800 Subject: [PATCH 17/20] docs(cli): document pipe mode of `files ls` subcommand in cli.md --- docs/cli.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/cli.md b/docs/cli.md index 72f03103..47c57143 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -33,6 +33,7 @@ * [`vectorcode query`](#vectorcode-query) * [`vectorcode vectorise`](#vectorcode-vectorise) * [`vectorcode ls`](#vectorcode-ls) + * [`vectorcode files ls`](#vectorcode-files-ls) * [LSP Mode](#lsp-mode) * [MCP Server](#mcp-server) * [Writing Prompts](#writing-prompts) @@ -656,6 +657,10 @@ A JSON array of collection information of the following format will be printed: - `"size"`: number of chunks stored in the database; - `"num_files"`: number of files that have been vectorised in the project. +#### `vectorcode files ls` + +A JSON array of strings (the absolute paths to the files in the collection). + ### LSP Mode There's an experimental implementation of VectorCode CLI, which accepts requests From da38f95d7f95fc0d6f5d59b85a48caf5060d39af Mon Sep 17 00:00:00 2001 From: Davidyz Date: Tue, 1 Jul 2025 08:03:19 +0000 Subject: [PATCH 18/20] Auto generate docs --- doc/VectorCode-cli.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index bc2dcc64..0925c90f 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -44,6 +44,7 @@ Table of Contents *VectorCode-cli-table-of-contents* - |VectorCode-cli-`vectorcode-query`| - |VectorCode-cli-`vectorcode-vectorise`| - |VectorCode-cli-`vectorcode-ls`| + - |VectorCode-cli-`vectorcode-files-ls`| - |VectorCode-cli-lsp-mode| - |VectorCode-cli-mcp-server| - |VectorCode-cli-writing-prompts| @@ -728,6 +729,11 @@ A JSON array of collection information of the following format will be printed: - `"num_files"`number of files that have been vectorised in the project. +VECTORCODE FILES LS + +A JSON array of strings (the absolute paths to the files in the collection). + + LSP MODE ~ There’s an experimental implementation of VectorCode CLI, which accepts From 91439ecbfa00b8423df8cc375ec28f18979df4c6 Mon Sep 17 00:00:00 2001 From: Zhe Yu Date: Tue, 1 Jul 2025 16:10:01 +0800 Subject: [PATCH 19/20] docs(cli): formatting --- docs/cli.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index 47c57143..c7d48a4d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -511,8 +511,8 @@ For empty collections and collections for removed projects, you can use the ### Inspecting and Manupulating Files in an Indexed Project -`vectorcode files ls` prints a list of files that are indexed in the project. -`vectorcode files rm file1 file2` removes the embeddings that belong to the +- `vectorcode files ls` prints a list of files that are indexed in the project. +- `vectorcode files rm file1 file2` removes the embeddings that belong to the specified files from the project. Both commands will honor the `--project_root` flag. From 36baeed5776e5360650b618346d58573e2aa59f3 Mon Sep 17 00:00:00 2001 From: Davidyz Date: Tue, 1 Jul 2025 08:11:12 +0000 Subject: [PATCH 20/20] Auto generate docs --- doc/VectorCode-cli.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt index 0925c90f..633b0199 100644 --- a/doc/VectorCode-cli.txt +++ b/doc/VectorCode-cli.txt @@ -560,9 +560,9 @@ For empty collections and collections for removed projects, you can use the INSPECTING AND MANUPULATING FILES IN AN INDEXED PROJECT ~ -`vectorcode files ls` prints a list of files that are indexed in the project. -`vectorcode files rm file1 file2` removes the embeddings that belong to the -specified files from the project. +- `vectorcode files ls` prints a list of files that are indexed in the project. +- `vectorcode files rm file1 file2` removes the embeddings that belong to the + specified files from the project. Both commands will honor the `--project_root` flag.