fstasearch/tests.py at main · crushndent/fstasearch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import unittest
import os
import shutil
import tempfile
import time
from pathlib import Path
import database
import config
from indexer import Indexer

class TestFstaSearch(unittest.TestCase):
    def setUp(self):
        # Create a temporary directory
        self.test_dir = tempfile.mkdtemp()

        # Create some dummy files
        os.makedirs(os.path.join(self.test_dir, "subdir"))
        Path(os.path.join(self.test_dir, "file1.txt")).touch()
        Path(os.path.join(self.test_dir, "file2.py")).touch()
        Path(os.path.join(self.test_dir, "subdir", "file3.jpg")).touch()

        # Hidden file
        Path(os.path.join(self.test_dir, ".hidden")).touch()

        # Setup Test Database (In-Memory)
        # We need to monkeypatch the global 'db' object in database module
        self.original_db = database.db
        database.db = database.DatabaseManager(":memory:")

    def tearDown(self):
        # Restore DB
        database.db.close()
        database.db = self.original_db
        shutil.rmtree(self.test_dir)

    def test_indexer_scan_persistence(self):
        """Test that scanning updates the DB and new Indexer loads from it."""
        indexer = Indexer([self.test_dir])
        indexer.scan()

        # Check in-memory verification
        self.assertEqual(len(indexer.files), 3)
        self.assertEqual(len(indexer.directories), 2)

        # Check Database Persistence
        db_files, db_dirs = database.db.get_index()
        self.assertEqual(len(db_files), 3)
        self.assertEqual(len(db_dirs), 2)

        # Create NEW Indexer - should load from DB immediately
        indexer2 = Indexer([self.test_dir])
        # Note: indexer2.scan() is NOT called
        self.assertEqual(len(indexer2.files), 3)
        self.assertTrue(any("file1.txt" in f for f in indexer2.files))

    def test_indexer_exclude(self):
        # Exclude the subdir
        subdir = os.path.join(self.test_dir, "subdir")
        indexer = Indexer([self.test_dir], exclude_dirs=[subdir])
        indexer.scan()

        self.assertEqual(len(indexer.files), 2)
        self.assertEqual(len(indexer.directories), 1)
        self.assertFalse(any("file3.jpg" in f for f in indexer.files))

    def test_indexer_search(self):
        indexer = Indexer([self.test_dir])
        indexer.scan()

        results = indexer.search("file1")
        self.assertEqual(len(results), 1)
        self.assertTrue("file1.txt" in results[0])

    def test_search_prioritization_and_collapsing(self):
        """Test directory collapsing and prioritization."""
        Path(os.path.join(self.test_dir, "subdir_file.txt")).touch()
        subdir = os.path.join(self.test_dir, "subdir")
        deepfile = os.path.join(subdir, "deepfile.txt")
        Path(deepfile).touch()

        indexer = Indexer([self.test_dir])
        indexer.scan()

        # 1. Search for 'sub' - should match 'subdir' folder
        results = indexer.search("sub")
        self.assertTrue(any(r.endswith("subdir") for r in results))
        self.assertTrue(any(r.endswith("subdir_file.txt") for r in results))
        self.assertFalse(any("deepfile.txt" in r for r in results))

        # 2. Collapsing Test
        # Create alpha/beta/gamma
        os.makedirs(os.path.join(self.test_dir, "alpha", "beta", "gamma"), exist_ok=True)
        indexer.scan()

        # Search for 'alpha' -> should return .../alpha
        results_alpha = indexer.search("alpha")
        alpha_matches = [r for r in results_alpha if r.endswith(os.sep + "alpha")]
        self.assertGreaterEqual(len(alpha_matches), 1, f"Expected match ending in 'alpha', got {results_alpha}")

        # Ensure deep paths are not included
        # If we found .../alpha, we should NOT find .../alpha/beta
        self.assertFalse(any(r.endswith(os.path.join("alpha", "beta")) for r in results_alpha))

        # Search for 'beta' -> should return .../alpha/beta
        results_beta = indexer.search("beta")
        self.assertTrue(any(r.endswith(os.path.join("alpha", "beta")) for r in results_beta))

    def test_config_persistence(self):
        # Test saving to DB via config module
        conf = config.load_config()
        conf["display_tooltips"] = False
        config.save_config(conf)

        # Reload
        conf2 = config.load_config()
        self.assertFalse(conf2["display_tooltips"])

    def test_deep_path_collapsing(self):
        """Test specific use case: deep_a/deep_b/deep_c..."""
        # Create deep structure
        deep_path = os.path.join(self.test_dir, "deep_a", "deep_b", "deep_c", "deep_d", "deep_e", "deep_f")
        os.makedirs(deep_path)

        indexer = Indexer([self.test_dir])
        indexer.scan()

        # 1. Search 'deep_d'
        results_d = indexer.search("deep_d")
        matches_d = [r for r in results_d if r.endswith(os.path.join("deep_a", "deep_b", "deep_c", "deep_d"))]
        self.assertTrue(matches_d, f"Expected path ending in .../deep_d, got {results_d}")
        self.assertFalse(any("deep_e" in r.split(os.sep)[-1] for r in results_d))

        # 2. Search 'deep_b'
        results_b = indexer.search("deep_b")
        matches_b = [r for r in results_b if r.endswith(os.path.join("deep_a", "deep_b"))]
        self.assertTrue(matches_b, f"Expected path ending in .../deep_b, got {results_b}")

        # 3. Search 'deep_f'
        results_f = indexer.search("deep_f")
        matches_f = [r for r in results_f if r.endswith("deep_f")]
        self.assertTrue(matches_f)

    def test_results_ordering_and_deduplication(self):
        """
        Verify:
        1. Directories list BEFORE files.
        2. Strict deduplication (if parent matches, child string not shown as separate result).
        3. Multiple separate directory branches are shown.
        """
        # Setup structure
        # Matches "target"

        # Branch 1: root/target_dir
        os.makedirs(os.path.join(self.test_dir, "target_dir"))

        # Branch 2: root/target_dir/nested_target_dir (should be collapsed into target_dir)
        os.makedirs(os.path.join(self.test_dir, "target_dir", "nested_target_dir"))

        # Branch 3: root/other/target_dir
        os.makedirs(os.path.join(self.test_dir, "other", "target_dir"))

        # File 1: root/target_file.txt
        Path(os.path.join(self.test_dir, "target_file.txt")).touch()

        # File 2: root/other/target_file_2.txt
        Path(os.path.join(self.test_dir, "other", "target_file_2.txt")).touch()

        indexer = Indexer([self.test_dir])
        indexer.scan()

        results = indexer.search("target")

        # Analyze results
        dirs_found = [r for r in results if os.path.isdir(r)]
        files_found = [r for r in results if os.path.isfile(r)]

        # 1. Total count check
        # We expect:
        # - target_dir
        # - other/target_dir
        # - target_file.txt
        # - other/target_file_2.txt
        # nested_target_dir should NOT be a separate result because its parent 'target_dir' matched.
        self.assertEqual(len(results), 4, f"Expected 4 results, got {len(results)}: {results}")

        # 2. Ordering check (All dirs before all files)
        # The first len(dirs_found) items should be directories
        self.assertEqual(results[:len(dirs_found)], dirs_found)
        # The rest should be files
        self.assertEqual(results[len(dirs_found):], files_found)

        # 3. Deduplication check
        # 'nested_target_dir' should NOT be in results explicitly (it's covered by target_dir)
        nested_path = os.path.join(self.test_dir, "target_dir", "nested_target_dir")
        self.assertNotIn(nested_path, results)

        # 4. Content check
        self.assertTrue(any(r.endswith("target_dir") for r in dirs_found))
        self.assertTrue(any(r.endswith(os.path.join("other", "target_dir")) for r in dirs_found))

    def test_symlink_following(self):
        """Test that symlinked directories are followed."""
        # Create a real directory with a file
        real_dir = os.path.join(self.test_dir, "real_dir")
        os.makedirs(real_dir)
        Path(os.path.join(real_dir, "real_file.txt")).touch()

        # Create a symlink to it
        link_dir = os.path.join(self.test_dir, "link_dir")
        os.symlink(real_dir, link_dir)

        indexer = Indexer([self.test_dir])
        indexer.scan()

        # We expect to find 'real_file.txt' inside 'link_dir' (as well as real_dir)
        # Our unique-path logic in Indexer means we might track visited REAL paths.
        # If we visit real_dir first, we add it.
        # If we visit link_dir, it points to real_dir. If we track visited REAL paths, we might SKIP it?
        # Let's check the logic:
        # "if real_path in seen_paths: del dirs[i]"
        # So it will likely only index ONE of them if they point to the same place, to avoid duplicates?
        # OR it depends on traversal order.
        # BUT the user said "scan soft links as well".
        # If I have project A and symlink project A_link -> A. I probably want to search A_link/file too?
        # The current logic I implemented: "seen_paths.add(real_path)".
        # This effectively de-duplicates physically identical directories.
        # This prevents the loop, but also prevents indexing the SAME content under multiple names.
        # This is a reasonable interpretation of "scan soft links" (don't ignore them),
        # but also "infinite loop" prevention usually implies strict graph traversal.

        # Let's verify what happens. One of them should be there.
        # If I search for "real_file", I should find at least one.

        results = indexer.search("real_file.txt")
        self.assertTrue(len(results) >= 1, f"Expected to find real_file.txt, got {results}")

    def test_symlink_loop_detection(self):
        """Test that a recursive symlink doesn't cause infinite loop."""
        # Create dir A
        dir_a = os.path.join(self.test_dir, "loop_a")
        os.makedirs(dir_a)

        # Create dir B inside A
        dir_b = os.path.join(dir_a, "loop_b")
        os.makedirs(dir_b)

        # Symlink B -> A inside B ( .. loop .. )
        # link structure: loop_a/loop_b/link_to_a -> loop_a
        link_path = os.path.join(dir_b, "link_to_a")
        os.symlink(dir_a, link_path)

        # Run scan with timeout to ensure we don't hang
        import threading
        indexer = Indexer([self.test_dir])

        # Run in a separate thread we can join (or just trust unit test timeout?)
        # Logic is: if it loops, os.walk might go forever.
        # Our logic should prevent adding 'link_to_a' to the dirs list because it points to 'loop_a' which is a real path we've seen.

        start = time.time()
        indexer.scan()
        duration = time.time() - start

        self.assertLess(duration, 5.0, "Scan took too long, possible infinite loop")

        # Verify we found the distinct folders
        # matches: .../loop_a
        # matches: .../loop_a/loop_b
        # matches: .../loop_a/loop_b/link_to_a  <-- Should NOT process children of this if it loops

        # Search for something that would be deep if it looped?
        pass

if __name__ == '__main__':
    unittest.main()