-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtext-search.py
More file actions
145 lines (122 loc) · 5.24 KB
/
text-search.py
File metadata and controls
145 lines (122 loc) · 5.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python3
"""
Recursive Text Search — Search for text patterns across files with context lines.
Usage:
python text-search.py <pattern> <directory>
python text-search.py <pattern> <directory> --context 3
python text-search.py <regex> <directory> --ignore-case --ext py
Options:
--context N Lines of context before/after each match (default: 2)
--ignore-case Case-insensitive search
--ext EXT File extension filter (e.g. py, txt, md — repeatable)
--exclude PAT Exclude files matching pattern (repeatable)
--no-binary Skip binary files
--max-size MB Skip files larger than MB (default: 10)
--help Show this help message and exit
"""
import os
import sys
import re
import argparse
from pathlib import Path
def search_file(filepath: Path, pattern: re.Pattern, context: int,
max_size_mb: float) -> list[dict]:
"""
Search a single file for the pattern. Returns a list of match dicts:
{line_number, line, context_before, context_after}.
"""
results = []
try:
size_mb = filepath.stat().st_size / (1024 * 1024)
if size_mb > max_size_mb:
return [{"error": f"Skipped (>{max_size_mb}MB): {size_mb:.1f}MB"}]
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
lines = f.readlines()
except PermissionError:
return [{"error": "Permission denied"}]
except Exception as e:
return [{"error": str(e)}]
for i, line in enumerate(lines):
if pattern.search(line):
start = max(0, i - context)
end = min(len(lines), i + context + 1)
results.append({
"line_number": i + 1,
"line": line.rstrip("\n\r"),
"context_before": [l.rstrip("\n\r") for l in lines[start:i]],
"context_after": [l.rstrip("\n\r") for l in lines[i + 1:end]],
})
return results
def main():
parser = argparse.ArgumentParser(
description="Recursively search for text patterns in files.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=(
"Examples:\n"
" python text-search.py 'TODO' ./src\n"
" python text-search.py 'def \\w+' ./ --ext py --context 3\n"
" python text-search.py 'error' /var/log --ignore-case --context 5\n"
" python text-search.py 'import os' . --ext py --exclude venv\n"
),
)
parser.add_argument("pattern", help="Search pattern (regex)")
parser.add_argument("directory", help="Root directory to search")
parser.add_argument("--context", type=int, default=2, help="Context lines (default: 2)")
parser.add_argument("--ignore-case", action="store_true", help="Case-insensitive search")
parser.add_argument("--ext", action="append", default=[], help="File extension filter (repeatable)")
parser.add_argument("--exclude", action="append", default=[], help="Exclude pattern (repeatable)")
parser.add_argument("--no-binary", action="store_true", help="Skip binary files (not foolproof)")
parser.add_argument("--max-size", type=float, default=10.0, help="Max file size in MB (default: 10)")
args = parser.parse_args()
root = Path(args.directory)
if not root.is_dir():
print(f"Error: '{args.directory}' is not a valid directory.")
sys.exit(1)
flags = re.IGNORECASE if args.ignore_case else 0
try:
pattern = re.compile(args.pattern, flags)
except re.error as e:
print(f"Error: Invalid regex pattern: {e}")
sys.exit(1)
# Normalize extensions: add leading dot if missing
exts = {f".{e.strip('.')}" for e in args.ext} if args.ext else None
total_matches = 0
total_files = 0
print(f"Searching for '{args.pattern}' in {root} ...\n")
for filepath in root.rglob("*"):
if not filepath.is_file():
continue
# Exclude patterns
if any(pat in str(filepath) for pat in args.exclude):
continue
# Extension filter
if exts is not None and filepath.suffix.lower() not in exts:
continue
# Rough binary check
if args.no_binary:
try:
with open(filepath, "rb") as f:
chunk = f.read(1024)
if b"\0" in chunk:
continue
except Exception:
continue
matches = search_file(filepath, pattern, args.context, args.max_size)
if matches and "error" not in matches[0]:
total_files += 1
print(f"── {filepath} ──")
for m in matches:
print(f" {m['line_number']:>6} | {m['line']}")
if m["context_before"]:
for cl in m["context_before"]:
print(f" | {cl} [before]")
if m["context_after"]:
for cl in m["context_after"]:
print(f" | {cl} [after]")
print()
total_matches += 1
elif matches and "error" in matches[0]:
print(f" ! {filepath}: {matches[0]['error']}")
print(f"\nResults: {total_matches} matches in {total_files} files")
if __name__ == "__main__":
main()