|
17 | 17 |
|
18 | 18 | import pytz |
19 | 19 |
|
20 | | -from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Results, mime, traverse, file_mtime |
| 20 | +from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Results, mime, traverse, file_mtime, echain, logger |
21 | 21 | from ..config import use_cores |
22 | 22 | from ..py37 import nullcontext |
23 | 23 |
|
@@ -91,12 +91,10 @@ def wrapped(path: Path): |
91 | 91 | # ugh. keeping yeild in the try section is not ideal, but need this because of lazy yield semantics |
92 | 92 | yield from it |
93 | 93 | except ModuleNotFoundError as me: |
94 | | - # TODO ugh. need to figure out how to attach cause/traceback |
95 | 94 | logger = get_logger() |
96 | 95 | logger.exception(me) |
97 | | - logger.warn('while indexing "%s": %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name) |
| 96 | + logger.warn('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name) |
98 | 97 | yield me |
99 | | - |
100 | 98 | fallback_active[ex] = True |
101 | 99 | do_fallback = True |
102 | 100 | if do_fallback: |
@@ -276,26 +274,35 @@ def _index_file(pp: Path, opts: Options) -> Results: |
276 | 274 | yield from _index(path=uncomp, opts=opts) |
277 | 275 | return |
278 | 276 |
|
| 277 | + ex = RuntimeError(f'While indexing {pp}') |
| 278 | + |
279 | 279 | ip, pm = by_path(pp) |
280 | 280 | if ip is None: |
281 | 281 | # TOOD use warning (with mime/ext as key?) |
282 | 282 | # TODO only log once? # hmm.. |
283 | 283 | msg = f'No extractor for suffix {suf}, mime {pm}' |
284 | 284 | warnings.warn(msg) |
285 | | - yield RuntimeError(msg + f', path {pp}') |
| 285 | + yield echain(ex, RuntimeError(msg)) |
286 | 286 | return |
287 | 287 |
|
288 | 288 | logger.debug('indexing via %s: %s', ip.__name__, pp) |
289 | | - indexer: Union[Urls, Results] = ip(pp) # type: ignore |
290 | | - # TODO careful, filter out obviously not plaintext? maybe mime could help here?? |
| 289 | + |
| 290 | + def indexer() -> Union[Urls, Results]: |
| 291 | + # eh, annoying.. need to make more generic.. |
| 292 | + idx = ip(pp) # type: ignore |
| 293 | + try: |
| 294 | + yield from idx |
| 295 | + except Exception as e: |
| 296 | + yield e |
291 | 297 |
|
292 | 298 | root = opts.root |
293 | 299 | fallback_dt = file_mtime(pp) |
294 | 300 | fallback_loc = Loc.file(pp) |
295 | 301 | replacer = opts.replacer |
296 | | - for r in indexer: |
| 302 | + for r in indexer(): |
297 | 303 | if isinstance(r, Exception): |
298 | | - yield r |
| 304 | + # indexers can rely on this method setting the error context |
| 305 | + yield echain(ex, r) |
299 | 306 | continue |
300 | 307 | if isinstance(r, EUrl): |
301 | 308 | v = Visit( |
|
0 commit comments