Skip to content

Commit 4fb9cbd

Browse files
committed
FEAT(index) add --sources CLI to limit sources to run,
args can be both Source.name & Source's positional index.
1 parent e44a2bd commit 4fb9cbd

File tree

1 file changed

+57
-11
lines changed

1 file changed

+57
-11
lines changed

src/promnesia/__main__.py

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,19 @@
1919
from .extract import extract_visits, make_filter
2020

2121

22-
def iter_all_visits() -> Iterator[Res[DbVisit]]:
22+
def _decide_indexers(
23+
indexers: Iterable[Source], sources_subset: Iterable[str]
24+
) -> Iterable[Source]:
25+
unknown = set(sources_subset) - set(range(len(indexers))) - {i.name for i in indexers}
26+
if unknown:
27+
# TODO: raise special exception on bad-sources and handle it politely in CLI.
28+
raise ValueError(f"Unknown source(s): {', '.join(str(i) for i in unknown)}")
29+
# Substitute positions with names, to facilitate debugging.
30+
named_indexers = {i.name: i for i in indexers}
31+
return {named_indexers[i] if isinstance(i, str) else indexers[i] for i in sources_subset}
32+
33+
34+
def iter_all_visits(sources_subset: Iterable[str]=()) -> Iterator[Res[DbVisit]]:
2335
cfg = config.get()
2436
output_dir = cfg.output_dir
2537
# not sure if belongs here??
@@ -29,8 +41,16 @@ def iter_all_visits() -> Iterator[Res[DbVisit]]:
2941

3042
hook = cfg.hook
3143

32-
indexers = cfg.sources
33-
for idx in indexers:
44+
indexers = list(cfg.sources)
45+
46+
if sources_subset:
47+
indexers = _decide_indexers(indexers, sources_subset)
48+
logger.info("sources to extract: %s", ", ".join(i.name for i in indexers))
49+
50+
for i, idx in enumerate(indexers):
51+
if sources_subset and idx.name not in sources_subset and i not in sources_subset:
52+
continue
53+
3454
if isinstance(idx, Exception):
3555
yield idx
3656
continue
@@ -46,11 +66,11 @@ def iter_all_visits() -> Iterator[Res[DbVisit]]:
4666
yield e
4767

4868

49-
def _do_index(dry: bool=False) -> Iterable[Exception]:
69+
def _do_index(dry: bool=False, sources_subset: Iterable[str]=()) -> Iterable[Exception]:
5070
# also keep & return errors for further display
5171
errors: List[Exception] = []
5272
def it() -> Iterable[Res[DbVisit]]:
53-
for v in iter_all_visits():
73+
for v in iter_all_visits(sources_subset):
5474
if isinstance(v, Exception):
5575
errors.append(v)
5676
yield v
@@ -68,10 +88,10 @@ def it() -> Iterable[Res[DbVisit]]:
6888
return errors
6989

7090

71-
def do_index(config_file: Path, dry: bool=False) -> None:
91+
def do_index(config_file: Path, dry: bool=False, sources_subset: Iterable[str]=()) -> None:
7292
config.load_from(config_file) # meh.. should be cleaner
7393
try:
74-
errors = list(_do_index(dry=dry))
94+
errors = list(_do_index(dry=dry, sources_subset=sources_subset))
7595
finally:
7696
config.reset()
7797
if len(errors) > 0:
@@ -104,7 +124,14 @@ def inner(*args, **kwargs):
104124
return res
105125

106126

107-
def do_demo(*, index_as: str, params: Sequence[str], port: Optional[str], config_file: Optional[Path], name: str='demo') -> None:
127+
def do_demo(*
128+
index_as: str,
129+
params: Sequence[str],
130+
port: Optional[str],
131+
config_file: Optional[Path],
132+
name: str='demo',
133+
sources_subset: Iterable[str]=(),
134+
) -> None:
108135
from pprint import pprint
109136
with TemporaryDirectory() as tdir:
110137
outdir = Path(tdir)
@@ -120,7 +147,7 @@ def do_demo(*, index_as: str, params: Sequence[str], port: Optional[str], config
120147
)
121148
config.instance = cfg
122149

123-
errors = list(_do_index())
150+
errors = list(_do_index(sources_subset=sources_subset))
124151
if len(errors) > 0:
125152
logger.error('%d errors during indexing (see logs above for backtraces)', len(errors))
126153
for e in errors:
@@ -245,6 +272,14 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
245272
logger.info('You should see the database path and version above!')
246273

247274

275+
def _parse_ordinal_or_name(s: str):
276+
try:
277+
s = int(s)
278+
except ValueError:
279+
pass
280+
return s
281+
282+
248283
def main() -> None:
249284
# TODO longer, literate description?
250285

@@ -256,6 +291,8 @@ def main() -> None:
256291
ep.add_argument('--dry', action='store_true', help="Dry run, won't touch the database, only print the results out")
257292
# TODO use some way to override or provide config only via cmdline?
258293
ep.add_argument('--intermediate', required=False, help="Used for development, you don't need it")
294+
ep.add_argument('--sources', required=False, action="extend", nargs="+", type=_parse_ordinal_or_name,
295+
help="Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources")
259296

260297
sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore
261298
server.setup_parser(sp)
@@ -276,6 +313,8 @@ def main() -> None:
276313
default='guess',
277314
help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
278315
)
316+
ap.add_argument('--sources', required=False, action="extend", nargs="+", type=_parse_ordinal_or_name,
317+
help="Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources")
279318
ap.add_argument('params', nargs='*', help='Optional extra params for the indexer')
280319

281320
isp = subp.add_parser('install-server', help='Install server as a systemd service (for autostart)', formatter_class=F)
@@ -315,13 +354,20 @@ def main() -> None:
315354

316355
with get_tmpdir() as tdir: # TODO??
317356
if args.mode == 'index':
318-
do_index(config_file=args.config, dry=args.dry)
357+
do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources)
319358
elif args.mode == 'serve':
320359
server.run(args)
321360
elif args.mode == 'demo':
322361
# TODO not sure if 'as' is that useful
323362
# something like Telegram/Takeout is too hard to setup to justify adhoc mode like this?
324-
do_demo(index_as=getattr(args, 'as'), params=args.params, port=args.port, config_file=args.config, name=args.name)
363+
do_demo(
364+
index_as=getattr(args, 'as'),
365+
params=args.params,
366+
port=args.port,
367+
config_file=args.config,
368+
name=args.name,
369+
sources_subset=args.sources,
370+
)
325371
elif args.mode == 'install-server': # todo rename to 'autostart' or something?
326372
install_server.install(args)
327373
elif args.mode == 'config':

0 commit comments

Comments
 (0)