1919from .extract import extract_visits , make_filter
2020
2121
22- def iter_all_visits () -> Iterator [Res [DbVisit ]]:
22+ def _decide_indexers (
23+ indexers : Iterable [Source ], sources_subset : Iterable [str ]
24+ ) -> Iterable [Source ]:
25+ unknown = set (sources_subset ) - set (range (len (indexers ))) - {i .name for i in indexers }
26+ if unknown :
27+ # TODO: raise special exception on bad-sources and handle it politely in CLI.
28+ raise ValueError (f"Unknown source(s): { ', ' .join (str (i ) for i in unknown )} " )
29+ # Substitute positions with names, to facilitate debugging.
30+ named_indexers = {i .name : i for i in indexers }
31+ return {named_indexers [i ] if isinstance (i , str ) else indexers [i ] for i in sources_subset }
32+
33+
34+ def iter_all_visits (sources_subset : Iterable [str ]= ()) -> Iterator [Res [DbVisit ]]:
2335 cfg = config .get ()
2436 output_dir = cfg .output_dir
2537 # not sure if belongs here??
@@ -29,8 +41,16 @@ def iter_all_visits() -> Iterator[Res[DbVisit]]:
2941
3042 hook = cfg .hook
3143
32- indexers = cfg .sources
33- for idx in indexers :
44+ indexers = list (cfg .sources )
45+
46+ if sources_subset :
47+ indexers = _decide_indexers (indexers , sources_subset )
48+ logger .info ("sources to extract: %s" , ", " .join (i .name for i in indexers ))
49+
50+ for i , idx in enumerate (indexers ):
51+ if sources_subset and idx .name not in sources_subset and i not in sources_subset :
52+ continue
53+
3454 if isinstance (idx , Exception ):
3555 yield idx
3656 continue
@@ -46,11 +66,11 @@ def iter_all_visits() -> Iterator[Res[DbVisit]]:
4666 yield e
4767
4868
49- def _do_index (dry : bool = False ) -> Iterable [Exception ]:
69+ def _do_index (dry : bool = False , sources_subset : Iterable [ str ] = () ) -> Iterable [Exception ]:
5070 # also keep & return errors for further display
5171 errors : List [Exception ] = []
5272 def it () -> Iterable [Res [DbVisit ]]:
53- for v in iter_all_visits ():
73+ for v in iter_all_visits (sources_subset ):
5474 if isinstance (v , Exception ):
5575 errors .append (v )
5676 yield v
@@ -68,10 +88,10 @@ def it() -> Iterable[Res[DbVisit]]:
6888 return errors
6989
7090
71- def do_index (config_file : Path , dry : bool = False ) -> None :
91+ def do_index (config_file : Path , dry : bool = False , sources_subset : Iterable [ str ] = () ) -> None :
7292 config .load_from (config_file ) # meh.. should be cleaner
7393 try :
74- errors = list (_do_index (dry = dry ))
94+ errors = list (_do_index (dry = dry , sources_subset = sources_subset ))
7595 finally :
7696 config .reset ()
7797 if len (errors ) > 0 :
@@ -104,7 +124,14 @@ def inner(*args, **kwargs):
104124 return res
105125
106126
107- def do_demo (* , index_as : str , params : Sequence [str ], port : Optional [str ], config_file : Optional [Path ], name : str = 'demo' ) -> None :
127+ def do_demo (*
128+ index_as : str ,
129+ params : Sequence [str ],
130+ port : Optional [str ],
131+ config_file : Optional [Path ],
132+ name : str = 'demo' ,
133+ sources_subset : Iterable [str ]= (),
134+ ) -> None :
108135 from pprint import pprint
109136 with TemporaryDirectory () as tdir :
110137 outdir = Path (tdir )
@@ -120,7 +147,7 @@ def do_demo(*, index_as: str, params: Sequence[str], port: Optional[str], config
120147 )
121148 config .instance = cfg
122149
123- errors = list (_do_index ())
150+ errors = list (_do_index (sources_subset = sources_subset ))
124151 if len (errors ) > 0 :
125152 logger .error ('%d errors during indexing (see logs above for backtraces)' , len (errors ))
126153 for e in errors :
@@ -245,6 +272,14 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
245272 logger .info ('You should see the database path and version above!' )
246273
247274
275+ def _parse_ordinal_or_name (s : str ):
276+ try :
277+ s = int (s )
278+ except ValueError :
279+ pass
280+ return s
281+
282+
248283def main () -> None :
249284 # TODO longer, literate description?
250285
@@ -256,6 +291,8 @@ def main() -> None:
256291 ep .add_argument ('--dry' , action = 'store_true' , help = "Dry run, won't touch the database, only print the results out" )
257292 # TODO use some way to override or provide config only via cmdline?
258293 ep .add_argument ('--intermediate' , required = False , help = "Used for development, you don't need it" )
294+ ep .add_argument ('--sources' , required = False , action = "extend" , nargs = "+" , type = _parse_ordinal_or_name ,
295+ help = "Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources" )
259296
260297 sp = subp .add_parser ('serve' , help = 'Serve a link database' , formatter_class = F ) # type: ignore
261298 server .setup_parser (sp )
@@ -276,6 +313,8 @@ def main() -> None:
276313 default = 'guess' ,
277314 help = 'Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)' ,
278315 )
316+ ap .add_argument ('--sources' , required = False , action = "extend" , nargs = "+" , type = _parse_ordinal_or_name ,
317+ help = "Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources" )
279318 ap .add_argument ('params' , nargs = '*' , help = 'Optional extra params for the indexer' )
280319
281320 isp = subp .add_parser ('install-server' , help = 'Install server as a systemd service (for autostart)' , formatter_class = F )
@@ -315,13 +354,20 @@ def main() -> None:
315354
316355 with get_tmpdir () as tdir : # TODO??
317356 if args .mode == 'index' :
318- do_index (config_file = args .config , dry = args .dry )
357+ do_index (config_file = args .config , dry = args .dry , sources_subset = args . sources )
319358 elif args .mode == 'serve' :
320359 server .run (args )
321360 elif args .mode == 'demo' :
322361 # TODO not sure if 'as' is that useful
323362 # something like Telegram/Takeout is too hard to setup to justify adhoc mode like this?
324- do_demo (index_as = getattr (args , 'as' ), params = args .params , port = args .port , config_file = args .config , name = args .name )
363+ do_demo (
364+ index_as = getattr (args , 'as' ),
365+ params = args .params ,
366+ port = args .port ,
367+ config_file = args .config ,
368+ name = args .name ,
369+ sources_subset = args .sources ,
370+ )
325371 elif args .mode == 'install-server' : # todo rename to 'autostart' or something?
326372 install_server .install (args )
327373 elif args .mode == 'config' :
0 commit comments