22import os
33import logging
44import inspect
5+ import os
56import sys
67from typing import List , Tuple , Optional , Dict , Sequence , Iterable , Iterator
78from pathlib import Path
@@ -66,7 +67,7 @@ def iter_all_visits(sources_subset: Iterable[str]=()) -> Iterator[Res[DbVisit]]:
6667 yield e
6768
6869
69- def _do_index (dry : bool = False , sources_subset : Iterable [str ]= ()) -> Iterable [Exception ]:
70+ def _do_index (dry : bool = False , sources_subset : Iterable [str ]= (), overwrite_db = False ) -> Iterable [Exception ]:
7071 # also keep & return errors for further display
7172 errors : List [Exception ] = []
7273 def it () -> Iterable [Res [DbVisit ]]:
@@ -81,17 +82,23 @@ def it() -> Iterable[Res[DbVisit]]:
8182 for v in res :
8283 print (v )
8384 else :
84- dump_errors = visits_to_sqlite (it ())
85+ dump_errors = visits_to_sqlite (it (), overwrite_db )
8586 for e in dump_errors :
8687 logger .exception (e )
8788 errors .append (e )
8889 return errors
8990
9091
91- def do_index (config_file : Path , dry : bool = False , sources_subset : Iterable [str ]= ()) -> None :
92+ def do_index (
93+ config_file : Path ,
94+ dry : bool = False ,
95+ sources_subset : Iterable [str ]= (),
96+ overwrite : bool = None ,
97+ overwrite_db = False ,
98+ ) -> None :
9299 config .load_from (config_file ) # meh.. should be cleaner
93100 try :
94- errors = list (_do_index (dry = dry , sources_subset = sources_subset ))
101+ errors = list (_do_index (dry = dry , sources_subset = sources_subset , overwrite_db = overwrite_db ))
95102 finally :
96103 config .reset ()
97104 if len (errors ) > 0 :
@@ -131,6 +138,7 @@ def do_demo(*
131138 config_file : Optional [Path ],
132139 name : str = 'demo' ,
133140 sources_subset : Iterable [str ]= (),
141+ overwrite_db : bool = False ,
134142 ) -> None :
135143 from pprint import pprint
136144 with TemporaryDirectory () as tdir :
@@ -147,7 +155,7 @@ def do_demo(*
147155 )
148156 config .instance = cfg
149157
150- errors = list (_do_index (sources_subset = sources_subset ))
158+ errors = list (_do_index (sources_subset = sources_subset , overwrite_db = overwrite_db ))
151159 if len (errors ) > 0 :
152160 logger .error ('%d errors during indexing (see logs above for backtraces)' , len (errors ))
153161 for e in errors :
@@ -293,6 +301,22 @@ def main() -> None:
293301 ep .add_argument ('--intermediate' , required = False , help = "Used for development, you don't need it" )
294302 ep .add_argument ('--sources' , required = False , action = "extend" , nargs = "+" , type = _parse_ordinal_or_name ,
295303 help = "Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources" )
304+ overwrite = ep .add_mutually_exclusive_group ()
305+ overwrite .add_argument (
306+ '--update' ,
307+ required = False ,
308+ action = "store_const" ,
309+ const = True ,
310+ dest = "overwrite_db" ,
311+ help =
312+ "Keep existing visits in db and merge new ones collected."
313+ " If neither is given, --update assumed when --sources given (the default)"
314+ ", unless PROMNESIA_INDEX_POLICY=(update|overwrite) env-var defined"
315+ ", which takes precendance."
316+ " Conflicts with --update.%(default)0.0s"
317+ )
318+ overwrite .add_argument ('--overwrite' , required = False , action = "store_const" , const = False , dest = "overwrite_db" ,
319+ help = "The opposite of --update: recreate db with newly indexed visits%(default)0.0s" )
296320
297321 sp = subp .add_parser ('serve' , help = 'Serve a link database' , formatter_class = F ) # type: ignore
298322 server .setup_parser (sp )
@@ -348,13 +372,33 @@ def main() -> None:
348372 p .print_help (sys .stderr )
349373 sys .exit (1 )
350374
375+ overwrite_policy_var = os .environ .get ("PROMNESIA_INDEX_POLICY" )
376+ if overwrite_policy_var :
377+ overwrite_policy_var = overwrite_policy_var .lower ()
378+ if overwrite_policy_var not in ("update" , "overwrite" ):
379+ print (
380+ f"Invalid value for PROMNESIA_INDEX_POLICY env-var: { overwrite_policy_var } "
381+ "\n Must be one of (update | overwrite)." ,
382+ file = sys .stderr )
383+ sys .exit (2 )
384+ args .overwrite_db = overwrite_policy_var == "overwrite"
385+ if args .overwrite_db is None :
386+ args .overwrite_db = not bool (args .sources )
387+
388+ logger .info ("CLI args: %s" , args )
389+
351390 # TODO maybe, it's better for server to compute intermediate represetnation?
352391 # the only downside is storage. dunno.
353392 # worst case -- could use database?
354393
355394 with get_tmpdir () as tdir : # TODO??
356395 if args .mode == 'index' :
357- do_index (config_file = args .config , dry = args .dry , sources_subset = args .sources )
396+ do_index (
397+ config_file = args .config ,
398+ dry = args .dry ,
399+ sources_subset = args .sources ,
400+ overwrite_db = args .overwrite_db ,
401+ )
358402 elif args .mode == 'serve' :
359403 server .run (args )
360404 elif args .mode == 'demo' :
@@ -367,6 +411,7 @@ def main() -> None:
367411 config_file = args .config ,
368412 name = args .name ,
369413 sources_subset = args .sources ,
414+ overwrite_db = args .overwrite_db ,
370415 )
371416 elif args .mode == 'install-server' : # todo rename to 'autostart' or something?
372417 install_server .install (args )
0 commit comments