Skip to content

Commit ef468ad

Browse files
committed
FEAT: update db by default, add --overwrite option ...
as suggested in #20: - drop PROMNESIA_INDEX_POLICY env-var. - CLI options described in the 2nd case explained in #211, due to simplicity. - Function defaults are false, as suggested in [#20](#20 (comment)). - Both index & demo updated. - Env-var now checks its value one of (update|overwrite). - All update/overwrite decision logic moved to __main_.
1 parent 548e853 commit ef468ad

File tree

3 files changed

+46
-25
lines changed

3 files changed

+46
-25
lines changed

doc/GUIDE.org

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ Also see [[https://github.com/karlicoss/promnesia/issues/172][issues/172]].
110110

111111
** partial update
112112

113-
(experimental) Set env variable =PROMNESIA_INDEX_POLICY=update=.
113+
Only index sources given in =promnesia index --sources SOURCE [SOURCE] ...=
114+
(or all sources, if no =--sources= given), unless =--overwrite= is given,
115+
in which case all existing visits are removed from db prior to indexing.
114116

115117
** exclude files from =auto= indexer
116118

src/promnesia/__main__.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import argparse
2-
import os
32
import logging
43
import inspect
54
import sys
@@ -63,7 +62,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
6362
logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))
6463

6564

66-
def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=()) -> Iterable[Exception]:
65+
def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db=False) -> Iterable[Exception]:
6766
# also keep & return errors for further display
6867
errors: List[Exception] = []
6968
def it() -> Iterable[Res[DbVisit]]:
@@ -78,7 +77,7 @@ def it() -> Iterable[Res[DbVisit]]:
7877
for v in res:
7978
print(v)
8079
else:
81-
dump_errors = visits_to_sqlite(it())
80+
dump_errors = visits_to_sqlite(it(), overwrite_db=overwrite_db)
8281
for e in dump_errors:
8382
logger.exception(e)
8483
errors.append(e)
@@ -90,10 +89,11 @@ def do_index(
9089
dry: bool=False,
9190
sources_subset: Iterable[Union[str, int]]=(),
9291
overwrite: bool=None,
92+
overwrite_db=False,
9393
) -> None:
9494
config.load_from(config_file) # meh.. should be cleaner
9595
try:
96-
errors = list(_do_index(dry=dry, sources_subset=sources_subset))
96+
errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
9797
finally:
9898
config.reset()
9999
if len(errors) > 0:
@@ -134,6 +134,7 @@ def do_demo(
134134
config_file: Optional[Path],
135135
name: str='demo',
136136
sources_subset: Iterable[Union[str, int]]=(),
137+
overwrite_db: bool=False,
137138
) -> None:
138139
from pprint import pprint
139140
with TemporaryDirectory() as tdir:
@@ -150,7 +151,7 @@ def do_demo(
150151
)
151152
config.instance = cfg
152153

153-
errors = list(_do_index(sources_subset=sources_subset))
154+
errors = list(_do_index(sources_subset=sources_subset, overwrite_db=overwrite_db))
154155
if len(errors) > 0:
155156
logger.error('%d errors during indexing (see logs above for backtraces)', len(errors))
156157
for e in errors:
@@ -301,8 +302,14 @@ def main() -> None:
301302
nargs="+",
302303
type=_ordinal_or_name,
303304
metavar="SOURCE",
304-
help="Source names (or their 0-indexed position) to index."
305-
" If missing, db is recreated empty and all sources are indexed.",
305+
help="Source names (or their 0-indexed position) to index.",
306+
)
307+
ep.add_argument(
308+
'--overwrite',
309+
required=False,
310+
action="store_true",
311+
help="Empty db before populating it with newly indexed visits."
312+
" If interrupted, db is left untouched."
306313
)
307314

308315
sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore
@@ -324,8 +331,22 @@ def main() -> None:
324331
default='guess',
325332
help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
326333
)
327-
ap.add_argument('--sources', required=False, action="extend", nargs="+", type=_ordinal_or_name,
328-
help="Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources")
334+
ap.add_argument(
335+
'--sources',
336+
required=False,
337+
action="extend",
338+
nargs="+",
339+
type=_ordinal_or_name,
340+
metavar="SOURCE",
341+
help="Source names (or their 0-indexed position) to index.",
342+
)
343+
ap.add_argument(
344+
'--overwrite',
345+
required=False,
346+
action="store_true",
347+
help="Empty db before populating it with newly indexed visits."
348+
" If interrupted, db is left untouched."
349+
)
329350
ap.add_argument('params', nargs='*', help='Optional extra params for the indexer')
330351

331352
isp = subp.add_parser('install-server', help='Install server as a systemd service (for autostart)', formatter_class=F)
@@ -359,13 +380,20 @@ def main() -> None:
359380
p.print_help(sys.stderr)
360381
sys.exit(1)
361382

383+
logger.info("CLI args: %s", args)
384+
362385
# TODO maybe, it's better for server to compute intermediate represetnation?
363386
# the only downside is storage. dunno.
364387
# worst case -- could use database?
365388

366389
with get_tmpdir() as tdir: # TODO??
367390
if args.mode == 'index':
368-
do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources)
391+
do_index(
392+
config_file=args.config,
393+
dry=args.dry,
394+
sources_subset=args.sources,
395+
overwrite_db=args.overwrite,
396+
)
369397
elif args.mode == 'serve':
370398
server.run(args)
371399
elif args.mode == 'demo':
@@ -378,6 +406,7 @@ def main() -> None:
378406
config_file=args.config,
379407
name=args.name,
380408
sources_subset=args.sources,
409+
overwrite_db=args.overwrite,
381410
)
382411
elif args.mode == 'install-server': # todo rename to 'autostart' or something?
383412
install_server.install(args)

src/promnesia/dump.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
from pathlib import Path
32
import shutil
43
from typing import Dict, List, Tuple, Set, Iterable
@@ -14,14 +13,6 @@
1413
from . import config
1514

1615

17-
def update_policy_active() -> bool:
18-
# NOTE: experimental.. need to make it a proper cmdline argument later
19-
INDEX_POLICY = os.environ.get('PROMNESIA_INDEX_POLICY', 'overwrite_all')
20-
# if 'update' is passed, will run against the existing db and only tough the sources present in the current index run
21-
# not sue if a good name for this..
22-
return INDEX_POLICY == 'update'
23-
24-
2516
# NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
2617
# since as far as sql is concerned it should all be in the same transaction. only a guess
2718
# not sure it's the proper way to handle it
@@ -30,7 +21,7 @@ def update_policy_active() -> bool:
3021

3122

3223
# returns critical warnings
33-
def visits_to_sqlite(vit: Iterable[Res[DbVisit]]) -> List[Exception]:
24+
def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]:
3425
logger = get_logger()
3526
db_path = config.get().db
3627

@@ -58,8 +49,7 @@ def vit_ok() -> Iterable[DbVisit]:
5849
yield ev
5950

6051
tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
61-
policy_update = update_policy_active()
62-
if not policy_update:
52+
if overwrite_db:
6353
engine = create_engine(f'sqlite:///{tpath}')
6454
else:
6555
engine = create_engine(f'sqlite:///{db_path}')
@@ -82,12 +72,12 @@ def vit_ok() -> Iterable[DbVisit]:
8272
# pylint: disable=no-value-for-parameter
8373
conn.execute(table.insert().values(bound))
8474

85-
if not policy_update:
75+
if overwrite_db:
8676
shutil.move(str(tpath), str(db_path))
8777

8878
errs = '' if errors == 0 else f', {errors} ERRORS'
8979
total = ok + errors
90-
what = 'updated' if policy_update else 'overwritten'
80+
what = 'overwritten' if overwrite_db else 'updated'
9181
logger.info('%s database "%s". %d total (%d OK%s)', what, db_path, total, ok, errs)
9282
res: List[Exception] = []
9383
if total == 0:

0 commit comments

Comments
 (0)