-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathsplit.py
More file actions
executable file
·43 lines (37 loc) · 1.66 KB
/
split.py
File metadata and controls
executable file
·43 lines (37 loc) · 1.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Command-line tool for splitting datasets.
"""
import superstyl.preproc.select as sel
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('path', action="store", help="path to feats csv file",
default="feats_tests.csv")
parser.add_argument('-s', action="store", help="optional path to already existing split file",
default=False)
parser.add_argument('-m', action="store", help="path to metadata file", required=False)
parser.add_argument('-e', action="store", help="path to excludes file", required=False)
parser.add_argument('--lang', action="store",
help="analyse only file in this language (optional, for initial split only)",
required=False)
parser.add_argument('--nosplit', action="store_true",
help="no split (do not provide split file)",
default=False)
parser.add_argument('--split_ratio', action="store", type=float,
help="validation split ratio (default: 0.1 = 10%%)",
default=0.1)
args = parser.parse_args()
if args.s:
# Apply existing selection
sel.apply_selection(path=args.path, presplit_path=args.s)
else:
# Create new selection (with or without split)
sel.read_clean(
path=args.path,
metadata_path=args.m,
excludes_path=args.e,
savesplit="split_nosplit.json" if args.nosplit else "split.json",
lang=args.lang,
split=not args.nosplit,
split_ratio=args.split_ratio
)