Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions load_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,6 @@
else:
print(".......unknown feature list format. Ignoring.......")

elif args.config:
# Load from new-style JSON config file
config = Config.from_json(args.config)
# Override paths if provided via CLI
if args.s:
config.corpus.paths = args.s

else:
if not args.s:
Expand Down
2 changes: 1 addition & 1 deletion superstyl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def validate(self) -> None:
raise ValueError("No paths specified for corpus loading.")

# Validate paths type
if not isinstance(self.corpus.paths, list):
if not isinstance(self.corpus.paths, (list, str)):
raise TypeError("Paths in config must be either a list or a glob pattern string.")

for feat_config in self.features:
Expand Down
7 changes: 3 additions & 4 deletions superstyl/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
import pandas
from typing import Optional, List, Tuple, Union

from superstyl.config import Config, FeatureConfig, NormalizationConfig
from superstyl.config import Config, FeatureConfig


def _load_single_feature(
myTexts: List[dict],
feat_config: FeatureConfig,
norm_config: NormalizationConfig,
use_provided_feat_list: bool = False,
) -> Tuple[pandas.DataFrame, List]:
"""
Expand Down Expand Up @@ -183,7 +182,7 @@ def load_corpus(
if len(config.features) == 1:
feat_config = config.features[0]
feats_df, feat_list = _load_single_feature(
myTexts, feat_config, config.normalization, use_provided_feat_list
myTexts, feat_config, use_provided_feat_list
)
corpus = pandas.concat([metadata, feats_df], axis=1)
return corpus, feat_list
Expand All @@ -199,7 +198,7 @@ def load_corpus(
print(f".......processing {prefix}.......")

feats_df, feat_list = _load_single_feature(
myTexts, feat_config, config.normalization, use_provided_feat_list
myTexts, feat_config, use_provided_feat_list
)

# Prefix columns to avoid collisions
Expand Down
2 changes: 1 addition & 1 deletion superstyl/preproc/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def extract_tokens(path: str, config: Config=Config()) -> List[str]:
return nltk.tokenize.wordpunct_tokenize(text)

elif config.corpus.format == "tei":
return LOADERS['tei'].extract_units(path, config.corpus.units, feats)
return LOADERS['tei'].extract_units(path, config.sampling.units, feats)

elif config.sampling.units == "verses" and config.corpus.format == "txm":
return LOADERS['txm'].extract_units(path, config.sampling.units, feats)
Expand Down