Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: build

on:
push:
branches:
- main
pull_request:

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]

steps:
- uses: actions/checkout@main
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 black pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
- name: Test with pytest
run: |
python -m pytest -v tests/test_readers.py
3 changes: 2 additions & 1 deletion checksit/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def _check_file(self, file_content, template, mappings=None, extra_rules=None, s

if log_mode == "compact":
highest = "ERROR" if len(errors) > 0 else "NONE"
print(f"{highest} | {len(errors)} ", end="")
endstr = "" if len(errors) > 0 else "\n"
print(f"{highest} | {len(errors)} ", end=endstr)
err_string = " | ".join([err.replace("|", "__VERTICAL_BAR_REPLACED__") for err in errors])
if err_string:
print(f"| {err_string}")
Expand Down
23 changes: 21 additions & 2 deletions checksit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,28 @@ def check(file_path, mappings=None, rules=None, specs=None, ignore_attrs=None, i
@main.command()
@click.argument("log_files", nargs=-1, default=None)
@click.option("-d", "--log-directory", default=None)
@click.option("--show-files/--no-show-files", default=False)
@click.option("-x", "--exclude", default=None)
@click.option("-e", "--exclude-file", default=None)
@click.option("--verbose/--no-verbose", default=False)
def summary(log_files=None, log_directory=None, verbose=False):
return summarise(log_files, log_directory=log_directory, verbose=verbose)
def summary(log_files=None, log_directory=None, show_files=False,
exclude=None, exclude_file=None,
verbose=False):

if exclude:
exclude = string_to_list(exclude)
else:
exclude = []

if exclude_file:
if not os.path.isfile(exclude_file):
raise Exception(f"'--exclude-file' does not point to a valid file")

with open(exclude_file) as exfile:
exclude.extend([exclude_pattern for exclude_pattern in exfile if exclude_pattern.strip()])

return summarise(log_files, log_directory=log_directory, show_files=show_files,
exclude=exclude, verbose=verbose)


@main.command()
Expand Down
46 changes: 42 additions & 4 deletions checksit/readers/cdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _check_format(self):

min_chars = 10
if len(source) < min_chars:
self.fmt_errors.append(f"[FORMAT:global_attributes:source] Must be at least {min_chars} characters, not {source}")
self.fmt_errors.append(f"[FORMAT:global_attributes:source] Must be at least {min_chars} characters, not {source}")

def _get_sections(self, lines, split_patterns, start_at):
split_patterns = deque(split_patterns)
Expand Down Expand Up @@ -114,31 +114,69 @@ def _construct_variables(self, content):
var_id = None
current = None

# Set defaults for key and value so they can be sent to multiline parser even if not set
key = None
value = None

for line in content:
if re.match(f"^{vocabs_prefix}:[0-9a-zA-Z_-]+:variables:", line):
vocab_var_id = line.split(":")[3]
vocab_lookup = line.split(":", 1)[-1]
variables[vocab_var_id] = vocabs.lookup(vocab_lookup)
elif not var_id or not line.startswith(f"{var_id}:"):
elif not var_id or not line.startswith(f"{var_id}:") and last_line.strip()[-1] != ",":
# Add current collected variable to list if it exists
if current:
variables[var_id] = current.copy()

var_id, dtype, dimensions = self._parse_var_dtype_dims(line)
current = {"type": dtype, "dimension": ', '.join(dimensions)}
else:
key, value = [x.strip() for x in line.split(":", 1)[1].split("=", 1)]
# key, value = [x.strip() for x in line.split(":", 1)[1].split("=", 1)]
# Send last key and last value (from last iteration of loop) and line to get new value
key, value = self._parse_key_value_multiline_safe(line, key, value, variable_attr=True)
current[key] = self._safe_parse_value(value)

last_line = line
else:
variables[var_id] = current.copy()

return variables

def _parse_key_value_multiline_safe(self, line, last_key, last_value, variable_attr=False):
# Caters for continuation lines for arrays of strings, etc
if "=" in line:
# A new (key, value) pair is found
if variable_attr: # var attr
key, value = [x.strip() for x in line.split(":", 1)[1].split("=", 1)]
else: # global attr
key, value = [x.strip() for x in line.lstrip(":").split("=", 1)]
else:
# Assume a continuation of th last value, so set key to None
key, value = last_key, last_value + " " + line.strip().rstrip(";")

return key, value


def _ordered_dict(self, content):
resp = {}
key = None
value = None

for line in content:
if self.verbose: print(f"WORKING ON LINE: {line}")
key, value = [x.strip() for x in line.lstrip(":").split("=", 1)]

# Cater for continuation lines for arrays of strings, etc
# if "=" in line:
# A new (key, value) pair is found
# key, value = [x.strip() for x in line.lstrip(":").split("=", 1)]
# else:
# Assume a continuation of th last value
# value += " " + line.strip()
# Send last key and last value (from last iteration of loop) and line to get new value
key, value = self._parse_key_value_multiline_safe(line, key, value)

# This will overwrite the previous value - which is safe if a continuation happened
# as the key is the same as last time
resp[key] = self._safe_parse_value(value)

return resp
Expand Down
44 changes: 36 additions & 8 deletions checksit/summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import re
import glob
from collections import defaultdict, OrderedDict as OD

import pandas as pd


Expand All @@ -20,8 +22,18 @@ def get_max_column_count(files, sep):
return count


def summarise(log_files=None, log_directory=None, verbose=False):
def do_exclude(err, exclude_patterns):
for exclude_pattern in exclude_patterns:
if exclude_pattern in err:
return True

return False


def summarise(log_files=None, log_directory=None, show_files=False,
exclude=None, verbose=False):
log_files = log_files or find_log_files(log_directory)
exclude_patterns = exclude or []

if len(log_files) == 0:
print("[ERROR] No log files found!")
Expand Down Expand Up @@ -58,16 +70,32 @@ def summarise(log_files=None, log_directory=None, verbose=False):
fatals = len(df[df["highest_error"].str.contains("FATAL")])
print(f"[INFO] Found {fatals} FATAL errors.")

all_errors = []
errors_by_type = defaultdict(list)

for err_col in err_cols:
all_errors.extend(list(set(
[f"{err} [found in {int(df[df[err_col] == err][err_col].value_counts())} file(s)]"
for err in df[err_col].unique() if err.strip()])))
for err in df[err_col].unique():
err = err.strip()
if not err or do_exclude(err, exclude_patterns): continue

filepaths = sorted(df[df[err_col] == err]["filepath"])
errors_by_type[err].extend(filepaths)

all_errors = OD()
for err in sorted(errors_by_type):
filepaths = errors_by_type[err]
all_errors[err] = sorted(filepaths)

all_errors = sorted(set(all_errors))
print(f"[INFO] {len(all_errors)} found. They are...")

for err in all_errors:
print(f"\t\t{err}")

filepaths = all_errors[err]
print(f"\t\t{err} [found in {len(filepaths)} file(s)]")

if show_files:
print("\n------- File paths --------\n")

for err in all_errors:
print(f"\t\t{err}")
for filepath in all_errors[err]:
print(f"\t\t\t{filepath}")

27 changes: 14 additions & 13 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
pip==19.2.3
bump2version==0.5.11
wheel==0.33.6
watchdog==0.9.0
flake8==3.7.8
tox==3.14.0
coverage==4.5.4
Sphinx==1.8.5
twine==1.14.0
pre-commit==2.8.2
Click==7.0
pytest==4.6.5
pytest-runner==5.1
pip
bump2version
wheel
watchdog
flake8
tox
coverage
Sphinx
twine
pre-commit
Click
pytest
pytest-runner
deepdiff
5 changes: 5 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os

TESTDATA_DIR = os.path.join(os.path.dirname(__file__), "testdata")


23 changes: 23 additions & 0 deletions tests/sample-fixture/make-sample-fixture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import glob
import json
import shelve

from checksit.readers.cdl import read as read_cdl


d = {}

for cdl in glob.glob("sample-ncs/*.cdl"):
fname = os.path.basename(cdl)

d[fname] = read_cdl(cdl).to_dict()


with shelve.open("sample-fixture") as db:
for fname in d:
db[fname] = d[fname]




Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
'rss_rcp85_land-cpm_uk_2.2km_01_day_20671201-20681130.cdl', (0, 3754)
'tasmax_rcp85_land-rcm_uk_12km_EC-EARTH_r12i1p1_HIRHAM5_day_19801201-19901130.cdl', (4096, 3789)
'cltAnom_rcp85_land-prob_uk_25km_sample_b8110_30y_mon_20091201-20991130.cdl', (8192, 3890)
'20190101-ESACCI-L4_FIRE-BA-SYN-fv1.0.cdl', (12288, 6368)
'summer_rainfall_2001.cdl', (18944, 2068)
'ESACCI-SEAICE-L3C-SITHICK-RA2_ENVISAT-SH50KMEASE2-201202-fv2.0.cdl', (21504, 7238)
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
'rss_rcp85_land-cpm_uk_2.2km_01_day_20671201-20681130.cdl', (0, 3754)
'tasmax_rcp85_land-rcm_uk_12km_EC-EARTH_r12i1p1_HIRHAM5_day_19801201-19901130.cdl', (4096, 3789)
'cltAnom_rcp85_land-prob_uk_25km_sample_b8110_30y_mon_20091201-20991130.cdl', (8192, 3890)
'20190101-ESACCI-L4_FIRE-BA-SYN-fv1.0.cdl', (12288, 6368)
'summer_rainfall_2001.cdl', (18944, 2068)
'ESACCI-SEAICE-L3C-SITHICK-RA2_ENVISAT-SH50KMEASE2-201202-fv2.0.cdl', (21504, 7238)
6 changes: 6 additions & 0 deletions tests/sample-fixture/sample-fixture.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
'rss_rcp85_land-cpm_uk_2.2km_01_day_20671201-20681130.cdl', (0, 3754)
'tasmax_rcp85_land-rcm_uk_12km_EC-EARTH_r12i1p1_HIRHAM5_day_19801201-19901130.cdl', (4096, 3789)
'cltAnom_rcp85_land-prob_uk_25km_sample_b8110_30y_mon_20091201-20991130.cdl', (8192, 3890)
'20190101-ESACCI-L4_FIRE-BA-SYN-fv1.0.cdl', (12288, 6168)
'summer_rainfall_2001.cdl', (18944, 2068)
'ESACCI-SEAICE-L3C-SITHICK-RA2_ENVISAT-SH50KMEASE2-201202-fv2.0.cdl', (21504, 7238)
Binary file added tests/sample-fixture/sample-fixture.dat
Binary file not shown.
6 changes: 6 additions & 0 deletions tests/sample-fixture/sample-fixture.dir
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
'rss_rcp85_land-cpm_uk_2.2km_01_day_20671201-20681130.cdl', (0, 3754)
'tasmax_rcp85_land-rcm_uk_12km_EC-EARTH_r12i1p1_HIRHAM5_day_19801201-19901130.cdl', (4096, 3789)
'cltAnom_rcp85_land-prob_uk_25km_sample_b8110_30y_mon_20091201-20991130.cdl', (8192, 3890)
'20190101-ESACCI-L4_FIRE-BA-SYN-fv1.0.cdl', (12288, 6168)
'summer_rainfall_2001.cdl', (18944, 2068)
'ESACCI-SEAICE-L3C-SITHICK-RA2_ENVISAT-SH50KMEASE2-201202-fv2.0.cdl', (21504, 7238)
Loading