Skip to content

Commit 228a2f0

Browse files
committed
Add more comments
1 parent df5a34f commit 228a2f0

File tree

5 files changed

+61
-1
lines changed

5 files changed

+61
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ cat file-to-parse | tabularize -
7373

7474
Tabularize operates at the byte level; however, it prints out data as JSON, which does not support bytes. As a result,
7575
it decodes the data before printing it to the terminal. You can customize the encoding and error resolution strategy
76-
using the `--encoding` and `errors` options:
76+
using the `--encoding` and `--errors` options:
7777

7878
```shell
7979
tabularize --encoding utf-8 --errors backslashreplace path-to-file

tabularize/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Utilities for parsing semi-structured text data
3+
"""
4+
15
from .parse import parse_headers, parse_body
26

37

tabularize/__main__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Enables the execution of the CLI by package name
3+
"""
4+
15
if __name__ == "__main__":
26
from . import main
37

tabularize/main.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Provides a command-line interface to the package
3+
"""
4+
15
import argparse
26
import json
37
import sys
@@ -12,6 +16,16 @@ def _parse_file(
1216
encoding: str = "utf-8",
1317
errors: str = "backslashreplace",
1418
) -> None:
19+
"""
20+
Parses `file` and prints output to standard output.
21+
22+
:param file: File to parse.
23+
:param force_headers: Iterable of header names to use as a heuristic.
24+
:param encoding: Encoding to use for decoding.
25+
:param errors: Error resolution strategy for decoding.
26+
:return: None.
27+
"""
28+
1529
header_line: bytes = file.readline()
1630
while not header_line.strip():
1731
header_line = file.readline()
@@ -40,6 +54,16 @@ def _process_file(
4054
encoding: str = "utf-8",
4155
errors: str = "backslashreplace",
4256
) -> None:
57+
"""
58+
Opens the appropriate stream and performs parsing.
59+
60+
:param file_path: Path to file to parse.
61+
:param force_headers: Iterable of header names to use as a heuristic.
62+
:param encoding: Encoding to use for decoding.
63+
:param errors: Error resolution strategy for decoding.
64+
:return: None.
65+
"""
66+
4367
if file_path == "-":
4468
if sys.stdin.isatty():
4569
raise RuntimeError("Terminal is attached - cannot process standard input")
@@ -58,6 +82,12 @@ def _process_file(
5882

5983

6084
def main() -> None:
85+
"""
86+
Parses inputs from the command-line and prints output to standard output.
87+
88+
:return: None.
89+
"""
90+
6191
parser = argparse.ArgumentParser()
6292
parser.add_argument(
6393
"--header",
@@ -84,6 +114,8 @@ def main() -> None:
84114

85115
headers: tuple[bytes, ...] = tuple(header.encode() for header in args.header)
86116
for file_path in args.files:
117+
# noinspection PyBroadException
118+
# pylint: disable=broad-exception-caught
87119
try:
88120
_process_file(
89121
file_path,

tabularize/parse.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Parsing utilities for semi-structured tabular text input
3+
"""
4+
15
from typing import TypeAlias, TYPE_CHECKING, Iterable
26

37
if TYPE_CHECKING:
@@ -8,6 +12,14 @@
812
def parse_headers(
913
data: "BytesType", force: Iterable["BytesType"] | None = None
1014
) -> tuple["Header", ...]:
15+
"""
16+
Parses a line of data to derive header names and positions.
17+
18+
:param data: Data to parse.
19+
:param force: Iterable of header names to use as a heuristic.
20+
:return: Tuple of headers, consisting of a name, start index, and end index.
21+
"""
22+
1123
extracted_headers: list["Header"] = []
1224

1325
header_start: int = 0
@@ -43,6 +55,14 @@ def parse_headers(
4355
def parse_body(
4456
headers: tuple["Header", ...], line: "BytesType"
4557
) -> dict[bytes, "BytesType"]:
58+
"""
59+
Parses a body line based on provided headers.
60+
61+
:param headers: Headers to map data to.
62+
:param line: Data to parse.
63+
:return: Dictionary of parsed data.
64+
"""
65+
4666
entry: dict[bytes, "BytesType"] = {}
4767

4868
start_offset: int | None = 0

0 commit comments

Comments
 (0)