Skip to content

Commit 4f29088

Browse files
committed
Parse newline, UTF-8, trailing comment, backslash
This adds support for: * multiline values (i.e. containing newlines or escaped \n), fixes #89 * backslashes in values, fixes #112 * trailing comments, fixes #141 * UTF-8 in unquoted values, fixes #147 Parsing is no longer line-based. That's why `parse_line` was replaced by `parse_binding`. Thanks to the previous commit, users of `parse_stream` don't have to deal with this change. This supersedes a previous pull-request, #142, which would add support for multiline values in `Dotenv.parse` but not in the CLI (`dotenv get` and `dotenv set`). The key-value binding regular expression was inspired by https://github.com/bkeepers/dotenv/blob/d749366b6009126b115fb7b63e0509566365859a/lib/dotenv/parser.rb#L14-L30 Parsing of escapes was fixed thanks to https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python/24519338#24519338
1 parent 339ffe4 commit 4f29088

File tree

3 files changed

+140
-42
lines changed

3 files changed

+140
-42
lines changed

dotenv/main.py

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,44 +15,78 @@
1515

1616
from .compat import StringIO, PY2, WIN, text_type
1717

18-
__escape_decoder = codecs.getdecoder('unicode_escape')
1918
__posix_variable = re.compile(r'\$\{[^\}]*\}')
2019

21-
Binding = namedtuple('Binding', 'key value original')
20+
_binding = re.compile(
21+
r"""
22+
(
23+
\s* # leading whitespace
24+
(?:export\s+)? # export
25+
26+
( '[^']+' # single-quoted key
27+
| [^=\#\s]+ # or unquoted key
28+
)?
29+
30+
(?:
31+
(?:\s*=\s*) # equal sign
32+
33+
( '(?:\\'|[^'])*' # single-quoted value
34+
| "(?:\\"|[^"])*" # or double-quoted value
35+
| [^\#\r\n]* # or unquoted value
36+
)
37+
)?
2238
39+
\s* # trailing whitespace
40+
(?:\#[^\r\n]*)? # comment
41+
\s* # trailing whitespace
42+
(?:\r|\n|\r\n)? # newline
43+
)
44+
""",
45+
re.MULTILINE | re.VERBOSE,
46+
)
2347

24-
def decode_escaped(escaped):
25-
return __escape_decoder(escaped)[0]
48+
_escape_sequence = re.compile(r"\\[\\'\"abfnrtv]")
2649

2750

28-
def parse_line(line):
29-
line = line.strip()
51+
Binding = namedtuple('Binding', 'key value original')
52+
3053

31-
# Ignore lines with `#` or which doesn't have `=` in it.
32-
if not line or line.startswith('#') or '=' not in line:
33-
return None, None
54+
def decode_escapes(string):
55+
def decode_match(match):
56+
return codecs.decode(match.group(0), 'unicode-escape')
3457

35-
k, v = line.split('=', 1)
58+
return _escape_sequence.sub(decode_match, string)
3659

37-
if k.startswith('export '):
38-
(_, _, k) = k.partition('export ')
3960

40-
# Remove any leading and trailing spaces in key, value
41-
k, v = k.strip(), v.strip()
61+
def is_surrounded_by(string, char):
62+
return (
63+
len(string) > 1
64+
and string[0] == string[-1] == char
65+
)
4266

43-
if v:
44-
v = v.encode('unicode-escape').decode('ascii')
45-
quoted = v[0] == v[-1] in ['"', "'"]
46-
if quoted:
47-
v = decode_escaped(v[1:-1])
4867

49-
return k, v
68+
def parse_binding(string, position):
69+
match = _binding.match(string, position)
70+
(matched, key, value) = match.groups()
71+
if key is None or value is None:
72+
key = None
73+
value = None
74+
else:
75+
value_quoted = is_surrounded_by(value, "'") or is_surrounded_by(value, '"')
76+
if value_quoted:
77+
value = decode_escapes(value[1:-1])
78+
else:
79+
value = value.strip()
80+
return (Binding(key=key, value=value, original=matched), match.end())
5081

5182

5283
def parse_stream(stream):
53-
for line in stream:
54-
(key, value) = parse_line(line)
55-
yield Binding(key=key, value=value, original=line)
84+
string = stream.read()
85+
position = 0
86+
length = len(string)
87+
while position < length:
88+
(binding, position) = parse_binding(string, position)
89+
yield binding
5690

5791

5892
class DotEnv():

tests/test_cli.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def test_set_key(dotenv_file):
3939
with open(dotenv_file, 'r') as fp:
4040
assert 'HELLO="WORLD 2"\nfoo="bar"' == fp.read().strip()
4141

42+
success, key_to_set, value_to_set = dotenv.set_key(dotenv_file, "HELLO", "WORLD\n3")
43+
44+
with open(dotenv_file, "r") as fp:
45+
assert 'HELLO="WORLD\n3"\nfoo="bar"' == fp.read().strip()
46+
4247

4348
def test_set_key_permission_error(dotenv_file):
4449
os.chmod(dotenv_file, 0o000)
@@ -71,6 +76,13 @@ def test_list_wo_file(cli):
7176
assert 'Invalid value for "-f"' in result.output
7277

7378

79+
def test_empty_value():
80+
with open(dotenv_path, "w") as f:
81+
f.write("TEST=")
82+
assert dotenv.get_key(dotenv_path, "TEST") == ""
83+
sh.rm(dotenv_path)
84+
85+
7486
def test_key_value_without_quotes():
7587
with open(dotenv_path, 'w') as f:
7688
f.write("TEST = value \n")
@@ -107,6 +119,25 @@ def test_value_with_special_characters():
107119
sh.rm(dotenv_path)
108120

109121

122+
def test_value_with_new_lines():
123+
with open(dotenv_path, 'w') as f:
124+
f.write('TEST="a\nb"')
125+
assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb"
126+
sh.rm(dotenv_path)
127+
128+
with open(dotenv_path, 'w') as f:
129+
f.write("TEST='a\nb'")
130+
assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb"
131+
sh.rm(dotenv_path)
132+
133+
134+
def test_value_after_comment():
135+
with open(dotenv_path, "w") as f:
136+
f.write("# comment\nTEST=a")
137+
assert dotenv.get_key(dotenv_path, "TEST") == "a"
138+
sh.rm(dotenv_path)
139+
140+
110141
def test_unset_ok(dotenv_file):
111142
with open(dotenv_file, "w") as f:
112143
f.write("a=b\nc=d")

tests/test_core.py

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import sh
1010

1111
from dotenv import load_dotenv, find_dotenv, set_key, dotenv_values
12-
from dotenv.main import Binding, parse_line, parse_stream
12+
from dotenv.main import Binding, parse_stream
1313
from dotenv.compat import StringIO
1414
from IPython.terminal.embed import InteractiveShellEmbed
1515

@@ -24,34 +24,67 @@ def restore_os_environ():
2424
os.environ.update(environ)
2525

2626

27-
@pytest.mark.parametrize("test_input,expected", [
28-
("a=b", ("a", "b")),
29-
(" a = b ", ("a", "b")),
30-
("export a=b", ("a", "b")),
31-
(" export 'a'=b", ("'a'", "b")),
32-
(" export 'a'=b", ("'a'", "b")),
33-
("# a=b", (None, None)),
34-
("# a=b", (None, None)),
35-
("a=b space ", ('a', 'b space')),
36-
("a='b space '", ('a', 'b space ')),
37-
('a="b space "', ('a', 'b space ')),
38-
("export export_spam=1", ("export_spam", "1")),
39-
("export port=8000", ("port", "8000")),
40-
])
41-
def test_parse_line(test_input, expected):
42-
assert parse_line(test_input) == expected
43-
44-
4527
@pytest.mark.parametrize("test_input,expected", [
4628
("", []),
4729
("a=b", [Binding(key="a", value="b", original="a=b")]),
30+
("'a'=b", [Binding(key="'a'", value="b", original="'a'=b")]),
31+
("[=b", [Binding(key="[", value="b", original="[=b")]),
32+
(" a = b ", [Binding(key="a", value="b", original=" a = b ")]),
33+
("export a=b", [Binding(key="a", value="b", original="export a=b")]),
34+
(" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]),
35+
(" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]),
36+
("# a=b", [Binding(key=None, value=None, original="# a=b")]),
37+
('a=b # comment', [Binding(key="a", value="b", original="a=b # comment")]),
38+
("a=b space ", [Binding(key="a", value="b space", original="a=b space ")]),
39+
("a='b space '", [Binding(key="a", value="b space ", original="a='b space '")]),
40+
('a="b space "', [Binding(key="a", value="b space ", original='a="b space "')]),
41+
("export export_a=1", [Binding(key="export_a", value="1", original="export export_a=1")]),
42+
("export port=8000", [Binding(key="port", value="8000", original="export port=8000")]),
43+
('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]),
44+
("a='b\nc'", [Binding(key="a", value="b\nc", original="a='b\nc'")]),
45+
('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]),
46+
('a="b\\nc"', [Binding(key="a", value='b\nc', original='a="b\\nc"')]),
47+
('a="b\\"c"', [Binding(key="a", value='b"c', original='a="b\\"c"')]),
48+
("a='b\\'c'", [Binding(key="a", value="b'c", original="a='b\\'c'")]),
49+
("a=à", [Binding(key="a", value="à", original="a=à")]),
50+
('a="à"', [Binding(key="a", value="à", original='a="à"')]),
51+
('garbage', [Binding(key=None, value=None, original="garbage")]),
4852
(
4953
"a=b\nc=d",
5054
[
5155
Binding(key="a", value="b", original="a=b\n"),
5256
Binding(key="c", value="d", original="c=d"),
5357
],
5458
),
59+
(
60+
"a=b\r\nc=d",
61+
[
62+
Binding(key="a", value="b", original="a=b\r\n"),
63+
Binding(key="c", value="d", original="c=d"),
64+
],
65+
),
66+
(
67+
'a="\nb=c',
68+
[
69+
Binding(key="a", value='"', original='a="\n'),
70+
Binding(key="b", value='c', original="b=c"),
71+
]
72+
),
73+
(
74+
'# comment\na="b\nc"\nd=e\n',
75+
[
76+
Binding(key=None, value=None, original="# comment\n"),
77+
Binding(key="a", value="b\nc", original='a="b\nc"\n'),
78+
Binding(key="d", value="e", original="d=e\n"),
79+
],
80+
),
81+
(
82+
'garbage[%$#\na=b',
83+
[
84+
Binding(key=None, value=None, original="garbage[%$#\n"),
85+
Binding(key="a", value="b", original='a=b'),
86+
],
87+
),
5588
])
5689
def test_parse_stream(test_input, expected):
5790
result = parse_stream(StringIO(test_input))

0 commit comments

Comments
 (0)