Skip to content

Commit f275ede

Browse files
authored
Merge branch 'main' into pickle_93627
2 parents 1bf64c0 + 37135d2 commit f275ede

File tree

7 files changed

+187
-10
lines changed

7 files changed

+187
-10
lines changed

Doc/library/tarfile.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,11 @@ A ``TarInfo`` object has the following public data attributes:
740740
Name of the target file name, which is only present in :class:`TarInfo` objects
741741
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
742742

743+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
744+
that contains the link.
745+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
746+
the archive.
747+
743748

744749
.. attribute:: TarInfo.uid
745750
:type: int

Lib/tarfile.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,8 @@ def __init__(self, name, mode, comptype, fileobj, bufsize,
372372
self.zlib = zlib
373373
self.crc = zlib.crc32(b"")
374374
if mode == "r":
375-
self._init_read_gz()
376375
self.exception = zlib.error
376+
self._init_read_gz()
377377
else:
378378
self._init_write_gz(compresslevel)
379379

@@ -742,7 +742,7 @@ def __init__(self, tarinfo):
742742
class AbsoluteLinkError(FilterError):
743743
def __init__(self, tarinfo):
744744
self.tarinfo = tarinfo
745-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
746746

747747
class LinkOutsideDestinationError(FilterError):
748748
def __init__(self, tarinfo, path):
@@ -802,7 +802,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
802802
if member.islnk() or member.issym():
803803
if os.path.isabs(member.linkname):
804804
raise AbsoluteLinkError(member)
805-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
805+
if member.issym():
806+
target_path = os.path.join(dest_path,
807+
os.path.dirname(name),
808+
member.linkname)
809+
else:
810+
target_path = os.path.join(dest_path,
811+
member.linkname)
812+
target_path = os.path.realpath(target_path)
806813
if os.path.commonpath([target_path, dest_path]) != dest_path:
807814
raise LinkOutsideDestinationError(member, target_path)
808815
return new_attrs

Lib/test/test_tarfile.py

Lines changed: 156 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,23 @@ class LzmaDetectReadTest(LzmaTest, DetectReadTest):
938938
pass
939939

940940

941+
class GzipBrokenHeaderCorrectException(GzipTest, unittest.TestCase):
942+
"""
943+
See: https://github.com/python/cpython/issues/107396
944+
"""
945+
def runTest(self):
946+
f = io.BytesIO(
947+
b'\x1f\x8b' # header
948+
b'\x08' # compression method
949+
b'\x04' # flags
950+
b'\0\0\0\0\0\0' # timestamp, compression data, OS ID
951+
b'\0\x01' # size
952+
b'\0\0\0\0\0' # corrupt data (zeros)
953+
)
954+
with self.assertRaises(tarfile.ReadError):
955+
tarfile.open(fileobj=f, mode='r|gz')
956+
957+
941958
class MemberReadTest(ReadTest, unittest.TestCase):
942959

943960
def _test_member(self, tarinfo, chksum=None, **kwargs):
@@ -3337,10 +3354,12 @@ def __exit__(self, *exc):
33373354
self.bio = None
33383355

33393356
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3340-
mode=None, **kwargs):
3357+
mode=None, size=None, **kwargs):
33413358
"""Add a member to the test archive. Call within `with`."""
33423359
name = str(name)
33433360
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3361+
if size is not None:
3362+
tarinfo.size = size
33443363
if mode:
33453364
tarinfo.mode = _filemode_to_int(mode)
33463365
if symlink_to is not None:
@@ -3416,7 +3435,8 @@ def check_context(self, tar, filter):
34163435
raise self.raised_exception
34173436
self.assertEqual(self.expected_paths, set())
34183437

3419-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3438+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3439+
size=None):
34203440
"""Check a single file. See check_context."""
34213441
if self.raised_exception:
34223442
raise self.raised_exception
@@ -3445,6 +3465,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
34453465
self.assertTrue(path.is_fifo())
34463466
else:
34473467
raise NotImplementedError(type)
3468+
if size is not None:
3469+
self.assertEqual(path.stat().st_size, size)
34483470
for parent in path.parents:
34493471
self.expected_paths.discard(parent)
34503472

@@ -3491,8 +3513,15 @@ def test_parent_symlink(self):
34913513
# Test interplaying symlinks
34923514
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
34933515
with ArchiveMaker() as arc:
3516+
3517+
# `current` links to `.` which is both:
3518+
# - the destination directory
3519+
# - `current` itself
34943520
arc.add('current', symlink_to='.')
3521+
3522+
# effectively points to ./../
34953523
arc.add('parent', symlink_to='current/..')
3524+
34963525
arc.add('parent/evil')
34973526

34983527
if os_helper.can_symlink():
@@ -3534,9 +3563,46 @@ def test_parent_symlink(self):
35343563
def test_parent_symlink2(self):
35353564
# Test interplaying symlinks
35363565
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3566+
3567+
# Posix and Windows have different pathname resolution:
3568+
# either symlink or a '..' component resolve first.
3569+
# Let's see which we are on.
3570+
if os_helper.can_symlink():
3571+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3572+
os.mkdir(testpath)
3573+
3574+
# testpath/current links to `.` which is all of:
3575+
# - `testpath`
3576+
# - `testpath/current`
3577+
# - `testpath/current/current`
3578+
# - etc.
3579+
os.symlink('.', os.path.join(testpath, 'current'))
3580+
3581+
# we'll test where `testpath/current/../file` ends up
3582+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3583+
pass
3584+
3585+
if os.path.exists(os.path.join(testpath, 'file')):
3586+
# Windows collapses 'current\..' to '.' first, leaving
3587+
# 'testpath\file'
3588+
dotdot_resolves_early = True
3589+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3590+
# Posix resolves 'current' to '.' first, leaving
3591+
# 'testpath/../file'
3592+
dotdot_resolves_early = False
3593+
else:
3594+
raise AssertionError('Could not determine link resolution')
3595+
35373596
with ArchiveMaker() as arc:
3597+
3598+
# `current` links to `.` which is both the destination directory
3599+
# and `current` itself
35383600
arc.add('current', symlink_to='.')
3601+
3602+
# `current/parent` is also available as `./parent`,
3603+
# and effectively points to `./../`
35393604
arc.add('current/parent', symlink_to='..')
3605+
35403606
arc.add('parent/evil')
35413607

35423608
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3550,6 +3616,7 @@ def test_parent_symlink2(self):
35503616

35513617
with self.check_context(arc.open(), 'tar'):
35523618
if os_helper.can_symlink():
3619+
# Fail when extracting a file outside destination
35533620
self.expect_exception(
35543621
tarfile.OutsideDestinationError,
35553622
"'parent/evil' would be extracted to "
@@ -3560,10 +3627,24 @@ def test_parent_symlink2(self):
35603627
self.expect_file('parent/evil')
35613628

35623629
with self.check_context(arc.open(), 'data'):
3563-
self.expect_exception(
3564-
tarfile.LinkOutsideDestinationError,
3565-
"""'current/parent' would link to ['"].*['"], """
3566-
+ "which is outside the destination")
3630+
if os_helper.can_symlink():
3631+
if dotdot_resolves_early:
3632+
# Fail when extracting a file outside destination
3633+
self.expect_exception(
3634+
tarfile.OutsideDestinationError,
3635+
"'parent/evil' would be extracted to "
3636+
+ """['"].*evil['"], which is outside """
3637+
+ "the destination")
3638+
else:
3639+
# Fail as soon as we have a symlink outside the destination
3640+
self.expect_exception(
3641+
tarfile.LinkOutsideDestinationError,
3642+
"'current/parent' would link to "
3643+
+ """['"].*outerdir['"], which is outside """
3644+
+ "the destination")
3645+
else:
3646+
self.expect_file('current/')
3647+
self.expect_file('parent/evil')
35673648

35683649
@symlink_test
35693650
def test_absolute_symlink(self):
@@ -3593,12 +3674,30 @@ def test_absolute_symlink(self):
35933674
with self.check_context(arc.open(), 'data'):
35943675
self.expect_exception(
35953676
tarfile.AbsoluteLinkError,
3596-
"'parent' is a symlink to an absolute path")
3677+
"'parent' is a link to an absolute path")
3678+
3679+
def test_absolute_hardlink(self):
3680+
# Test hardlink to an absolute path
3681+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3682+
with ArchiveMaker() as arc:
3683+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3684+
3685+
with self.check_context(arc.open(), 'fully_trusted'):
3686+
self.expect_exception(KeyError, ".*foo. not found")
3687+
3688+
with self.check_context(arc.open(), 'tar'):
3689+
self.expect_exception(KeyError, ".*foo. not found")
3690+
3691+
with self.check_context(arc.open(), 'data'):
3692+
self.expect_exception(
3693+
tarfile.AbsoluteLinkError,
3694+
"'parent' is a link to an absolute path")
35973695

35983696
@symlink_test
35993697
def test_sly_relative0(self):
36003698
# Inspired by 'relative0' in jwilk/traversal-archives
36013699
with ArchiveMaker() as arc:
3700+
# points to `../../tmp/moo`
36023701
arc.add('../moo', symlink_to='..//tmp/moo')
36033702

36043703
try:
@@ -3649,6 +3748,56 @@ def test_sly_relative2(self):
36493748
+ """['"].*moo['"], which is outside the """
36503749
+ "destination")
36513750

3751+
@symlink_test
3752+
def test_deep_symlink(self):
3753+
# Test that symlinks and hardlinks inside a directory
3754+
# point to the correct file (`target` of size 3).
3755+
# If links aren't supported we get a copy of the file.
3756+
with ArchiveMaker() as arc:
3757+
arc.add('targetdir/target', size=3)
3758+
# a hardlink's linkname is relative to the archive
3759+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3760+
'targetdir', 'target'))
3761+
# a symlink's linkname is relative to the link's directory
3762+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3763+
'..', 'targetdir', 'target'))
3764+
3765+
for filter in 'tar', 'data', 'fully_trusted':
3766+
with self.check_context(arc.open(), filter):
3767+
self.expect_file('targetdir/target', size=3)
3768+
self.expect_file('linkdir/hardlink', size=3)
3769+
if os_helper.can_symlink():
3770+
self.expect_file('linkdir/symlink', size=3,
3771+
symlink_to='../targetdir/target')
3772+
else:
3773+
self.expect_file('linkdir/symlink', size=3)
3774+
3775+
@symlink_test
3776+
def test_chains(self):
3777+
# Test chaining of symlinks/hardlinks.
3778+
# Symlinks are created before the files they point to.
3779+
with ArchiveMaker() as arc:
3780+
arc.add('linkdir/symlink', symlink_to='hardlink')
3781+
arc.add('symlink2', symlink_to=os.path.join(
3782+
'linkdir', 'hardlink2'))
3783+
arc.add('targetdir/target', size=3)
3784+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3785+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3786+
3787+
for filter in 'tar', 'data', 'fully_trusted':
3788+
with self.check_context(arc.open(), filter):
3789+
self.expect_file('targetdir/target', size=3)
3790+
self.expect_file('linkdir/hardlink', size=3)
3791+
self.expect_file('linkdir/hardlink2', size=3)
3792+
if os_helper.can_symlink():
3793+
self.expect_file('linkdir/symlink', size=3,
3794+
symlink_to='hardlink')
3795+
self.expect_file('symlink2', size=3,
3796+
symlink_to='linkdir/hardlink2')
3797+
else:
3798+
self.expect_file('linkdir/symlink', size=3)
3799+
self.expect_file('symlink2', size=3)
3800+
36523801
def test_modes(self):
36533802
# Test how file modes are extracted
36543803
# (Note that the modes are ignored on platforms without working chmod)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
C API functions :c:func:`PyErr_SetFromErrnoWithFilename`,
2+
:c:func:`PyErr_SetExcFromWindowsErrWithFilename` and
3+
:c:func:`PyErr_SetFromWindowsErrWithFilename` save now the error code before
4+
calling :c:func:`PyUnicode_DecodeFSDefault`.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tarfiles; Fixed use before assignment of self.exception for gzip decompression
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

Python/errors.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,10 +895,12 @@ PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename)
895895
{
896896
PyObject *name = NULL;
897897
if (filename) {
898+
int i = errno;
898899
name = PyUnicode_DecodeFSDefault(filename);
899900
if (name == NULL) {
900901
return NULL;
901902
}
903+
errno = i;
902904
}
903905
PyObject *result = PyErr_SetFromErrnoWithFilenameObjects(exc, name, NULL);
904906
Py_XDECREF(name);
@@ -998,6 +1000,9 @@ PyObject *PyErr_SetExcFromWindowsErrWithFilename(
9981000
{
9991001
PyObject *name = NULL;
10001002
if (filename) {
1003+
if ((DWORD)ierr == 0) {
1004+
ierr = (int)GetLastError();
1005+
}
10011006
name = PyUnicode_DecodeFSDefault(filename);
10021007
if (name == NULL) {
10031008
return NULL;
@@ -1028,6 +1033,9 @@ PyObject *PyErr_SetFromWindowsErrWithFilename(
10281033
{
10291034
PyObject *name = NULL;
10301035
if (filename) {
1036+
if ((DWORD)ierr == 0) {
1037+
ierr = (int)GetLastError();
1038+
}
10311039
name = PyUnicode_DecodeFSDefault(filename);
10321040
if (name == NULL) {
10331041
return NULL;

0 commit comments

Comments
 (0)