Skip to content

Commit 7e8cb85

Browse files
authored
Merge pull request Changaco#138 from Changaco/digests
2 parents e3b7d83 + b2feb75 commit 7e8cb85

File tree

3 files changed

+140
-9
lines changed

3 files changed

+140
-9
lines changed

libarchive/entry.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from contextlib import contextmanager
2-
from ctypes import create_string_buffer
2+
from ctypes import create_string_buffer, string_at
33
from enum import IntEnum
44
import math
55

66
from . import ffi
7+
from .exception import ArchiveError
78

89

910
class FileType(IntEnum):
@@ -86,6 +87,7 @@ def modify(self, header_codec=None, **attributes):
8687
rdev (int | Tuple[int, int]): device number, if the file is a device
8788
rdevmajor (int): major part of the device number
8889
rdevminor (int): minor part of the device number
90+
stored_digests (dict[str, bytes]): hashes of the file's contents
8991
"""
9092
if header_codec:
9193
self.header_codec = header_codec
@@ -433,6 +435,64 @@ def rdevminor(self):
433435
def rdevminor(self, value):
434436
ffi.entry_set_rdevminor(self._entry_p, value)
435437

438+
@property
439+
def stored_digests(self):
440+
"""The file's hashes stored in the archive.
441+
442+
libarchive only supports reading and writing digests from and to 'mtree'
443+
files. Setting the digests requires at least version 3.8.0 of libarchive
444+
(released in May 2025). It also requires including the names of the
445+
digest algorithms in the string of options passed to the archive writer
446+
(e.g. `file_writer(archive_path, 'mtree', options='md5,rmd160,sha256')`).
447+
"""
448+
return {name: self.get_stored_digest(name) for name in ffi.DIGEST_ALGORITHMS}
449+
450+
@stored_digests.setter
451+
def stored_digests(self, values):
452+
for name, value in values.items():
453+
self.set_stored_digest(name, value)
454+
455+
def get_stored_digest(self, algorithm_name):
456+
algorithm = ffi.DIGEST_ALGORITHMS[algorithm_name]
457+
try:
458+
ptr = ffi.entry_digest(self._entry_p, algorithm.libarchive_id)
459+
except AttributeError:
460+
raise NotImplementedError(
461+
f"the libarchive being used (version {ffi.version_number()}, path "
462+
f"{ffi.libarchive_path}) doesn't support reading entry digests"
463+
) from None
464+
except ArchiveError:
465+
raise NotImplementedError(
466+
f"the libarchive being used (version {ffi.version_number()}, path "
467+
f"{ffi.libarchive_path}) doesn't support {algorithm_name} digests"
468+
) from None
469+
return string_at(ptr, algorithm.bytes_length)
470+
471+
def set_stored_digest(self, algorithm_name, value):
472+
algorithm = ffi.DIGEST_ALGORITHMS[algorithm_name]
473+
expected_length = algorithm.bytes_length
474+
if len(value) != expected_length:
475+
raise ValueError(
476+
f"invalid input digest: expected {expected_length} bytes, "
477+
f"got {len(value)}"
478+
)
479+
try:
480+
retcode = ffi.entry_set_digest(
481+
self._entry_p,
482+
algorithm.libarchive_id,
483+
(expected_length * ffi.c_ubyte).from_buffer_copy(value)
484+
)
485+
except AttributeError:
486+
raise NotImplementedError(
487+
f"the libarchive being used (version {ffi.version_number()}, path "
488+
f"{ffi.libarchive_path}) doesn't support writing entry digests"
489+
) from None
490+
if retcode < 0:
491+
raise NotImplementedError(
492+
f"the libarchive being used (version {ffi.version_number()}, path "
493+
f"{ffi.libarchive_path}) doesn't support {algorithm_name} digests"
494+
) from None
495+
436496

437497
class ConsumedArchiveEntry(ArchiveEntry):
438498

libarchive/ffi.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ctypes import (
22
c_char_p, c_int, c_uint, c_long, c_longlong, c_size_t, c_int64,
3-
c_void_p, c_wchar_p, CFUNCTYPE, POINTER,
3+
c_ubyte, c_void_p, c_wchar_p, CFUNCTYPE, POINTER,
44
)
55

66
try:
@@ -365,3 +365,42 @@ def get_write_filter_function(filter_name):
365365
f"the libarchive being used (version {version_number()}, "
366366
f"path {libarchive_path}) doesn't support encryption"
367367
)
368+
369+
370+
# archive entry digests (a.k.a. hashes)
371+
372+
class DigestAlgorithm:
373+
__slots__ = ('name', 'libarchive_id', 'bytes_length')
374+
375+
def __init__(self, name, libarchive_id, bytes_length):
376+
self.name = name
377+
self.libarchive_id = libarchive_id
378+
self.bytes_length = bytes_length
379+
380+
381+
DIGEST_ALGORITHMS = {
382+
'md5': DigestAlgorithm('md5', libarchive_id=1, bytes_length=16),
383+
'rmd160': DigestAlgorithm('rmd160', libarchive_id=2, bytes_length=20),
384+
'sha1': DigestAlgorithm('sha1', libarchive_id=3, bytes_length=20),
385+
'sha256': DigestAlgorithm('sha256', libarchive_id=4, bytes_length=32),
386+
'sha384': DigestAlgorithm('sha384', libarchive_id=5, bytes_length=48),
387+
'sha512': DigestAlgorithm('sha512', libarchive_id=6, bytes_length=64),
388+
}
389+
390+
try:
391+
ffi('entry_digest', [c_archive_entry_p, c_int], POINTER(c_ubyte), check_null)
392+
except AttributeError:
393+
logger.info(
394+
f"the libarchive being used (version {version_number()}, "
395+
f"path {libarchive_path}) doesn't support reading entry digests"
396+
)
397+
398+
try:
399+
ffi('entry_set_digest',
400+
[c_archive_entry_p, c_int, POINTER(c_ubyte)],
401+
c_int, check_int)
402+
except AttributeError:
403+
logger.info(
404+
f"the libarchive being used (version {version_number()}, "
405+
f"path {libarchive_path}) doesn't support modifying entry digests"
406+
)

tests/test_entry.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
3-
from codecs import open
41
import json
52
import locale
63
from os import environ, stat
@@ -9,14 +6,12 @@
96

107
import pytest
118

12-
from libarchive import ArchiveError, memory_reader, memory_writer
9+
from libarchive import ArchiveError, ffi, file_writer, memory_reader, memory_writer
1310
from libarchive.entry import ArchiveEntry, ConsumedArchiveEntry, PassedArchiveEntry
1411

1512
from . import data_dir, get_entries, get_tarinfos
1613

1714

18-
text_type = unicode if str is bytes else str # noqa: F821
19-
2015
locale.setlocale(locale.LC_ALL, '')
2116

2217
# needed for sane time stamp comparison
@@ -106,7 +101,7 @@ def check_entries(test_file, regen=False, ignore=''):
106101
# Normalize all unicode (can vary depending on the system)
107102
for d in (e1, e2):
108103
for key in d:
109-
if isinstance(d[key], text_type):
104+
if isinstance(d[key], str):
110105
d[key] = unicodedata.normalize('NFC', d[key])
111106
assert e1 == e2
112107

@@ -155,3 +150,40 @@ def test_non_ASCII_encoding_of_file_metadata():
155150
with memory_reader(buf, header_codec='cp037') as archive:
156151
entry = next(iter(archive))
157152
assert entry.pathname == file_name
153+
154+
155+
fake_hashes = dict(
156+
md5=b'!' * 16,
157+
rmd160=b'!' * 20,
158+
sha1=b'!' * 20,
159+
sha256=b'!' * 32,
160+
sha384=b'!' * 48,
161+
sha512=b'!' * 64,
162+
)
163+
mtree = (
164+
'#mtree\n'
165+
'./empty.txt nlink=0 time=0.0 mode=664 gid=0 uid=0 type=file size=42 '
166+
f'md5digest={'21'*16} rmd160digest={'21'*20} sha1digest={'21'*20} '
167+
f'sha256digest={'21'*32} sha384digest={'21'*48} sha512digest={'21'*64}\n'
168+
)
169+
170+
171+
def test_reading_entry_digests(tmpdir):
172+
with memory_reader(mtree.encode('ascii')) as archive:
173+
entry = next(iter(archive))
174+
assert entry.stored_digests == fake_hashes
175+
176+
177+
@pytest.mark.xfail(
178+
condition=ffi.version_number() < 3008000,
179+
reason="libarchive < 3.8",
180+
)
181+
def test_writing_entry_digests(tmpdir):
182+
archive_path = str(tmpdir / 'mtree')
183+
options = ','.join(fake_hashes.keys())
184+
with file_writer(archive_path, 'mtree', options=options) as archive:
185+
# Add an empty file, with fake hashes.
186+
archive.add_file_from_memory('empty.txt', 42, (), stored_digests=fake_hashes)
187+
with open(archive_path) as f:
188+
libarchive_mtree = f.read()
189+
assert libarchive_mtree == mtree

0 commit comments

Comments
 (0)