Skip to content

Commit f2db03e

Browse files
Merge pull request #1: borg2: repoindex improvements
Merged from original PR #6705 Original: borgbackup/borg#6705
2 parents 6ba644b + 98f4534 commit f2db03e

File tree

5 files changed

+269
-62
lines changed

5 files changed

+269
-62
lines changed

src/borg/hashindex.pyx

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,20 @@ assert UINT32_MAX == 2**32-1
7777
assert _MAX_VALUE % 2 == 1
7878

7979

80+
def hashindex_variant(fn):
81+
"""peek into an index file and find out what it is"""
82+
with open(fn, 'rb') as f:
83+
hh = f.read(18) # len(HashHeader)
84+
magic = hh[0:8]
85+
if magic == b'BORG_IDX':
86+
key_size = hh[16]
87+
value_size = hh[17]
88+
return f'k{key_size}_v{value_size}'
89+
if magic == b'12345678': # used by unit tests
90+
return 'k32_v16' # just return the current variant
91+
raise ValueError(f'unknown hashindex format, magic: {magic!r}')
92+
93+
8094
@cython.internal
8195
cdef class IndexBase:
8296
cdef HashIndex *index
@@ -196,8 +210,115 @@ cdef class FuseVersionsIndex(IndexBase):
196210
return hashindex_get(self.index, <unsigned char *>key) != NULL
197211

198212

213+
NSIndexEntry = namedtuple('NSIndexEntry', 'segment offset size')
214+
215+
199216
cdef class NSIndex(IndexBase):
200217

218+
value_size = 16
219+
220+
def __getitem__(self, key):
221+
assert len(key) == self.key_size
222+
data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
223+
if not data:
224+
raise KeyError(key)
225+
cdef uint32_t segment = _le32toh(data[0])
226+
assert segment <= _MAX_VALUE, "maximum number of segments reached"
227+
return NSIndexEntry(segment, _le32toh(data[1]), _le32toh(data[2]))
228+
229+
def __setitem__(self, key, value):
230+
assert len(key) == self.key_size
231+
cdef uint32_t[4] data
232+
cdef uint32_t segment = value[0]
233+
assert segment <= _MAX_VALUE, "maximum number of segments reached"
234+
data[0] = _htole32(segment)
235+
data[1] = _htole32(value[1])
236+
data[2] = _htole32(value[2])
237+
data[3] = 0 # init flags to all cleared
238+
if not hashindex_set(self.index, <unsigned char *>key, data):
239+
raise Exception('hashindex_set failed')
240+
241+
def __contains__(self, key):
242+
cdef uint32_t segment
243+
assert len(key) == self.key_size
244+
data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
245+
if data != NULL:
246+
segment = _le32toh(data[0])
247+
assert segment <= _MAX_VALUE, "maximum number of segments reached"
248+
return data != NULL
249+
250+
def iteritems(self, marker=None, mask=0, value=0):
251+
"""iterate over all items or optionally only over items having specific flag values"""
252+
cdef const unsigned char *key
253+
assert isinstance(mask, int)
254+
assert isinstance(value, int)
255+
iter = NSKeyIterator(self.key_size, mask, value)
256+
iter.idx = self
257+
iter.index = self.index
258+
if marker:
259+
key = hashindex_get(self.index, <unsigned char *>marker)
260+
if marker is None:
261+
raise IndexError
262+
iter.key = key - self.key_size
263+
return iter
264+
265+
def flags(self, key, mask=0xFFFFFFFF, value=None):
266+
"""query and optionally set flags"""
267+
assert len(key) == self.key_size
268+
assert isinstance(mask, int)
269+
data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
270+
if not data:
271+
raise KeyError(key)
272+
flags = _le32toh(data[3])
273+
if isinstance(value, int):
274+
new_flags = flags & ~mask # clear masked bits
275+
new_flags |= value & mask # set value bits
276+
data[3] = _htole32(new_flags)
277+
return flags & mask # always return previous flags value
278+
279+
280+
cdef class NSKeyIterator:
281+
cdef NSIndex idx
282+
cdef HashIndex *index
283+
cdef const unsigned char *key
284+
cdef int key_size
285+
cdef int exhausted
286+
cdef int flag_mask
287+
cdef int flag_value
288+
289+
def __cinit__(self, key_size, mask, value):
290+
self.key = NULL
291+
self.key_size = key_size
292+
# note: mask and value both default to 0, so they will match all entries
293+
self.flag_mask = _htole32(mask)
294+
self.flag_value = _htole32(value)
295+
self.exhausted = 0
296+
297+
def __iter__(self):
298+
return self
299+
300+
def __next__(self):
301+
cdef uint32_t *value
302+
if self.exhausted:
303+
raise StopIteration
304+
while True:
305+
self.key = hashindex_next_key(self.index, <unsigned char *>self.key)
306+
if not self.key:
307+
self.exhausted = 1
308+
raise StopIteration
309+
value = <uint32_t *> (self.key + self.key_size)
310+
if value[3] & self.flag_mask == self.flag_value:
311+
# we found a matching entry!
312+
break
313+
314+
cdef uint32_t segment = _le32toh(value[0])
315+
assert segment <= _MAX_VALUE, "maximum number of segments reached"
316+
return ((<char *>self.key)[:self.key_size],
317+
NSIndexEntry(segment, _le32toh(value[1]), _le32toh(value[2])))
318+
319+
320+
cdef class NSIndex1(IndexBase): # legacy borg 1.x
321+
201322
value_size = 8
202323

203324
def __getitem__(self, key):
@@ -230,7 +351,7 @@ cdef class NSIndex(IndexBase):
230351

231352
def iteritems(self, marker=None):
232353
cdef const unsigned char *key
233-
iter = NSKeyIterator(self.key_size)
354+
iter = NSKeyIterator1(self.key_size)
234355
iter.idx = self
235356
iter.index = self.index
236357
if marker:
@@ -241,8 +362,8 @@ cdef class NSIndex(IndexBase):
241362
return iter
242363

243364

244-
cdef class NSKeyIterator:
245-
cdef NSIndex idx
365+
cdef class NSKeyIterator1: # legacy borg 1.x
366+
cdef NSIndex1 idx
246367
cdef HashIndex *index
247368
cdef const unsigned char *key
248369
cdef int key_size

0 commit comments

Comments
 (0)