Skip to content

Commit 4bf1be7

Browse files
repository: set/query flags, iteration over flagged items (NSIndex)
use this to query or set/clear flags in the "extra" word. also: remove direct access to the "extra" word, adapt tests.
1 parent 5fb4f94 commit 4bf1be7

File tree

5 files changed

+131
-38
lines changed

5 files changed

+131
-38
lines changed

src/borg/hashindex.pyx

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ cdef class FuseVersionsIndex(IndexBase):
210210
return hashindex_get(self.index, <unsigned char *>key) != NULL
211211

212212

213-
NSIndexEntry = namedtuple('NSIndexEntry', 'segment offset size extra')
213+
NSIndexEntry = namedtuple('NSIndexEntry', 'segment offset size')
214214

215215

216216
cdef class NSIndex(IndexBase):
@@ -224,7 +224,7 @@ cdef class NSIndex(IndexBase):
224224
raise KeyError(key)
225225
cdef uint32_t segment = _le32toh(data[0])
226226
assert segment <= _MAX_VALUE, "maximum number of segments reached"
227-
return NSIndexEntry(segment, _le32toh(data[1]), _le32toh(data[2]), _le32toh(data[3]))
227+
return NSIndexEntry(segment, _le32toh(data[1]), _le32toh(data[2]))
228228

229229
def __setitem__(self, key, value):
230230
assert len(key) == self.key_size
@@ -234,7 +234,7 @@ cdef class NSIndex(IndexBase):
234234
data[0] = _htole32(segment)
235235
data[1] = _htole32(value[1])
236236
data[2] = _htole32(value[2])
237-
data[3] = _htole32(value[3])
237+
data[3] = 0 # init flags to all cleared
238238
if not hashindex_set(self.index, <unsigned char *>key, data):
239239
raise Exception('hashindex_set failed')
240240

@@ -247,9 +247,10 @@ cdef class NSIndex(IndexBase):
247247
assert segment <= _MAX_VALUE, "maximum number of segments reached"
248248
return data != NULL
249249

250-
def iteritems(self, marker=None):
250+
def iteritems(self, marker=None, mask=None, value=None):
251+
"""iterate over all items or optionally only over items having specific flag values"""
251252
cdef const unsigned char *key
252-
iter = NSKeyIterator(self.key_size)
253+
iter = NSKeyIterator(self.key_size, mask, value)
253254
iter.idx = self
254255
iter.index = self.index
255256
if marker:
@@ -259,34 +260,62 @@ cdef class NSIndex(IndexBase):
259260
iter.key = key - self.key_size
260261
return iter
261262

263+
def flags(self, key, mask=0xFFFFFFFF, value=None):
264+
"""query and optionally set flags"""
265+
assert len(key) == self.key_size
266+
assert isinstance(mask, int)
267+
data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
268+
if not data:
269+
raise KeyError(key)
270+
flags = _le32toh(data[3])
271+
if isinstance(value, int):
272+
new_flags = flags & ~mask # clear masked bits
273+
new_flags |= value & mask # set value bits
274+
data[3] = _htole32(new_flags)
275+
return flags & mask # always return previous flags value
276+
262277

263278
cdef class NSKeyIterator:
264279
cdef NSIndex idx
265280
cdef HashIndex *index
266281
cdef const unsigned char *key
267282
cdef int key_size
268283
cdef int exhausted
284+
cdef int flag_mask
285+
cdef int flag_value
269286

270-
def __cinit__(self, key_size):
287+
def __cinit__(self, key_size, mask, value):
271288
self.key = NULL
272289
self.key_size = key_size
290+
if mask is not None and value is not None:
291+
self.flag_mask = _htole32(mask)
292+
self.flag_value = _htole32(value)
293+
else:
294+
self.flag_mask = 0
295+
self.flag_value = 0
273296
self.exhausted = 0
274297

275298
def __iter__(self):
276299
return self
277300

278301
def __next__(self):
302+
cdef uint32_t *value
279303
if self.exhausted:
280304
raise StopIteration
281-
self.key = hashindex_next_key(self.index, <unsigned char *>self.key)
282-
if not self.key:
283-
self.exhausted = 1
284-
raise StopIteration
285-
cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
305+
while True:
306+
self.key = hashindex_next_key(self.index, <unsigned char *>self.key)
307+
if not self.key:
308+
self.exhausted = 1
309+
raise StopIteration
310+
value = <uint32_t *> (self.key + self.key_size)
311+
if value[3] & self.flag_mask == self.flag_value:
312+
# we found a matching entry!
313+
break
314+
286315
cdef uint32_t segment = _le32toh(value[0])
287316
assert segment <= _MAX_VALUE, "maximum number of segments reached"
288317
return ((<char *>self.key)[:self.key_size],
289-
NSIndexEntry(segment, _le32toh(value[1]), _le32toh(value[2]), _le32toh(value[3])))
318+
NSIndexEntry(segment, _le32toh(value[1]), _le32toh(value[2])))
290319

291320

292321
cdef class NSIndex1(IndexBase): # legacy borg 1.x

src/borg/repository.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ def complete_xfer(intermediate=True):
821821
except LoggedIO.SegmentFull:
822822
complete_xfer()
823823
new_segment, offset = self.io.write_put(key, data)
824-
self.index[key] = NSIndexEntry(new_segment, offset, len(data), in_index.extra)
824+
self.index[key] = NSIndexEntry(new_segment, offset, len(data))
825825
segments.setdefault(new_segment, 0)
826826
segments[new_segment] += 1
827827
segments[segment] -= 1
@@ -937,7 +937,7 @@ def _update_index(self, segment, objects, report=None):
937937
self.segments[in_index.segment] -= 1
938938
except KeyError:
939939
pass
940-
self.index[key] = NSIndexEntry(segment, offset, size, 0)
940+
self.index[key] = NSIndexEntry(segment, offset, size)
941941
self.segments[segment] += 1
942942
self.storage_quota_use += header_size(tag) + size
943943
elif tag == TAG_DELETE:
@@ -1182,7 +1182,7 @@ def scan(self, limit=None, marker=None):
11821182
self.index = self.open_index(transaction_id)
11831183
at_start = marker is None
11841184
# smallest valid seg is <uint32> 0, smallest valid offs is <uint32> 8
1185-
start_segment, start_offset, _, _ = (0, 0, 0, 0) if at_start else self.index[marker]
1185+
start_segment, start_offset, _ = (0, 0, 0) if at_start else self.index[marker]
11861186
result = []
11871187
for segment, filename in self.io.segment_iterator(start_segment):
11881188
obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False, include_data=False)
@@ -1212,7 +1212,7 @@ def get(self, id):
12121212
if not self.index:
12131213
self.index = self.open_index(self.get_transaction_id())
12141214
try:
1215-
in_index = NSIndexEntry(*((self.index[id] + (None, None))[:4])) # legacy: no size/extra
1215+
in_index = NSIndexEntry(*((self.index[id] + (None, ))[:3])) # legacy: no size/extra
12161216
return self.io.read(in_index.segment, in_index.offset, id, expected_size=in_index.size)
12171217
except KeyError:
12181218
raise self.ObjectNotFound(id, self.path) from None
@@ -1243,7 +1243,7 @@ def put(self, id, data, wait=True):
12431243
self.storage_quota_use += header_size(TAG_PUT2) + len(data)
12441244
self.segments.setdefault(segment, 0)
12451245
self.segments[segment] += 1
1246-
self.index[id] = NSIndexEntry(segment, offset, len(data), 0)
1246+
self.index[id] = NSIndexEntry(segment, offset, len(data))
12471247
if self.storage_quota and self.storage_quota_use > self.storage_quota:
12481248
self.transaction_doomed = self.StorageQuotaExceeded(
12491249
format_file_size(self.storage_quota), format_file_size(self.storage_quota_use))

src/borg/selftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
ChunkerTestCase,
3434
]
3535

36-
SELFTEST_COUNT = 36
36+
SELFTEST_COUNT = 37
3737

3838

3939
class SelfTestResult(TestResult):

src/borg/testsuite/hashindex.py

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def _generic_test(self, cls, make_value, sha):
8787
del idx
8888

8989
def test_nsindex(self):
90-
self._generic_test(NSIndex, lambda x: (x, x, x, x),
91-
'c9fe5878800d2a0691b667c665a00d4a186e204e891076d6b109016940742bed')
90+
self._generic_test(NSIndex, lambda x: (x, x, x),
91+
'7d70671d0b7e9d2f51b2691ecf35184b9f8ecc1202cceb2748c905c8fc04c256')
9292

9393
def test_chunkindex(self):
9494
self._generic_test(ChunkIndex, lambda x: (x, x),
@@ -153,6 +153,70 @@ def test_chunkindex_summarize(self):
153153
assert chunks == 1 + 2 + 3
154154
assert unique_chunks == 3
155155

156+
def test_flags(self):
157+
idx = NSIndex()
158+
key = H(0)
159+
self.assert_raises(KeyError, idx.flags, key, 0)
160+
idx[key] = 0, 0, 0 # create entry
161+
# check bit 0 and 1, should be both 0 after entry creation
162+
self.assert_equal(idx.flags(key, mask=3), 0)
163+
# set bit 0
164+
idx.flags(key, mask=1, value=1)
165+
self.assert_equal(idx.flags(key, mask=1), 1)
166+
# set bit 1
167+
idx.flags(key, mask=2, value=2)
168+
self.assert_equal(idx.flags(key, mask=2), 2)
169+
# check both bit 0 and 1, both should be set
170+
self.assert_equal(idx.flags(key, mask=3), 3)
171+
# clear bit 1
172+
idx.flags(key, mask=2, value=0)
173+
self.assert_equal(idx.flags(key, mask=2), 0)
174+
# clear bit 0
175+
idx.flags(key, mask=1, value=0)
176+
self.assert_equal(idx.flags(key, mask=1), 0)
177+
# check both bit 0 and 1, both should be cleared
178+
self.assert_equal(idx.flags(key, mask=3), 0)
179+
180+
def test_flags_iteritems(self):
181+
idx = NSIndex()
182+
keys_flagged0 = {H(i) for i in (1, 2, 3, 42)}
183+
keys_flagged1 = {H(i) for i in (11, 12, 13, 142)}
184+
keys_flagged2 = {H(i) for i in (21, 22, 23, 242)}
185+
keys_flagged3 = {H(i) for i in (31, 32, 33, 342)}
186+
for key in keys_flagged0:
187+
idx[key] = 0, 0, 0 # create entry
188+
idx.flags(key, mask=3, value=0) # not really necessary, unflagged is default
189+
for key in keys_flagged1:
190+
idx[key] = 0, 0, 0 # create entry
191+
idx.flags(key, mask=3, value=1)
192+
for key in keys_flagged2:
193+
idx[key] = 0, 0, 0 # create entry
194+
idx.flags(key, mask=3, value=2)
195+
for key in keys_flagged3:
196+
idx[key] = 0, 0, 0 # create entry
197+
idx.flags(key, mask=3, value=3)
198+
# check if we can iterate over all items
199+
k_all = {k for k, v in idx.iteritems()}
200+
self.assert_equal(k_all, keys_flagged0 | keys_flagged1 | keys_flagged2 | keys_flagged3)
201+
# check if we can iterate over the flagged0 items
202+
k0 = {k for k, v in idx.iteritems(mask=3, value=0)}
203+
self.assert_equal(k0, keys_flagged0)
204+
# check if we can iterate over the flagged1 items
205+
k1 = {k for k, v in idx.iteritems(mask=3, value=1)}
206+
self.assert_equal(k1, keys_flagged1)
207+
# check if we can iterate over the flagged2 items
208+
k1 = {k for k, v in idx.iteritems(mask=3, value=2)}
209+
self.assert_equal(k1, keys_flagged2)
210+
# check if we can iterate over the flagged3 items
211+
k1 = {k for k, v in idx.iteritems(mask=3, value=3)}
212+
self.assert_equal(k1, keys_flagged3)
213+
# check if we can iterate over the flagged1 + flagged3 items
214+
k1 = {k for k, v in idx.iteritems(mask=1, value=1)}
215+
self.assert_equal(k1, keys_flagged1 | keys_flagged3)
216+
# check if we can iterate over the flagged0 + flagged2 items
217+
k1 = {k for k, v in idx.iteritems(mask=1, value=0)}
218+
self.assert_equal(k1, keys_flagged0 | keys_flagged2)
219+
156220

157221
class HashIndexExtraTestCase(BaseTestCase):
158222
"""These tests are separate because they should not become part of the selftest.
@@ -531,38 +595,38 @@ def test_bug_4829(self):
531595

532596
from struct import pack
533597

534-
def HH(w, x, y, z):
535-
# make some 32byte long thing that depends on w, x, y, z.
536-
# same w will mean a collision in the hashtable as bucket index is computed from
537-
# first 4 bytes. giving a specific w targets bucket index w.
538-
# x is to create different keys and does not go into the bucket index calculation.
539-
# so, same w + different x --> collision
540-
return pack('<IIIIIIII', w, x, y, z, 0, 0, 0, 0) # 8 * 4 == 32
598+
def HH(x, y, z):
599+
# make some 32byte long thing that depends on x, y, z.
600+
# same x will mean a collision in the hashtable as bucket index is computed from
601+
# first 4 bytes. giving a specific x targets bucket index x.
602+
# y is to create different keys and does not go into the bucket index calculation.
603+
# so, same x + different y --> collision
604+
return pack('<IIIIIIII', x, y, z, 0, 0, 0, 0, 0) # 8 * 4 == 32
541605

542606
idx = NSIndex()
543607

544608
# create lots of colliding entries
545-
for x in range(700): # stay below max load to not trigger resize
546-
idx[HH(0, x, 0, 0)] = (0, x, 0, 0)
609+
for y in range(700): # stay below max load to not trigger resize
610+
idx[HH(0, y, 0)] = (0, y, 0)
547611

548612
assert idx.size() == 1031 * 48 + 18 # 1031 buckets + header
549613

550614
# delete lots of the collisions, creating lots of tombstones
551-
for x in range(400): # stay above min load to not trigger resize
552-
del idx[HH(0, x, 0, 0)]
615+
for y in range(400): # stay above min load to not trigger resize
616+
del idx[HH(0, y, 0)]
553617

554618
# create lots of colliding entries, within the not yet used part of the hashtable
555-
for x in range(330): # stay below max load to not trigger resize
556-
# at x == 259 a resize will happen due to going beyond max EFFECTIVE load
619+
for y in range(330): # stay below max load to not trigger resize
620+
# at y == 259 a resize will happen due to going beyond max EFFECTIVE load
557621
# if the bug is present, that element will be inserted at the wrong place.
558622
# and because it will be at the wrong place, it can not be found again.
559-
idx[HH(600, x, 0, 0)] = 600, x, 0, 0
623+
idx[HH(600, y, 0)] = 600, y, 0
560624

561625
# now check if hashtable contents is as expected:
562626

563-
assert [idx.get(HH(0, x, 0, 0)) for x in range(400, 700)] == [(0, x, 0, 0) for x in range(400, 700)]
627+
assert [idx.get(HH(0, y, 0)) for y in range(400, 700)] == [(0, y, 0) for y in range(400, 700)]
564628

565-
assert [HH(0, x, 0, 0) in idx for x in range(400)] == [False for x in range(400)] # deleted entries
629+
assert [HH(0, y, 0) in idx for y in range(400)] == [False for y in range(400)] # deleted entries
566630

567631
# this will fail at HH(600, 259) if the bug is present.
568-
assert [idx.get(HH(600, x, 0, 0)) for x in range(330)] == [(600, x, 0, 0) for x in range(330)]
632+
assert [idx.get(HH(600, y, 0)) for y in range(330)] == [(600, y, 0) for y in range(330)]

src/borg/testsuite/repository.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ def open_index(self):
714714

715715
def corrupt_object(self, id_):
716716
idx = self.open_index()
717-
segment, offset, _, _ = idx[H(id_)]
717+
segment, offset, _ = idx[H(id_)]
718718
with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd:
719719
fd.seek(offset)
720720
fd.write(b'BOOM')

0 commit comments

Comments
 (0)