Skip to content

Commit 8211435

Browse files
committed
removed the unused functions
1 parent 08d5b15 commit 8211435

File tree

4 files changed

+22
-331
lines changed

4 files changed

+22
-331
lines changed

examples/cid/cid_usage.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
from libp2p.bitswap.cid import (
1010
CODEC_DAG_PB,
1111
CODEC_RAW,
12-
analyze_cid_collection,
1312
compute_cid_v0,
1413
compute_cid_v1,
15-
detect_cid_encoding_format,
16-
recompute_cid_from_data,
14+
compute_cid_v1_obj,
15+
parse_cid_codec,
16+
parse_cid_version,
1717
verify_cid,
1818
)
1919

@@ -44,33 +44,17 @@ def main() -> None:
4444
("CIDv1 raw", cid_v1_raw),
4545
("CIDv1 dag-pb", cid_v1_dag_pb),
4646
]:
47-
info = detect_cid_encoding_format(cid)
48-
if "error" in info:
49-
logger.info("%s: %s", name, info["error"])
50-
else:
51-
logger.info(
52-
"%s: version=%s, codec=%s, encoding=%s, is_breaking=%s",
53-
name,
54-
info["version"],
55-
info["codec_name"],
56-
info["encoding"],
57-
info["is_breaking"],
58-
)
59-
60-
try:
61-
recomputed = recompute_cid_from_data(cid_v1_raw, data)
62-
logger.info("Recomputed CID matches original: %s", cid_v1_raw == recomputed)
63-
except ValueError as e:
64-
logger.info("Recompute error: %s", e)
65-
66-
collection = [cid_v0, cid_v1_raw, cid_v1_dag_pb]
67-
analysis = analyze_cid_collection(collection)
47+
logger.info(
48+
"%s: version=%s, codec=%s",
49+
name,
50+
parse_cid_version(cid),
51+
parse_cid_codec(cid),
52+
)
53+
54+
cid_obj = compute_cid_v1_obj(data, codec=CODEC_RAW)
55+
logger.info("CIDv1 object text form: %s", cid_obj)
6856
logger.info(
69-
"Collection: total=%s, backward_compatible=%s, breaking_change=%s, by_codec=%s",
70-
analysis["total"],
71-
analysis["backward_compatible"],
72-
analysis["breaking_change"],
73-
analysis["by_codec"],
57+
"CIDv1 object bytes == compute_cid_v1 bytes: %s", cid_obj.buffer == cid_v1_raw
7458
)
7559

7660

libp2p/bitswap/cid.py

Lines changed: 7 additions & 227 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
"""
22
CID (Content Identifier) utilities for Bitswap.
33
4-
This module provides simplified CID encoding/decoding for different Bitswap
5-
protocol versions.
6-
Note: This is a simplified implementation for demonstration. In production,
7-
use a proper CID library like py-cid or multiformats.
4+
This module provides py-cid-backed CID encoding/decoding helpers for Bitswap.
5+
Byte-returning functions are preserved for compatibility with existing callers,
6+
and object-returning variants are provided for new code paths.
87
98
====================================
109
IMPORTANT: Breaking Change in v1.0
@@ -23,7 +22,6 @@
2322
"""
2423

2524
import hashlib
26-
from typing import Any
2725

2826
from cid import CIDv0, CIDv1, V0Builder, V1Builder, from_string, make_cid
2927
from cid.prefix import Prefix
@@ -42,13 +40,6 @@
4240
CIDObject = CIDv0 | CIDv1
4341

4442

45-
def _compute_multihash_sha256(data: bytes) -> bytes:
46-
"""Compute multihash (SHA2-256) for data."""
47-
digest = hashlib.sha256(data).digest()
48-
# Multihash format: <hash-type><hash-length><hash-digest>
49-
return bytes([int(HASH_SHA256), len(digest)]) + digest
50-
51-
5243
def _normalise_codec(codec: Code | str | int) -> Code:
5344
"""Normalise codec input to a Code object with validation."""
5445
if isinstance(codec, Code):
@@ -59,41 +50,14 @@ def _normalise_codec(codec: Code | str | int) -> Code:
5950
raise ValueError(f"Unknown codec: {codec}")
6051
return Code.from_string(codec)
6152

62-
# Integer code path
53+
# Integer code path.
6354
normalised = Code(codec)
6455
# If the name is unknown, the code is not registered
6556
if normalised.name in ("<unknown>", "", None):
6657
raise ValueError(f"Unknown codec code: 0x{codec:x}")
6758
return normalised
6859

6960

70-
def _parse_varint(data: bytes, offset: int = 0) -> tuple[int, int] | None:
71-
"""
72-
Parse an unsigned varint from data starting at offset.
73-
74-
Returns:
75-
(value, length) on success, or None on failure.
76-
77-
"""
78-
value = 0
79-
shift = 0
80-
length = 0
81-
82-
# Varints for multicodec are at most 10 bytes.
83-
for i in range(offset, min(len(data), offset + 10)):
84-
byte = data[i]
85-
value |= (byte & 0x7F) << shift
86-
length += 1
87-
88-
if (byte & 0x80) == 0:
89-
# MSB clear => last byte of varint
90-
return value, length
91-
92-
shift += 7
93-
94-
return None
95-
96-
9761
def compute_cid_v0_obj(data: bytes) -> CIDv0:
9862
"""Compute a CIDv0 object for data."""
9963
return V0Builder().sum(data)
@@ -124,21 +88,9 @@ def compute_cid_v1_obj(data: bytes, codec: Code | str | int = CODEC_RAW) -> CIDv
12488

12589
def compute_cid_v1(data: bytes, codec: Code | str | int = CODEC_RAW) -> bytes:
12690
"""
127-
Compute a CIDv1 for data using py-cid builders.
128-
129-
CIDv1 format: <version><codec-varint><multihash>
130-
131-
.. note:: **Breaking Change - CIDv1 Encoding Format**
132-
This function now uses varint-encoded multicodec prefixes via `add_prefix()`.
133-
Previously, CIDv1 used a single-byte codec representation.
134-
135-
**Compatibility:**
136-
- Codecs < 128 (e.g., raw=0x55, dag-pb=0x70): Formats are **identical**
137-
(backward compatible, no migration needed).
138-
- Codecs >= 128: Formats **differ** (breaking change, requires migration).
91+
Compute a CIDv1 for data and return raw CID bytes.
13992
140-
See :func:`detect_cid_encoding_version` and :func:`migrate_legacy_cid`
141-
for migration utilities.
93+
This is the compatibility wrapper over :func:`compute_cid_v1_obj`.
14294
14395
Args:
14496
data: The data to hash
@@ -184,7 +136,7 @@ def get_cid_prefix(cid: bytes) -> bytes:
184136

185137
def reconstruct_cid_from_prefix_and_data(prefix: bytes, data: bytes) -> bytes:
186138
"""
187-
Reconstruct a CID from prefix and data using py-multihash v3 API.
139+
Reconstruct a CID from prefix and data using py-cid Prefix APIs.
188140
189141
Used when receiving v1.1.0+ Block messages with prefix.
190142
@@ -364,175 +316,3 @@ def parse_cid_codec(cid: bytes) -> str:
364316
return DAG_PB.name
365317

366318
return cid_obj.codec
367-
368-
369-
# ============================================================================
370-
# Migration and Version Encoding Detection Utilities
371-
# ============================================================================
372-
373-
374-
def detect_cid_encoding_format(cid: bytes) -> dict[str, Any]:
375-
"""
376-
Detect CID encoding format and codec details.
377-
378-
Returns:
379-
{
380-
'version': 0 or 1,
381-
'codec_value': int,
382-
'codec_name': str,
383-
'encoding': 'legacy' or 'varint',
384-
'needs_migration': bool,
385-
'is_breaking': bool
386-
}
387-
388-
"""
389-
from multicodec import Code
390-
391-
if len(cid) < 2:
392-
return {"version": None, "error": "CID too short"}
393-
394-
version = cid[0]
395-
396-
if version == 0x12: # CIDv0 (multihash only)
397-
return {
398-
"version": 0,
399-
"codec_value": 0x70, # dag-pb
400-
"codec_name": "dag-pb",
401-
"encoding": "legacy",
402-
"needs_migration": False,
403-
"is_breaking": False,
404-
}
405-
406-
if version != 0x01: # Not CIDv1
407-
return {"version": version, "error": "Unknown CID version"}
408-
409-
# Parse codec value from varint
410-
codec_value = 0
411-
shift = 0
412-
codec_length = 0
413-
414-
for i in range(1, min(len(cid), 11)): # Max varint is 10 bytes
415-
byte = cid[i]
416-
codec_value |= (byte & 0x7F) << shift
417-
shift += 7
418-
codec_length += 1
419-
420-
if (byte & 0x80) == 0: # Last byte
421-
break
422-
423-
# Get codec name
424-
try:
425-
codec = Code(codec_value)
426-
codec_name = str(codec)
427-
except Exception:
428-
codec_name = f"0x{codec_value:x}"
429-
430-
# Determine if this uses legacy or varint encoding
431-
# Legacy: single byte for all codecs
432-
# Varint: matches codec_value encoding
433-
is_breaking = codec_value >= 128
434-
435-
# For codecs < 128, legacy and varint are identical (both 1 byte)
436-
# For codecs ≥ 128, we can't definitively tell without the original data
437-
# But we assume varint if properly implemented
438-
encoding = "varint" if codec_length > 1 else "legacy-or-varint"
439-
440-
return {
441-
"version": 1,
442-
"codec_value": codec_value,
443-
"codec_name": codec_name,
444-
"codec_length": codec_length,
445-
"encoding": encoding,
446-
"needs_migration": False, # Can't migrate without data
447-
"is_breaking": is_breaking,
448-
}
449-
450-
451-
def recompute_cid_from_data(old_cid: bytes, data: bytes) -> bytes:
452-
"""
453-
Recompute CID with proper varint encoding.
454-
455-
Note: Original data is required because CIDs use cryptographic hashes
456-
(one-way functions that cannot be reversed).
457-
458-
Args:
459-
old_cid: Existing CID (used to extract codec)
460-
data: Original data that was hashed
461-
462-
Returns:
463-
New CID with proper varint-encoded codec
464-
465-
Raises:
466-
ValueError: If old_cid is invalid or doesn't match data
467-
468-
"""
469-
# Detect old CID format
470-
info = detect_cid_encoding_format(old_cid)
471-
472-
if info.get("error"):
473-
raise ValueError(f"Invalid CID: {info['error']}")
474-
475-
# First, ensure the provided data actually matches the original CID.
476-
# If this fails, the caller is not supplying the correct original data.
477-
if not verify_cid(old_cid, data):
478-
raise ValueError("Recomputed CID does not verify with provided data")
479-
480-
# Extract codec from the old CID encoding
481-
codec_value = info["codec_value"]
482-
483-
# Recompute with proper varint encoding
484-
new_cid = compute_cid_v1(data, codec=codec_value)
485-
486-
# Sanity check: new CID must also verify against the same data
487-
if not verify_cid(new_cid, data):
488-
raise ValueError("Recomputed CID does not verify with provided data")
489-
490-
return new_cid
491-
492-
493-
def analyze_cid_collection(cids: list[bytes]) -> dict[str, Any]:
494-
"""
495-
Analyze a collection of CIDs for migration impact.
496-
497-
Returns:
498-
{
499-
'total': int,
500-
'backward_compatible': int,
501-
'breaking_change': int,
502-
'by_codec': {codec_name: count},
503-
'breaking_cids': [bytes]
504-
}
505-
506-
"""
507-
results: dict[str, Any] = {
508-
"total": len(cids),
509-
"backward_compatible": 0,
510-
"breaking_change": 0,
511-
"by_codec": {},
512-
"breaking_cids": [],
513-
}
514-
515-
by_codec: dict[str, int] = {}
516-
breaking_cids: list[bytes] = []
517-
518-
for cid in cids:
519-
try:
520-
info = detect_cid_encoding_format(cid)
521-
522-
if info.get("error"):
523-
continue
524-
525-
codec_name = info["codec_name"]
526-
by_codec[codec_name] = by_codec.get(codec_name, 0) + 1
527-
528-
if info["is_breaking"]:
529-
results["breaking_change"] += 1
530-
breaking_cids.append(cid)
531-
else:
532-
results["backward_compatible"] += 1
533-
except Exception:
534-
continue
535-
536-
results["by_codec"] = by_codec
537-
results["breaking_cids"] = breaking_cids
538-
return results

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ dependencies = [
3737
"py-multibase>=2.0.0",
3838
"py-multihash>=3.0.0",
3939
"py-multicodec>=1.0.0",
40+
"py-cid>=0.5.0",
4041
"pynacl>=1.3.0",
4142
"rpcudp>=3.0.0",
4243
"trio-typing>=0.0.4",

0 commit comments

Comments
 (0)