Skip to content

Commit 2d46808

Browse files
authored
optimize memory for DSSE v0.0.1 processing (#2766)
This change reduces the memory footprint of DSSE v0.0.1 entries by: - Extracting index keys (subjects and SLSA materials) during initial unmarshaling - Clearing the large base64-encoded envelope and decoded payload early - Avoiding redundant payload decoding and JSON unmarshaling in IndexKeys These optimizations prevent out-of-memory errors when processing large attestations. Signed-off-by: Hayden <8418760+Hayden-IO@users.noreply.github.com>
1 parent bd11cb9 commit 2d46808

File tree

2 files changed

+60
-54
lines changed

2 files changed

+60
-54
lines changed

pkg/types/dsse/v0.0.1/entry.go

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,24 @@ func init() {
5757
type V001Entry struct {
5858
DSSEObj models.DSSEV001Schema
5959
env *dsse.Envelope
60+
61+
// memory optimization: extract and store these during Unmarshal
62+
// so we can clear the huge payload from memory
63+
extractedIndexKeys []string
64+
isInsertable bool
65+
}
66+
67+
type indexKeyExtract struct {
68+
Subject []struct {
69+
Digest map[string]string `json:"digest"`
70+
} `json:"subject"`
71+
Predicate json.RawMessage `json:"predicate"`
72+
}
73+
74+
type materialsExtract struct {
75+
Materials []struct {
76+
Digest map[string]string `json:"digest"`
77+
} `json:"materials"`
6078
}
6179

6280
func (v V001Entry) APIVersion() string {
@@ -108,58 +126,13 @@ func (v V001Entry) IndexKeys() ([]string, error) {
108126
return result, nil
109127
}
110128

111-
switch v.env.PayloadType {
112-
case in_toto.PayloadType:
113-
114-
if v.env.Payload == "" {
115-
log.Logger.Info("DSSEObj DSSE payload is empty")
116-
return result, nil
117-
}
118-
decodedPayload, err := v.env.DecodeB64Payload()
119-
if err != nil {
120-
return result, fmt.Errorf("could not decode envelope payload: %w", err)
121-
}
122-
statement, err := parseStatement(decodedPayload)
123-
if err != nil {
124-
return result, err
125-
}
126-
for _, s := range statement.Subject {
127-
for alg, ds := range s.Digest {
128-
result = append(result, alg+":"+ds)
129-
}
130-
}
131-
// Not all in-toto statements will contain a SLSA provenance predicate.
132-
// See https://github.com/in-toto/attestation/blob/main/spec/README.md#predicate
133-
// for other predicates.
134-
if predicate, err := parseSlsaPredicate(decodedPayload); err == nil {
135-
if predicate.Predicate.Materials != nil {
136-
for _, s := range predicate.Predicate.Materials {
137-
for alg, ds := range s.Digest {
138-
result = append(result, alg+":"+ds)
139-
}
140-
}
141-
}
142-
}
143-
default:
129+
if v.env.PayloadType == in_toto.PayloadType {
130+
result = append(result, v.extractedIndexKeys...)
131+
} else {
144132
log.Logger.Infof("Unknown DSSE envelope payloadType: %s", v.env.PayloadType)
145133
}
146-
return result, nil
147-
}
148-
149-
func parseStatement(p []byte) (*in_toto.Statement, error) {
150-
ps := in_toto.Statement{}
151-
if err := json.Unmarshal(p, &ps); err != nil {
152-
return nil, err
153-
}
154-
return &ps, nil
155-
}
156134

157-
func parseSlsaPredicate(p []byte) (*in_toto.ProvenanceStatement, error) {
158-
predicate := in_toto.ProvenanceStatement{}
159-
if err := json.Unmarshal(p, &predicate); err != nil {
160-
return nil, err
161-
}
162-
return &predicate, nil
135+
return result, nil
163136
}
164137

165138
// DecodeEntry performs direct decode into the provided output pointer
@@ -344,6 +317,28 @@ func (v *V001Entry) Unmarshal(pe models.ProposedEntry) error {
344317
return err
345318
}
346319

320+
// extraction of index keys - done here so we can clear the huge strings from memory
321+
if env.PayloadType == in_toto.PayloadType {
322+
var extract indexKeyExtract
323+
if err := json.Unmarshal(decodedPayload, &extract); err == nil {
324+
for _, s := range extract.Subject {
325+
for alg, ds := range s.Digest {
326+
v.extractedIndexKeys = append(v.extractedIndexKeys, alg+":"+ds)
327+
}
328+
}
329+
if extract.Predicate != nil {
330+
var materials materialsExtract
331+
if err := json.Unmarshal(extract.Predicate, &materials); err == nil {
332+
for _, m := range materials.Materials {
333+
for alg, ds := range m.Digest {
334+
v.extractedIndexKeys = append(v.extractedIndexKeys, alg+":"+ds)
335+
}
336+
}
337+
}
338+
}
339+
}
340+
}
341+
347342
payloadHash := sha256.Sum256(decodedPayload)
348343
dsseObj.PayloadHash = &models.DSSEV001SchemaPayloadHash{
349344
Algorithm: conv.Pointer(models.DSSEV001SchemaPayloadHashAlgorithmSha256),
@@ -359,6 +354,11 @@ func (v *V001Entry) Unmarshal(pe models.ProposedEntry) error {
359354
// we've gotten through all processing without error, now update the object we're unmarshalling into
360355
v.DSSEObj = *dsseObj
361356
v.env = env
357+
v.isInsertable = true
358+
359+
// memory optimization: clear huge strings/buffers
360+
v.env.Payload = ""
361+
v.DSSEObj.ProposedContent = nil
362362

363363
return nil
364364
}
@@ -533,6 +533,9 @@ func (v V001Entry) ArtifactHash() (string, error) {
533533
}
534534

535535
func (v V001Entry) Insertable() (bool, error) {
536+
if v.isInsertable {
537+
return true, nil
538+
}
536539
if v.DSSEObj.ProposedContent == nil {
537540
return false, errors.New("missing proposed content")
538541
}

pkg/types/dsse/v0.0.1/entry_test.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,7 @@ func TestV001Entry_Unmarshal(t *testing.T) {
370370
if !reflect.DeepEqual(got, want) {
371371
t.Errorf("V001Entry.IndexKeys() = %v, want %v", got, want)
372372
}
373-
payloadBytes, _ := v.env.DecodeB64Payload()
374-
payloadSha := sha256.Sum256(payloadBytes)
375-
payloadHash := hex.EncodeToString(payloadSha[:])
373+
payloadHash := hex.EncodeToString(h[:])
376374

377375
canonicalBytes, err := v.Canonicalize(context.Background())
378376
if err != nil {
@@ -524,11 +522,16 @@ func TestV001Entry_IndexKeys(t *testing.T) {
524522
keyHash := sha256.Sum256(*sig.Verifier)
525523
want = append(want, "sha256:"+hex.EncodeToString(keyHash[:]))
526524
}
527-
decodedPayload, _ := base64.StdEncoding.DecodeString(v.env.Payload)
525+
spec := pe.Spec.(*models.DSSEV001Schema)
526+
env := &dsse.Envelope{}
527+
if err := json.Unmarshal([]byte(*spec.ProposedContent.Envelope), env); err != nil {
528+
t.Error(err)
529+
}
530+
decodedPayload, _ := env.DecodeB64Payload()
528531
h := sha256.Sum256(decodedPayload)
529532
want = append(want, "sha256:"+hex.EncodeToString(h[:]))
530533

531-
envHashBytes := sha256.Sum256([]byte(*v.DSSEObj.ProposedContent.Envelope))
534+
envHashBytes := sha256.Sum256([]byte(*spec.ProposedContent.Envelope))
532535
envHash := hex.EncodeToString(envHashBytes[:])
533536

534537
hashkey := strings.ToLower(fmt.Sprintf("sha256:%s", envHash))

0 commit comments

Comments
 (0)