From 11d7a476a3b141f823544fc30571a726fbce7817 Mon Sep 17 00:00:00 2001 From: chlins Date: Wed, 19 Nov 2025 11:57:29 +0800 Subject: [PATCH] refactor(xattr): extract xattr operations to dedicated package Signed-off-by: chlins --- pkg/backend/build/builder.go | 164 +++++++++++++---------------------- pkg/backend/extract.go | 14 ++- pkg/backend/pull.go | 14 ++- pkg/codec/raw.go | 91 +++++++++++++++++++ pkg/xattr/xattr.go | 61 +++++++++++++ pkg/xattr/xattr_test.go | 65 ++++++++++++++ 6 files changed, 301 insertions(+), 108 deletions(-) create mode 100644 pkg/xattr/xattr.go create mode 100644 pkg/xattr/xattr_test.go diff --git a/pkg/backend/build/builder.go b/pkg/backend/build/builder.go index 6b7c0d25..d971589a 100644 --- a/pkg/backend/build/builder.go +++ b/pkg/backend/build/builder.go @@ -36,13 +36,13 @@ import ( spec "github.com/opencontainers/image-spec/specs-go" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" buildconfig "github.com/modelpack/modctl/pkg/backend/build/config" "github.com/modelpack/modctl/pkg/backend/build/hooks" "github.com/modelpack/modctl/pkg/backend/build/interceptor" pkgcodec "github.com/modelpack/modctl/pkg/codec" "github.com/modelpack/modctl/pkg/storage" + "github.com/modelpack/modctl/pkg/xattr" ) // OutputType defines the type of output to generate. @@ -288,73 +288,80 @@ func BuildModelConfig(modelConfig *buildconfig.Model, layers []ocispec.Descripto // computeDigestAndSize computes the digest and size for the encoded content, using xattrs if available. func computeDigestAndSize(mediaType, path, workDirPath string, info os.FileInfo, reader io.Reader, codec pkgcodec.Codec) (io.Reader, string, int64, error) { - var digest string - var size int64 - + // Try to retrieve valid digest from xattrs cache. if pkgcodec.IsRawMediaType(mediaType) { - // By default let's assume the mtime and size has changed. - mtimeChanged := true - sizeChanged := true - - if mtime, err := getXattr(path, xattrMtimeKey(mediaType)); err == nil { - if string(mtime) == fmt.Sprintf("%d", info.ModTime().UnixNano()) { - mtimeChanged = false - } + if digest, size, ok := retrieveCachedDigest(path, info); ok { + return reader, digest, size, nil } + } - if sizeBytes, err := getXattr(path, xattrSizeKey(mediaType)); err == nil { - if parsedSize, err := strconv.ParseInt(string(sizeBytes), 10, 64); err == nil { - if parsedSize == info.Size() { - sizeChanged = false - } - } - } + logrus.Infof("builder: calculating digest for file %s", path) - if !mtimeChanged && !sizeChanged { - // Check xattrs for cached digest and size. - if sha256, err := getXattr(path, xattrSha256Key(mediaType)); err == nil { - digest = string(sha256) - logrus.Infof("builder: retrieved sha256 hash from xattr for file %s [digest: %s]", path, digest) - } - - if sizeBytes, err := getXattr(path, xattrSizeKey(mediaType)); err == nil { - if parsedSize, err := strconv.ParseInt(string(sizeBytes), 10, 64); err == nil { - size = parsedSize - logrus.Infof("builder: retrieved size from xattr for file %s [size: %d]", path, size) - } - } - } + hash := sha256.New() + size, err := io.Copy(hash, reader) + if err != nil { + return reader, "", 0, fmt.Errorf("failed to copy content to hash: %w", err) } + digest := fmt.Sprintf("sha256:%x", hash.Sum(nil)) - // Compute digest and size if not retrieved from xattrs. - if digest == "" { - logrus.Infof("builder: calculating digest for file %s", path) - var err error - hash := sha256.New() - size, err = io.Copy(hash, reader) - if err != nil { - return reader, "", 0, fmt.Errorf("failed to copy content to hash: %w", err) - } - digest = fmt.Sprintf("sha256:%x", hash.Sum(nil)) - logrus.Infof("builder: calculated digest for file %s [digest: %s]", path, digest) + logrus.Infof("builder: calculated digest for file %s [digest: %s]", path, digest) - // Reset reader - reader, err = resetReader(reader, path, workDirPath, codec) - if err != nil { - return reader, "", 0, err - } + // Reset reader for subsequent use. + reader, err = resetReader(reader, path, workDirPath, codec) + if err != nil { + return reader, "", 0, err + } - // Store xattrs if raw media type. - if pkgcodec.IsRawMediaType(mediaType) { - setXattr(path, xattrMtimeKey(mediaType), fmt.Appendf([]byte{}, "%d", info.ModTime().UnixNano())) - setXattr(path, xattrSha256Key(mediaType), []byte(digest)) - setXattr(path, xattrSizeKey(mediaType), fmt.Appendf([]byte{}, "%d", size)) + // Update xattrs cache. + if pkgcodec.IsRawMediaType(mediaType) { + if err := updateCachedDigest(path, info.ModTime().UnixNano(), size, digest); err != nil { + logrus.Warnf("builder: failed to update xattrs for file %s: %s", path, err) } } return reader, digest, size, nil } +// retrieveCachedDigest checks if mtime and size match, then returns the cached digest. +func retrieveCachedDigest(path string, info os.FileInfo) (string, int64, bool) { + mtimeData, err := xattr.Get(path, xattr.MakeKey(xattr.KeyMtime)) + if err != nil || string(mtimeData) != strconv.FormatInt(info.ModTime().UnixNano(), 10) { + return "", 0, false + } + + sizeData, err := xattr.Get(path, xattr.MakeKey(xattr.KeySize)) + if err != nil { + return "", 0, false + } + cachedSize, err := strconv.ParseInt(string(sizeData), 10, 64) + if err != nil || cachedSize != info.Size() { + return "", 0, false + } + + digestData, err := xattr.Get(path, xattr.MakeKey(xattr.KeySha256)) + if err != nil { + return "", 0, false + } + + digest := string(digestData) + logrus.Infof("builder: retrieved from xattr cache for file %s [digest: %s]", path, digest) + return digest, cachedSize, true +} + +// updateCachedDigest writes mtime, size, and digest to xattrs. +func updateCachedDigest(path string, mtime, size int64, digest string) error { + if err := xattr.Set(path, xattr.MakeKey(xattr.KeyMtime), []byte(strconv.FormatInt(mtime, 10))); err != nil { + return err + } + if err := xattr.Set(path, xattr.MakeKey(xattr.KeySha256), []byte(digest)); err != nil { + return err + } + if err := xattr.Set(path, xattr.MakeKey(xattr.KeySize), []byte(strconv.FormatInt(size, 10))); err != nil { + return err + } + return nil +} + // resetReader resets the reader to the beginning or re-encodes if not seekable. func resetReader(reader io.Reader, path, workDirPath string, codec pkgcodec.Codec) (io.Reader, error) { if seeker, ok := reader.(io.ReadSeeker); ok { @@ -442,52 +449,3 @@ func getFileMetadata(path string) (modelspec.FileMetadata, error) { return metadata, nil } - -func xattrSha256Key(mediaType string) string { - // Uniformity between linux and mac platforms is simplified by adding the prefix 'user.', - // because the key may be unlimited under mac, - // but on linux, in some cases, the user can only manipulate the user space. - return fmt.Sprintf("user.%s.sha256", mediaType) -} - -func xattrSizeKey(mediaType string) string { - // Uniformity between linux and mac platforms is simplified by adding the prefix 'user.', - // because the key may be unlimited under mac, - // but on linux, in some cases, the user can only manipulate the user space. - return fmt.Sprintf("user.%s.size", mediaType) -} - -func xattrMtimeKey(mediaType string) string { - // Uniformity between linux and mac platforms is simplified by adding the prefix 'user.', - // because the key may be unlimited under mac, - // but on linux, in some cases, the user can only manipulate the user space. - return fmt.Sprintf("user.%s.mtime", mediaType) -} - -// getXattr retrieves an xattr value for a given key. -func getXattr(path, key string) ([]byte, error) { - var value []byte - sz, err := unix.Getxattr(path, key, value) - if err != nil { - logrus.Warnf("builder: failed to get xattr %s for file %s: %v", key, path, err) - return nil, err - } - - value = make([]byte, sz) - _, err = unix.Getxattr(path, key, value) - if err != nil { - logrus.Warnf("builder: failed to get xattr %s for file %s: %v", key, path, err) - return nil, err - } - - return value, nil -} - -// setXattr sets an xattr value for a given key. -func setXattr(path, key string, value []byte) { - if err := unix.Setxattr(path, key, value, 0); err != nil { - logrus.Warnf("builder: failed to set xattr %s for file %s: %v", key, path, err) - } else { - logrus.Infof("builder: set xattr %s for file %s: %s", key, path, string(value)) - } -} diff --git a/pkg/backend/extract.go b/pkg/backend/extract.go index 29cfc3d2..fc36cd51 100644 --- a/pkg/backend/extract.go +++ b/pkg/backend/extract.go @@ -20,6 +20,7 @@ import ( "bufio" "context" "encoding/json" + "errors" "fmt" "io" @@ -29,7 +30,7 @@ import ( "github.com/sirupsen/logrus" "golang.org/x/sync/errgroup" - "github.com/modelpack/modctl/pkg/codec" + pkgcodec "github.com/modelpack/modctl/pkg/codec" "github.com/modelpack/modctl/pkg/config" "github.com/modelpack/modctl/pkg/storage" ) @@ -89,6 +90,11 @@ func exportModelArtifact(ctx context.Context, store storage.Storage, manifest oc bufferedReader := bufio.NewReaderSize(reader, defaultBufferSize) if err := extractLayer(layer, cfg.Output, bufferedReader); err != nil { + if errors.Is(err, pkgcodec.ErrAlreadyUpToDate) { + logrus.Debugf("extract: skipped layer %s because target is up-to-date", layer.Digest.String()) + return nil + } + return fmt.Errorf("failed to extract layer %s: %w", layer.Digest.String(), err) } @@ -118,12 +124,16 @@ func extractLayer(desc ocispec.Descriptor, outputDir string, reader io.Reader) e } - codec, err := codec.New(codec.TypeFromMediaType(desc.MediaType)) + codec, err := pkgcodec.New(pkgcodec.TypeFromMediaType(desc.MediaType)) if err != nil { return fmt.Errorf("failed to create codec for media type %s: %w", desc.MediaType, err) } if err := codec.Decode(outputDir, filepath, reader, desc); err != nil { + if errors.Is(err, pkgcodec.ErrAlreadyUpToDate) { + return err + } + return fmt.Errorf("failed to decode the layer %s to output directory: %w", desc.Digest.String(), err) } diff --git a/pkg/backend/pull.go b/pkg/backend/pull.go index 7c94b755..008ba5a6 100644 --- a/pkg/backend/pull.go +++ b/pkg/backend/pull.go @@ -19,6 +19,7 @@ package backend import ( "context" "encoding/json" + "errors" "fmt" "io" @@ -30,6 +31,7 @@ import ( internalpb "github.com/modelpack/modctl/internal/pb" "github.com/modelpack/modctl/pkg/backend/remote" + "github.com/modelpack/modctl/pkg/codec" "github.com/modelpack/modctl/pkg/config" "github.com/modelpack/modctl/pkg/storage" ) @@ -255,9 +257,15 @@ func pullAndExtractFromRemote(ctx context.Context, pb *internalpb.ProgressBar, p reader = io.TeeReader(reader, hash) if err := extractLayer(desc, outputDir, reader); err != nil { - err = fmt.Errorf("failed to extract the blob %s to output directory: %w", desc.Digest.String(), err) - pb.Abort(desc.Digest.String(), err) - return err + if errors.Is(err, codec.ErrAlreadyUpToDate) { + logrus.Debugf("pull: skipped extracting blob %s because target is up-to-date", desc.Digest.String()) + pb.Complete(desc.Digest.String(), fmt.Sprintf("%s %s", internalpb.NormalizePrompt("Skipped blob"), desc.Digest.String())) + return nil + } + + wrapped := fmt.Errorf("failed to extract the blob %s to output directory: %w", desc.Digest.String(), err) + pb.Abort(desc.Digest.String(), wrapped) + return wrapped } // validate the digest of the blob. diff --git a/pkg/codec/raw.go b/pkg/codec/raw.go index 985c2f30..e16a85de 100644 --- a/pkg/codec/raw.go +++ b/pkg/codec/raw.go @@ -18,15 +18,24 @@ package codec import ( "encoding/json" + "errors" + "fmt" "io" "os" "path/filepath" + "strconv" legacymodelspec "github.com/dragonflyoss/model-spec/specs-go/v1" modelspec "github.com/modelpack/model-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" + + "github.com/modelpack/modctl/pkg/xattr" ) +// ErrAlreadyUpToDate is returned when the target output already matches the descriptor metadata. +var ErrAlreadyUpToDate = errors.New("codec: target already up-to-date") + // raw is a codec that for raw files. type raw struct{} @@ -45,6 +54,70 @@ func (r *raw) Encode(targetFilePath, workDirPath string) (io.Reader, error) { return os.Open(targetFilePath) } +// fileNeedsUpdate checks if the file exists and whether its size and digest match. +// Returns true if the file needs to be updated/written, false if it can be skipped. +func (r *raw) fileNeedsUpdate(fullPath string, desc ocispec.Descriptor) (bool, error) { + // Check if file exists. + info, err := os.Stat(fullPath) + if err != nil { + if os.IsNotExist(err) { + // File doesn't exist, needs to be written. + return true, nil + } + + // Other error occurred. + return true, err + } + + // File exists, check size first (quick check). + if info.Size() != desc.Size { + return true, nil + } + + // Check xattrs for stored size and digest. + sizeKey := xattr.MakeKey(xattr.KeySize) + storedSize, err := xattr.Get(fullPath, sizeKey) + if err != nil { + // xattr not found or error reading, needs update. + return true, nil + } + + digestKey := xattr.MakeKey(xattr.KeySha256) + storedDigest, err := xattr.Get(fullPath, digestKey) + if err != nil { + // xattr not found or error reading, needs update. + return true, nil + } + + // Compare stored values with descriptor. + expectedSize := strconv.FormatInt(desc.Size, 10) + expectedDigest := desc.Digest.String() + + if string(storedSize) == expectedSize && string(storedDigest) == expectedDigest { + // File is up-to-date, no need to write. + logrus.Debugf("file %s is up-to-date", fullPath) + return false, nil + } + + // Values don't match, needs update. + return true, nil +} + +// storeFileMetadata stores the size and digest in xattrs. +func (r *raw) storeFileMetadata(fullPath string, desc ocispec.Descriptor) error { + sizeKey := xattr.MakeKey(xattr.KeySize) + if err := xattr.Set(fullPath, sizeKey, []byte(strconv.FormatInt(desc.Size, 10))); err != nil { + return fmt.Errorf("failed to set size xattr: %w", err) + } + + digestKey := xattr.MakeKey(xattr.KeySha256) + if err := xattr.Set(fullPath, digestKey, []byte(desc.Digest.String())); err != nil { + return fmt.Errorf("failed to set digest xattr: %w", err) + } + + return nil +} + // Decode reads the input reader and decodes the data into the output path. func (r *raw) Decode(outputDir, filePath string, reader io.Reader, desc ocispec.Descriptor) error { fullPath := filepath.Join(outputDir, filePath) @@ -53,6 +126,18 @@ func (r *raw) Decode(outputDir, filePath string, reader io.Reader, desc ocispec. return err } + // Check if file needs update. + needsUpdate, err := r.fileNeedsUpdate(fullPath, desc) + if err != nil { + logrus.Errorf("failed to check whether the file %s needs to be updated: %s", fullPath, err) + needsUpdate = true + } + + if !needsUpdate { + return ErrAlreadyUpToDate + } + + // File needs to be written/updated. file, err := os.Create(fullPath) if err != nil { return err @@ -95,5 +180,11 @@ func (r *raw) Decode(outputDir, filePath string, reader io.Reader, desc ocispec. } } + // Store size and digest in xattrs after successful write. + // Ignore errors as xattrs might not be supported on all filesystems. + if err := r.storeFileMetadata(fullPath, desc); err != nil { + logrus.Errorf("failed to store file metadata of %s: %s", fullPath, err) + } + return nil } diff --git a/pkg/xattr/xattr.go b/pkg/xattr/xattr.go new file mode 100644 index 00000000..408f9414 --- /dev/null +++ b/pkg/xattr/xattr.go @@ -0,0 +1,61 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package xattr + +import ( + "strings" + + "golang.org/x/sys/unix" +) + +const ( + // Prefix for all xattr keys to ensure compatibility across platforms. + // Linux requires "user." prefix for user-space xattrs, while macOS allows any key. + Prefix = "user." + + // Common xattr keys. + KeySize = "modctl.size" + KeyMtime = "modctl.mtime" + KeySha256 = "modctl.sha256" +) + +// Get retrieves an xattr value for a given key. +func Get(path, key string) ([]byte, error) { + var value []byte + sz, err := unix.Getxattr(path, key, value) + if err != nil { + return nil, err + } + + value = make([]byte, sz) + _, err = unix.Getxattr(path, key, value) + if err != nil { + return nil, err + } + + return value, nil +} + +// Set sets an xattr value for a given key. +func Set(path, key string, value []byte) error { + return unix.Setxattr(path, key, value, 0) +} + +// MakeKey creates a fully-qualified xattr key with the user prefix. +func MakeKey(parts ...string) string { + return Prefix + strings.Join(parts, ".") +} diff --git a/pkg/xattr/xattr_test.go b/pkg/xattr/xattr_test.go new file mode 100644 index 00000000..ed37f01c --- /dev/null +++ b/pkg/xattr/xattr_test.go @@ -0,0 +1,65 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package xattr + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMakeKey(t *testing.T) { + assert.Equal(t, "user.modctl", MakeKey("modctl")) + assert.Equal(t, "user.modctl.size", MakeKey("modctl", "size")) + assert.Equal(t, "user.modctl.file.digest", MakeKey("modctl", "file", "digest")) +} + +func TestSetAndGet(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "test_file") + err := os.WriteFile(filePath, []byte("test"), 0644) + require.NoError(t, err) + + // Check if filesystem supports xattrs. + key := "user.test" + testValue := []byte("test value") + if err := Set(filePath, key, testValue); err != nil { + t.Skip("Filesystem does not support extended attributes") + } + + // Test set and get. + value := []byte("hello world") + err = Set(filePath, key, value) + require.NoError(t, err) + + retrieved, err := Get(filePath, key) + require.NoError(t, err) + assert.Equal(t, value, retrieved) +} + +func TestGetNonExistent(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "test_file") + err := os.WriteFile(filePath, []byte("test"), 0644) + require.NoError(t, err) + + _, err = Get(filePath, "user.nonexistent") + assert.Error(t, err) +}