From b7280f4aa965a34275759c940a282ae7a22c99ae Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Sun, 18 Dec 2022 23:43:30 +0900 Subject: [PATCH 01/15] Image2Image Encoder Encoder --- .../pipeline/AlphasCumprodCalculation.swift | 29 +++++ .../pipeline/CGImage+vImage.swift | 120 ++++++++++++++++++ swift/StableDiffusion/pipeline/Decoder.swift | 52 +------- swift/StableDiffusion/pipeline/Encoder.swift | 80 ++++++++++++ 4 files changed, 231 insertions(+), 50 deletions(-) create mode 100644 swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift create mode 100644 swift/StableDiffusion/pipeline/CGImage+vImage.swift create mode 100644 swift/StableDiffusion/pipeline/Encoder.swift diff --git a/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift b/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift new file mode 100644 index 00000000..350ba3a7 --- /dev/null +++ b/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift @@ -0,0 +1,29 @@ +// For licensing see accompanying LICENSE.md file. +// Copyright (C) 2022 Apple Inc. All Rights Reserved. + +import Foundation + +public struct AlphasCumprodCalculation { + public var sqrtAlphasCumprod: Float + public var sqrtOneMinusAlphasCumprod: Float + + public init( + sqrtAlphasCumprod: Float, + sqrtOneMinusAlphasCumprod: Float + ) { + self.sqrtAlphasCumprod = sqrtAlphasCumprod + self.sqrtOneMinusAlphasCumprod = sqrtOneMinusAlphasCumprod + } + + public init( + alphasCumprod: [Float], + timesteps: Int = 1_000, + steps: Int, + strength: Float + ) { + let tEnc = Int(strength * Float(steps)) + let initTimestep = timesteps - timesteps / steps * (steps - tEnc) + 1 + self.sqrtAlphasCumprod = alphasCumprod[initTimestep].squareRoot() + self.sqrtOneMinusAlphasCumprod = (1 - alphasCumprod[initTimestep]).squareRoot() + } +} diff --git a/swift/StableDiffusion/pipeline/CGImage+vImage.swift b/swift/StableDiffusion/pipeline/CGImage+vImage.swift new file mode 100644 index 00000000..809836e7 --- /dev/null +++ b/swift/StableDiffusion/pipeline/CGImage+vImage.swift @@ -0,0 +1,120 @@ +// For licensing see accompanying LICENSE.md file. +// Copyright (C) 2022 Apple Inc. All Rights Reserved. + +import Foundation +import Accelerate +import CoreML + +@available(iOS 16.0, macOS 13.0, *) +extension CGImage { + + typealias PixelBufferPFx1 = vImage.PixelBuffer + typealias PixelBufferP8x3 = vImage.PixelBuffer + typealias PixelBufferIFx3 = vImage.PixelBuffer + typealias PixelBufferI8x3 = vImage.PixelBuffer + + public enum ShapedArrayError: String, Swift.Error { + case wrongNumberOfChannels + case incorrectFormatsConvertingToShapedArray + case vImageConverterNotInitialized + } + + public static func fromShapedArray(_ array: MLShapedArray) throws -> CGImage { + + // array is [N,C,H,W], where C==3 + let channelCount = array.shape[1] + guard channelCount == 3 else { + throw ShapedArrayError.wrongNumberOfChannels + } + + let height = array.shape[2] + let width = array.shape[3] + + // Normalize each channel into a float between 0 and 1.0 + let floatChannels = (0.. [0.0 1.0] + cIn.multiply(by: 0.5, preBias: 1.0, postBias: 0.0, destination: cOut) + } + return cOut + } + + // Convert to interleaved and then to UInt8 + let floatImage = PixelBufferIFx3(planarBuffers: floatChannels) + let uint8Image = PixelBufferI8x3(width: width, height: height) + floatImage.convert(to:uint8Image) // maps [0.0 1.0] -> [0 255] and clips + + // Convert to uint8x3 to RGB CGImage (no alpha) + let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue) + let cgImage = uint8Image.makeCGImage(cgImageFormat: + .init(bitsPerComponent: 8, + bitsPerPixel: 3*8, + colorSpace: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: bitmapInfo)!)! + + return cgImage + } + + public var plannerRGBShapedArray: MLShapedArray { + get throws { + guard + var sourceFormat = vImage_CGImageFormat(cgImage: self), + var mediumFormat = vImage_CGImageFormat( + bitsPerComponent: 8 * MemoryLayout.size, + bitsPerPixel: 8 * MemoryLayout.size * 4, + colorSpace: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGBitmapInfo(rawValue: CGImageAlphaInfo.first.rawValue)), + let width = vImagePixelCount(exactly: self.width), + let height = vImagePixelCount(exactly: self.height) + else { + throw ShapedArrayError.incorrectFormatsConvertingToShapedArray + } + + var sourceImageBuffer = try vImage_Buffer(cgImage: self) + + var mediumDesination = try vImage_Buffer(width: Int(width), height: Int(height), bitsPerPixel: mediumFormat.bitsPerPixel) + + let converter = vImageConverter_CreateWithCGImageFormat( + &sourceFormat, + &mediumFormat, + nil, + vImage_Flags(kvImagePrintDiagnosticsToConsole), + nil) + + guard let converter = converter?.takeRetainedValue() else { + throw ShapedArrayError.vImageConverterNotInitialized + } + + vImageConvert_AnyToAny(converter, &sourceImageBuffer, &mediumDesination, nil, vImage_Flags(kvImagePrintDiagnosticsToConsole)) + + var destinationA = try vImage_Buffer(width: Int(width), height: Int(height), bitsPerPixel: 8 * UInt32(MemoryLayout.size)) + var destinationR = try vImage_Buffer(width: Int(width), height: Int(height), bitsPerPixel: 8 * UInt32(MemoryLayout.size)) + var destinationG = try vImage_Buffer(width: Int(width), height: Int(height), bitsPerPixel: 8 * UInt32(MemoryLayout.size)) + var destinationB = try vImage_Buffer(width: Int(width), height: Int(height), bitsPerPixel: 8 * UInt32(MemoryLayout.size)) + + var minFloat: [Float] = [-1.0, -1.0, -1.0, -1.0] + var maxFloat: [Float] = [1.0, 1.0, 1.0, 1.0] + + vImageConvert_ARGB8888toPlanarF(&mediumDesination, &destinationA, &destinationR, &destinationG, &destinationB, &maxFloat, &minFloat, .zero) + + let redData = Data(bytes: destinationR.data, count: Int(width) * Int(height) * MemoryLayout.size) + let greenData = Data(bytes: destinationG.data, count: Int(width) * Int(height) * MemoryLayout.size) + let blueData = Data(bytes: destinationB.data, count: Int(width) * Int(height) * MemoryLayout.size) + + let imageData = redData + greenData + blueData + + let shapedArray = MLShapedArray(data: imageData, shape: [1, 3, 512, 512]) + + return shapedArray + } + } +} + diff --git a/swift/StableDiffusion/pipeline/Decoder.swift b/swift/StableDiffusion/pipeline/Decoder.swift index 04f04ba6..e9b2c704 100644 --- a/swift/StableDiffusion/pipeline/Decoder.swift +++ b/swift/StableDiffusion/pipeline/Decoder.swift @@ -3,7 +3,6 @@ import Foundation import CoreML -import Accelerate /// A decoder model which produces RGB images from latent samples @available(iOS 16.2, macOS 13.1, *) @@ -57,12 +56,11 @@ public struct Decoder: ResourceManaging { } // Transform the outputs to CGImages - let images: [CGImage] = (0..(output)) + return try CGImage.fromShapedArray(MLShapedArray(output)) } return images @@ -74,50 +72,4 @@ public struct Decoder: ResourceManaging { } } - typealias PixelBufferPFx1 = vImage.PixelBuffer - typealias PixelBufferP8x3 = vImage.PixelBuffer - typealias PixelBufferIFx3 = vImage.PixelBuffer - typealias PixelBufferI8x3 = vImage.PixelBuffer - - func toRGBCGImage(_ array: MLShapedArray) -> CGImage { - - // array is [N,C,H,W], where C==3 - let channelCount = array.shape[1] - assert(channelCount == 3, - "Decoding model output has \(channelCount) channels, expected 3") - let height = array.shape[2] - let width = array.shape[3] - - // Normalize each channel into a float between 0 and 1.0 - let floatChannels = (0.. [0.0 1.0] - cIn.multiply(by: 0.5, preBias: 1.0, postBias: 0.0, destination: cOut) - } - return cOut - } - - // Convert to interleaved and then to UInt8 - let floatImage = PixelBufferIFx3(planarBuffers: floatChannels) - let uint8Image = PixelBufferI8x3(width: width, height: height) - floatImage.convert(to:uint8Image) // maps [0.0 1.0] -> [0 255] and clips - - // Convert to uint8x3 to RGB CGImage (no alpha) - let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue) - let cgImage = uint8Image.makeCGImage(cgImageFormat: - .init(bitsPerComponent: 8, - bitsPerPixel: 3*8, - colorSpace: CGColorSpaceCreateDeviceRGB(), - bitmapInfo: bitmapInfo)!)! - - return cgImage - } } diff --git a/swift/StableDiffusion/pipeline/Encoder.swift b/swift/StableDiffusion/pipeline/Encoder.swift new file mode 100644 index 00000000..043865cb --- /dev/null +++ b/swift/StableDiffusion/pipeline/Encoder.swift @@ -0,0 +1,80 @@ +// For licensing see accompanying LICENSE.md file. +// Copyright (C) 2022 Apple Inc. All Rights Reserved. + +import Foundation +import CoreML + +@available(iOS 16.0, macOS 13.0, *) +/// Encoder, currently supports image2image +public struct Encoder { + + public enum Error: String, Swift.Error { + case latentOutputNotValid + case batchLatentOutputEmpty + } + + /// VAE encoder model + post math and adding noise from schedular + var model: MLModel + + /// Create decoder from Core ML model + /// + /// - Parameters + /// - model: Core ML model for VAE decoder + public init(model: MLModel) { + self.model = model + } + + /// Prediction queue + let queue = DispatchQueue(label: "encoder.predict") + + /// Batch encode latent samples into images + /// - Parameters: + /// - image: image used for image2image + /// - diagonalNoise: random noise for `DiagonalGaussianDistribution` operation + /// - noise: random noise for initial latent space based on strength argument + /// - alphasCumprodStep: calculations using the scheduler traditionally calculated in the pipeline in pyTorch Diffusers library. + /// - Returns: The encoded latent space as MLShapedArray + public func encode( + image: CGImage, + diagonalNoise: MLShapedArray, + noise: MLShapedArray, + alphasCumprodStep: AlphasCumprodCalculation + ) throws -> MLShapedArray { + let sample = try image.plannerRGBShapedArray + let sqrtAlphasCumprod = MLShapedArray(scalars: [alphasCumprodStep.sqrtAlphasCumprod], shape: [1, 1]) + let sqrtOneMinusAlphasCumprod = MLShapedArray(scalars: [alphasCumprodStep.sqrtOneMinusAlphasCumprod], shape: [1, 1]) + + let dict: [String: Any] = [ + "sample": MLMultiArray(sample), + "diagonalNoise": MLMultiArray(diagonalNoise), + "noise": MLMultiArray(noise), + "sqrtAlphasCumprod": MLMultiArray(sqrtAlphasCumprod), + "sqrtOneMinusAlphasCumprod": MLMultiArray(sqrtOneMinusAlphasCumprod), + ] + let featureProvider = try MLDictionaryFeatureProvider(dictionary: dict) + + let batch = MLArrayBatchProvider(array: [featureProvider]) + + // Batch predict with model + let results = try queue.sync { try model.predictions(fromBatch: batch) } + + let batchLatents: [MLShapedArray] = try (0.. Date: Mon, 19 Dec 2022 00:38:04 +0900 Subject: [PATCH 02/15] Scheduler and pipeline --- .../DPMSolverMultistepScheduler.swift | 6 +- .../StableDiffusion/pipeline/Scheduler.swift | 41 +++++++++++- .../StableDiffusionPipeline+Resources.swift | 13 ++++ .../pipeline/StableDiffusionPipeline.swift | 65 ++++++++++++++++++- 4 files changed, 120 insertions(+), 5 deletions(-) diff --git a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift index 1fbfff1c..3e8342ce 100644 --- a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift +++ b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift @@ -23,12 +23,16 @@ public final class DPMSolverMultistepScheduler: Scheduler { public let betas: [Float] public let alphas: [Float] public let alphasCumProd: [Float] - public let timeSteps: [Int] + private let timeSteps: [Int] public let alpha_t: [Float] public let sigma_t: [Float] public let lambda_t: [Float] + public var allTimeSteps: [Int] { + timeSteps + } + public let solverOrder = 2 private(set) var lowerOrderStepped = 0 diff --git a/swift/StableDiffusion/pipeline/Scheduler.swift b/swift/StableDiffusion/pipeline/Scheduler.swift index 0bd92840..d0ed12e3 100644 --- a/swift/StableDiffusion/pipeline/Scheduler.swift +++ b/swift/StableDiffusion/pipeline/Scheduler.swift @@ -10,9 +10,12 @@ public protocol Scheduler { /// Number of inference steps to be performed var inferenceStepCount: Int { get } + + /// Training diffusion time steps index by inference time step + var allTimeSteps: [Int] { get } /// Training diffusion time steps index by inference time step - var timeSteps: [Int] { get } + func calculateTimesteps(strength: Float?) -> [Int] /// Schedule of betas which controls the amount of noise added at each timestep var betas: [Float] { get } @@ -71,6 +74,35 @@ public extension Scheduler { } } +// MARK: - Image2Image + +@available(iOS 16.2, macOS 13.1, *) +public extension Scheduler { + + func calculateAlphasCumprod(strength: Float) -> AlphasCumprodCalculation { + AlphasCumprodCalculation( + alphasCumprod: alphasCumProd, + timesteps: trainStepCount, + steps: inferenceStepCount, + strength: strength) + } +} + +// MARK: - Timesteps + +@available(iOS 16.2, macOS 13.1, *) +public extension Scheduler { + + func calculateTimesteps(strength: Float?) -> [Int] { + guard let strength else { return allTimeSteps.reversed() } + let startStep = Int(Float(inferenceStepCount) * strength) + let acutalTimesteps = Array(allTimeSteps[0..] + let timestepStrength: Float? + + if let startingImage { + timestepStrength = strength + guard let encoder else { + throw Error.startingImageProvidedWithoutEncoder + } + let noiseTuples = generateImage2ImageLatentSamples(imageCount, stdev: 1, seed: seed) + latents = try noiseTuples.map({ + try encoder.encode( + image: startingImage, + diagonalNoise: $0.diagonal, + noise: $0.latentNoise, + alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: strength)) + }) + } else { + timestepStrength = nil + // Generate random latent samples from specified seed + latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed) + } // De-noising loop - for (step,t) in scheduler[0].timeSteps.enumerated() { + let timeSteps = scheduler[0].calculateTimesteps(strength: timestepStrength) + for (step,t) in timeSteps.enumerated() { // Expand the latents for classifier-free guidance // and input to the Unet noise prediction model @@ -215,6 +247,35 @@ public struct StableDiffusionPipeline: ResourceManaging { } return samples } + + + /// For image2image - + /// - Parameters: + /// - count: batch size + /// - stdev: 1 + /// - seed: seed provided + /// - diagonalAndLatentNoiseIsSame: Diffusions library does not seem to use the same noise for the `DiagonalGaussianDistribution` operation, + /// but I have seen implementations of pipelines where it is the same. + /// - Returns: An array of tuples of noise values with length of batch size. + func generateImage2ImageLatentSamples(_ count: Int, stdev: Float, seed: Int, diagonalAndLatentNoiseIsSame: Bool = false) -> [(diagonal: MLShapedArray, latentNoise: MLShapedArray)] { + var sampleShape = unet.latentSampleShape + sampleShape[0] = 1 + + var random = NumPyRandomSource(seed: UInt32(truncatingIfNeeded: seed)) + let samples = (0..( + converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev))) + return (noise, noise) + } else { + return (MLShapedArray( + converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev))), + MLShapedArray( + converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev)))) + } + } + return samples + } func toHiddenStates(_ embedding: MLShapedArray) -> MLShapedArray { // Unoptimized manual transpose [0, 2, None, 1] From d0a754c1a1d2de9071ad8c703ad89bc8c8eb7e7a Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 19 Dec 2022 01:23:31 +0900 Subject: [PATCH 03/15] fix scheduler --- swift/StableDiffusion/pipeline/Scheduler.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/StableDiffusion/pipeline/Scheduler.swift b/swift/StableDiffusion/pipeline/Scheduler.swift index d0ed12e3..cfaa71cf 100644 --- a/swift/StableDiffusion/pipeline/Scheduler.swift +++ b/swift/StableDiffusion/pipeline/Scheduler.swift @@ -179,7 +179,7 @@ public final class PNDMScheduler: Scheduler { timeSteps.append(contentsOf: forwardSteps.dropLast(1)) timeSteps.append(timeSteps.last!) timeSteps.append(forwardSteps.last!) - timeSteps.reverse() + // do no revers timeSteps, this is now done in `calculateTimesteps` function self.timeSteps = timeSteps self.counter = 0 From 19dc885259a37f86ae9166bd70a15e099bf9bc4c Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 19 Dec 2022 01:23:40 +0900 Subject: [PATCH 04/15] cli --- swift/StableDiffusionCLI/main.swift | 40 ++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index 7343cf2b..1b1c8573 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -32,6 +32,12 @@ struct StableDiffusionSample: ParsableCommand { ) ) var resourcePath: String = "./" + + @Option(help: "Path to starting image.") + var image: String = "none" + + @Option(help: "Strength for image2image.") + var strength: Float = 0.5 @Option(help: "Number of images to sample / generate") var imageCount: Int = 1 @@ -51,7 +57,7 @@ struct StableDiffusionSample: ParsableCommand { var outputPath: String = "./" @Option(help: "Random seed") - var seed: UInt32 = 93 + var seed: UInt32 = UInt32.random(in: 0...UInt32.max) @Option(help: "Controls the influence of the text prompt on sampling process (0=random images)") var guidanceScale: Float = 7.5 @@ -84,6 +90,32 @@ struct StableDiffusionSample: ParsableCommand { disableSafety: disableSafety, reduceMemory: reduceMemory) try pipeline.loadResources() + + let startingImage: CGImage? + if image != "none" { + let imageURL = URL(filePath: image) +// if FileManager.default.fileExists(atPath: imageURL.path()) { +// throw RunError.resources("Starting image not found \(imageURL)") +// } + do { + let imageData = try Data(contentsOf: imageURL) + guard + let imgDataProvider = CGDataProvider(data: imageData as CFData), + let loadedImage = CGImage( + pngDataProviderSource: imgDataProvider, + decode: nil, shouldInterpolate: false, + intent: CGColorRenderingIntent.defaultIntent) + else { + throw RunError.resources("Starting Image not available \(resourcePath)") + } + startingImage = loadedImage + } catch let error { + throw RunError.resources("Starting image not found \(imageURL), error: \(error)") + } + + } else { + startingImage = nil + } log("Sampling ...\n") let sampleTimer = SampleTimer() @@ -92,6 +124,8 @@ struct StableDiffusionSample: ParsableCommand { let images = try pipeline.generateImages( prompt: prompt, negativePrompt: negativePrompt, + startingImage: startingImage, + strength: strength, imageCount: imageCount, stepCount: stepCount, seed: seed, @@ -168,6 +202,10 @@ struct StableDiffusionSample: ParsableCommand { if imageCount != 1 { name += ".\(sample)" } + + if image != "none" { + name += ".str\(Int(strength * 100))" + } name += ".\(seed)" From 2dca60608c2ca5931fe31728b7cd1bcc3e98921a Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 19 Dec 2022 02:07:19 +0900 Subject: [PATCH 05/15] remove CLI comment --- swift/StableDiffusionCLI/main.swift | 3 --- 1 file changed, 3 deletions(-) diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index 1b1c8573..aaf44295 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -94,9 +94,6 @@ struct StableDiffusionSample: ParsableCommand { let startingImage: CGImage? if image != "none" { let imageURL = URL(filePath: image) -// if FileManager.default.fileExists(atPath: imageURL.path()) { -// throw RunError.resources("Starting image not found \(imageURL)") -// } do { let imageData = try Data(contentsOf: imageURL) guard From e7eb953d7b2280f161d1b87e1b592d80c7472857 Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 19 Dec 2022 02:10:00 +0900 Subject: [PATCH 06/15] disable dpm multistep solver with image2image --- .../pipeline/DPMSolverMultistepScheduler.swift | 11 ++++++----- swift/StableDiffusion/pipeline/Scheduler.swift | 12 ++++-------- .../pipeline/StableDiffusionPipeline.swift | 14 +++++++++++--- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift index 3e8342ce..97c8525f 100644 --- a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift +++ b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift @@ -23,16 +23,12 @@ public final class DPMSolverMultistepScheduler: Scheduler { public let betas: [Float] public let alphas: [Float] public let alphasCumProd: [Float] - private let timeSteps: [Int] + public let timeSteps: [Int] public let alpha_t: [Float] public let sigma_t: [Float] public let lambda_t: [Float] - public var allTimeSteps: [Int] { - timeSteps - } - public let solverOrder = 2 private(set) var lowerOrderStepped = 0 @@ -183,4 +179,9 @@ public final class DPMSolverMultistepScheduler: Scheduler { return prevSample } + + /// This scheduler does not support image2image strength value. + public func calculateTimesteps(strength: Float?) -> [Int] { + timeSteps + } } diff --git a/swift/StableDiffusion/pipeline/Scheduler.swift b/swift/StableDiffusion/pipeline/Scheduler.swift index cfaa71cf..d70a42df 100644 --- a/swift/StableDiffusion/pipeline/Scheduler.swift +++ b/swift/StableDiffusion/pipeline/Scheduler.swift @@ -12,7 +12,7 @@ public protocol Scheduler { var inferenceStepCount: Int { get } /// Training diffusion time steps index by inference time step - var allTimeSteps: [Int] { get } + var timeSteps: [Int] { get } /// Training diffusion time steps index by inference time step func calculateTimesteps(strength: Float?) -> [Int] @@ -94,9 +94,9 @@ public extension Scheduler { public extension Scheduler { func calculateTimesteps(strength: Float?) -> [Int] { - guard let strength else { return allTimeSteps.reversed() } + guard let strength else { return timeSteps.reversed() } let startStep = Int(Float(inferenceStepCount) * strength) - let acutalTimesteps = Array(allTimeSteps[0.. Bool = { _ in true } ) throws -> [CGImage?] { @@ -153,7 +154,7 @@ public struct StableDiffusionPipeline: ResourceManaging { /// Setup schedulers let scheduler: [Scheduler] = (0.. Date: Tue, 20 Dec 2022 06:31:29 +0900 Subject: [PATCH 07/15] clamp initial timestamp --- swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift | 2 +- swift/StableDiffusion/pipeline/Encoder.swift | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift b/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift index 350ba3a7..78756831 100644 --- a/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift +++ b/swift/StableDiffusion/pipeline/AlphasCumprodCalculation.swift @@ -22,7 +22,7 @@ public struct AlphasCumprodCalculation { strength: Float ) { let tEnc = Int(strength * Float(steps)) - let initTimestep = timesteps - timesteps / steps * (steps - tEnc) + 1 + let initTimestep = min(max(0, timesteps - timesteps / steps * (steps - tEnc) + 1), timesteps - 1) self.sqrtAlphasCumprod = alphasCumprod[initTimestep].squareRoot() self.sqrtOneMinusAlphasCumprod = (1 - alphasCumprod[initTimestep]).squareRoot() } diff --git a/swift/StableDiffusion/pipeline/Encoder.swift b/swift/StableDiffusion/pipeline/Encoder.swift index 043865cb..856e89a0 100644 --- a/swift/StableDiffusion/pipeline/Encoder.swift +++ b/swift/StableDiffusion/pipeline/Encoder.swift @@ -66,7 +66,6 @@ public struct Encoder { else { throw Error.latentOutputNotValid } - print("output.shape: \(output.shape)") return MLShapedArray(output) } From 73927da7f90760d51f77b6233ca81f05d038925f Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Wed, 28 Dec 2022 20:23:38 +0100 Subject: [PATCH 08/15] Store timesteps in reverse order for consistency. --- .../pipeline/DPMSolverMultistepScheduler.swift | 7 +------ swift/StableDiffusion/pipeline/Scheduler.swift | 11 +++++------ .../pipeline/StableDiffusionPipeline.swift | 7 ------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift index 97c8525f..1c3f2616 100644 --- a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift +++ b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift @@ -178,10 +178,5 @@ public final class DPMSolverMultistepScheduler: Scheduler { } return prevSample - } - - /// This scheduler does not support image2image strength value. - public func calculateTimesteps(strength: Float?) -> [Int] { - timeSteps - } + } } diff --git a/swift/StableDiffusion/pipeline/Scheduler.swift b/swift/StableDiffusion/pipeline/Scheduler.swift index d70a42df..ee649f8e 100644 --- a/swift/StableDiffusion/pipeline/Scheduler.swift +++ b/swift/StableDiffusion/pipeline/Scheduler.swift @@ -92,12 +92,11 @@ public extension Scheduler { @available(iOS 16.2, macOS 13.1, *) public extension Scheduler { - func calculateTimesteps(strength: Float?) -> [Int] { - guard let strength else { return timeSteps.reversed() } - let startStep = Int(Float(inferenceStepCount) * strength) - let acutalTimesteps = Array(timeSteps[0.. Date: Wed, 28 Dec 2022 20:49:59 +0100 Subject: [PATCH 09/15] Report actual number of steps. --- swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift index 7e54b914..68d676df 100644 --- a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift @@ -219,7 +219,7 @@ public struct StableDiffusionPipeline: ResourceManaging { pipeline: self, prompt: prompt, step: step, - stepCount: stepCount, + stepCount: timeSteps.count, currentLatentSamples: latents, isSafetyEnabled: canSafetyCheck && !disableSafety ) From c199a4225d71c0d50c081f8da991574e21878d11 Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Thu, 5 Jan 2023 20:40:29 -0800 Subject: [PATCH 10/15] uint32 --- swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift index 68d676df..63eb5758 100644 --- a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift @@ -258,7 +258,7 @@ public struct StableDiffusionPipeline: ResourceManaging { /// - diagonalAndLatentNoiseIsSame: Diffusions library does not seem to use the same noise for the `DiagonalGaussianDistribution` operation, /// but I have seen implementations of pipelines where it is the same. /// - Returns: An array of tuples of noise values with length of batch size. - func generateImage2ImageLatentSamples(_ count: Int, stdev: Float, seed: Int, diagonalAndLatentNoiseIsSame: Bool = false) -> [(diagonal: MLShapedArray, latentNoise: MLShapedArray)] { + func generateImage2ImageLatentSamples(_ count: Int, stdev: Float, seed: UInt32, diagonalAndLatentNoiseIsSame: Bool = false) -> [(diagonal: MLShapedArray, latentNoise: MLShapedArray)] { var sampleShape = unet.latentSampleShape sampleShape[0] = 1 From c17c80ff8b49a638b3cc622b60f65cde30ee3843 Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 30 Jan 2023 22:36:34 -0800 Subject: [PATCH 11/15] PRComments --- .../DPMSolverMultistepScheduler.swift | 2 +- swift/StableDiffusion/pipeline/Encoder.swift | 41 +++++++--- .../StableDiffusion/pipeline/Scheduler.swift | 2 +- .../StableDiffusionPipeline+Resources.swift | 6 +- .../pipeline/StableDiffusionPipeline.swift | 82 +++++++++++++++---- swift/StableDiffusionCLI/main.swift | 4 +- 6 files changed, 101 insertions(+), 36 deletions(-) diff --git a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift index 1c3f2616..1fbfff1c 100644 --- a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift +++ b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift @@ -178,5 +178,5 @@ public final class DPMSolverMultistepScheduler: Scheduler { } return prevSample - } + } } diff --git a/swift/StableDiffusion/pipeline/Encoder.swift b/swift/StableDiffusion/pipeline/Encoder.swift index 856e89a0..e62f1236 100644 --- a/swift/StableDiffusion/pipeline/Encoder.swift +++ b/swift/StableDiffusion/pipeline/Encoder.swift @@ -4,9 +4,9 @@ import Foundation import CoreML -@available(iOS 16.0, macOS 13.0, *) +@available(iOS 16.0, macOS 13.1, *) /// Encoder, currently supports image2image -public struct Encoder { +public struct Encoder: ResourceManaging { public enum Error: String, Swift.Error { case latentOutputNotValid @@ -14,14 +14,26 @@ public struct Encoder { } /// VAE encoder model + post math and adding noise from schedular - var model: MLModel + var model: ManagedMLModel - /// Create decoder from Core ML model + /// Create encoder from Core ML model /// - /// - Parameters - /// - model: Core ML model for VAE decoder - public init(model: MLModel) { - self.model = model + /// - Parameters: + /// - url: Location of compiled VAE encoder Core ML model + /// - configuration: configuration to be used when the model is loaded + /// - Returns: An encoder that will lazily load its required resources when needed or requested + public init(modelAt url: URL, configuration: MLModelConfiguration) { + self.model = ManagedMLModel(modelAt: url, configuration: configuration) + } + + /// Ensure the model has been loaded into memory + public func loadResources() throws { + try model.loadResources() + } + + /// Unload the underlying model to free up memory + public func unloadResources() { + model.unloadResources() } /// Prediction queue @@ -46,17 +58,22 @@ public struct Encoder { let dict: [String: Any] = [ "sample": MLMultiArray(sample), - "diagonalNoise": MLMultiArray(diagonalNoise), + "diagonal_noise": MLMultiArray(diagonalNoise), "noise": MLMultiArray(noise), - "sqrtAlphasCumprod": MLMultiArray(sqrtAlphasCumprod), - "sqrtOneMinusAlphasCumprod": MLMultiArray(sqrtOneMinusAlphasCumprod), + "sqrt_alphas_cumprod": MLMultiArray(sqrtAlphasCumprod), + "sqrt_one_minus_alphas_cumprod": MLMultiArray(sqrtOneMinusAlphasCumprod), ] let featureProvider = try MLDictionaryFeatureProvider(dictionary: dict) let batch = MLArrayBatchProvider(array: [featureProvider]) // Batch predict with model - let results = try queue.sync { try model.predictions(fromBatch: batch) } + + let results = try queue.sync { + try model.perform { model in + try model.predictions(fromBatch: batch) + } + } let batchLatents: [MLShapedArray] = try (0.. Bool = { _ in true } ) throws -> [CGImage?] { + try generateImages(input: SampleInput( + prompt: prompt, + negativePrompt: negativePrompt, + startingImage: startingImage, + strength: strength, + imageCount: imageCount, + stepCount: stepCount, + seed: seed, + guidanceScale: guidanceScale, + disableSafety: disableSafety, + schedulerType: schedulerType), progressHandler: progressHandler) + } + + public func generateImages( + input: SampleInput, + progressHandler: (Progress) -> Bool = { _ in true } + ) throws -> [CGImage?] { // Encode the input prompt and negative prompt - let promptEmbedding = try textEncoder.encode(prompt) - let negativePromptEmbedding = try textEncoder.encode(negativePrompt) + let promptEmbedding = try textEncoder.encode(input.prompt) + let negativePromptEmbedding = try textEncoder.encode(input.negativePrompt) if reduceMemory { textEncoder.unloadResources() @@ -152,10 +199,10 @@ public struct StableDiffusionPipeline: ResourceManaging { let hiddenStates = toHiddenStates(concatEmbedding) /// Setup schedulers - let scheduler: [Scheduler] = (0..] let timestepStrength: Float? - if let startingImage { - timestepStrength = strength + if + let startingImage = input.startingImage, + input.mode == .imageToImage + { + timestepStrength = input.strength guard let encoder else { throw Error.startingImageProvidedWithoutEncoder } - let noiseTuples = generateImage2ImageLatentSamples(imageCount, stdev: 1, seed: seed) + let noiseTuples = generateImage2ImageLatentSamples(input.imageCount, stdev: 1, seed: input.seed) latents = try noiseTuples.map({ try encoder.encode( image: startingImage, diagonalNoise: $0.diagonal, noise: $0.latentNoise, - alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: strength)) + alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: input.strength)) }) } else { timestepStrength = nil // Generate random latent samples from specified seed - latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed) + latents = generateLatentSamples(input.imageCount, stdev: stdev, seed: input.seed) } // De-noising loop @@ -202,11 +252,11 @@ public struct StableDiffusionPipeline: ResourceManaging { hiddenStates: hiddenStates ) - noise = performGuidance(noise, guidanceScale) + noise = performGuidance(noise, input.guidanceScale) // Have the scheduler compute the previous (t-1) latent // sample given the predicted noise and current sample - for i in 0.. [MLShapedArray] { diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index aaf44295..c0ec63d2 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -34,7 +34,7 @@ struct StableDiffusionSample: ParsableCommand { var resourcePath: String = "./" @Option(help: "Path to starting image.") - var image: String = "none" + var image: String? = nil @Option(help: "Strength for image2image.") var strength: Float = 0.5 @@ -92,7 +92,7 @@ struct StableDiffusionSample: ParsableCommand { try pipeline.loadResources() let startingImage: CGImage? - if image != "none" { + if let image { let imageURL = URL(filePath: image) do { let imageData = try Data(contentsOf: imageURL) From d6647a4551f37f85acd79920d96f4fbf4961ad87 Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 30 Jan 2023 22:49:08 -0800 Subject: [PATCH 12/15] remove old initializer --- .../StableDiffusionPipeline+SampleInput.swift | 82 +++++++++++++++++++ .../pipeline/StableDiffusionPipeline.swift | 65 +-------------- swift/StableDiffusionCLI/main.swift | 19 +++-- 3 files changed, 93 insertions(+), 73 deletions(-) create mode 100644 swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift new file mode 100644 index 00000000..0efe0ec8 --- /dev/null +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift @@ -0,0 +1,82 @@ +// +// File.swift +// +// +// Created by Timothy Kautz on 1/30/23. +// + +import Foundation +import CoreGraphics + +@available(iOS 16.2, macOS 13.1, *) +extension StableDiffusionPipeline { + + /// Tyoe of processing that will be performed to generate an image + public enum Mode { + case textToImage + case imageToImage + // case inPainting + } + + /// Image generation configuration + public struct SampleInput: Hashable { + + /// Text prompt to guide sampling + public var prompt: String + /// Negative text prompt to guide sampling + public var negativePrompt: String = "" + /// Starting image for image2image or in-painting + public var startingImage: CGImage? = nil + //public var maskImage: CGImage? = nil + public var strength: Float = 1.0 + /// Number of images to generate + public var imageCount: Int = 1 + /// Number of inference steps to perform + public var stepCount: Int = 50 + /// Random seed which to start generation + public var seed: UInt32 = 0 + /// Controls the influence of the text prompt on sampling process (0=random images) + public var guidanceScale: Float = 7.5 + /// Safety checks are only performed if `self.canSafetyCheck && !disableSafety` + public var disableSafety: Bool = false + /// The type of Scheduler to use. + public var schedulerType: StableDiffusionScheduler = .pndmScheduler + + /// Given the configuration, what mode will be used for generation + public var mode: Mode { + guard startingImage != nil else { + return .textToImage + } + guard strength < 1.0 else { + return .textToImage + } + return .imageToImage + } + + public init( + prompt: String, + negativePrompt: String = "", + startingImage: CGImage? = nil, + strength: Float = 1.0, + imageCount: Int = 1, + stepCount: Int = 50, + seed: UInt32 = 0, + guidanceScale: Float = 7.5, + disableSafety: Bool = false, + schedulerType: StableDiffusionScheduler = .pndmScheduler + ) { + self.prompt = prompt + self.negativePrompt = negativePrompt + self.startingImage = startingImage + self.strength = strength + self.imageCount = imageCount + self.stepCount = stepCount + self.seed = seed + self.guidanceScale = guidanceScale + self.disableSafety = disableSafety + self.schedulerType = schedulerType + } + + } + +} diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift index 01578203..8a9c3507 100644 --- a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift @@ -25,36 +25,6 @@ public struct StableDiffusionPipeline: ResourceManaging { case startingImageProvidedWithoutEncoder } - public enum Mode { - case textToImage - case imageToImage - // case inPainting - } - - public struct SampleInput: Hashable { - public var prompt: String - public var negativePrompt: String = "" - public var startingImage: CGImage? = nil - //public var maskImage: CGImage? = nil - public var strength: Float = 1.0 - public var imageCount: Int = 1 - public var stepCount: Int = 50 - public var seed: UInt32 = 0 - public var guidanceScale: Float = 7.5 - public var disableSafety: Bool = false - public var schedulerType: StableDiffusionScheduler = .pndmScheduler - - public var mode: Mode { - guard startingImage != nil else { - return .textToImage - } - guard strength < 1.0 else { - return .textToImage - } - return .imageToImage - } - } - /// Model to generate embeddings for tokenized input text var textEncoder: TextEncoder @@ -137,45 +107,12 @@ public struct StableDiffusionPipeline: ResourceManaging { try safetyChecker?.prewarmResources() } - /// Text to image generation using stable diffusion - /// + /// Image generation using stable diffusion /// - Parameters: - /// - prompt: Text prompt to guide sampling - /// - negativePrompt: Negative text prompt to guide sampling - /// - stepCount: Number of inference steps to perform - /// - imageCount: Number of samples/images to generate for the input prompt - /// - seed: Random seed which - /// - guidanceScale: Controls the influence of the text prompt on sampling process (0=random images) /// - disableSafety: Safety checks are only performed if `self.canSafetyCheck && !disableSafety` /// - progressHandler: Callback to perform after each step, stops on receiving false response /// - Returns: An array of `imageCount` optional images. /// The images will be nil if safety checks were performed and found the result to be un-safe - public func generateImages( - prompt: String, - negativePrompt: String = "", - startingImage: CGImage? = nil, - strength: Float = 1.0, - imageCount: Int = 1, - stepCount: Int = 50, - seed: UInt32 = 0, - guidanceScale: Float = 7.5, - disableSafety: Bool = false, - scheduler schedulerType: StableDiffusionScheduler = .pndmScheduler, - progressHandler: (Progress) -> Bool = { _ in true } - ) throws -> [CGImage?] { - try generateImages(input: SampleInput( - prompt: prompt, - negativePrompt: negativePrompt, - startingImage: startingImage, - strength: strength, - imageCount: imageCount, - stepCount: stepCount, - seed: seed, - guidanceScale: guidanceScale, - disableSafety: disableSafety, - schedulerType: schedulerType), progressHandler: progressHandler) - } - public func generateImages( input: SampleInput, progressHandler: (Progress) -> Bool = { _ in true } diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index c0ec63d2..338891c6 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -119,15 +119,16 @@ struct StableDiffusionSample: ParsableCommand { sampleTimer.start() let images = try pipeline.generateImages( - prompt: prompt, - negativePrompt: negativePrompt, - startingImage: startingImage, - strength: strength, - imageCount: imageCount, - stepCount: stepCount, - seed: seed, - guidanceScale: guidanceScale, - scheduler: scheduler.stableDiffusionScheduler + input: StableDiffusionPipeline.SampleInput( + prompt: prompt, + negativePrompt: negativePrompt, + startingImage: startingImage, + strength: strength, + imageCount: imageCount, + stepCount: stepCount, + seed: seed, + guidanceScale: guidanceScale, + schedulerType: scheduler.stableDiffusionScheduler) ) { progress in sampleTimer.stop() handleProgress(progress,sampleTimer) From 088bdc1dc45aa027e69c279c21242ff872e17765 Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Mon, 30 Jan 2023 23:46:36 -0800 Subject: [PATCH 13/15] pr comments --- .../StableDiffusionPipeline+SampleInput.swift | 30 ++------------- .../pipeline/StableDiffusionPipeline.swift | 36 +++++++++--------- swift/StableDiffusionCLI/main.swift | 38 ++++++++++--------- 3 files changed, 42 insertions(+), 62 deletions(-) diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift index 0efe0ec8..8933557b 100644 --- a/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift @@ -1,9 +1,5 @@ -// -// File.swift -// -// -// Created by Timothy Kautz on 1/30/23. -// +// For licensing see accompanying LICENSE.md file. +// Copyright (C) 2022 Apple Inc. All Rights Reserved. import Foundation import CoreGraphics @@ -19,7 +15,7 @@ extension StableDiffusionPipeline { } /// Image generation configuration - public struct SampleInput: Hashable { + public struct Configuration: Hashable { /// Text prompt to guide sampling public var prompt: String @@ -54,27 +50,9 @@ extension StableDiffusionPipeline { } public init( - prompt: String, - negativePrompt: String = "", - startingImage: CGImage? = nil, - strength: Float = 1.0, - imageCount: Int = 1, - stepCount: Int = 50, - seed: UInt32 = 0, - guidanceScale: Float = 7.5, - disableSafety: Bool = false, - schedulerType: StableDiffusionScheduler = .pndmScheduler + prompt: String ) { self.prompt = prompt - self.negativePrompt = negativePrompt - self.startingImage = startingImage - self.strength = strength - self.imageCount = imageCount - self.stepCount = stepCount - self.seed = seed - self.guidanceScale = guidanceScale - self.disableSafety = disableSafety - self.schedulerType = schedulerType } } diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift index 8a9c3507..f2792475 100644 --- a/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift +++ b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.swift @@ -114,13 +114,13 @@ public struct StableDiffusionPipeline: ResourceManaging { /// - Returns: An array of `imageCount` optional images. /// The images will be nil if safety checks were performed and found the result to be un-safe public func generateImages( - input: SampleInput, + configuration config: Configuration, progressHandler: (Progress) -> Bool = { _ in true } ) throws -> [CGImage?] { // Encode the input prompt and negative prompt - let promptEmbedding = try textEncoder.encode(input.prompt) - let negativePromptEmbedding = try textEncoder.encode(input.negativePrompt) + let promptEmbedding = try textEncoder.encode(config.prompt) + let negativePromptEmbedding = try textEncoder.encode(config.negativePrompt) if reduceMemory { textEncoder.unloadResources() @@ -136,10 +136,10 @@ public struct StableDiffusionPipeline: ResourceManaging { let hiddenStates = toHiddenStates(concatEmbedding) /// Setup schedulers - let scheduler: [Scheduler] = (0.. [MLShapedArray] { diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index 338891c6..6b460469 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -118,25 +118,27 @@ struct StableDiffusionSample: ParsableCommand { let sampleTimer = SampleTimer() sampleTimer.start() + var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) + + pipelineConfig.negativePrompt = negativePrompt + pipelineConfig.startingImage = startingImage + pipelineConfig.strength = strength + pipelineConfig.imageCount = imageCount + pipelineConfig.stepCount = stepCount + pipelineConfig.seed = seed + pipelineConfig.guidanceScale = guidanceScale + pipelineConfig.schedulerType = scheduler.stableDiffusionScheduler + let images = try pipeline.generateImages( - input: StableDiffusionPipeline.SampleInput( - prompt: prompt, - negativePrompt: negativePrompt, - startingImage: startingImage, - strength: strength, - imageCount: imageCount, - stepCount: stepCount, - seed: seed, - guidanceScale: guidanceScale, - schedulerType: scheduler.stableDiffusionScheduler) - ) { progress in - sampleTimer.stop() - handleProgress(progress,sampleTimer) - if progress.stepCount != progress.step { - sampleTimer.start() - } - return true - } + configuration: pipelineConfig, + progressHandler: { progress in + sampleTimer.stop() + handleProgress(progress,sampleTimer) + if progress.stepCount != progress.step { + sampleTimer.start() + } + return true + }) _ = try saveImages(images, logNames: true) } From eafd72efd9372257a5778ead93304a74a0dd8f3e Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Thu, 9 Feb 2023 00:10:31 -0800 Subject: [PATCH 14/15] change name and add error handling also fix 512 hard coded --- .../pipeline/CGImage+vImage.swift | 2 +- swift/StableDiffusion/pipeline/Encoder.swift | 47 ++++++++++++++++--- ...ableDiffusionPipeline.Configuration.swift} | 0 3 files changed, 42 insertions(+), 7 deletions(-) rename swift/StableDiffusion/pipeline/{StableDiffusionPipeline+SampleInput.swift => StableDiffusionPipeline.Configuration.swift} (100%) diff --git a/swift/StableDiffusion/pipeline/CGImage+vImage.swift b/swift/StableDiffusion/pipeline/CGImage+vImage.swift index 809836e7..6910b207 100644 --- a/swift/StableDiffusion/pipeline/CGImage+vImage.swift +++ b/swift/StableDiffusion/pipeline/CGImage+vImage.swift @@ -111,7 +111,7 @@ extension CGImage { let imageData = redData + greenData + blueData - let shapedArray = MLShapedArray(data: imageData, shape: [1, 3, 512, 512]) + let shapedArray = MLShapedArray(data: imageData, shape: [1, 3, self.width, self.height]) return shapedArray } diff --git a/swift/StableDiffusion/pipeline/Encoder.swift b/swift/StableDiffusion/pipeline/Encoder.swift index e62f1236..b35f5a94 100644 --- a/swift/StableDiffusion/pipeline/Encoder.swift +++ b/swift/StableDiffusion/pipeline/Encoder.swift @@ -8,9 +8,19 @@ import CoreML /// Encoder, currently supports image2image public struct Encoder: ResourceManaging { + public enum FeatureName: String { + case sample = "sample" + case diagonalNoise = "diagonal_noise" + case noise = "noise" + case sqrtAlphasCumprod = "sqrt_alphas_cumprod" + case sqrtOneMinusAlphasCumprod = "sqrt_one_minus_alphas_cumprod" + } + public enum Error: String, Swift.Error { case latentOutputNotValid case batchLatentOutputEmpty + case sampleInputShapeNotCorrect + case noiseInputShapeNotCorrect } /// VAE encoder model + post math and adding noise from schedular @@ -57,11 +67,11 @@ public struct Encoder: ResourceManaging { let sqrtOneMinusAlphasCumprod = MLShapedArray(scalars: [alphasCumprodStep.sqrtOneMinusAlphasCumprod], shape: [1, 1]) let dict: [String: Any] = [ - "sample": MLMultiArray(sample), - "diagonal_noise": MLMultiArray(diagonalNoise), - "noise": MLMultiArray(noise), - "sqrt_alphas_cumprod": MLMultiArray(sqrtAlphasCumprod), - "sqrt_one_minus_alphas_cumprod": MLMultiArray(sqrtOneMinusAlphasCumprod), + FeatureName.sample.rawValue: MLMultiArray(sample), + FeatureName.diagonalNoise.rawValue: MLMultiArray(diagonalNoise), + FeatureName.noise.rawValue: MLMultiArray(noise), + FeatureName.sqrtAlphasCumprod.rawValue: MLMultiArray(sqrtAlphasCumprod), + FeatureName.sqrtOneMinusAlphasCumprod.rawValue: MLMultiArray(sqrtOneMinusAlphasCumprod), ] let featureProvider = try MLDictionaryFeatureProvider(dictionary: dict) @@ -71,7 +81,32 @@ public struct Encoder: ResourceManaging { let results = try queue.sync { try model.perform { model in - try model.predictions(fromBatch: batch) + if let feature = model.modelDescription.inputDescriptionsByName[FeatureName.sample.rawValue], + let shape = feature.multiArrayConstraint?.shape as? [Int] + { + guard sample.shape == shape else { + // TODO: Consider auto resizing and croping similar to how Vision or CoreML auto-generated Swift code can accomplish with `MLFeatureValue` + throw Error.sampleInputShapeNotCorrect + } + } + + if let feature = model.modelDescription.inputDescriptionsByName[FeatureName.noise.rawValue], + let shape = feature.multiArrayConstraint?.shape as? [Int] + { + guard noise.shape == shape else { + throw Error.noiseInputShapeNotCorrect + } + } + + if let feature = model.modelDescription.inputDescriptionsByName[FeatureName.diagonalNoise.rawValue], + let shape = feature.multiArrayConstraint?.shape as? [Int] + { + guard diagonalNoise.shape == shape else { + throw Error.noiseInputShapeNotCorrect + } + } + + return try model.predictions(fromBatch: batch) } } diff --git a/swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift b/swift/StableDiffusion/pipeline/StableDiffusionPipeline.Configuration.swift similarity index 100% rename from swift/StableDiffusion/pipeline/StableDiffusionPipeline+SampleInput.swift rename to swift/StableDiffusion/pipeline/StableDiffusionPipeline.Configuration.swift From 5bf9c71081003d72c96b9a48edb37ffaa42782cd Mon Sep 17 00:00:00 2001 From: Timothy Kautz Date: Thu, 9 Feb 2023 00:21:19 -0800 Subject: [PATCH 15/15] Add fix for Jpegs --- swift/StableDiffusionCLI/main.swift | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/swift/StableDiffusionCLI/main.swift b/swift/StableDiffusionCLI/main.swift index 6b460469..c5ae31a3 100644 --- a/swift/StableDiffusionCLI/main.swift +++ b/swift/StableDiffusionCLI/main.swift @@ -7,6 +7,7 @@ import CoreML import Foundation import StableDiffusion import UniformTypeIdentifiers +import Cocoa @available(iOS 16.2, macOS 13.1, *) struct StableDiffusionSample: ParsableCommand { @@ -97,11 +98,8 @@ struct StableDiffusionSample: ParsableCommand { do { let imageData = try Data(contentsOf: imageURL) guard - let imgDataProvider = CGDataProvider(data: imageData as CFData), - let loadedImage = CGImage( - pngDataProviderSource: imgDataProvider, - decode: nil, shouldInterpolate: false, - intent: CGColorRenderingIntent.defaultIntent) + let nsImage = NSImage(data: imageData), + let loadedImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else { throw RunError.resources("Starting Image not available \(resourcePath)") }