Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add @capacitor-mlkit/selfie-segmentation package #74

Merged
merged 16 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,16 +1,64 @@
import Foundation
import MLKitVision
import UIKit

extension UIImage {
public func scaledImage(width: Int?, height: Int?) -> UIImage {
let newWidth: CGFloat
let newHeight: CGFloat

if let width = width {
newWidth = CGFloat(width)
if let height = height {
newHeight = CGFloat(height)
} else {
let scaleFactor = newWidth / self.size.width
newHeight = self.size.height * scaleFactor
}
} else
if let height = height {
newHeight = CGFloat(height)
if let width = width {
newWidth = CGFloat(width)
} else {
let scaleFactor = newHeight / self.size.height
newWidth = self.size.width * scaleFactor
}
} else {
return self
}

let newSize = CGSize(width: newWidth, height: newHeight)

if newSize.width >= size.width && newSize.height >= size.height {
return self
}

UIGraphicsBeginImageContextWithOptions(newSize, false, scale)
defer { UIGraphicsEndImageContext() }
draw(in: CGRect(origin: .zero, size: newSize))
return UIGraphicsGetImageFromCurrentImageContext() ?? self
}
}

@objc class ProcessImageOptions: NSObject {
private var visionImage: VisionImage
private var image: UIImage
private var confidence: CGFloat

init(
visionImage: VisionImage
image: UIImage,
width: Int?,
height: Int?,
confidence: CGFloat
) {
self.visionImage = visionImage
self.image = image.scaledImage(width: width, height: height)
self.confidence = confidence
}

func getImage() -> UIImage {
return image
}

func getVisionImage() -> VisionImage {
return visionImage
func getConfidence() -> CGFloat {
return confidence
}
}
Original file line number Diff line number Diff line change
@@ -1,47 +1,32 @@
import Foundation
import Capacitor
import MLKitVision
import MLKitSegmentationSelfie

@objc class ProcessImageResult: NSObject {
let segmentationMask: SegmentationMask
let image: UIImage

init(segmentationMask: SegmentationMask) {
self.segmentationMask = segmentationMask
init(image: UIImage) {
self.image = image
}

func toJSObject() -> JSObject {
let (maskResult, maskWidth, maskHeight) = createMaskResult(mask: segmentationMask)

var result = JSObject()
result["mask"] = maskResult
result["width"] = maskWidth
result["height"] = maskHeight

return result
}

private func createMaskResult(mask: SegmentationMask) -> (JSArray, Int, Int) {
var result = JSArray()
if let data = image.pngData() {
do {
let path = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask)[0]
let name = "photo-"+UUID().uuidString+".png"
let url = path.appendingPathComponent(name)
try data.write(to: url)

let maskWidth = CVPixelBufferGetWidth(mask.buffer)
let maskHeight = CVPixelBufferGetHeight(mask.buffer)

CVPixelBufferLockBaseAddress(mask.buffer, CVPixelBufferLockFlags.readOnly)
let maskBytesPerRow = CVPixelBufferGetBytesPerRow(mask.buffer)
var maskAddress =
CVPixelBufferGetBaseAddress(mask.buffer)!.bindMemory(
to: Float32.self, capacity: maskBytesPerRow * maskHeight)

for _ in 0...(maskHeight - 1) {
for col in 0...(maskWidth - 1) {
// Gets the confidence of the pixel in the mask being in the foreground.
let foregroundConfidence: Float32 = maskAddress[col]
result.append(foregroundConfidence)
result["path"] = url.absoluteString
robingenz marked this conversation as resolved.
Show resolved Hide resolved
} catch {
result["path"] = "data:image/png;base64," + data.base64EncodedString()
}
maskAddress += maskBytesPerRow / MemoryLayout<Float32>.size

result["width"] = Int(image.size.width)
result["height"] = Int(image.size.height)
}

return (result, maskWidth, maskHeight)
return result
}
}
186 changes: 168 additions & 18 deletions packages/selfie-segmentation/ios/Plugin/SelfieSegmentation.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,191 @@ import MLKitSegmentationSelfie
self.plugin = plugin
}

@objc func createVisionImageFromFilePath(_ path: String) -> VisionImage? {
@objc func createImageFromFilePath(_ path: String) -> UIImage? {
guard let url = URL.init(string: path) else {
return nil
}
if FileManager.default.fileExists(atPath: url.path) {
guard let image = UIImage.init(contentsOfFile: url.path) else {
return nil
}
return VisionImage.init(
image: image
)
return UIImage.init(contentsOfFile: url.path)
} else {
return nil
}
}

enum ProcessError: Error {
case createImageBuffer
}

private var segmenter: Segmenter?

@objc func processImage(_ options: ProcessImageOptions, completion: @escaping (ProcessImageResult?, Error?) -> Void) {
let visionImage = options.getVisionImage()
let image = options.getImage()
let threshold = options.getConfidence()

let visionImage = VisionImage.init(image: image)
visionImage.orientation = image.imageOrientation

let selfieSegmenterOptions: SelfieSegmenterOptions = SelfieSegmenterOptions()
selfieSegmenterOptions.segmenterMode = .singleImage
selfieSegmenterOptions.shouldEnableRawSizeMask = true

let segmenter = Segmenter.segmenter(
segmenter = Segmenter.segmenter(
options: selfieSegmenterOptions
)

do {
let mask: SegmentationMask = try segmenter.results(
in: visionImage
)
let result = ProcessImageResult(segmentationMask: mask)
completion(result, nil)
} catch let error {
completion(nil, error)
segmenter?.process(visionImage) { mask, error in
self.segmenter = nil

guard error == nil, let mask = mask else {
return completion(nil, error)
}

do {
guard let imageBuffer = self.createImageBuffer(from: image) else {
throw ProcessError.createImageBuffer
}

self.applySegmentationMask(
mask: mask, to: imageBuffer, threshold: threshold
)

let image = self.createImage(from: imageBuffer)
let result = ProcessImageResult(image: image)

completion(result, nil)
} catch {
completion(nil, error)
}
}
}

func createImageBuffer(from image: UIImage) -> CVImageBuffer? {
guard let cgImage = image.cgImage else { return nil }
let width = cgImage.width
let height = cgImage.height

var buffer: CVPixelBuffer?
CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32BGRA,
nil,
&buffer)
guard let imageBuffer = buffer else { return nil }

let flags = CVPixelBufferLockFlags(rawValue: 0)
CVPixelBufferLockBaseAddress(imageBuffer, flags)
let baseAddress = CVPixelBufferGetBaseAddress(imageBuffer)
let colorSpace = CGColorSpaceCreateDeviceRGB()
let bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer)
let context = CGContext(
data: baseAddress,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: bytesPerRow,
space: colorSpace,
bitmapInfo: (CGImageAlphaInfo.premultipliedFirst.rawValue
| CGBitmapInfo.byteOrder32Little.rawValue))

if let context = context {
let rect = CGRect.init(x: 0, y: 0, width: width, height: height)
context.draw(cgImage, in: rect)
CVPixelBufferUnlockBaseAddress(imageBuffer, flags)
return imageBuffer
} else {
CVPixelBufferUnlockBaseAddress(imageBuffer, flags)
return nil
}
}

// func createSampleBuffer(with imageBuffer: CVImageBuffer) -> CMSampleBuffer? {
// var timingInfo = CMSampleTimingInfo()
//// guard CMSampleBufferGetSampleTimingInfo(sampleBuffer, at: 0, timingInfoOut: &timingInfo) == 0 else {
//// return nil
//// }
// var outputSampleBuffer: CMSampleBuffer?
// var newFormatDescription: CMFormatDescription?
// CMVideoFormatDescriptionCreateForImageBuffer(allocator: nil, imageBuffer: imageBuffer, formatDescriptionOut: &newFormatDescription)
// guard let formatDescription = newFormatDescription else {
// return nil
// }
// CMSampleBufferCreateReadyWithImageBuffer(allocator: nil, imageBuffer: imageBuffer, formatDescription: formatDescription, sampleTiming: &timingInfo, sampleBufferOut: &outputSampleBuffer)
// guard let buffer = outputSampleBuffer else {
// return nil
// }
// return buffer
// }

func createImage(
from imageBuffer: CVImageBuffer
) -> UIImage {
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
let context = CIContext(options: nil)
let cgImage = context.createCGImage(ciImage, from: ciImage.extent)!
return UIImage(cgImage: cgImage)
}

func applySegmentationMask(
mask: SegmentationMask, to imageBuffer: CVImageBuffer, threshold: CGFloat
) {
let bgraBytesPerPixel = 4

assert(
CVPixelBufferGetPixelFormatType(imageBuffer) == kCVPixelFormatType_32BGRA,
"Image buffer must have 32BGRA pixel format type")

let width = CVPixelBufferGetWidth(mask.buffer)
let height = CVPixelBufferGetHeight(mask.buffer)
assert(CVPixelBufferGetWidth(imageBuffer) == width, "Width must match")
assert(CVPixelBufferGetHeight(imageBuffer) == height, "Height must match")

let writeFlags = CVPixelBufferLockFlags(rawValue: 0)
CVPixelBufferLockBaseAddress(imageBuffer, writeFlags)
CVPixelBufferLockBaseAddress(mask.buffer, CVPixelBufferLockFlags.readOnly)

let maskBytesPerRow = CVPixelBufferGetBytesPerRow(mask.buffer)
var maskAddress =
CVPixelBufferGetBaseAddress(mask.buffer)!.bindMemory(
to: Float32.self, capacity: maskBytesPerRow * height)

let imageBytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer)
var imageAddress = CVPixelBufferGetBaseAddress(imageBuffer)!.bindMemory(
to: UInt8.self, capacity: imageBytesPerRow * height)

for _ in 0...(height - 1) {
for col in 0...(width - 1) {
let pixelOffset = col * bgraBytesPerPixel
let blueOffset = pixelOffset
let greenOffset = pixelOffset + 1
let redOffset = pixelOffset + 2
let alphaOffset = pixelOffset + 3

let confidence: CGFloat = CGFloat(maskAddress[col])

if confidence >= threshold {
let red = CGFloat(imageAddress[redOffset])
let green = CGFloat(imageAddress[greenOffset])
let blue = CGFloat(imageAddress[blueOffset])
// let alpha = CGFloat(imageAddress[alphaOffset])

imageAddress[redOffset] = UInt8(red * confidence)
imageAddress[greenOffset] = UInt8(green * confidence)
imageAddress[blueOffset] = UInt8(blue * confidence)
imageAddress[alphaOffset] = UInt8(0xff)
} else {
imageAddress[redOffset] = UInt8(0x00)
imageAddress[greenOffset] = UInt8(0x00)
imageAddress[blueOffset] = UInt8(0x00)
imageAddress[alphaOffset] = UInt8(0x00)
}
}

imageAddress += imageBytesPerRow / MemoryLayout<UInt8>.size
maskAddress += maskBytesPerRow / MemoryLayout<Float32>.size
}

CVPixelBufferUnlockBaseAddress(imageBuffer, writeFlags)
CVPixelBufferUnlockBaseAddress(mask.buffer, CVPixelBufferLockFlags.readOnly)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ import Capacitor
@objc(SelfieSegmentationPlugin)
public class SelfieSegmentationPlugin: CAPPlugin {
public let tag = "SelfieSegmentation"

public let errorPathMissing = "path must be provided."
public let errorLoadImageFailed = "image could not be loaded."

public let defaultConfidence: Float = 0.9

private var implementation: SelfieSegmentation?

override public func load() {
Expand All @@ -23,21 +26,30 @@ public class SelfieSegmentationPlugin: CAPPlugin {
return
}

guard let visionImage = implementation?.createVisionImageFromFilePath(path) else {
let width = call.getInt("width")
let height = call.getInt("height")

let confidence = call.getFloat("confidence", defaultConfidence)

guard let image = implementation?.createImageFromFilePath(path) else {
call.reject(errorLoadImageFailed)
return
}

let options = ProcessImageOptions(visionImage: visionImage)
let options = ProcessImageOptions(image: image,
width: width,
height: height,
confidence: CGFloat(confidence))

implementation?.processImage(options, completion: { result, error in
if let error = error {
CAPLog.print("[", self.tag, "] ", error)
call.reject(error.localizedDescription, nil, error)
return
}
if let result = result?.toJSObject() as? JSObject {
call.resolve(result)

if let result = result {
call.resolve(result.toJSObject())
}
})
}
Expand Down