From 43c8f8bee6611a90bbe466a803e81de1af62ebb0 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Tue, 21 May 2024 11:55:29 +0900 Subject: [PATCH 01/26] Add human feature. --- YOLO.xcodeproj/project.pbxproj | 4 + YOLO/Main.storyboard | 17 +- YOLO/Utilities/BoundingBoxView.swift | 36 ++- YOLO/Utilities/PostProcessing.swift | 137 ++++++++ YOLO/ViewController.swift | 464 +++++++++++++++++---------- 5 files changed, 474 insertions(+), 184 deletions(-) create mode 100644 YOLO/Utilities/PostProcessing.swift diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index ddcf965..19a6758 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -21,6 +21,7 @@ 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -41,6 +42,7 @@ 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -60,6 +62,7 @@ 636166E72514438D0054FA7E /* Utilities */ = { isa = PBXGroup; children = ( + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, ); @@ -212,6 +215,7 @@ files = ( 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 048e9f6..c5daed8 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,9 +1,9 @@ - + - + @@ -182,18 +182,30 @@ + + + + + + + + + + + + @@ -223,6 +235,7 @@ + diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index b506545..bfbe2fb 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -20,6 +20,9 @@ class BoundingBoxView { /// The layer that displays the label and confidence score for the detected object. let textLayer: CATextLayer + /// The layer that displays the inner text within the bounding box. + let innerTextLayer: CATextLayer + /// Initializes a new BoundingBoxView with configured shape and text layers. init() { shapeLayer = CAShapeLayer() @@ -33,22 +36,32 @@ class BoundingBoxView { textLayer.fontSize = 14 // Set font size for the label text textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels textLayer.alignmentMode = .center // Center-align the text within the layer + + innerTextLayer = CATextLayer() + innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + innerTextLayer.fontSize = 12 // Set font size for the inner text + innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text + innerTextLayer.alignmentMode = .left // Left-align the text within the layer + innerTextLayer.isWrapped = true // Wrap the text to fit within the layer } - /// Adds the bounding box and text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. + /// Adds the bounding box, text, and inner text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. func addToLayer(_ parent: CALayer) { parent.addSublayer(shapeLayer) parent.addSublayer(textLayer) + parent.addSublayer(innerTextLayer) } - /// Updates the bounding box and label to be visible with specified properties. + /// Updates the bounding box, label, and inner text to be visible with specified properties. /// - Parameters: /// - frame: The CGRect frame defining the bounding box's size and position. /// - label: The text label to display (e.g., object class and confidence). /// - color: The color of the bounding box stroke and label background. /// - alpha: The opacity level for the bounding box stroke and label background. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { + /// - innerTexts: The text to display inside the bounding box. + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { CATransaction.setDisableActions(true) // Disable implicit animations let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box @@ -69,11 +82,24 @@ class BoundingBoxView { let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + + if !innerTexts.isEmpty { + innerTextLayer.string = innerTexts // Set the inner text + innerTextLayer.backgroundColor = UIColor.red.withAlphaComponent(0.5).cgColor // No background color + innerTextLayer.isHidden = false // Make the inner text layer visible + innerTextLayer.foregroundColor = UIColor.white.cgColor // Set text color + innerTextLayer.frame = CGRect(x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, height: frame.height - 8) + // Set the inner text layer frame + } else { + innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty + } + } - /// Hides the bounding box and text layers. + /// Hides the bounding box, text, and inner text layers. func hide() { shapeLayer.isHidden = true textLayer.isHidden = true + innerTextLayer.isHidden = true } } diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift new file mode 100644 index 0000000..b29d2d1 --- /dev/null +++ b/YOLO/Utilities/PostProcessing.swift @@ -0,0 +1,137 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessing for Ultralytics YOLO App +// This feature is designed to post-process the output of a YOLOv8 model within the Ultralytics YOLO app to extract high-confidence objects. +// Output high confidence boxes and their corresponding feature values using Non max suppression. +// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + + +import Foundation +import CoreML +import Vision + +func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) + + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } + } + } + } + } + return selectedIndices +} + +// Human model's output [1,95,8400] to [(Box, Confidence, HumanFeatures)] + +func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { + let numAnchors = prediction.shape[2].intValue + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: 11) + for k in 0..<11 { + let key = (84 + k) * numAnchors + j + boxFeatures[k] = featurePointer[key] + } + + lock.sync { + boxes.append(boundingBox) + scores.append(confidence) + features.append(boxFeatures) + } + } + } + + let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) + var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() + + for idx in selectedIndices { + selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) + } + + return selectedBoxesAndFeatures +} + +let genders = ["female", "male"] +let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] + +struct HumanFeatures { + let weight: Float + let height: Float + let age: Int + let gender: String + let genderConfidence: Float + let race: String + let raceConfidence: Float + + init(features:[Float]) { + self.weight = features[0] + self.height = features[1] + self.age = Int(round(features[2])) + let genderCandidates = Array(features[3..<5]) + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } + + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let raceCandidates = Array(features[5...]) + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } +} + +extension CGRect { + var area: CGFloat { + return width * height + } +} + diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 025a3de..e11dfd9 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -23,6 +23,7 @@ class ViewController: UIViewController { @IBOutlet var videoPreview: UIView! @IBOutlet var View0: UIView! @IBOutlet var segmentedControl: UISegmentedControl! + @IBOutlet weak var taskSegmentControl: UISegmentedControl! @IBOutlet var playButtonOutlet: UIBarButtonItem! @IBOutlet var pauseButtonOutlet: UIBarButtonItem! @IBOutlet var slider: UISlider! @@ -36,7 +37,7 @@ class ViewController: UIViewController { @IBOutlet weak var labelSliderConf: UILabel! @IBOutlet weak var labelSliderIoU: UILabel! @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - + let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) var session: AVCaptureSession! @@ -49,12 +50,12 @@ class ViewController: UIViewController { var t3 = CACurrentMediaTime() // FPS start var t4 = 0.0 // FPS dt smoothed // var cameraOutput: AVCapturePhotoOutput! - + // Developer mode let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings let save_detections = false // write every detection to detections.txt let save_frames = false // write every frame to frames.txt - + lazy var visionRequest: VNCoreMLRequest = { let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in @@ -64,54 +65,110 @@ class ViewController: UIViewController { request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop return request }() + + enum Task { + case detect + case human + } + + var task: Task = .detect + var confidenceThreshold:Float = 0.25 + var iouThreshold:Float = 0.4 override func viewDidLoad() { super.viewDidLoad() slider.value = 30 + taskSegmentControl.selectedSegmentIndex = 0 setLabels() setUpBoundingBoxViews() startVideo() // setModel() } - + @IBAction func vibrate(_ sender: Any) { selection.selectionChanged() } - + @IBAction func indexChanged(_ sender: Any) { selection.selectionChanged() activityIndicator.startAnimating() - - /// Switch model - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } setModel() setUpBoundingBoxViews() activityIndicator.stopAnimating() } - + func setModel() { + + /// Switch model + switch task { + case .detect: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break + } + case .human: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + if #available(iOS 15.0, *) { + mlModel = try! yolov8n_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 1: + self.labelName.text = "YOLOv8s" + if #available(iOS 15.0, *) { + mlModel = try! yolov8s_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 2: + self.labelName.text = "YOLOv8m" + if #available(iOS 15.0, *) { + mlModel = try! yolov8m_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 3: + self.labelName.text = "YOLOv8l" + if #available(iOS 15.0, *) { + mlModel = try! yolov8l_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 4: + self.labelName.text = "YOLOv8x" + if #available(iOS 15.0, *) { + mlModel = try! yolov8x_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + default: + break + } + + } + DispatchQueue.global(qos: .userInitiated).async { [self] in + /// VNCoreMLModel detector = try! VNCoreMLModel(for: mlModel) detector.featureProvider = ThresholdProvider() - + /// VNCoreMLRequest let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in self?.processObservations(for: request, error: error) @@ -121,20 +178,43 @@ class ViewController: UIViewController { t2 = 0.0 // inference dt smoothed t3 = CACurrentMediaTime() // FPS start t4 = 0.0 // FPS dt smoothed + } } - + /// Update thresholds from slider values @IBAction func sliderChanged(_ sender: Any) { + self.confidenceThreshold = sliderConf.value + self.iouThreshold = sliderIoU.value let conf = Double(round(100 * sliderConf.value)) / 100 let iou = Double(round(100 * sliderIoU.value)) / 100 self.labelSliderConf.text = String(conf) + " Confidence Threshold" self.labelSliderIoU.text = String(iou) + " IoU Threshold" detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } - + + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + switch sender.selectedSegmentIndex { + case 0: + if self.task != .detect { + self.task = .detect + self.setModel() + } + case 1: + if self.task != .human { + self.task = .human + for i in 0.. Double { let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) @@ -376,7 +484,7 @@ class ViewController: UIViewController { } return 0 } - + // Return RAM usage (GB) func memoryUsage() -> Double { var taskInfo = mach_task_basic_info() @@ -392,138 +500,140 @@ class ViewController: UIViewController { return 0 } } - - func show(predictions: [VNRecognizedObjectObservation]) { - let width = videoPreview.bounds.width // 375 pix - let height = videoPreview.bounds.height // 812 pix + + func show(predictions: [VNRecognizedObjectObservation], boxesAndValues: [(CGRect, Float, [Float])]) { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height var str = "" - - // ratio = videoPreview AR divided by sessionPreset AR + var ratio: CGFloat = 1.0 if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) // .photo + ratio = (height / width) / (4.0 / 3.0) } else { - ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. + ratio = (height / width) / (16.0 / 9.0) } - - // date + let date = Date() let calendar = Calendar.current let hour = calendar.component(.hour, from: date) let minutes = calendar.component(.minute, from: date) let seconds = calendar.component(.second, from: date) let nanoseconds = calendar.component(.nanosecond, from: date) - let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day - - self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" + let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 + + var resultCount = 0 + + switch task { + case .detect: + resultCount = predictions.count + case .human: + resultCount = boxesAndValues.count + } + self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" for i in 0..= 1 { // iPhone ratio = 1.218 - let offset = (1 - ratio) * (0.5 - rect.minX) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - rect = rect.applying(transform) - rect.size.width *= ratio - } else { // iPad ratio = 0.75 - let offset = (ratio - 1) * (0.5 - rect.maxY) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - rect = rect.applying(transform) - rect.size.height /= ratio - } - - // Scale normalized to pixels [375, 812] [width, height] - rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - - // The labels array is a list of VNClassificationObservation objects, - // with the highest scoring class first in the list. - let bestClass = prediction.labels[0].identifier - let confidence = prediction.labels[0].confidence - // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - - // Show the bounding box. - boundingBoxViews[i].show(frame: rect, - label: String(format: "%@ %.1f", bestClass, confidence * 100), - color: colors[bestClass] ?? UIColor.white, - alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) - + if ratio >= 1 { + let offset = (1 - ratio) * (0.5 - displayRect.minX) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: offset, y: 0) + displayRect = displayRect.applying(transform) + } + displayRect.size.width *= ratio + } else { + let offset = (ratio - 1) * (0.5 - displayRect.maxY) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: 0, y: 1-offset) + displayRect = displayRect.applying(transform) + } + displayRect.size.height /= ratio + } + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) + + boundingBoxViews[i].show(frame: displayRect, label: label, color: boxColor, alpha: alpha, innerTexts: innerTexts) + if developerMode { - // Write if save_detections { str += String(format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) } - - // Action trigger upon detection - // if false { - // if (bestClass == "car") { // "cell phone", "car", "person" - // self.takePhoto(nil) - // // self.pauseButton(nil) - // sleep(2) - // } - // } } + } else { boundingBoxViews[i].hide() } } - - // Write - if developerMode { - if save_detections { - saveText(text: str, file: "detections.txt") // Write stats for each detection - } - if save_frames { - str = String(format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", - sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, - self.t1 * 1000, self.t2 * 1000, 1 / self.t4) - saveText(text: str, file: "frames.txt") // Write stats for each image - } - } - - // Debug - // print(str) - // print(UIDevice.current.identifierForVendor!) - // saveImage() } - + + // Pinch to Zoom Start --------------------------------------------------------------------------------------------- let minimumZoom: CGFloat = 1.0 let maximumZoom: CGFloat = 10.0 var lastZoomFactor: CGFloat = 1.0 - + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { let device = videoCapture.captureDevice - + // Return zoom value between the minimum and maximum zoom values func minMaxZoom(_ factor: CGFloat) -> CGFloat { return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) } - + func update(scale factor: CGFloat) { do { try device.lockForConfiguration() @@ -535,7 +645,7 @@ class ViewController: UIViewController { print("\(error.localizedDescription)") } } - + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) switch pinch.state { case .began: fallthrough @@ -569,7 +679,7 @@ extension ViewController: AVCapturePhotoCaptureDelegate { let dataProvider = CGDataProvider(data: dataImage as CFData) let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) let image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: UIImage.Orientation.right) - + // Save to camera roll UIImageWriteToSavedPhotosAlbum(image, nil, nil, nil); } else { From 1c72a5285760bbba6f9c088784bc64003a1326ce Mon Sep 17 00:00:00 2001 From: john-rocky Date: Tue, 21 May 2024 12:17:38 +0900 Subject: [PATCH 02/26] Add human feature. --- YOLO.xcodeproj/project.pbxproj | 24 ++---------------------- YOLO/Info.plist | 2 +- YOLO/Utilities/BoundingBoxView.swift | 4 ++-- 3 files changed, 5 insertions(+), 25 deletions(-) diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 19a6758..7dac285 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -13,11 +13,6 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; }; - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; }; - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; }; - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; @@ -36,11 +31,6 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -90,11 +80,6 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -213,14 +198,9 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */, - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, @@ -355,7 +335,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -383,7 +363,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index c36dbc0..0917317 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 24 + 29 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index bfbe2fb..dfb758e 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -85,9 +85,9 @@ class BoundingBoxView { if !innerTexts.isEmpty { innerTextLayer.string = innerTexts // Set the inner text - innerTextLayer.backgroundColor = UIColor.red.withAlphaComponent(0.5).cgColor // No background color + innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color innerTextLayer.isHidden = false // Make the inner text layer visible - innerTextLayer.foregroundColor = UIColor.white.cgColor // Set text color + innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color innerTextLayer.frame = CGRect(x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, height: frame.height - 8) // Set the inner text layer frame } else { From 0cef67b72cdbf5c355e21486451720dc78d134a9 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Wed, 22 May 2024 21:33:37 +0900 Subject: [PATCH 03/26] Edit porstprocessing. --- YOLO/Utilities/PostProcessing.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift index b29d2d1..8ecdd42 100644 --- a/YOLO/Utilities/PostProcessing.swift +++ b/YOLO/Utilities/PostProcessing.swift @@ -34,7 +34,7 @@ func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [I return selectedIndices } -// Human model's output [1,95,8400] to [(Box, Confidence, HumanFeatures)] +// Human model's output [1,15,8400] to [(Box, Confidence, HumanFeatures)] func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { let numAnchors = prediction.shape[2].intValue @@ -62,7 +62,7 @@ func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouT var boxFeatures = [Float](repeating: 0, count: 11) for k in 0..<11 { - let key = (84 + k) * numAnchors + j + let key = (5 + k) * numAnchors + j boxFeatures[k] = featurePointer[key] } @@ -80,7 +80,7 @@ func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouT for idx in selectedIndices { selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) } - + print(selectedBoxesAndFeatures) return selectedBoxesAndFeatures } From 09c644710e13a3b22ada2c3a6a4803a6b97fa5d5 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Tue, 4 Jun 2024 11:27:56 +0900 Subject: [PATCH 04/26] simple swift tracking --- YOLO.xcodeproj/project.pbxproj | 8 ++ YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 18 +++- YOLO/Utilities/HumanModel.swift | 155 ++++++++++++++++++++++++++++ YOLO/Utilities/PostProcessing.swift | 50 ++------- YOLO/Utilities/TrackingModel.swift | 126 ++++++++++++++++++++++ YOLO/ViewController.swift | 49 ++++++--- 7 files changed, 350 insertions(+), 58 deletions(-) create mode 100644 YOLO/Utilities/HumanModel.swift create mode 100644 YOLO/Utilities/TrackingModel.swift diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 7dac285..bd01f8c 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -17,6 +17,8 @@ 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7742C0EA36D00218E8F /* HumanModel.swift */; }; + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7762C0EA37300218E8F /* TrackingModel.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -33,6 +35,8 @@ 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HumanModel.swift; sourceTree = ""; }; + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrackingModel.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -52,6 +56,8 @@ 636166E72514438D0054FA7E /* Utilities */ = { isa = PBXGroup; children = ( + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */, + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */, 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, @@ -202,8 +208,10 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 0917317..8372868 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 29 + 34 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index c5daed8..e1ae774 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -192,13 +192,27 @@ + + + + + + + + + @@ -209,10 +223,12 @@ + + @@ -257,7 +273,7 @@ - + diff --git a/YOLO/Utilities/HumanModel.swift b/YOLO/Utilities/HumanModel.swift new file mode 100644 index 0000000..0a167e7 --- /dev/null +++ b/YOLO/Utilities/HumanModel.swift @@ -0,0 +1,155 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App +// This struct is designed to turn the inference results of the YOLOv8-Human model into a manageable DataModel of human feature values ​​in the Ultralytics YOLO app. When in tracking mode, this struct averages the feature values ​​of a given individual across frames to a stable value. +// This struct automatically analyzes the boxes, scores, and feature values ​​provided to the update function to create a human model.// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + + +import Foundation +import UIKit + +let updateFrequency: Int = 120 + +struct Person { + var index: Int + var box: CGRect = .zero + + var score: Float = 0 + var weight: Float = 0 + var height: Float = 0 + + var age: Int = 0 + + var gender: String = "female" + var genderConfidence: Float = 0 + var race: String = "asian" + var raceConfidence: Float = 0 + + var listCount: Int = 0 + var scoreRawList: [Float] = [] + var weightRawList: [Float] = [] + var heightRawList: [Float] = [] + var ageRawList: [Float] = [] + var maleRawList: [Float] = [] + var femaleRawList: [Float] = [] + var asianRawList: [Float] = [] + var whiteRawList: [Float] = [] + var middleEasternRawList: [Float] = [] + var indianRawList: [Float] = [] + var latinoRawList: [Float] = [] + var blackRawList: [Float] = [] + + var trackedBox: CGRect? + var color:UIColor + + var unDetectedCounter: Int = 0 + var stable = false + + init(index: Int) { + self.index = index + self.color = UIColor(red: CGFloat.random(in: 0...1), + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) + } + + mutating func update(box:CGRect, score:Float, features:[Float]) { + self.box = box + + self.scoreRawList.append(score) + self.weightRawList.append(features[0]) + self.heightRawList.append(features[1]) + self.ageRawList.append(features[2]) + self.maleRawList.append(features[3]) + self.femaleRawList.append(features[4]) + self.asianRawList.append(features[5]) + self.whiteRawList.append(features[6]) + self.middleEasternRawList.append(features[7]) + self.indianRawList.append(features[8]) + self.latinoRawList.append(features[9]) + self.blackRawList.append(features[10]) + + if !stable || scoreRawList.count >= updateFrequency { + stable = true + calcurateFeatures() + } + + if scoreRawList.count >= updateFrequency { + scoreRawList.removeAll() + weightRawList.removeAll() + heightRawList.removeAll() + ageRawList.removeAll() + maleRawList.removeAll() + femaleRawList.removeAll() + asianRawList.removeAll() + whiteRawList.removeAll() + middleEasternRawList.removeAll() + indianRawList.removeAll() + latinoRawList.removeAll() + blackRawList.removeAll() + + } + + self.unDetectedCounter = 0 + } + + private mutating func calcurateFeatures() { + + self.score = average(of: scoreRawList) + self.weight = average(of: weightRawList) + self.height = average(of: heightRawList) + self.age = Int(round(average(of: ageRawList))) + let femaleAverage = average(of: femaleRawList) + let maleAverage = average(of: maleRawList) + let genderCandidates = [femaleAverage,maleAverage] + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } + + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let asianAverage = average(of: asianRawList) + let whiteAverage = average(of: whiteRawList) + let middleEasternAverage = average(of: middleEasternRawList) + let indianAverage = average(of: indianRawList) + let latinoAverage = average(of: latinoRawList) + let blackAverage = average(of: blackRawList) + + let raceCandidates = [asianAverage,whiteAverage,middleEasternAverage,indianAverage,latinoAverage,blackAverage] + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } + + func average(of numbers: [Float]) -> Float { + guard !numbers.isEmpty else { + return 0 + } + var sum: Float = 0 + for number in numbers { + sum += number + } + return sum / Float(numbers.count) + } + +} + +let genders = ["female", "male"] +let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] + diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift index 8ecdd42..db25d12 100644 --- a/YOLO/Utilities/PostProcessing.swift +++ b/YOLO/Utilities/PostProcessing.swift @@ -84,49 +84,15 @@ func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouT return selectedBoxesAndFeatures } -let genders = ["female", "male"] -let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] - -struct HumanFeatures { - let weight: Float - let height: Float - let age: Int - let gender: String - let genderConfidence: Float - let race: String - let raceConfidence: Float - - init(features:[Float]) { - self.weight = features[0] - self.height = features[1] - self.age = Int(round(features[2])) - let genderCandidates = Array(features[3..<5]) - var genderMaxIndex = 0 - var genderMaxValue = genderCandidates[0] - - for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { - if genderValue > genderMaxValue { - genderMaxValue = genderValue - genderMaxIndex = genderIndex + 1 - } - } - - self.gender = genders[genderMaxIndex] - self.genderConfidence = genderMaxValue - - let raceCandidates = Array(features[5...]) - var raceMaxIndex = 0 - var raceMaxValue = raceCandidates[0] - - for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { - if raceValue > raceMaxValue { - raceMaxValue = raceValue - raceMaxIndex = raceIndex + 1 - } - } - self.race = races[raceMaxIndex] - self.raceConfidence = raceMaxValue +func toPerson(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { + var persons = [Person]() + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: -1) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + person.color = .red + persons.append(person) } + return persons } extension CGRect { diff --git a/YOLO/Utilities/TrackingModel.swift b/YOLO/Utilities/TrackingModel.swift new file mode 100644 index 0000000..a4f5dc5 --- /dev/null +++ b/YOLO/Utilities/TrackingModel.swift @@ -0,0 +1,126 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App + +// This class is designed to track and identify the same person across frames using the inference results of the YOLOv8-Human model in the Ultralytics YOLO app. +// The tack function is a simple tracking algorithm that tracks boxes of the same person based on box overlap across frames. +// Access the source code: https://github.com/ultralytics/yolo-ios-app + +import Foundation +import Vision +import Accelerate + +class TrackingModel { + var persons = [Person]() + var personIndex:Int = 0 + var recent:[(CGRect, Float, [Float])] = [] + + func track(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { + + if persons.isEmpty { + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: personIndex) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + personIndex += 1 + persons.append(person) + + } + return persons + } + + var unDetectedPersonIndexes:[Int] = [] + var usedDetectedIndex:Set = Set() + + for (pi, person) in persons.enumerated() { + var bestIOU:CGFloat = 0 + var bestIndex = 0 + + for (i, detected) in boxesAndScoresAndFeatures.enumerated() { + let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) + if IoU > bestIOU { + bestIOU = IoU + bestIndex = i + } + } + if bestIOU >= 50 { + let detectedPerson = boxesAndScoresAndFeatures[bestIndex] + persons[pi].update(box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) + usedDetectedIndex.insert(bestIndex) + } else { + unDetectedPersonIndexes.append(pi) + } + } + + let sortedIndices = unDetectedPersonIndexes.sorted(by: >) + for index in sortedIndices { + persons[index].unDetectedCounter += 1 + } + + for (index, det) in boxesAndScoresAndFeatures.enumerated() { + if !usedDetectedIndex.contains(index) { + var person = Person(index: personIndex) + person.update(box: det.0, score: det.1, features: det.2) + personIndex += 1 + persons.append(person) + } + } + + persons = removeOverlappingRects(persons: persons) + + var personsToShow: [Person] = [] + var removePersonIndexes: [Int] = [] + for (pindex, person) in persons.enumerated() { + if person.unDetectedCounter == 0 { + personsToShow.append(person) + } else if person.unDetectedCounter >= 15 { + removePersonIndexes.append(pindex) + } + } + let sortedRemoveIndices = removePersonIndexes.sorted(by: >) + for index in sortedRemoveIndices { + persons.remove(at: index) + } + + return personsToShow + + } +} + +func overlapPercentage(rect1: CGRect, rect2: CGRect) -> CGFloat { + let intersection = rect1.intersection(rect2) + + if intersection.isNull { + return 0.0 + } + + let intersectionArea = intersection.width * intersection.height + + let rect1Area = rect1.width * rect1.height + + let overlapPercentage = (intersectionArea / rect1Area) * 100 + + return overlapPercentage +} + +func removeOverlappingRects(persons: [Person], threshold: CGFloat = 90.0) -> [Person] { + var filteredPersons = persons + var index = 0 + + while index < filteredPersons.count { + var shouldRemove = false + for j in (index + 1)..= threshold { + shouldRemove = true + break + } + } + if shouldRemove { + filteredPersons.remove(at: index) + } else { + index += 1 + } + } + + return filteredPersons +} diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 3307627..c848de5 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -74,6 +74,8 @@ class ViewController: UIViewController { var task: Task = .detect var confidenceThreshold:Float = 0.25 var iouThreshold:Float = 0.4 + var tracking = false + var tracker = TrackingModel() override func viewDidLoad() { super.viewDidLoad() @@ -212,6 +214,16 @@ class ViewController: UIViewController { } } + @IBAction func TrackingSwitch(_ sender: UISwitch) { + tracking.toggle() + if tracking { + sender.isOn = true + } else { + sender.isOn = false + } + } + + @IBAction func takePhoto(_ sender: Any?) { let t0 = DispatchTime.now().uptimeNanoseconds @@ -393,9 +405,9 @@ class ViewController: UIViewController { case .detect: DispatchQueue.main.async { if let results = request.results as? [VNRecognizedObjectObservation] { - self.show(predictions: results, boxesAndValues: []) + self.show(predictions: results, persons: []) } else { - self.show(predictions: [], boxesAndValues: []) + self.show(predictions: [], persons: []) } // Measure FPS @@ -413,9 +425,15 @@ class ViewController: UIViewController { if let prediction = results.first?.featureValue.multiArrayValue { let pred = PostProcessHuman(prediction:prediction, confidenceThreshold: self.confidenceThreshold, iouThreshold: self.iouThreshold) - self.show(predictions: [], boxesAndValues: pred) + var persons:[Person] = [] + if !self.tracking { + persons = toPerson(boxesAndScoresAndFeatures: pred) + } else { + persons = self.tracker.track(boxesAndScoresAndFeatures: pred) + } + self.show(predictions: [], persons: persons) } else { - self.show(predictions: [], boxesAndValues: []) + self.show(predictions: [], persons: []) } if self.t1 < 10.0 { // valid dt self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time @@ -492,7 +510,7 @@ class ViewController: UIViewController { } } - func show(predictions: [VNRecognizedObjectObservation], boxesAndValues: [(CGRect, Float, [Float])]) { + func show(predictions: [VNRecognizedObjectObservation], persons: [Person]) { let width = videoPreview.bounds.width let height = videoPreview.bounds.height var str = "" @@ -518,7 +536,7 @@ class ViewController: UIViewController { case .detect: resultCount = predictions.count case .human: - resultCount = boxesAndValues.count + resultCount = persons.count } self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" for i in 0.. Date: Tue, 4 Jun 2024 15:16:31 +0900 Subject: [PATCH 05/26] hide a tracking toggle in detect mode --- YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 6 ++++-- YOLO/ViewController.swift | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 8372868..f671e09 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 34 + 40 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index e1ae774..2672b4e 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -192,13 +192,13 @@ - - + @@ -365,7 +365,6 @@ - diff --git a/YOLO/Utilities/PostProcessSegment.swift b/YOLO/Utilities/PostProcessSegment.swift index 417ecce..a7853fc 100644 --- a/YOLO/Utilities/PostProcessSegment.swift +++ b/YOLO/Utilities/PostProcessSegment.swift @@ -32,71 +32,77 @@ } } - func getBoundingBoxesAndMasks(feature: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Int, Float, MLMultiArray)] { - let numAnchors = feature.shape[2].intValue - let numFeatures = feature.shape[1].intValue - let boxFeatureLength = 4 - let maskConfidenceLength = 32 - let numClasses = numFeatures - boxFeatureLength - maskConfidenceLength - - var results = [(CGRect, Float, Int, MLMultiArray)]() - let featurePointer = feature.dataPointer.assumingMemoryBound(to: Float.self) - - let queue = DispatchQueue.global(qos: .userInitiated) - let resultsQueue = DispatchQueue(label: "resultsQueue", attributes: .concurrent) - - DispatchQueue.concurrentPerform(iterations: numAnchors) { j in - let baseOffset = j - let x = featurePointer[baseOffset] - let y = featurePointer[numAnchors + baseOffset] - let width = featurePointer[2 * numAnchors + baseOffset] - let height = featurePointer[3 * numAnchors + baseOffset] - - let boxWidth = CGFloat(width) - let boxHeight = CGFloat(height) - let boxX = CGFloat(x - width / 2) - let boxY = CGFloat(y - height / 2) - - let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) - - var classProbs = [Float](repeating: 0, count: numClasses) - classProbs.withUnsafeMutableBufferPointer { classProbsPointer in - vDSP_mtrans(featurePointer + 4 * numAnchors + baseOffset, numAnchors, classProbsPointer.baseAddress!, 1, 1, vDSP_Length(numClasses)) + func getBoundingBoxesAndMasks(feature: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Int, Float, MLMultiArray)] { + let numAnchors = feature.shape[2].intValue + let numFeatures = feature.shape[1].intValue + let boxFeatureLength = 4 + let maskConfidenceLength = 32 + let numClasses = numFeatures - boxFeatureLength - maskConfidenceLength + + var results = [(CGRect, Float, Int, MLMultiArray)]() + let featurePointer = feature.dataPointer.assumingMemoryBound(to: Float.self) + + let queue = DispatchQueue.global(qos: .userInitiated) + let resultsQueue = DispatchQueue(label: "resultsQueue", attributes: .concurrent) + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let baseOffset = j + let x = featurePointer[baseOffset] + let y = featurePointer[numAnchors + baseOffset] + let width = featurePointer[2 * numAnchors + baseOffset] + let height = featurePointer[3 * numAnchors + baseOffset] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var classProbs = [Float](repeating: 0, count: numClasses) + classProbs.withUnsafeMutableBufferPointer { classProbsPointer in + vDSP_mtrans(featurePointer + 4 * numAnchors + baseOffset, numAnchors, classProbsPointer.baseAddress!, 1, 1, vDSP_Length(numClasses)) + } + var maxClassValue: Float = 0 + var maxClassIndex: vDSP_Length = 0 + vDSP_maxvi(classProbs, 1, &maxClassValue, &maxClassIndex, vDSP_Length(numClasses)) + + if maxClassValue > confidenceThreshold { + let maskProbsPointer = featurePointer + (4 + numClasses) * numAnchors + baseOffset + let maskProbs = try! MLMultiArray(shape: [NSNumber(value: maskConfidenceLength)], dataType: .float32) + for i in 0.. confidenceThreshold { - let maskProbsPointer = featurePointer + (4 + numClasses) * numAnchors + baseOffset - let maskProbs = try! MLMultiArray(shape: [NSNumber(value: maskConfidenceLength)], dataType: .float32) - for i in 0.. 19 { + count = 0 + } if colors[label] == nil { // if key not in dict - colors[label] = UIColor(red: CGFloat.random(in: 0...1), - green: CGFloat.random(in: 0...1), - blue: CGFloat.random(in: 0...1), - alpha: 0.6) + colors[label] = color } } + count = 0 for (key,color) in colors { + let color = ultralyticsColorsolors[count] + count += 1 + if count > 19 { + count = 0 + } guard let colorForMask = color.toRGBComponents() else {fatalError()} - colorsForMask[key] = colorForMask + colorsForMask.append(colorForMask) } } From 502f85c3a0ee94f093653686f44b51dd309b0b97 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Wed, 17 Jul 2024 22:00:53 +0900 Subject: [PATCH 13/26] Fixed landscape mode layout --- YOLO.xcodeproj/project.pbxproj | 12 +-- YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 101 ++++++++++++++++++------ YOLO/Utilities/PostProcessSegment.swift | 6 +- YOLO/ViewController.swift | 18 ++++- 5 files changed, 104 insertions(+), 35 deletions(-) diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 023e0f9..cdad1cd 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -346,8 +346,8 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = MFN25KNUGJ; + CURRENT_PROJECT_VERSION = 2; + DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -356,7 +356,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 8.2.0; + MARKETING_VERSION = 8.3.0; PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; @@ -374,8 +374,8 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = MFN25KNUGJ; + CURRENT_PROJECT_VERSION = 2; + DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -384,7 +384,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 8.2.0; + MARKETING_VERSION = 8.3.0; PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 0cf5498..567c1a6 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 361 + 382 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 27da3ac..6380d41 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,9 +1,10 @@ - + + @@ -14,17 +15,17 @@ - + - + - + @@ -52,7 +53,7 @@ - + - + - + @@ -106,7 +107,7 @@ - + @@ -118,7 +119,7 @@ - + @@ -139,7 +140,7 @@ - + @@ -179,7 +180,7 @@ - + @@ -190,19 +191,19 @@ + + + @@ -263,12 +299,15 @@ + + + @@ -278,11 +317,14 @@ + + + @@ -311,14 +353,14 @@ - + - + @@ -339,15 +381,19 @@ + + + + @@ -365,6 +411,9 @@ + + + diff --git a/YOLO/Utilities/PostProcessSegment.swift b/YOLO/Utilities/PostProcessSegment.swift index a7853fc..30da29a 100644 --- a/YOLO/Utilities/PostProcessSegment.swift +++ b/YOLO/Utilities/PostProcessSegment.swift @@ -28,7 +28,11 @@ offSet = height / ratio margin = (offSet - self.videoPreview.bounds.width) / 2 self.maskLayer.frame = CGRect(x:-margin, y: 0, width: offSet, height: self.videoPreview.bounds.height) - + } else { + offSet = width / ratio + margin = (offSet - self.videoPreview.bounds.height) / 2 + self.maskLayer.frame = CGRect(x:0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) + } } diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index e9f747e..737a620 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -42,8 +42,12 @@ class ViewController: UIViewController { @IBOutlet weak var labelSliderConfLandScape: UILabel! @IBOutlet weak var labelSliderIoU: UILabel! @IBOutlet weak var labelSliderIoULandScape: UILabel! + @IBOutlet weak var playButtonLandScape: UIButton! + @IBOutlet weak var pauseButtonLandScape: UIButton! + @IBOutlet weak var shareButtonLandScape: UIButton! @IBOutlet weak var activityIndicator: UIActivityIndicatorView! + @IBOutlet weak var toolbar: UIToolbar! @IBOutlet weak var forcus: UIImageView! var maskLayer: CALayer = CALayer() let selection = UISelectionFeedbackGenerator() @@ -109,6 +113,10 @@ class ViewController: UIViewController { sliderConfLandScape.isHidden = false labelSliderIoULandScape.isHidden = false sliderIoULandScape.isHidden = false + toolbar.isHidden = true + playButtonLandScape.isHidden = false + pauseButtonLandScape.isHidden = false + shareButtonLandScape.isHidden = false } else { labelSliderConf.isHidden = false @@ -119,9 +127,17 @@ class ViewController: UIViewController { sliderConfLandScape.isHidden = true labelSliderIoULandScape.isHidden = true sliderIoULandScape.isHidden = true + toolbar.isHidden = false + playButtonLandScape.isHidden = true + pauseButtonLandScape.isHidden = true + shareButtonLandScape.isHidden = true + } self.videoCapture.previewLayer?.frame = CGRect(x: 0, y: 0, width: size.width, height: size.height) - + coordinator.animate(alongsideTransition: { context in + }, completion: { context in + self.setupMaskLayer() + }) } private func setUpOrientationChangeNotification() { From f976a35a8efe7abb0a460ad6ae5b87ee6dc1c7d1 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Wed, 7 Aug 2024 08:33:10 +0900 Subject: [PATCH 14/26] dev --- YOLO.xcodeproj/project.pbxproj | 8 +- YOLO/Info.plist | 2 +- YOLO/Utilities/PostProcessPose.swift | 155 ++++++++++++++ YOLO/Utilities/PostProcessSegment.swift | 3 +- YOLO/ViewController.swift | 267 ++++++++++++++---------- 5 files changed, 320 insertions(+), 115 deletions(-) create mode 100644 YOLO/Utilities/PostProcessPose.swift diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index cdad1cd..f7591df 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -19,6 +19,7 @@ 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7742C0EA36D00218E8F /* HumanModel.swift */; }; 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7762C0EA37300218E8F /* TrackingModel.swift */; }; + 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */; }; 73FE95F32C3500AC00C6C806 /* PostProcessSegment.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73FE95F22C3500AC00C6C806 /* PostProcessSegment.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -38,6 +39,7 @@ 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; 73A4E7742C0EA36D00218E8F /* HumanModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HumanModel.swift; sourceTree = ""; }; 73A4E7762C0EA37300218E8F /* TrackingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrackingModel.swift; sourceTree = ""; }; + 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PostProcessPose.swift; sourceTree = ""; }; 73FE95F22C3500AC00C6C806 /* PostProcessSegment.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessSegment.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; @@ -64,6 +66,7 @@ 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, + 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */, ); path = Utilities; sourceTree = ""; @@ -207,6 +210,7 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */, 73FE95F32C3500AC00C6C806 /* PostProcessSegment.swift in Sources */, 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, @@ -347,7 +351,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -375,7 +379,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 567c1a6..0d7e3f3 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 382 + 391 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Utilities/PostProcessPose.swift b/YOLO/Utilities/PostProcessPose.swift new file mode 100644 index 0000000..f5c22cb --- /dev/null +++ b/YOLO/Utilities/PostProcessPose.swift @@ -0,0 +1,155 @@ +import Foundation +import UIKit +import CoreML + +@available(iOS 15.0, *) +extension ViewController { + func PostProcessPose(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { + let numAnchors = prediction.shape[2].intValue + let featureCount = prediction.shape[1].intValue - 5 // 56個のうち、ボックス(4)と信頼度(1)を除いた51個の特徴量 + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: featureCount) + for k in 0.. Date: Mon, 12 Aug 2024 14:50:20 +0900 Subject: [PATCH 15/26] add pose feature. --- YOLO.xcodeproj/project.pbxproj | 8 +- YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 66 ++-- YOLO/Utilities/Colors.swift | 77 +++++ YOLO/Utilities/PostProcessPose.swift | 194 ++++++----- YOLO/ViewController.swift | 319 ++++++++++-------- ...03\203\343\203\210 2024-08-09 9.53.27.png" | Bin 0 -> 153602 bytes 7 files changed, 400 insertions(+), 266 deletions(-) create mode 100644 YOLO/Utilities/Colors.swift create mode 100644 "YOLO/\343\202\271\343\202\257\343\203\252\343\203\274\343\203\263\343\202\267\343\203\247\343\203\203\343\203\210 2024-08-09 9.53.27.png" diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index f7591df..e2c2f89 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -17,6 +17,7 @@ 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; + 7333105F2C69CE95001D647B /* Colors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7333105E2C69CE95001D647B /* Colors.swift */; }; 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7742C0EA36D00218E8F /* HumanModel.swift */; }; 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7762C0EA37300218E8F /* TrackingModel.swift */; }; 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */; }; @@ -37,6 +38,7 @@ 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; + 7333105E2C69CE95001D647B /* Colors.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Colors.swift; sourceTree = ""; }; 73A4E7742C0EA36D00218E8F /* HumanModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HumanModel.swift; sourceTree = ""; }; 73A4E7762C0EA37300218E8F /* TrackingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrackingModel.swift; sourceTree = ""; }; 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PostProcessPose.swift; sourceTree = ""; }; @@ -67,6 +69,7 @@ 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */, + 7333105E2C69CE95001D647B /* Colors.swift */, ); path = Utilities; sourceTree = ""; @@ -211,6 +214,7 @@ buildActionMask = 2147483647; files = ( 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */, + 7333105F2C69CE95001D647B /* Colors.swift in Sources */, 73FE95F32C3500AC00C6C806 /* PostProcessSegment.swift in Sources */, 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, @@ -351,7 +355,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; - DEVELOPMENT_TEAM = MFN25KNUGJ; + DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -379,7 +383,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; - DEVELOPMENT_TEAM = MFN25KNUGJ; + DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 0d7e3f3..7af94ae 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 391 + 434 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 6380d41..5c1049e 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,6 +1,6 @@ - + @@ -15,17 +15,17 @@ - + - + - + @@ -53,7 +53,7 @@ - + - + - + @@ -107,7 +107,7 @@ - + @@ -119,7 +119,7 @@ - + @@ -140,7 +140,7 @@ - + @@ -180,30 +180,31 @@ - + + - - + - + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - - - - + - - - - - - - - - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - + + - + - + - - - + diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index c743e44..add81aa 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -20,9 +20,6 @@ class BoundingBoxView { /// The layer that displays the label and confidence score for the detected object. let textLayer: CATextLayer - /// The layer that displays the inner text within the bounding box. - let innerTextLayer: CATextLayer - /// Initializes a new BoundingBoxView with configured shape and text layers. init() { shapeLayer = CAShapeLayer() @@ -36,32 +33,22 @@ class BoundingBoxView { textLayer.fontSize = 14 // Set font size for the label text textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels textLayer.alignmentMode = .center // Center-align the text within the layer - - innerTextLayer = CATextLayer() - innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs - innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays - innerTextLayer.fontSize = 12 // Set font size for the inner text - innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text - innerTextLayer.alignmentMode = .left // Left-align the text within the layer - innerTextLayer.isWrapped = true // Wrap the text to fit within the layer } - /// Adds the bounding box, text, and inner text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. + /// Adds the bounding box and text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. func addToLayer(_ parent: CALayer) { parent.addSublayer(shapeLayer) parent.addSublayer(textLayer) - parent.addSublayer(innerTextLayer) } - /// Updates the bounding box, label, and inner text to be visible with specified properties. + /// Updates the bounding box and label to be visible with specified properties. /// - Parameters: /// - frame: The CGRect frame defining the bounding box's size and position. /// - label: The text label to display (e.g., object class and confidence). /// - color: The color of the bounding box stroke and label background. /// - alpha: The opacity level for the bounding box stroke and label background. - /// - innerTexts: The text to display inside the bounding box. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { CATransaction.setDisableActions(true) // Disable implicit animations let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box @@ -83,26 +70,11 @@ class BoundingBoxView { let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame - - if !innerTexts.isEmpty { - innerTextLayer.string = innerTexts // Set the inner text - innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color - innerTextLayer.isHidden = false // Make the inner text layer visible - innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color - innerTextLayer.frame = CGRect( - x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, - height: frame.height - 8) - // Set the inner text layer frame - } else { - innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty - } - } - /// Hides the bounding box, text, and inner text layers. + /// Hides the bounding box and text layers. func hide() { shapeLayer.isHidden = true textLayer.isHidden = true - innerTextLayer.isHidden = true } } diff --git a/YOLO/Utilities/HumanModel.swift b/YOLO/Utilities/HumanModel.swift deleted file mode 100644 index 13778be..0000000 --- a/YOLO/Utilities/HumanModel.swift +++ /dev/null @@ -1,150 +0,0 @@ -// Ultralytics YOLO 🚀 - AGPL-3.0 License -// -// HumanModel for Ultralytics YOLO App -// This struct is designed to turn the inference results of the YOLOv8-Human model into a manageable DataModel of human feature values ​​in the Ultralytics YOLO app. When in tracking mode, this struct averages the feature values ​​of a given individual across frames to a stable value. -// This struct automatically analyzes the boxes, scores, and feature values ​​provided to the update function to create a human model.// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license -// Access the source code: https://github.com/ultralytics/yolo-ios-app - -import Foundation -import UIKit - -let updateFrequency: Int = 120 - -struct Person { - var index: Int - var box: CGRect = .zero - - var score: Float = 0 - var weight: Float = 0 - var height: Float = 0 - - var age: Int = 0 - - var gender: String = "female" - var genderConfidence: Float = 0 - var race: String = "asian" - var raceConfidence: Float = 0 - - var listCount: Int = 0 - var scoreRawList: [Float] = [] - var weightRawList: [Float] = [] - var heightRawList: [Float] = [] - var ageRawList: [Float] = [] - var maleRawList: [Float] = [] - var femaleRawList: [Float] = [] - var asianRawList: [Float] = [] - var whiteRawList: [Float] = [] - var middleEasternRawList: [Float] = [] - var indianRawList: [Float] = [] - var latinoRawList: [Float] = [] - var blackRawList: [Float] = [] - - var trackedBox: CGRect? - var color: UIColor - - var unDetectedCounter: Int = 0 - var stable = false - - init(index: Int) { - self.index = index - self.color = UIColor( - red: CGFloat.random(in: 0...1), - green: CGFloat.random(in: 0...1), - blue: CGFloat.random(in: 0...1), - alpha: 0.6) - } - - mutating func update(box: CGRect, score: Float, features: [Float]) { - self.box = box - if scoreRawList.count >= updateFrequency { - scoreRawList.removeFirst() - weightRawList.removeFirst() - heightRawList.removeFirst() - ageRawList.removeFirst() - maleRawList.removeFirst() - femaleRawList.removeFirst() - asianRawList.removeFirst() - whiteRawList.removeFirst() - middleEasternRawList.removeFirst() - indianRawList.removeFirst() - latinoRawList.removeFirst() - blackRawList.removeFirst() - } - - self.scoreRawList.append(score) - self.weightRawList.append(features[0]) - self.heightRawList.append(features[1]) - self.ageRawList.append(features[2]) - self.femaleRawList.append(features[3]) - self.maleRawList.append(features[4]) - self.asianRawList.append(features[5]) - self.whiteRawList.append(features[6]) - self.middleEasternRawList.append(features[7]) - self.indianRawList.append(features[8]) - self.latinoRawList.append(features[9]) - self.blackRawList.append(features[10]) - calcurateFeatures() - - self.unDetectedCounter = 0 - } - - private mutating func calcurateFeatures() { - - self.score = average(of: scoreRawList) - self.weight = average(of: weightRawList) - self.height = average(of: heightRawList) - self.age = Int(round(average(of: ageRawList))) - let femaleAverage = average(of: femaleRawList) - let maleAverage = average(of: maleRawList) - let genderCandidates = [femaleAverage, maleAverage] - var genderMaxIndex = 0 - var genderMaxValue = genderCandidates[0] - - for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { - if genderValue > genderMaxValue { - genderMaxValue = genderValue - genderMaxIndex = genderIndex + 1 - } - } - - self.gender = genders[genderMaxIndex] - self.genderConfidence = genderMaxValue - - let asianAverage = average(of: asianRawList) - let whiteAverage = average(of: whiteRawList) - let middleEasternAverage = average(of: middleEasternRawList) - let indianAverage = average(of: indianRawList) - let latinoAverage = average(of: latinoRawList) - let blackAverage = average(of: blackRawList) - - let raceCandidates = [ - asianAverage, whiteAverage, middleEasternAverage, indianAverage, latinoAverage, blackAverage, - ] - var raceMaxIndex = 0 - var raceMaxValue = raceCandidates[0] - - for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { - if raceValue > raceMaxValue { - raceMaxValue = raceValue - raceMaxIndex = raceIndex + 1 - } - } - self.race = races[raceMaxIndex] - self.raceConfidence = raceMaxValue - } - - func average(of numbers: [Float]) -> Float { - guard !numbers.isEmpty else { - return 0 - } - var sum: Float = 0 - for number in numbers { - sum += number - } - return sum / Float(numbers.count) - } - -} - -let genders = ["female", "male"] -let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] diff --git a/YOLO/Utilities/PostProcessPose.swift b/YOLO/Utilities/PostProcessPose.swift index ad9f62b..009fe92 100644 --- a/YOLO/Utilities/PostProcessPose.swift +++ b/YOLO/Utilities/PostProcessPose.swift @@ -3,7 +3,43 @@ import Foundation import UIKit @available(iOS 15.0, *) + extension ViewController { + + func setupMaskLayer() { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height + + var ratio: CGFloat = 1.0 + if videoCapture.captureSession.sessionPreset == .photo { + ratio = (4.0 / 3.0) + } else { + ratio = (16.0 / 9.0) + } + + var offSet = CGFloat.zero + var margin = CGFloat.zero + if view.bounds.width < view.bounds.height { + offSet = height / ratio + margin = (offSet - self.videoPreview.bounds.width) / 2 + self.maskLayer.frame = CGRect( + x: -margin, y: 0, width: offSet, height: self.videoPreview.bounds.height) + } else { + offSet = width / ratio + margin = (offSet - self.videoPreview.bounds.height) / 2 + self.maskLayer.frame = CGRect( + x: 0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) + } + + } + + func removeAllMaskSubLayers() { + self.maskLayer.sublayers?.forEach { layer in + layer.removeFromSuperlayer() + } + self.maskLayer.sublayers = nil + } + func PostProcessPose(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { @@ -175,3 +211,32 @@ extension ViewController { } } + +func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) + + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } + } + } + } + } + return selectedIndices +} + +extension CGRect { + var area: CGFloat { + return width * height + } +} diff --git a/YOLO/Utilities/PostProcessSegment.swift b/YOLO/Utilities/PostProcessSegment.swift deleted file mode 100644 index 4ae8c45..0000000 --- a/YOLO/Utilities/PostProcessSegment.swift +++ /dev/null @@ -1,290 +0,0 @@ -// Ultralytics YOLO 🚀 - AGPL-3.0 License -// -// PostProcessSegment for Ultralytics YOLO App - -// These functions are designed to post-process inference results from the YOLOv8-Segment model in the Ultralytics YOLO app to display segment masks. - -import Accelerate -import MetalPerformanceShaders -// Access the source code: https://github.com/ultralytics/yolo-ios-app -import UIKit -import Vision - -@available(iOS 15.0, *) -extension ViewController { - func setupMaskLayer() { - let width = videoPreview.bounds.width - let height = videoPreview.bounds.height - - var ratio: CGFloat = 1.0 - if videoCapture.captureSession.sessionPreset == .photo { - ratio = (4.0 / 3.0) - } else { - ratio = (16.0 / 9.0) - } - var offSet = CGFloat.zero - var margin = CGFloat.zero - if view.bounds.width < view.bounds.height { - offSet = height / ratio - margin = (offSet - self.videoPreview.bounds.width) / 2 - self.maskLayer.frame = CGRect( - x: -margin, y: 0, width: offSet, height: self.videoPreview.bounds.height) - } else { - offSet = width / ratio - margin = (offSet - self.videoPreview.bounds.height) / 2 - self.maskLayer.frame = CGRect( - x: 0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) - - } - } - - func getBoundingBoxesAndMasks( - feature: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float - ) -> [(CGRect, Int, Float, MLMultiArray)] { - let numAnchors = feature.shape[2].intValue - let numFeatures = feature.shape[1].intValue - let boxFeatureLength = 4 - let maskConfidenceLength = 32 - let numClasses = numFeatures - boxFeatureLength - maskConfidenceLength - - var results = [(CGRect, Float, Int, MLMultiArray)]() - let featurePointer = feature.dataPointer.assumingMemoryBound(to: Float.self) - - let queue = DispatchQueue.global(qos: .userInitiated) - let resultsQueue = DispatchQueue(label: "resultsQueue", attributes: .concurrent) - - DispatchQueue.concurrentPerform(iterations: numAnchors) { j in - let baseOffset = j - let x = featurePointer[baseOffset] - let y = featurePointer[numAnchors + baseOffset] - let width = featurePointer[2 * numAnchors + baseOffset] - let height = featurePointer[3 * numAnchors + baseOffset] - - let boxWidth = CGFloat(width) - let boxHeight = CGFloat(height) - let boxX = CGFloat(x - width / 2) - let boxY = CGFloat(y - height / 2) - - let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) - - var classProbs = [Float](repeating: 0, count: numClasses) - classProbs.withUnsafeMutableBufferPointer { classProbsPointer in - vDSP_mtrans( - featurePointer + 4 * numAnchors + baseOffset, numAnchors, classProbsPointer.baseAddress!, - 1, 1, vDSP_Length(numClasses)) - } - var maxClassValue: Float = 0 - var maxClassIndex: vDSP_Length = 0 - vDSP_maxvi(classProbs, 1, &maxClassValue, &maxClassIndex, vDSP_Length(numClasses)) - - if maxClassValue > confidenceThreshold { - let maskProbsPointer = featurePointer + (4 + numClasses) * numAnchors + baseOffset - let maskProbs = try! MLMultiArray( - shape: [NSNumber(value: maskConfidenceLength)], dataType: .float32) - for i in 0.. $1.0.size.width * $1.0.size.height - } - - var newLayers: [CALayer] = [] - - for (box, classIndex, conf, masksIn) in sortedObjects { - group.enter() - DispatchQueue.global(qos: .userInitiated).async { - defer { group.leave() } - if let maskImage = self.generateColoredMaskImage( - from: masksIn, protos: maskArray, in: self.maskLayer.bounds.size, colorIndex: classIndex, - boundingBox: box) - { - DispatchQueue.main.async { - let adjustedBox = self.adjustBox(box, toFitIn: self.maskLayer.bounds.size) - - let maskImageLayer = CALayer() - maskImageLayer.frame = adjustedBox - maskImageLayer.contents = maskImage - maskImageLayer.opacity = 0.5 - newLayers.append(maskImageLayer) - } - } - } - } - - group.notify(queue: .main) { - self.removeAllMaskSubLayers() - newLayers.forEach { self.maskLayer.addSublayer($0) } - - print("Processing Time: \(Date().timeIntervalSince(startTime)) seconds") - } - } - - func generateColoredMaskImage( - from masksIn: MLMultiArray, protos: MLMultiArray, in size: CGSize, colorIndex: Int, - boundingBox: CGRect - ) -> CGImage? { - let maskWidth = protos.shape[3].intValue - let maskHeight = protos.shape[2].intValue - let maskChannels = protos.shape[1].intValue - - guard protos.shape.count == 4, protos.shape[0].intValue == 1, masksIn.shape.count == 1, - masksIn.shape[0].intValue == maskChannels - else { - print("Invalid shapes for protos or masksIn") - return nil - } - - let masksPointer = masksIn.dataPointer.assumingMemoryBound(to: Float.self) - let protosPointer = protos.dataPointer.assumingMemoryBound(to: Float.self) - - let masksPointerOutput = UnsafeMutablePointer.allocate(capacity: maskHeight * maskWidth) - vDSP_mmul( - masksPointer, 1, protosPointer, 1, masksPointerOutput, 1, vDSP_Length(1), - vDSP_Length(maskHeight * maskWidth), vDSP_Length(maskChannels)) - - let threshold: Float = 0.5 - let maskColorIndex = colorIndex % 20 - let color = colorsForMask[colorIndex] - let red = UInt8(color.red) - let green = UInt8(color.green) - let blue = UInt8(color.blue) - - var maskPixels = [UInt8](repeating: 0, count: maskHeight * maskWidth * 4) - for y in 0.. threshold { - let pixelIndex = index * 4 - maskPixels[pixelIndex] = red - maskPixels[pixelIndex + 1] = green - maskPixels[pixelIndex + 2] = blue - maskPixels[pixelIndex + 3] = 255 - } - } - } - - let maskDataPointer = UnsafeMutablePointer.allocate(capacity: maskPixels.count) - maskDataPointer.initialize(from: maskPixels, count: maskPixels.count) - - let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue) - let colorSpace = CGColorSpaceCreateDeviceRGB() - - let maskDataProvider = CGDataProvider( - dataInfo: nil, data: maskDataPointer, size: maskPixels.count - ) { _, data, _ in - data.deallocate() - } - - guard - let maskCGImage = CGImage( - width: maskWidth, height: maskHeight, bitsPerComponent: 8, bitsPerPixel: 32, - bytesPerRow: maskWidth * 4, space: colorSpace, bitmapInfo: bitmapInfo, - provider: maskDataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - else { - masksPointerOutput.deallocate() - return nil - } - - let maskCIImage = CIImage(cgImage: maskCGImage) - let scaledCIImage = maskCIImage.transformed( - by: CGAffineTransform( - scaleX: size.width / CGFloat(maskWidth), y: size.height / CGFloat(maskHeight))) - let invertedY = size.height - (boundingBox.origin.y + boundingBox.height) * size.height / 640.0 - let cropRect = CGRect( - x: boundingBox.origin.x * size.width / 640.0, y: invertedY, - width: boundingBox.width * size.width / 640.0, - height: boundingBox.height * size.height / 640.0) - - let croppedCIImage = scaledCIImage.cropped(to: cropRect) - - let ciContext = CIContext() - guard let cgImage = ciContext.createCGImage(croppedCIImage, from: cropRect) else { - masksPointerOutput.deallocate() - return nil - } - - masksPointerOutput.deallocate() - - return cgImage - } - - func removeAllMaskSubLayers() { - self.maskLayer.sublayers?.forEach { layer in - layer.removeFromSuperlayer() - } - self.maskLayer.sublayers = nil - } - - func adjustBox(_ box: CGRect, toFitIn containerSize: CGSize) -> CGRect { - let xScale = containerSize.width / 640.0 - let yScale = containerSize.height / 640.0 - return CGRect( - x: box.origin.x * xScale, y: box.origin.y * yScale, width: box.size.width * xScale, - height: box.size.height * yScale) - } -} - -extension UIColor { - func toRGBComponents() -> (red: UInt8, green: UInt8, blue: UInt8)? { - var red: CGFloat = 0 - var green: CGFloat = 0 - var blue: CGFloat = 0 - var alpha: CGFloat = 0 - - let success = self.getRed(&red, green: &green, blue: &blue, alpha: &alpha) - - if success { - let redUInt8 = UInt8(red * 255.0) - let greenUInt8 = UInt8(green * 255.0) - let blueUInt8 = UInt8(blue * 255.0) - return (red: redUInt8, green: greenUInt8, blue: blueUInt8) - } else { - return nil - } - } -} diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift deleted file mode 100644 index d84e14a..0000000 --- a/YOLO/Utilities/PostProcessing.swift +++ /dev/null @@ -1,103 +0,0 @@ -// Ultralytics YOLO 🚀 - AGPL-3.0 License -// -// PostProcessing for Ultralytics YOLO App -// This feature is designed to post-process the output of a YOLOv8 model within the Ultralytics YOLO app to extract high-confidence objects. -// Output high confidence boxes and their corresponding feature values using Non max suppression. -// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license -// Access the source code: https://github.com/ultralytics/yolo-ios-app - -import CoreML -import Foundation -import Vision - -func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { - let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } - var selectedIndices = [Int]() - var activeIndices = [Bool](repeating: true, count: boxes.count) - - for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { - activeIndices[otherIdx] = false - } - } - } - } - } - return selectedIndices -} - -// Human model's output [1,15,8400] to [(Box, Confidence, HumanFeatures)] - -func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) - -> [(CGRect, Float, [Float])] -{ - let numAnchors = prediction.shape[2].intValue - var boxes = [CGRect]() - var scores = [Float]() - var features = [[Float]]() - let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) - let lock = DispatchQueue(label: "com.example.lock") - - DispatchQueue.concurrentPerform(iterations: numAnchors) { j in - let confIndex = 4 * numAnchors + j - let confidence = featurePointer[confIndex] - if confidence > confidenceThreshold { - let x = featurePointer[j] - let y = featurePointer[numAnchors + j] - let width = featurePointer[2 * numAnchors + j] - let height = featurePointer[3 * numAnchors + j] - - let boxWidth = CGFloat(width) - let boxHeight = CGFloat(height) - let boxX = CGFloat(x - width / 2) - let boxY = CGFloat(y - height / 2) - - let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) - - var boxFeatures = [Float](repeating: 0, count: 11) - for k in 0..<11 { - let key = (5 + k) * numAnchors + j - boxFeatures[k] = featurePointer[key] - } - - lock.sync { - boxes.append(boundingBox) - scores.append(confidence) - features.append(boxFeatures) - } - } - } - - let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) - var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() - - for idx in selectedIndices { - selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) - } - print(selectedBoxesAndFeatures) - return selectedBoxesAndFeatures -} - -func toPerson(boxesAndScoresAndFeatures: [(CGRect, Float, [Float])]) -> [Person] { - var persons = [Person]() - for detectedHuman in boxesAndScoresAndFeatures { - var person = Person(index: -1) - person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) - person.color = .red - persons.append(person) - } - return persons -} - -extension CGRect { - var area: CGFloat { - return width * height - } -} diff --git a/YOLO/Utilities/TrackingModel.swift b/YOLO/Utilities/TrackingModel.swift deleted file mode 100644 index 1beedfa..0000000 --- a/YOLO/Utilities/TrackingModel.swift +++ /dev/null @@ -1,128 +0,0 @@ -// Ultralytics YOLO 🚀 - AGPL-3.0 License -// -// HumanModel for Ultralytics YOLO App - -// This class is designed to track and identify the same person across frames using the inference results of the YOLOv8-Human model in the Ultralytics YOLO app. -// The tack function is a simple tracking algorithm that tracks boxes of the same person based on box overlap across frames. -// Access the source code: https://github.com/ultralytics/yolo-ios-app - -import Accelerate -import Foundation -import Vision - -class TrackingModel { - var persons = [Person]() - var personIndex: Int = 0 - var recent: [(CGRect, Float, [Float])] = [] - - func track(boxesAndScoresAndFeatures: [(CGRect, Float, [Float])]) -> [Person] { - - if persons.isEmpty { - for detectedHuman in boxesAndScoresAndFeatures { - var person = Person(index: personIndex) - person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) - personIndex += 1 - persons.append(person) - - } - return persons - } - - var unDetectedPersonIndexes: [Int] = [] - var usedDetectedIndex: Set = Set() - - for (pi, person) in persons.enumerated() { - var bestIOU: CGFloat = 0 - var bestIndex = 0 - - for (i, detected) in boxesAndScoresAndFeatures.enumerated() { - let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) - if IoU > bestIOU { - bestIOU = IoU - bestIndex = i - } - } - if bestIOU >= 50 { - let detectedPerson = boxesAndScoresAndFeatures[bestIndex] - persons[pi].update( - box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) - usedDetectedIndex.insert(bestIndex) - } else { - unDetectedPersonIndexes.append(pi) - } - } - - let sortedIndices = unDetectedPersonIndexes.sorted(by: >) - for index in sortedIndices { - persons[index].unDetectedCounter += 1 - } - - for (index, det) in boxesAndScoresAndFeatures.enumerated() { - if !usedDetectedIndex.contains(index) { - var person = Person(index: personIndex) - person.update(box: det.0, score: det.1, features: det.2) - personIndex += 1 - persons.append(person) - } - } - - persons = removeOverlappingRects(persons: persons) - - var personsToShow: [Person] = [] - var removePersonIndexes: [Int] = [] - for (pindex, person) in persons.enumerated() { - if person.unDetectedCounter == 0 { - personsToShow.append(person) - } else if person.unDetectedCounter >= 15 { - removePersonIndexes.append(pindex) - } - } - let sortedRemoveIndices = removePersonIndexes.sorted(by: >) - for index in sortedRemoveIndices { - persons.remove(at: index) - } - - return personsToShow - - } -} - -func overlapPercentage(rect1: CGRect, rect2: CGRect) -> CGFloat { - let intersection = rect1.intersection(rect2) - - if intersection.isNull { - return 0.0 - } - - let intersectionArea = intersection.width * intersection.height - - let rect1Area = rect1.width * rect1.height - - let overlapPercentage = (intersectionArea / rect1Area) * 100 - - return overlapPercentage -} - -func removeOverlappingRects(persons: [Person], threshold: CGFloat = 90.0) -> [Person] { - var filteredPersons = persons - var index = 0 - - while index < filteredPersons.count { - var shouldRemove = false - for j in (index + 1)..= threshold { - shouldRemove = true - break - } - } - if shouldRemove { - filteredPersons.remove(at: index) - } else { - index += 1 - } - } - - return filteredPersons -} diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index d82c4b0..0285a55 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -17,7 +17,6 @@ import CoreMedia import UIKit import Vision -@available(iOS 15.0, *) var mlModel = try! yolov8m(configuration: .init()).model @available(iOS 15.0, *) @@ -25,9 +24,6 @@ class ViewController: UIViewController { @IBOutlet var videoPreview: UIView! @IBOutlet var View0: UIView! @IBOutlet var segmentedControl: UISegmentedControl! - @IBOutlet weak var taskSegmentControl: UISegmentedControl! - @IBOutlet weak var trackingLabel: UILabel! - @IBOutlet weak var trackingSwitch: UISwitch! @IBOutlet var playButtonOutlet: UIBarButtonItem! @IBOutlet var pauseButtonOutlet: UIBarButtonItem! @IBOutlet var slider: UISlider! @@ -44,14 +40,11 @@ class ViewController: UIViewController { @IBOutlet weak var labelSliderConfLandScape: UILabel! @IBOutlet weak var labelSliderIoU: UILabel! @IBOutlet weak var labelSliderIoULandScape: UILabel! - @IBOutlet weak var playButtonLandScape: UIButton! - @IBOutlet weak var pauseButtonLandScape: UIButton! - @IBOutlet weak var shareButtonLandScape: UIButton! @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - - @IBOutlet weak var toolbar: UIToolbar! @IBOutlet weak var forcus: UIImageView! + @IBOutlet weak var toolBar: UIToolbar! var maskLayer: CALayer = CALayer() + let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) var session: AVCaptureSession! @@ -84,23 +77,16 @@ class ViewController: UIViewController { enum Task { case detect - case human - case seg case pose } - - var task: Task = .pose + + var task: Task = .detect var confidenceThreshold: Float = 0.25 var iouThreshold: Float = 0.4 - var tracking = false - var tracker = TrackingModel() - - var overlayLayer: CAShapeLayer! override func viewDidLoad() { super.viewDidLoad() slider.value = 30 - taskSegmentControl.selectedSegmentIndex = 0 setLabels() setUpBoundingBoxViews() setUpOrientationChangeNotification() @@ -118,38 +104,35 @@ class ViewController: UIViewController { sliderConf.isHidden = true labelSliderIoU.isHidden = true sliderIoU.isHidden = true + toolBar.setBackgroundImage(UIImage(), forToolbarPosition: .any, barMetrics: .default) + toolBar.setShadowImage(UIImage(), forToolbarPosition: .any) + labelSliderConfLandScape.isHidden = false sliderConfLandScape.isHidden = false labelSliderIoULandScape.isHidden = false sliderIoULandScape.isHidden = false - toolbar.isHidden = true - playButtonLandScape.isHidden = false - pauseButtonLandScape.isHidden = false - shareButtonLandScape.isHidden = false } else { labelSliderConf.isHidden = false sliderConf.isHidden = false labelSliderIoU.isHidden = false sliderIoU.isHidden = false + toolBar.setBackgroundImage(nil, forToolbarPosition: .any, barMetrics: .default) + toolBar.setShadowImage(nil, forToolbarPosition: .any) + labelSliderConfLandScape.isHidden = true sliderConfLandScape.isHidden = true labelSliderIoULandScape.isHidden = true sliderIoULandScape.isHidden = true - toolbar.isHidden = false - playButtonLandScape.isHidden = true - pauseButtonLandScape.isHidden = true - shareButtonLandScape.isHidden = true - } self.videoCapture.previewLayer?.frame = CGRect( x: 0, y: 0, width: size.width, height: size.height) - coordinator.animate( - alongsideTransition: { context in - }, - completion: { context in - self.setupMaskLayer() - }) + coordinator.animate( + alongsideTransition: { context in + }, + completion: { context in + self.setupMaskLayer() + }) } private func setUpOrientationChangeNotification() { @@ -167,7 +150,6 @@ class ViewController: UIViewController { } @IBAction func indexChanged(_ sender: Any) { - self.removeAllMaskSubLayers() selection.selectionChanged() activityIndicator.startAnimating() setModel() @@ -176,8 +158,7 @@ class ViewController: UIViewController { } func setModel() { - - /// Switch model + /// Switch model switch task { case .detect: switch segmentedControl.selectedSegmentIndex { @@ -199,127 +180,30 @@ class ViewController: UIViewController { default: break } - case .human: - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - if #available(iOS 15.0, *) { - mlModel = try! yolov8n_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 1: - self.labelName.text = "YOLOv8s" - if #available(iOS 15.0, *) { - mlModel = try! yolov8s_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 2: - self.labelName.text = "YOLOv8m" - if #available(iOS 15.0, *) { - mlModel = try! yolov8m_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 3: - self.labelName.text = "YOLOv8l" - if #available(iOS 15.0, *) { - mlModel = try! yolov8l_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 4: - self.labelName.text = "YOLOv8x" - if #available(iOS 15.0, *) { - mlModel = try! yolov8x_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - - default: - break - } - case .seg: - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - if #available(iOS 15.0, *) { - mlModel = try! yolov8n_seg(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 1: - self.labelName.text = "YOLOv8s" - if #available(iOS 15.0, *) { - mlModel = try! yolov8s_seg(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 2: - self.labelName.text = "YOLOv8m" - if #available(iOS 15.0, *) { - mlModel = try! yolov8m_seg(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 3: - self.labelName.text = "YOLOv8l" - if #available(iOS 15.0, *) { - mlModel = try! yolov8l_seg(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 4: - self.labelName.text = "YOLOv8x" - if #available(iOS 15.0, *) { - mlModel = try! yolov8x_seg(configuration: .init()).model - } else { - // Fallback on earlier versions - } - default: break - } + case .pose: switch segmentedControl.selectedSegmentIndex { case 0: self.labelName.text = "YOLOv8n" - if #available(iOS 15.0, *) { - mlModel = try! yolov8n_pose(configuration: .init()).model - } else { - // Fallback on earlier versions - } + mlModel = try! yolov8n_pose(configuration: .init()).model case 1: self.labelName.text = "YOLOv8s" - if #available(iOS 15.0, *) { - mlModel = try! yolov8s_pose(configuration: .init()).model - } else { - // Fallback on earlier versions - } + mlModel = try! yolov8s_pose(configuration: .init()).model + case 2: self.labelName.text = "YOLOv8m" - if #available(iOS 15.0, *) { - mlModel = try! yolov8m_pose(configuration: .init()).model - } else { - // Fallback on earlier versions - } + mlModel = try! yolov8m_pose(configuration: .init()).model case 3: self.labelName.text = "YOLOv8l" - if #available(iOS 15.0, *) { - mlModel = try! yolov8l_pose(configuration: .init()).model - } else { - // Fallback on earlier versions - } + mlModel = try! yolov8l_pose(configuration: .init()).model case 4: self.labelName.text = "YOLOv8x" - if #available(iOS 15.0, *) { - mlModel = try! yolov8x_pose(configuration: .init()).model - } else { - // Fallback on earlier versions - } + mlModel = try! yolov8x_pose(configuration: .init()).model default: break } } + DispatchQueue.global(qos: .userInitiated).async { [self] in /// VNCoreMLModel @@ -331,7 +215,7 @@ class ViewController: UIViewController { model: detector, completionHandler: { [weak self] request, error in self?.processObservations(for: request, error: error) - }) + }) request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop visionRequest = request t2 = 0.0 // inference dt smoothed @@ -342,8 +226,6 @@ class ViewController: UIViewController { /// Update thresholds from slider values @IBAction func sliderChanged(_ sender: Any) { - self.confidenceThreshold = sliderConf.value - self.iouThreshold = sliderIoU.value let conf = Double(round(100 * sliderConf.value)) / 100 let iou = Double(round(100 * sliderIoU.value)) / 100 self.labelSliderConf.text = String(conf) + " Confidence Threshold" @@ -351,52 +233,30 @@ class ViewController: UIViewController { detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } - @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { - self.removeAllMaskSubLayers() + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + self.removeAllMaskSubLayers() - switch sender.selectedSegmentIndex { - case 0: - if self.task != .detect { - self.trackingLabel.isHidden = true - self.trackingSwitch.isHidden = true - self.task = .detect - self.setModel() - } - case 1: - if self.task != .human { - self.task = .human - for i in 0.. 19 { - count = 0 - } - guard let colorForMask = color.toRGBComponents() else { fatalError() } - colorsForMask.append(colorForMask) - } } - } func startVideo() { @@ -540,6 +387,7 @@ class ViewController: UIViewController { self.videoPreview.layer.addSublayer(previewLayer) self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer } + self.setupMaskLayer() self.videoPreview.layer.addSublayer(self.maskLayer) @@ -547,12 +395,6 @@ class ViewController: UIViewController { for box in self.boundingBoxViews { box.addToLayer(self.videoPreview.layer) } - self.overlayLayer = CAShapeLayer() - self.overlayLayer.frame = self.view.bounds - self.overlayLayer.strokeColor = UIColor.red.cgColor - self.overlayLayer.lineWidth = 2.0 - self.overlayLayer.fillColor = UIColor.clear.cgColor - self.view.layer.addSublayer(self.overlayLayer) // Once everything is set up, we can start capturing live video. self.videoCapture.start() @@ -579,7 +421,6 @@ class ViewController: UIViewController { imageOrientation = .up case .unknown: imageOrientation = .up - default: imageOrientation = .up } @@ -604,11 +445,12 @@ class ViewController: UIViewController { func processObservations(for request: VNRequest, error: Error?) { switch task { case .detect: + DispatchQueue.main.async { if let results = request.results as? [VNRecognizedObjectObservation] { - self.show(predictions: results, persons: [], processedBoxAndMasks: [], boxes: []) + self.show(predictions: results, predsPose: []) } else { - self.show(predictions: [], persons: [], processedBoxAndMasks: [], boxes: []) + self.show(predictions: [], predsPose: []) } // Measure FPS @@ -619,74 +461,24 @@ class ViewController: UIViewController { self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms self.t3 = CACurrentMediaTime() } - case .human: - if let results = request.results as? [VNCoreMLFeatureValueObservation] { - DispatchQueue.main.async { - - if let prediction = results.first?.featureValue.multiArrayValue { - - let pred = PostProcessHuman( - prediction: prediction, confidenceThreshold: self.confidenceThreshold, - iouThreshold: self.iouThreshold) - var persons: [Person] = [] - if !self.tracking { - persons = toPerson(boxesAndScoresAndFeatures: pred) - } else { - persons = self.tracker.track(boxesAndScoresAndFeatures: pred) - } - self.show(predictions: [], persons: persons, processedBoxAndMasks: [], boxes: []) - } else { - self.show(predictions: [], persons: [], processedBoxAndMasks: [], boxes: []) - } - if self.t1 < 10.0 { // valid dt - self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time - } - self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS - self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms - self.t3 = CACurrentMediaTime() - } - } - case .seg: - if let results = request.results as? [VNCoreMLFeatureValueObservation] { - DispatchQueue.main.async { [self] in - guard results.count == 2 else { return } - let masks = results[0].featureValue.multiArrayValue - let pred = results[1].featureValue.multiArrayValue - let processed = getBoundingBoxesAndMasks( - feature: pred!, confidenceThreshold: 0.25, iouThreshold: 0.4) - - self.show(predictions: [], persons: [], processedBoxAndMasks: processed, boxes: []) - DispatchQueue.main.async { - let a = Date() - self.updateMaskAndBoxes(detectedObjects: processed, maskArray: masks!) - print(Date().timeIntervalSince(a)) - } - - if self.t1 < 10.0 { // valid dt - self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time - } - self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS - self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms - self.t3 = CACurrentMediaTime() - } - } + case .pose: if let results = request.results as? [VNCoreMLFeatureValueObservation] { DispatchQueue.main.async { [self] in if let prediction = results.first?.featureValue.multiArrayValue { - let pred = PostProcessPose( + let preds = PostProcessPose( prediction: prediction, confidenceThreshold: self.confidenceThreshold, iouThreshold: self.iouThreshold) var boxes = [(CGRect, Float)]() var kpts = [[Float]]() - for p in pred { - boxes.append((p.0, p.1)) - kpts.append(p.2) + for pred in preds { + boxes.append((pred.0, pred.1)) + kpts.append(pred.2) } - self.show(predictions: [], persons: [], processedBoxAndMasks: [], boxes: boxes) + self.show(predictions: [], predsPose: preds) self.maskLayer.sublayers?.forEach { $0.removeFromSuperlayer() } self.drawKeypoints( @@ -694,7 +486,7 @@ class ViewController: UIViewController { imageViewSize: maskLayer.bounds.size, originalImageSize: maskLayer.bounds.size) } else { - self.show(predictions: [], persons: [], processedBoxAndMasks: [], boxes: []) + self.show(predictions: [], predsPose: []) } if self.t1 < 10.0 { // valid dt self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time @@ -704,11 +496,7 @@ class ViewController: UIViewController { self.t3 = CACurrentMediaTime() } } - } - } - - func measureFPS() { } @@ -775,22 +563,20 @@ class ViewController: UIViewController { } } - func show( - predictions: [VNRecognizedObjectObservation], persons: [Person], - processedBoxAndMasks: [(CGRect, Int, Float, MLMultiArray)], boxes: [(CGRect, Float)] - ) { - let width = videoPreview.bounds.width - let height = videoPreview.bounds.height + func show(predictions: [VNRecognizedObjectObservation], predsPose: [(CGRect, Float, [Float])]) { + let width = videoPreview.bounds.width // 375 pix + let height = videoPreview.bounds.height // 812 pix var str = "" + // ratio = videoPreview AR divided by sessionPreset AR var ratio: CGFloat = 1.0 - if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) + ratio = (height / width) / (4.0 / 3.0) // .photo } else { - ratio = (height / width) / (16.0 / 9.0) + ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. } + // date let date = Date() let calendar = Calendar.current let hour = calendar.component(.hour, from: date) @@ -798,21 +584,20 @@ class ViewController: UIViewController { let seconds = calendar.component(.second, from: date) let nanoseconds = calendar.component(.nanosecond, from: date) let sec_day = - Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 + Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day var resultCount = 0 switch task { - case .detect: - resultCount = predictions.count - case .human: - resultCount = persons.count - case .seg: - resultCount = processedBoxAndMasks.count - case .pose: - resultCount = boxes.count + case .detect: + resultCount = predictions.count + case .pose: + resultCount = predsPose.count } - self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" + + self.labelSlider.text = + String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" + for i in 0..= 1 { let offset = (1 - ratio) * (0.5 - displayRect.minX) if task == .detect { @@ -904,6 +663,7 @@ class ViewController: UIViewController { let transform = CGAffineTransform(translationX: offset, y: 0) displayRect = displayRect.applying(transform) } + displayRect.size.width *= ratio } else { if task == .detect { @@ -911,32 +671,53 @@ class ViewController: UIViewController { let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) displayRect = displayRect.applying(transform) - } else { + } else { let offset = (ratio - 1) * (0.5 - displayRect.minY) let transform = CGAffineTransform(translationX: 0, y: offset) displayRect = displayRect.applying(transform) - } + } ratio = (height / width) / (3.0 / 4.0) displayRect.size.height /= ratio } + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) boundingBoxViews[i].show( - frame: displayRect, label: label, color: boxColor, alpha: alpha, innerTexts: innerTexts) + frame: displayRect, label: label, color: boxColor, alpha: alpha) if developerMode { if save_detections { str += String( - format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) } } - } else { - boundingBoxViews[i].hide() + } else { + boundingBoxViews[i].hide() } } + + + // Write + if developerMode { + if save_detections { + saveText(text: str, file: "detections.txt") // Write stats for each detection + } + if save_frames { + str = String( + format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", + sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, + self.t1 * 1000, self.t2 * 1000, 1 / self.t4) + saveText(text: str, file: "frames.txt") // Write stats for each image + } + } + + // Debug + // print(str) + // print(UIDevice.current.identifierForVendor!) + // saveImage() } // Pinch to Zoom Start --------------------------------------------------------------------------------------------- @@ -976,9 +757,7 @@ class ViewController: UIViewController { self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) default: break } - } // Pinch to Zoom Start - - // ------------------------------------------------------------------------------------------ + } // Pinch to Zoom End -------------------------------------------------------------------------------------------- } // ViewController class End @available(iOS 15.0, *) @@ -988,8 +767,8 @@ extension ViewController: VideoCaptureDelegate { } } -// Programmatically save image @available(iOS 15.0, *) +// Programmatically save image extension ViewController: AVCapturePhotoCaptureDelegate { func photoOutput( _ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error? @@ -1002,7 +781,7 @@ extension ViewController: AVCapturePhotoCaptureDelegate { let cgImageRef: CGImage! = CGImage( jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - var orientation = UIImage.Orientation.right + var orientation = CGImagePropertyOrientation.right switch UIDevice.current.orientation { case .landscapeLeft: orientation = .up @@ -1011,14 +790,16 @@ extension ViewController: AVCapturePhotoCaptureDelegate { default: break } - var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: orientation) - + var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: .right) + if let orientedCIImage = CIImage(image: image)?.oriented(orientation), + let cgImage = CIContext().createCGImage(orientedCIImage, from: orientedCIImage.extent) + { + image = UIImage(cgImage: cgImage) + } let imageView = UIImageView(image: image) imageView.contentMode = .scaleAspectFill imageView.frame = videoPreview.frame let imageLayer = imageView.layer - var sublayers = videoPreview.layer.sublayers ?? [] - let insertIndex = max(sublayers.count - 1, 0) videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) let bounds = UIScreen.main.bounds From ed7e45b659e7f4c759cbd18c5bfc86ab3bc0e456 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sat, 24 Aug 2024 11:54:21 +0900 Subject: [PATCH 18/26] add model reference --- YOLO.xcodeproj/project.pbxproj | 44 ++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 76ca3e5..1c6001a 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -17,6 +17,16 @@ 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; 7333105F2C69CE95001D647B /* Colors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7333105E2C69CE95001D647B /* Colors.swift */; }; + 737FDB1A2C798277009A6696 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB152C798273009A6696 /* yolov8x.mlpackage */; }; + 737FDB1B2C798277009A6696 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB162C798274009A6696 /* yolov8s.mlpackage */; }; + 737FDB1C2C798277009A6696 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB172C798274009A6696 /* yolov8n.mlpackage */; }; + 737FDB1D2C798277009A6696 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB182C798276009A6696 /* yolov8l.mlpackage */; }; + 737FDB1E2C798277009A6696 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB192C798277009A6696 /* yolov8m.mlpackage */; }; + 737FDB242C7982A5009A6696 /* yolov8n-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB1F2C79829E009A6696 /* yolov8n-pose.mlpackage */; }; + 737FDB252C7982A5009A6696 /* yolov8m-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB202C79829F009A6696 /* yolov8m-pose.mlpackage */; }; + 737FDB262C7982A5009A6696 /* yolov8l-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB212C7982A1009A6696 /* yolov8l-pose.mlpackage */; }; + 737FDB272C7982A5009A6696 /* yolov8s-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB222C7982A2009A6696 /* yolov8s-pose.mlpackage */; }; + 737FDB282C7982A5009A6696 /* yolov8x-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB232C7982A5009A6696 /* yolov8x-pose.mlpackage */; }; 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -34,6 +44,16 @@ 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 7333105E2C69CE95001D647B /* Colors.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Colors.swift; sourceTree = ""; }; + 737FDB152C798273009A6696 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; + 737FDB162C798274009A6696 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; + 737FDB172C798274009A6696 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; + 737FDB182C798276009A6696 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; + 737FDB192C798277009A6696 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; + 737FDB1F2C79829E009A6696 /* yolov8n-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8n-pose.mlpackage"; sourceTree = ""; }; + 737FDB202C79829F009A6696 /* yolov8m-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8m-pose.mlpackage"; sourceTree = ""; }; + 737FDB212C7982A1009A6696 /* yolov8l-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8l-pose.mlpackage"; sourceTree = ""; }; + 737FDB222C7982A2009A6696 /* yolov8s-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8s-pose.mlpackage"; sourceTree = ""; }; + 737FDB232C7982A5009A6696 /* yolov8x-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8x-pose.mlpackage"; sourceTree = ""; }; 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PostProcessPose.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; @@ -83,6 +103,16 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( + 737FDB182C798276009A6696 /* yolov8l.mlpackage */, + 737FDB192C798277009A6696 /* yolov8m.mlpackage */, + 737FDB172C798274009A6696 /* yolov8n.mlpackage */, + 737FDB162C798274009A6696 /* yolov8s.mlpackage */, + 737FDB152C798273009A6696 /* yolov8x.mlpackage */, + 737FDB212C7982A1009A6696 /* yolov8l-pose.mlpackage */, + 737FDB202C79829F009A6696 /* yolov8m-pose.mlpackage */, + 737FDB1F2C79829E009A6696 /* yolov8n-pose.mlpackage */, + 737FDB222C7982A2009A6696 /* yolov8s-pose.mlpackage */, + 737FDB232C7982A5009A6696 /* yolov8x-pose.mlpackage */, ); path = Models; sourceTree = ""; @@ -202,12 +232,22 @@ buildActionMask = 2147483647; files = ( 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */, + 737FDB272C7982A5009A6696 /* yolov8s-pose.mlpackage in Sources */, + 737FDB1C2C798277009A6696 /* yolov8n.mlpackage in Sources */, 7333105F2C69CE95001D647B /* Colors.swift in Sources */, + 737FDB1D2C798277009A6696 /* yolov8l.mlpackage in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, + 737FDB1B2C798277009A6696 /* yolov8s.mlpackage in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, + 737FDB282C7982A5009A6696 /* yolov8x-pose.mlpackage in Sources */, + 737FDB242C7982A5009A6696 /* yolov8n-pose.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, + 737FDB1A2C798277009A6696 /* yolov8x.mlpackage in Sources */, + 737FDB262C7982A5009A6696 /* yolov8l-pose.mlpackage in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, + 737FDB1E2C798277009A6696 /* yolov8m.mlpackage in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 737FDB252C7982A5009A6696 /* yolov8m-pose.mlpackage in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -339,7 +379,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; - DEVELOPMENT_TEAM = MFN25KNUGJ; + DEVELOPMENT_TEAM = 3MR4P6CL3X; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -367,7 +407,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 2; - DEVELOPMENT_TEAM = MFN25KNUGJ; + DEVELOPMENT_TEAM = 3MR4P6CL3X; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; From 2a7ce494fef7741a66dfb6d41ecabb072d7e67c7 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Sat, 24 Aug 2024 02:54:54 +0000 Subject: [PATCH 19/26] Auto-format by https://ultralytics.com/actions --- YOLO/Utilities/PostProcessPose.swift | 4 +- YOLO/ViewController.swift | 129 +++++++++++++-------------- 2 files changed, 66 insertions(+), 67 deletions(-) diff --git a/YOLO/Utilities/PostProcessPose.swift b/YOLO/Utilities/PostProcessPose.swift index 009fe92..e410c64 100644 --- a/YOLO/Utilities/PostProcessPose.swift +++ b/YOLO/Utilities/PostProcessPose.swift @@ -16,7 +16,7 @@ extension ViewController { } else { ratio = (16.0 / 9.0) } - + var offSet = CGFloat.zero var margin = CGFloat.zero if view.bounds.width < view.bounds.height { @@ -30,7 +30,7 @@ extension ViewController { self.maskLayer.frame = CGRect( x: 0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) } - + } func removeAllMaskSubLayers() { diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 0285a55..befe492 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -79,7 +79,7 @@ class ViewController: UIViewController { case detect case pose } - + var task: Task = .detect var confidenceThreshold: Float = 0.25 var iouThreshold: Float = 0.4 @@ -127,12 +127,12 @@ class ViewController: UIViewController { } self.videoCapture.previewLayer?.frame = CGRect( x: 0, y: 0, width: size.width, height: size.height) - coordinator.animate( - alongsideTransition: { context in - }, - completion: { context in - self.setupMaskLayer() - }) + coordinator.animate( + alongsideTransition: { context in + }, + completion: { context in + self.setupMaskLayer() + }) } private func setUpOrientationChangeNotification() { @@ -158,7 +158,7 @@ class ViewController: UIViewController { } func setModel() { - /// Switch model + /// Switch model switch task { case .detect: switch segmentedControl.selectedSegmentIndex { @@ -180,7 +180,7 @@ class ViewController: UIViewController { default: break } - + case .pose: switch segmentedControl.selectedSegmentIndex { case 0: @@ -189,7 +189,7 @@ class ViewController: UIViewController { case 1: self.labelName.text = "YOLOv8s" mlModel = try! yolov8s_pose(configuration: .init()).model - + case 2: self.labelName.text = "YOLOv8m" mlModel = try! yolov8m_pose(configuration: .init()).model @@ -215,7 +215,7 @@ class ViewController: UIViewController { model: detector, completionHandler: { [weak self] request, error in self?.processObservations(for: request, error: error) - }) + }) request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop visionRequest = request t2 = 0.0 // inference dt smoothed @@ -233,30 +233,30 @@ class ViewController: UIViewController { detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } - @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { - self.removeAllMaskSubLayers() + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + self.removeAllMaskSubLayers() - switch sender.selectedSegmentIndex { - case 0: - if self.task != .detect { - self.task = .detect - self.setModel() - } - case 1: - if self.task != .pose { - self.task = .pose - for i in 0..= 1 { let offset = (1 - ratio) * (0.5 - displayRect.minX) if task == .detect { @@ -663,7 +663,7 @@ class ViewController: UIViewController { let transform = CGAffineTransform(translationX: offset, y: 0) displayRect = displayRect.applying(transform) } - + displayRect.size.width *= ratio } else { if task == .detect { @@ -671,34 +671,33 @@ class ViewController: UIViewController { let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) displayRect = displayRect.applying(transform) - } else { + } else { let offset = (ratio - 1) * (0.5 - displayRect.minY) let transform = CGAffineTransform(translationX: 0, y: offset) displayRect = displayRect.applying(transform) - } + } ratio = (height / width) / (3.0 / 4.0) displayRect.size.height /= ratio } - + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) boundingBoxViews[i].show( - frame: displayRect, label: label, color: boxColor, alpha: alpha) + frame: displayRect, label: label, color: boxColor, alpha: alpha) if developerMode { if save_detections { str += String( - format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) } } - } else { - boundingBoxViews[i].hide() + } else { + boundingBoxViews[i].hide() } } - // Write if developerMode { From 47d0d8ba9ad814d0f37df29959d1fb4e37be0a0f Mon Sep 17 00:00:00 2001 From: MLBoy_DaisukeMajima Date: Sat, 24 Aug 2024 12:03:07 +0900 Subject: [PATCH 20/26] Update README.md --- YOLO/Models/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/YOLO/Models/README.md b/YOLO/Models/README.md index b70f1e5..f9e662a 100644 --- a/YOLO/Models/README.md +++ b/YOLO/Models/README.md @@ -35,6 +35,7 @@ If you prefer to use specific model versions or need to customize the models, yo # Export all YOLOv8 models to CoreML INT8 for size in ("n", "s", "m", "l", "x"): # all YOLOv8 model sizes YOLO(f"yolov8{size}.pt").export(format="coreml", int8=True, nms=True, imgsz=[640, 384]) + YOLO(f"yolov8{size}-pose.pt").export(format="coreml", int8=True, imgsz=[640, 384]) ``` 3. **Place Models in Project:** After exporting, locate the CoreML model files and place them in the `YOLO/Models` directory of your project. From bfbcc60c69c1d44782758bdb1bf91d813f9da6f7 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sun, 25 Aug 2024 04:32:45 +0900 Subject: [PATCH 21/26] resolve conflicts --- YOLO/VideoCapture.swift | 1 - YOLO/ViewController.swift | 3 --- 2 files changed, 4 deletions(-) diff --git a/YOLO/VideoCapture.swift b/YOLO/VideoCapture.swift index c1a1395..426ee05 100644 --- a/YOLO/VideoCapture.swift +++ b/YOLO/VideoCapture.swift @@ -103,7 +103,6 @@ public class VideoCapture: NSObject { default: videoOutput.connection(with: .video)?.videoOrientation = .portrait } - } if let connection = videoOutput.connection(with: .video) { self.previewLayer?.connection?.videoOrientation = connection.videoOrientation diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index f88d752..59c4227 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -395,8 +395,6 @@ class ViewController: UIViewController { for box in self.boundingBoxViews { box.addToLayer(self.videoPreview.layer) } - t1 = CACurrentMediaTime() - t0 // inference dt - } // Once everything is set up, we can start capturing live video. self.videoCapture.start() @@ -524,7 +522,6 @@ class ViewController: UIViewController { // Reading // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} } - } // Save image file From 5288ce00a2db4eafbe05e84ca2af680a59e35a5b Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Sat, 24 Aug 2024 19:33:15 +0000 Subject: [PATCH 22/26] Auto-format by https://ultralytics.com/actions --- YOLO/ViewController.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 59c4227..cfa4c15 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -316,7 +316,7 @@ class ViewController: UIViewController { guard let videoInput1 = try? AVCaptureDeviceInput(device: device) else { return } - + self.videoCapture.captureSession.addInput(videoInput1) self.videoCapture.captureSession.commitConfiguration() } From 0ee392118a3b9737c482d54e314dfa3bc9d8b071 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sun, 25 Aug 2024 05:14:47 +0900 Subject: [PATCH 23/26] fix storyboard --- YOLO.xcodeproj/project.pbxproj | 81 +++++----- YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 268 +++++++++++++++++---------------- 3 files changed, 177 insertions(+), 174 deletions(-) diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index fcd5ff4..0d94a82 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -17,19 +17,20 @@ 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; 7333105F2C69CE95001D647B /* Colors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7333105E2C69CE95001D647B /* Colors.swift */; }; - 737FDB1A2C798277009A6696 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB152C798273009A6696 /* yolov8x.mlpackage */; }; - 737FDB1B2C798277009A6696 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB162C798274009A6696 /* yolov8s.mlpackage */; }; - 737FDB1C2C798277009A6696 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB172C798274009A6696 /* yolov8n.mlpackage */; }; - 737FDB1D2C798277009A6696 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB182C798276009A6696 /* yolov8l.mlpackage */; }; - 737FDB1E2C798277009A6696 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB192C798277009A6696 /* yolov8m.mlpackage */; }; - 737FDB242C7982A5009A6696 /* yolov8n-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB1F2C79829E009A6696 /* yolov8n-pose.mlpackage */; }; - 737FDB252C7982A5009A6696 /* yolov8m-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB202C79829F009A6696 /* yolov8m-pose.mlpackage */; }; - 737FDB262C7982A5009A6696 /* yolov8l-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB212C7982A1009A6696 /* yolov8l-pose.mlpackage */; }; - 737FDB272C7982A5009A6696 /* yolov8s-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB222C7982A2009A6696 /* yolov8s-pose.mlpackage */; }; - 737FDB282C7982A5009A6696 /* yolov8x-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB232C7982A5009A6696 /* yolov8x-pose.mlpackage */; }; + 737FDB332C7A6D19009A6696 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB292C7A6D0A009A6696 /* yolov8s.mlpackage */; }; + 737FDB342C7A6D19009A6696 /* yolov8x-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2A2C7A6D0D009A6696 /* yolov8x-pose.mlpackage */; }; + 737FDB352C7A6D19009A6696 /* yolov8l-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2B2C7A6D0F009A6696 /* yolov8l-pose.mlpackage */; }; + 737FDB362C7A6D19009A6696 /* yolov8m-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2C2C7A6D11009A6696 /* yolov8m-pose.mlpackage */; }; + 737FDB372C7A6D19009A6696 /* yolov8s-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2D2C7A6D12009A6696 /* yolov8s-pose.mlpackage */; }; + 737FDB382C7A6D19009A6696 /* yolov8n-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2E2C7A6D12009A6696 /* yolov8n-pose.mlpackage */; }; + 737FDB392C7A6D19009A6696 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2F2C7A6D13009A6696 /* yolov8m.mlpackage */; }; + 737FDB3A2C7A6D19009A6696 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB302C7A6D17009A6696 /* yolov8x.mlpackage */; }; + 737FDB3B2C7A6D19009A6696 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB312C7A6D17009A6696 /* yolov8n.mlpackage */; }; + 737FDB3C2C7A6D19009A6696 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB322C7A6D19009A6696 /* yolov8l.mlpackage */; }; 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ + /* Begin PBXFileReference section */ 6323C44D22186177008AE681 /* LaunchScreen.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = LaunchScreen.storyboard; sourceTree = ""; }; 6323C44F22186177008AE681 /* Main.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = Main.storyboard; sourceTree = ""; }; @@ -43,16 +44,16 @@ 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 7333105E2C69CE95001D647B /* Colors.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Colors.swift; sourceTree = ""; }; - 737FDB152C798273009A6696 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 737FDB162C798274009A6696 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 737FDB172C798274009A6696 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; - 737FDB182C798276009A6696 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 737FDB192C798277009A6696 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 737FDB1F2C79829E009A6696 /* yolov8n-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8n-pose.mlpackage"; sourceTree = ""; }; - 737FDB202C79829F009A6696 /* yolov8m-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8m-pose.mlpackage"; sourceTree = ""; }; - 737FDB212C7982A1009A6696 /* yolov8l-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8l-pose.mlpackage"; sourceTree = ""; }; - 737FDB222C7982A2009A6696 /* yolov8s-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8s-pose.mlpackage"; sourceTree = ""; }; - 737FDB232C7982A5009A6696 /* yolov8x-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8x-pose.mlpackage"; sourceTree = ""; }; + 737FDB292C7A6D0A009A6696 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; + 737FDB2A2C7A6D0D009A6696 /* yolov8x-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8x-pose.mlpackage"; sourceTree = ""; }; + 737FDB2B2C7A6D0F009A6696 /* yolov8l-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8l-pose.mlpackage"; sourceTree = ""; }; + 737FDB2C2C7A6D11009A6696 /* yolov8m-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8m-pose.mlpackage"; sourceTree = ""; }; + 737FDB2D2C7A6D12009A6696 /* yolov8s-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8s-pose.mlpackage"; sourceTree = ""; }; + 737FDB2E2C7A6D12009A6696 /* yolov8n-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8n-pose.mlpackage"; sourceTree = ""; }; + 737FDB2F2C7A6D13009A6696 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; + 737FDB302C7A6D17009A6696 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; + 737FDB312C7A6D17009A6696 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; + 737FDB322C7A6D19009A6696 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PostProcessPose.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; @@ -102,16 +103,16 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 737FDB182C798276009A6696 /* yolov8l.mlpackage */, - 737FDB192C798277009A6696 /* yolov8m.mlpackage */, - 737FDB172C798274009A6696 /* yolov8n.mlpackage */, - 737FDB162C798274009A6696 /* yolov8s.mlpackage */, - 737FDB152C798273009A6696 /* yolov8x.mlpackage */, - 737FDB212C7982A1009A6696 /* yolov8l-pose.mlpackage */, - 737FDB202C79829F009A6696 /* yolov8m-pose.mlpackage */, - 737FDB1F2C79829E009A6696 /* yolov8n-pose.mlpackage */, - 737FDB222C7982A2009A6696 /* yolov8s-pose.mlpackage */, - 737FDB232C7982A5009A6696 /* yolov8x-pose.mlpackage */, + 737FDB2B2C7A6D0F009A6696 /* yolov8l-pose.mlpackage */, + 737FDB322C7A6D19009A6696 /* yolov8l.mlpackage */, + 737FDB2C2C7A6D11009A6696 /* yolov8m-pose.mlpackage */, + 737FDB2F2C7A6D13009A6696 /* yolov8m.mlpackage */, + 737FDB2E2C7A6D12009A6696 /* yolov8n-pose.mlpackage */, + 737FDB312C7A6D17009A6696 /* yolov8n.mlpackage */, + 737FDB2D2C7A6D12009A6696 /* yolov8s-pose.mlpackage */, + 737FDB292C7A6D0A009A6696 /* yolov8s.mlpackage */, + 737FDB2A2C7A6D0D009A6696 /* yolov8x-pose.mlpackage */, + 737FDB302C7A6D17009A6696 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -230,23 +231,23 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 737FDB352C7A6D19009A6696 /* yolov8l-pose.mlpackage in Sources */, + 737FDB332C7A6D19009A6696 /* yolov8s.mlpackage in Sources */, 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */, - 737FDB272C7982A5009A6696 /* yolov8s-pose.mlpackage in Sources */, - 737FDB1C2C798277009A6696 /* yolov8n.mlpackage in Sources */, + 737FDB3C2C7A6D19009A6696 /* yolov8l.mlpackage in Sources */, + 737FDB3B2C7A6D19009A6696 /* yolov8n.mlpackage in Sources */, + 737FDB342C7A6D19009A6696 /* yolov8x-pose.mlpackage in Sources */, 7333105F2C69CE95001D647B /* Colors.swift in Sources */, - 737FDB1D2C798277009A6696 /* yolov8l.mlpackage in Sources */, + 737FDB372C7A6D19009A6696 /* yolov8s-pose.mlpackage in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, - 737FDB1B2C798277009A6696 /* yolov8s.mlpackage in Sources */, + 737FDB382C7A6D19009A6696 /* yolov8n-pose.mlpackage in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 737FDB282C7982A5009A6696 /* yolov8x-pose.mlpackage in Sources */, - 737FDB242C7982A5009A6696 /* yolov8n-pose.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, - 737FDB1A2C798277009A6696 /* yolov8x.mlpackage in Sources */, - 737FDB262C7982A5009A6696 /* yolov8l-pose.mlpackage in Sources */, + 737FDB392C7A6D19009A6696 /* yolov8m.mlpackage in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, - 737FDB1E2C798277009A6696 /* yolov8m.mlpackage in Sources */, + 737FDB362C7A6D19009A6696 /* yolov8m-pose.mlpackage in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, - 737FDB252C7982A5009A6696 /* yolov8m-pose.mlpackage in Sources */, + 737FDB3A2C7A6D19009A6696 /* yolov8x.mlpackage in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index a7022ec..2e153b8 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 25 + 29 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index a525056..0fb4888 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,5 +1,5 @@ - + @@ -10,20 +10,20 @@ - + - - + + - + - - + - - - - - - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - + + - + - + From f93b5761ddaeb08e7d66b676be7bc44848e62561 Mon Sep 17 00:00:00 2001 From: MLBoy_DaisukeMajima Date: Sun, 25 Aug 2024 05:21:48 +0900 Subject: [PATCH 24/26] Update Info.plist --- YOLO/Info.plist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 2e153b8..a7022ec 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 29 + 25 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS From e5124ce4d35c860978e33a0e93daea54f815eee7 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sat, 31 Aug 2024 06:05:13 +0900 Subject: [PATCH 25/26] add comments and rename overlay layer. --- YOLO/Utilities/Colors.swift | 6 ++++-- YOLO/Utilities/PostProcessPose.swift | 15 ++++++++++----- YOLO/ViewController.swift | 14 +++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/YOLO/Utilities/Colors.swift b/YOLO/Utilities/Colors.swift index 966723f..77d3b5a 100644 --- a/YOLO/Utilities/Colors.swift +++ b/YOLO/Utilities/Colors.swift @@ -1,6 +1,8 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License // -// Colors.swift -// YOLO +// Colors for Ultralytics YOLO App + +// These colors are set to use the same Ultralytics color palette as the Python version. import Foundation import UIKit diff --git a/YOLO/Utilities/PostProcessPose.swift b/YOLO/Utilities/PostProcessPose.swift index e410c64..da56e87 100644 --- a/YOLO/Utilities/PostProcessPose.swift +++ b/YOLO/Utilities/PostProcessPose.swift @@ -1,3 +1,8 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessSegment for Ultralytics YOLO App +// These functions are designed to post-process inference results from the YOLOv8-Pose model in the Ultralytics YOLO app to display a Pose skeleton. + import CoreML import Foundation import UIKit @@ -6,7 +11,7 @@ import UIKit extension ViewController { - func setupMaskLayer() { + func setupOverlayLayer() { let width = videoPreview.bounds.width let height = videoPreview.bounds.height @@ -22,22 +27,22 @@ extension ViewController { if view.bounds.width < view.bounds.height { offSet = height / ratio margin = (offSet - self.videoPreview.bounds.width) / 2 - self.maskLayer.frame = CGRect( + self.overlayLayer.frame = CGRect( x: -margin, y: 0, width: offSet, height: self.videoPreview.bounds.height) } else { offSet = width / ratio margin = (offSet - self.videoPreview.bounds.height) / 2 - self.maskLayer.frame = CGRect( + self.overlayLayer.frame = CGRect( x: 0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) } } func removeAllMaskSubLayers() { - self.maskLayer.sublayers?.forEach { layer in + self.overlayLayer.sublayers?.forEach { layer in layer.removeFromSuperlayer() } - self.maskLayer.sublayers = nil + self.overlayLayer.sublayers = nil } func PostProcessPose(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index cfa4c15..f9d1148 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -43,7 +43,7 @@ class ViewController: UIViewController { @IBOutlet weak var activityIndicator: UIActivityIndicatorView! @IBOutlet weak var forcus: UIImageView! @IBOutlet weak var toolBar: UIToolbar! - var maskLayer: CALayer = CALayer() + var overlayLayer: CALayer = CALayer() let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) @@ -131,7 +131,7 @@ class ViewController: UIViewController { alongsideTransition: { context in }, completion: { context in - self.setupMaskLayer() + self.setupOverlayLayer() }) } @@ -388,8 +388,8 @@ class ViewController: UIViewController { self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer } - self.setupMaskLayer() - self.videoPreview.layer.addSublayer(self.maskLayer) + self.setupOverlayLayer() + self.videoPreview.layer.addSublayer(self.overlayLayer) // Add the bounding box layers to the UI, on top of the video preview. for box in self.boundingBoxViews { @@ -479,11 +479,11 @@ class ViewController: UIViewController { kpts.append(pred.2) } self.show(predictions: [], predsPose: preds) - self.maskLayer.sublayers?.forEach { $0.removeFromSuperlayer() } + self.overlayLayer.sublayers?.forEach { $0.removeFromSuperlayer() } self.drawKeypoints( - keypointsList: kpts, boundingBoxes: boxes, on: maskLayer, - imageViewSize: maskLayer.bounds.size, originalImageSize: maskLayer.bounds.size) + keypointsList: kpts, boundingBoxes: boxes, on: overlayLayer, + imageViewSize: overlayLayer.bounds.size, originalImageSize: overlayLayer.bounds.size) } else { self.show(predictions: [], predsPose: []) From c6eabcee0f07e0ea816eed0a6817d2bfa5b72a63 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sat, 31 Aug 2024 09:46:13 +0900 Subject: [PATCH 26/26] fix comment --- YOLO/Utilities/PostProcessPose.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/YOLO/Utilities/PostProcessPose.swift b/YOLO/Utilities/PostProcessPose.swift index da56e87..d75ce27 100644 --- a/YOLO/Utilities/PostProcessPose.swift +++ b/YOLO/Utilities/PostProcessPose.swift @@ -1,6 +1,6 @@ // Ultralytics YOLO 🚀 - AGPL-3.0 License // -// PostProcessSegment for Ultralytics YOLO App +// PostProcessPose for Ultralytics YOLO App // These functions are designed to post-process inference results from the YOLOv8-Pose model in the Ultralytics YOLO app to display a Pose skeleton. import CoreML