Ambisonic Recordings and RealityKit
At the moment RealityKit 2.0 doesn't support Ambisonics
. It's hard to say whether it'll support it.
Using Vision and RealityKit Rotates Counterclockwise and Distorts(Stretches?) Video
After days of trying to figure this out, with research and more research, I came across this question and answer that provides the solution. Please note that both answers are valid, it just depends upon the structure of your app.
The crux of the issue is that causing a state change in RealityKitView
causes the ARView
to be re-instantiated. However, this time, it is instantiated with a size of 0, and that is what causes the error[CAMetalLayer nextDrawable] returning nil because allocation failed
as this causes it to return nil. However, initializing it with some size like this:
let arView = ARView(frame: .init(x: 1, y: 1, width: 1, height: 1), cameraMode: .ar, automaticallyConfigureSession: false)
resolves that issue.
For the sake of those who are attempting this in the future, here is the current working UIViewRepresentable
:
import SwiftUI
import ARKit
import RealityKit
import Vision
struct RealityKitView: UIViewRepresentable {
let arView = ARView(frame: .init(x: 1, y: 1, width: 1, height: 1), cameraMode: .ar, automaticallyConfigureSession: false)
// Making this implicity unwrapped. If this fails, the app should crash anyway...
let model: VNCoreMLModel! = RealityKitView.returnMLModel()
@Binding var isDetecting: Bool // This turns Vision on and off
@Binding var success: Bool // This is the state of Vision's finding the object
@Binding var message: String // This allows different messages to be communicated to the user
@State var boundingBox: CGRect?
func makeUIView(context: Context) -> some UIView {
// Start AR Session
let session = configureSession()
// Add coaching overlay
addCoachingOverlay(session: session)
// Handle ARSession events via delegate
session.delegate = context.coordinator
return arView
}
func addCoachingOverlay(session: ARSession) {
let coachingOverlay = ARCoachingOverlayView()
coachingOverlay.autoresizingMask = [.flexibleWidth, .flexibleHeight]
coachingOverlay.session = session
coachingOverlay.goal = .horizontalPlane
arView.addSubview(coachingOverlay)
}
func configureSession() -> ARSession {
let session = arView.session
let config = ARWorldTrackingConfiguration()
config.planeDetection = [.horizontal, .vertical]
config.environmentTexturing = .automatic
session.run(config)
return session
}
static func returnMLModel() -> VNCoreMLModel? {
do {
let config = MLModelConfiguration()
config.computeUnits = .all
let detector = try AppleRemoteDetector()
let model = try VNCoreMLModel(for: detector.model)
return model
} catch {
print("RealityKitView:returnMLModel failed with error: \(error)")
}
return nil
}
func updateUIView(_ uiView: UIViewType, context: Context) {
}
func makeCoordinator() -> Coordinator {
Coordinator(self)
}
class Coordinator: NSObject, ARSessionDelegate {
var parent: RealityKitView
init(_ parent: RealityKitView) {
self.parent = parent
}
func session(_ session: ARSession, didUpdate frame: ARFrame) {
if parent.isDetecting {
// Do not enqueue other buffers for processing while another Vision task is still running.
// The camera stream has only a finite amount of buffers available; holding too many buffers for analysis would starve the camera.
guard currentBuffer == nil, case .normal = frame.camera.trackingState else {
return
}
// Retain the image buffer for Vision processing.
self.currentBuffer = frame.capturedImage
classifyCurrentImage()
}
}
// MARK: - Vision classification
// Vision classification request and model
/// - Tag: ClassificationRequest
private lazy var classificationRequest: VNCoreMLRequest = {
// Instantiate the model from its generated Swift class.
let request = VNCoreMLRequest(model: parent.model, completionHandler: { [weak self] request, error in
self?.processClassifications(for: request, error: error)
})
// Crop input images to square area at center, matching the way the ML model was trained.
request.imageCropAndScaleOption = .scaleFill
// Use CPU for Vision processing to ensure that there are adequate GPU resources for rendering.
request.usesCPUOnly = true
return request
}()
// The pixel buffer being held for analysis; used to serialize Vision requests.
private var currentBuffer: CVPixelBuffer?
// Queue for dispatching vision classification requests
private let visionQueue = DispatchQueue(label: "com.alelin.Find-My-Apple-Remote.ARKitVision.serialVisionQueue")
// Run the Vision+ML classifier on the current image buffer.
/// - Tag: ClassifyCurrentImage
private func classifyCurrentImage() {
guard let currentBuffer = currentBuffer else {
return
}
// Most computer vision tasks are not rotation agnostic so it is important to pass in the orientation of the image with respect to device.
// This is an extension on CGImagePropertyOrientation
let orientation = CGImagePropertyOrientation(UIDevice.current.orientation)
let input = AppleRemoteDetectorInput(image: currentBuffer)
parent.model.featureProvider = input
let requestHandler = VNImageRequestHandler(cvPixelBuffer: currentBuffer, orientation: orientation, options: [:])
visionQueue.async {
do {
// Release the pixel buffer when done, allowing the next buffer to be processed.
defer { self.currentBuffer = nil }
try requestHandler.perform([self.classificationRequest])
} catch {
print("Error: Vision request failed with error \"\(error)\"")
}
}
}
// Handle completion of the Vision request and choose results to display.
/// - Tag: ProcessClassifications
func processClassifications(for request: VNRequest, error: Error?) {
guard
let results = request.results,
!results.isEmpty,
let recognizedObjectObservations = results as? [VNRecognizedObjectObservation],
let recognizedObjectObservation = recognizedObjectObservations.first,
let bestResult = recognizedObjectObservation.labels.first(where: { result in result.confidence > 0.5 }),
let label = bestResult.identifier.split(separator: ",").first
else {
self.parent.boundingBox = nil
self.parent.success = false
if let error = error {
print("Unable to classify image.\n\(error.localizedDescription)")
}
return
}
self.parent.success = true
print("\(recognizedObjectObservation.boundingBox)")
self.parent.boundingBox = recognizedObjectObservation.boundingBox
// Show a label for the highest-confidence result (but only above a minimum confidence threshold).
let confidence = String(format: "%.0f", bestResult.confidence * 100)
let labelString = String(label)
parent.message = "\(labelString) at \(confidence)"
}
func session(_ session: ARSession, didFailWithError error: Error) {
guard error is ARError else { return }
let errorWithInfo = error as NSError
let messages = [
errorWithInfo.localizedDescription,
errorWithInfo.localizedFailureReason,
errorWithInfo.localizedRecoverySuggestion
]
// Filter out optional error messages.
let errorMessage = messages.compactMap({ $0 }).joined(separator: "\n")
DispatchQueue.main.async {
self.parent.message = "The AR session failed with error: \(errorMessage)"
}
}
}
}
Record video from front facing camera during ARKit ARSession on iPhone X
ARKit runs its own AVCaptureSession
, and there can be only one capture session running at a time — if you run a capture session, you preempt ARKit’s, which prevents ARKit from working.
However, ARKit does provide access to the camera pixel buffers it receives from its capture session, so you can record video by feeding those sample buffers to an AVAssetWriter
. (It’s basically the same workflow you’d use when recording video from AVCaptureVideoDataOutput
... a lower-level way of doing video recording compared to AVCaptureMovieFileOutput
.)
You can also feed the ARKit camera pixel buffers (see ARFrame.capturedImage
) to other technologies that work with live camera imagery, like the Vision framework. Apple has a sample code project demonstrating such usage.
Related Topics
Nspredicate with Swift and Core Data
How in Swift to Convert Int16 to Two Uint8 Bytes
Swift Struct Doesn't Conform to Protocol Equatable
How to Get the Download Progress with the New Try Await Urlsession.Shared.Download(...)
How to Convert a Float Value to Byte Array in Swift
How to Define an Enum as a Subset of Another Enum's Cases
Giving Physics to Tiles of Sktilemapnode in Xcode 8
Swift Subtle Difference Between Curried and Higher Order Function
How to Install Swift Package via Package Manager
Add an Extension/Method to All Objects in Swift
"Missing Required Entitlement" for Nfctagreadersession
Swift Error When Trying to Access Dictionary: 'Could Not Find Member 'Subscript''
Collection View Cell Button Not Triggering Action
Calculating Angle Between Two Points on Edge of Circle Swift Spritekit
How to Convert Nsset to [String] Array
Difference Between String Interpolation and String Concatenation