From 186b6a0a7e815604f2ae8654de1f5d559dfd307e Mon Sep 17 00:00:00 2001 From: Carsten Abele Date: Tue, 7 Apr 2026 19:39:16 +0200 Subject: [PATCH] feat: add AudioCapture with AVAudioEngine mic tap and PCM conversion --- MyVoxtral/MyVoxtral/Audio/AudioCapture.swift | 92 ++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 MyVoxtral/MyVoxtral/Audio/AudioCapture.swift diff --git a/MyVoxtral/MyVoxtral/Audio/AudioCapture.swift b/MyVoxtral/MyVoxtral/Audio/AudioCapture.swift new file mode 100644 index 0000000..f72b3f4 --- /dev/null +++ b/MyVoxtral/MyVoxtral/Audio/AudioCapture.swift @@ -0,0 +1,92 @@ +import AVFoundation + +final class AudioCapture { + private let engine = AVAudioEngine() + private let targetSampleRate: Double = 16000 + private let chunkDurationMs: Double = 480 + + var onChunk: ((Data) -> Void)? + + private var buffer = Data() + private let bytesPerChunk: Int + + init() { + // 16kHz * 2 bytes (16-bit) * 1 channel * 0.48s = 15360 bytes + bytesPerChunk = Int(targetSampleRate * 2 * chunkDurationMs / 1000) + } + + func start() throws { + let inputNode = engine.inputNode + let inputFormat = inputNode.outputFormat(forBus: 0) + + let targetFormat = AVAudioFormat( + commonFormat: .pcmFormatInt16, + sampleRate: targetSampleRate, + channels: 1, + interleaved: true + )! + + guard let converter = AVAudioConverter(from: inputFormat, to: targetFormat) else { + throw AudioCaptureError.converterCreationFailed + } + + let bufferSize = AVAudioFrameCount(inputFormat.sampleRate * chunkDurationMs / 1000) + + inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: inputFormat) { [weak self] pcmBuffer, _ in + guard let self else { return } + + let frameCount = AVAudioFrameCount( + Double(pcmBuffer.frameLength) * self.targetSampleRate / inputFormat.sampleRate + ) + guard let convertedBuffer = AVAudioPCMBuffer( + pcmFormat: targetFormat, + frameCapacity: frameCount + ) else { return } + + var error: NSError? + let status = converter.convert(to: convertedBuffer, error: &error) { _, outStatus in + outStatus.pointee = .haveData + return pcmBuffer + } + + guard status != .error, error == nil else { return } + + let byteCount = Int(convertedBuffer.frameLength) * 2 // 16-bit = 2 bytes + guard let int16Ptr = convertedBuffer.int16ChannelData?[0] else { return } + let data = Data(bytes: int16Ptr, count: byteCount) + + self.buffer.append(data) + + while self.buffer.count >= self.bytesPerChunk { + let chunk = self.buffer.prefix(self.bytesPerChunk) + self.buffer = Data(self.buffer.dropFirst(self.bytesPerChunk)) + self.onChunk?(Data(chunk)) + } + } + + engine.prepare() + try engine.start() + } + + func stop() { + engine.inputNode.removeTap(onBus: 0) + engine.stop() + + // Flush remaining buffer + if !buffer.isEmpty { + onChunk?(buffer) + buffer = Data() + } + } +} + +enum AudioCaptureError: Error, LocalizedError { + case converterCreationFailed + + var errorDescription: String? { + switch self { + case .converterCreationFailed: + return "Failed to create audio format converter" + } + } +}