AudioSourceProviderAVFObjC.mm   [plain text]


/*
 * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */

#import "config.h"
#import "AudioSourceProviderAVFObjC.h"

#if ENABLE(WEB_AUDIO) && USE(MEDIATOOLBOX)

#import "AudioBus.h"
#import "AudioChannel.h"
#import "AudioSourceProviderClient.h"
#import "CARingBuffer.h"
#import "Logging.h"
#import <AVFoundation/AVAssetTrack.h>
#import <AVFoundation/AVAudioMix.h>
#import <AVFoundation/AVMediaFormat.h>
#import <AVFoundation/AVPlayerItem.h>
#import <objc/runtime.h>
#import <pal/avfoundation/MediaTimeAVFoundation.h>
#import <wtf/Lock.h>
#import <wtf/MainThread.h>

#if !LOG_DISABLED
#import <wtf/StringPrintStream.h>
#endif

#import <pal/cf/CoreMediaSoftLink.h>
#import <pal/cocoa/AVFoundationSoftLink.h>

SOFT_LINK_FRAMEWORK(MediaToolbox)
SOFT_LINK_FRAMEWORK(AudioToolbox)

SOFT_LINK(AudioToolbox, AudioConverterConvertComplexBuffer, OSStatus, (AudioConverterRef inAudioConverter, UInt32 inNumberPCMFrames, const AudioBufferList* inInputData, AudioBufferList* outOutputData), (inAudioConverter, inNumberPCMFrames, inInputData, outOutputData))
SOFT_LINK(AudioToolbox, AudioConverterNew, OSStatus, (const AudioStreamBasicDescription* inSourceFormat, const AudioStreamBasicDescription* inDestinationFormat, AudioConverterRef* outAudioConverter), (inSourceFormat, inDestinationFormat, outAudioConverter))

SOFT_LINK(MediaToolbox, MTAudioProcessingTapGetStorage, void*, (MTAudioProcessingTapRef tap), (tap))
SOFT_LINK(MediaToolbox, MTAudioProcessingTapGetSourceAudio, OSStatus, (MTAudioProcessingTapRef tap, CMItemCount numberFrames, AudioBufferList *bufferListInOut, MTAudioProcessingTapFlags *flagsOut, CMTimeRange *timeRangeOut, CMItemCount *numberFramesOut), (tap, numberFrames, bufferListInOut, flagsOut, timeRangeOut, numberFramesOut))
SOFT_LINK_MAY_FAIL(MediaToolbox, MTAudioProcessingTapCreate, OSStatus, (CFAllocatorRef allocator, const MTAudioProcessingTapCallbacks *callbacks, MTAudioProcessingTapCreationFlags flags, MTAudioProcessingTapRef *tapOut), (allocator, callbacks, flags, tapOut))

namespace WebCore {

using namespace PAL;
static const double kRingBufferDuration = 1;

class AudioSourceProviderAVFObjC::TapStorage : public ThreadSafeRefCounted<AudioSourceProviderAVFObjC::TapStorage> {
public:
    TapStorage(AudioSourceProviderAVFObjC* _this) : _this(_this) { }
    AudioSourceProviderAVFObjC* _this;
    Lock lock;
};

RefPtr<AudioSourceProviderAVFObjC> AudioSourceProviderAVFObjC::create(AVPlayerItem *item)
{
    if (!canLoadMTAudioProcessingTapCreate())
        return nullptr;
    return adoptRef(*new AudioSourceProviderAVFObjC(item));
}

AudioSourceProviderAVFObjC::AudioSourceProviderAVFObjC(AVPlayerItem *item)
    : m_avPlayerItem(item)
    , m_ringBufferCreationCallback([] { return makeUniqueRef<CARingBuffer>(); })
{
}

AudioSourceProviderAVFObjC::~AudioSourceProviderAVFObjC()
{
    // FIXME: this is not correct, as this indicates that there might be simultaneous calls
    // to the destructor and a member function. This undefined behavior will be addressed in the future
    // commits. https://bugs.webkit.org/show_bug.cgi?id=224480
    setClient(nullptr);
}

void AudioSourceProviderAVFObjC::provideInput(AudioBus* bus, size_t framesToProcess)
{
    // Protect access to m_ringBuffer by using tryHoldLock(). If we failed
    // to aquire, a re-configure is underway, and m_ringBuffer is unsafe to access.
    // Emit silence.
    if (!m_tapStorage) {
        bus->zero();
        return;
    }

    auto locker = tryHoldLock(m_tapStorage->lock);
    if (!locker || !m_ringBuffer) {
        bus->zero();
        return;
    }

    uint64_t startFrame = 0;
    uint64_t endFrame = 0;
    uint64_t seekTo = m_seekTo.exchange(NoSeek);
    uint64_t writeAheadCount = m_writeAheadCount.load();
    if (seekTo != NoSeek)
        m_readCount = seekTo;

    m_ringBuffer->getCurrentFrameBounds(startFrame, endFrame);

    size_t framesAvailable = static_cast<size_t>(endFrame - (m_readCount + writeAheadCount));
    if (!framesAvailable) {
        bus->zero();
        return;
    }

    if (framesAvailable < framesToProcess) {
        framesToProcess = framesAvailable;
        bus->zero();
    }

    ASSERT(bus->numberOfChannels() == m_ringBuffer->channelCount());

    for (unsigned i = 0; i < m_list->mNumberBuffers; ++i) {
        AudioChannel* channel = bus->channel(i);
        m_list->mBuffers[i].mNumberChannels = 1;
        m_list->mBuffers[i].mData = channel->mutableData();
        m_list->mBuffers[i].mDataByteSize = channel->length() * sizeof(float);
    }

    m_ringBuffer->fetch(m_list.get(), framesToProcess, m_readCount);
    m_readCount += framesToProcess;

    if (m_converter)
        AudioConverterConvertComplexBuffer(m_converter.get(), framesToProcess, m_list.get(), m_list.get());
}

void AudioSourceProviderAVFObjC::setClient(AudioSourceProviderClient* client)
{
    if (m_client == client)
        return;
    destroyMixIfNeeded();
    m_client = client;
    createMixIfNeeded();
}

void AudioSourceProviderAVFObjC::setPlayerItem(AVPlayerItem *avPlayerItem)
{
    if (m_avPlayerItem == avPlayerItem)
        return;
    destroyMixIfNeeded();
    m_avPlayerItem = avPlayerItem;
    createMixIfNeeded();
}

void AudioSourceProviderAVFObjC::setAudioTrack(AVAssetTrack *avAssetTrack)
{
    if (m_avAssetTrack == avAssetTrack)
        return;
    destroyMixIfNeeded();
    m_avAssetTrack = avAssetTrack;
    createMixIfNeeded();
}

void AudioSourceProviderAVFObjC::destroyMixIfNeeded()
{
    if (!m_avAudioMix)
        return;
    ASSERT(m_tapStorage);
    {
        auto locker = holdLock(m_tapStorage->lock);
        if (m_avPlayerItem)
            [m_avPlayerItem setAudioMix:nil];
        [m_avAudioMix setInputParameters:@[ ]];
        m_avAudioMix.clear();
        m_tap.clear();
        m_tapStorage->_this = nullptr;
        // Call unprepare, since Tap cannot call it after clear.
        unprepare();
        m_weakFactory.revokeAll();
    }
    m_tapStorage = nullptr;
}

void AudioSourceProviderAVFObjC::createMixIfNeeded()
{
    if (!m_client || !m_avPlayerItem || !m_avAssetTrack)
        return;

    ASSERT(!m_avAudioMix);
    ASSERT(!m_tapStorage);
    ASSERT(!m_tap);

    auto tapStorage = adoptRef(new TapStorage(this));
    auto locker = holdLock(tapStorage->lock);

    MTAudioProcessingTapCallbacks callbacks = {
        0,
        tapStorage.get(),
        initCallback,
        finalizeCallback,
        prepareCallback,
        unprepareCallback,
        processCallback,
    };

    MTAudioProcessingTapRef tap = nullptr;
    OSStatus status = MTAudioProcessingTapCreate(kCFAllocatorDefault, &callbacks, 1, &tap);
    if (status != noErr) {
        if (tap)
            CFRelease(tap);
        return;
    }
    m_tap = adoptCF(tap);
    m_tapStorage = WTFMove(tapStorage);
    m_avAudioMix = adoptNS([PAL::allocAVMutableAudioMixInstance() init]);

    RetainPtr<AVMutableAudioMixInputParameters> parameters = adoptNS([PAL::allocAVMutableAudioMixInputParametersInstance() init]);
    [parameters setAudioTapProcessor:m_tap.get()];

    CMPersistentTrackID trackID = m_avAssetTrack.get().trackID;
    [parameters setTrackID:trackID];
    
    [m_avAudioMix setInputParameters:@[parameters.get()]];
    [m_avPlayerItem setAudioMix:m_avAudioMix.get()];
    m_weakFactory.initializeIfNeeded(*this);
}

void AudioSourceProviderAVFObjC::initCallback(MTAudioProcessingTapRef tap, void* clientInfo, void** tapStorageOut)
{
    ASSERT_UNUSED(tap, tap);
    TapStorage* tapStorage = static_cast<TapStorage*>(clientInfo);
    *tapStorageOut = tapStorage;
    // ref balanced by deref in finalizeCallback:
    tapStorage->ref();
}

void AudioSourceProviderAVFObjC::finalizeCallback(MTAudioProcessingTapRef tap)
{
    ASSERT(tap);
    TapStorage* tapStorage = static_cast<TapStorage*>(MTAudioProcessingTapGetStorage(tap));
    tapStorage->deref();
}

void AudioSourceProviderAVFObjC::prepareCallback(MTAudioProcessingTapRef tap, CMItemCount maxFrames, const AudioStreamBasicDescription *processingFormat)
{
    ASSERT(tap);
    TapStorage* tapStorage = static_cast<TapStorage*>(MTAudioProcessingTapGetStorage(tap));

    auto locker = holdLock(tapStorage->lock);

    if (tapStorage->_this)
        tapStorage->_this->prepare(maxFrames, processingFormat);
}

void AudioSourceProviderAVFObjC::unprepareCallback(MTAudioProcessingTapRef tap)
{
    ASSERT(tap);
    TapStorage* tapStorage = static_cast<TapStorage*>(MTAudioProcessingTapGetStorage(tap));

    auto locker = holdLock(tapStorage->lock);

    if (tapStorage->_this)
        tapStorage->_this->unprepare();
}

void AudioSourceProviderAVFObjC::processCallback(MTAudioProcessingTapRef tap, CMItemCount numberFrames, MTAudioProcessingTapFlags flags, AudioBufferList *bufferListInOut, CMItemCount *numberFramesOut, MTAudioProcessingTapFlags *flagsOut)
{
    ASSERT(tap);
    TapStorage* tapStorage = static_cast<TapStorage*>(MTAudioProcessingTapGetStorage(tap));

    auto locker = holdLock(tapStorage->lock);

    if (tapStorage->_this)
        tapStorage->_this->process(tap, numberFrames, flags, bufferListInOut, numberFramesOut, flagsOut);
}

void AudioSourceProviderAVFObjC::prepare(CMItemCount maxFrames, const AudioStreamBasicDescription *processingFormat)
{
    ASSERT(maxFrames >= 0);

    m_tapDescription = makeUniqueWithoutFastMallocCheck<AudioStreamBasicDescription>(*processingFormat);
    int numberOfChannels = processingFormat->mChannelsPerFrame;
    double sampleRate = processingFormat->mSampleRate;
    ASSERT(sampleRate >= 0);

    m_outputDescription = makeUniqueWithoutFastMallocCheck<AudioStreamBasicDescription>();
    m_outputDescription->mSampleRate = sampleRate;
    m_outputDescription->mFormatID = kAudioFormatLinearPCM;
    m_outputDescription->mFormatFlags = kAudioFormatFlagsNativeFloatPacked;
    m_outputDescription->mBitsPerChannel = 8 * sizeof(Float32);
    m_outputDescription->mChannelsPerFrame = numberOfChannels;
    m_outputDescription->mFramesPerPacket = 1;
    m_outputDescription->mBytesPerPacket = sizeof(Float32);
    m_outputDescription->mBytesPerFrame = sizeof(Float32);
    m_outputDescription->mFormatFlags |= kAudioFormatFlagIsNonInterleaved;

    if (*m_tapDescription != *m_outputDescription) {
        AudioConverterRef outConverter = nullptr;
        AudioConverterNew(m_tapDescription.get(), m_outputDescription.get(), &outConverter);
        m_converter = outConverter;
    }

    // Make the ringbuffer large enough to store at least two callbacks worth of audio, or 1s, whichever is larger.
    size_t capacity = std::max(static_cast<size_t>(2 * maxFrames), static_cast<size_t>(kRingBufferDuration * sampleRate));

    CAAudioStreamDescription description { *processingFormat };
    if (!m_ringBuffer)
        m_ringBuffer = m_ringBufferCreationCallback().moveToUniquePtr();
    m_ringBuffer->allocate(description, capacity);

    // AudioBufferList is a variable-length struct, so create on the heap with a generic new() operator
    // with a custom size, and initialize the struct manually.
    size_t bufferListSize = sizeof(AudioBufferList) + (sizeof(AudioBuffer) * std::max(1, numberOfChannels - 1));
    m_list = std::unique_ptr<AudioBufferList>((AudioBufferList*) ::operator new (bufferListSize));
    memset(m_list.get(), 0, bufferListSize);
    m_list->mNumberBuffers = numberOfChannels;

    callOnMainThread([weakThis = m_weakFactory.createWeakPtr(*this), numberOfChannels, sampleRate] {
        auto* self = weakThis.get();
        if (self && self->m_client)
            self->m_client->setFormat(numberOfChannels, sampleRate);
    });
}

void AudioSourceProviderAVFObjC::unprepare()
{
    m_tapDescription = nullptr;
    m_outputDescription = nullptr;
    m_ringBuffer = nullptr;
    m_list = nullptr;
}

void AudioSourceProviderAVFObjC::process(MTAudioProcessingTapRef tap, CMItemCount numberOfFrames, MTAudioProcessingTapFlags flags, AudioBufferList* bufferListInOut, CMItemCount* numberFramesOut, MTAudioProcessingTapFlags* flagsOut)
{
    UNUSED_PARAM(flags);
    
    CMItemCount itemCount = 0;
    CMTimeRange rangeOut;
    OSStatus status = MTAudioProcessingTapGetSourceAudio(tap, numberOfFrames, bufferListInOut, flagsOut, &rangeOut, &itemCount);
    if (status != noErr || !itemCount)
        return;

    MediaTime rangeStart = PAL::toMediaTime(rangeOut.start);
    MediaTime rangeDuration = PAL::toMediaTime(rangeOut.duration);

    if (rangeStart.isInvalid())
        return;

    MediaTime currentTime = PAL::toMediaTime(PAL::CMTimebaseGetTime([m_avPlayerItem timebase]));
    if (currentTime.isInvalid())
        return;

    // The audio tap will generate silence when the media is paused, and will not advance the
    // tap currentTime.
    if (rangeStart == m_startTimeAtLastProcess || rangeDuration == MediaTime::zeroTime()) {
        m_paused = true;
        return;
    }

    if (m_paused) {
        // Only check the write-ahead time when playback begins.
        m_paused = false;
        MediaTime earlyBy = rangeStart - currentTime;
        m_writeAheadCount.store(m_tapDescription->mSampleRate * earlyBy.toDouble());
    }

    uint64_t startFrame = 0;
    uint64_t endFrame = 0;
    m_ringBuffer->getCurrentFrameBounds(startFrame, endFrame);

    // Check to see if the underlying media has seeked, which would require us to "flush"
    // our outstanding buffers.
    if (rangeStart != m_endTimeAtLastProcess)
        m_seekTo.store(endFrame);

    m_startTimeAtLastProcess = rangeStart;
    m_endTimeAtLastProcess = rangeStart + rangeDuration;

    // StartOfStream indicates a discontinuity, such as when an AVPlayerItem is re-added
    // to an AVPlayer, so "flush" outstanding buffers.
    if (flagsOut && *flagsOut & kMTAudioProcessingTapFlag_StartOfStream)
        m_seekTo.store(endFrame);

    m_ringBuffer->store(bufferListInOut, itemCount, endFrame);

    // Mute the default audio playback by zeroing the tap-owned buffers.
    for (uint32_t i = 0; i < bufferListInOut->mNumberBuffers; ++i) {
        AudioBuffer& buffer = bufferListInOut->mBuffers[i];
        memset(buffer.mData, 0, buffer.mDataByteSize);
    }
    *numberFramesOut = 0;

    if (m_audioCallback)
        m_audioCallback(endFrame, itemCount);
}

void AudioSourceProviderAVFObjC::setAudioCallback(AudioCallback&& callback)
{
    ASSERT(!m_avAudioMix);
    m_audioCallback = WTFMove(callback);
}

void AudioSourceProviderAVFObjC::setRingBufferCreationCallback(RingBufferCreationCallback&& callback)
{
    ASSERT(!m_avAudioMix);
    m_ringBufferCreationCallback = WTFMove(callback);
}

}

#endif // ENABLE(WEB_AUDIO) && USE(MEDIATOOLBOX)