// // libtgvoip is free and unencumbered public domain software. // For more information, see http://unlicense.org or the UNLICENSE file // you should have received with this source code distribution. // #include "AudioInputWASAPI.h" #include "../../VoIPController.h" #include "../../logging.h" #include #define BUFFER_SIZE 960 #define CHECK_RES(res, msg) \ { \ if (FAILED(res)) \ { \ LOGE("%s failed: HRESULT=0x%08X", msg, res); \ failed = true; \ return; \ } \ } #define SCHECK_RES(res, msg) \ { \ if (FAILED(res)) \ { \ LOGE("%s failed: HRESULT=0x%08X", msg, res); \ return; \ } \ } template void SafeRelease(T** ppT) { if (*ppT) { (*ppT)->Release(); *ppT = nullptr; } } using namespace tgvoip::audio; AudioInputWASAPI::AudioInputWASAPI(std::string deviceID) { isRecording = false; remainingDataLen = 0; refCount = 1; HRESULT res; res = CoInitializeEx(nullptr, COINIT_MULTITHREADED); if (FAILED(res) && res != RPC_E_CHANGED_MODE) { CHECK_RES(res, "CoInitializeEx"); } #ifdef TGVOIP_WINXP_COMPAT HANDLE(WINAPI * __CreateEventExA) (LPSECURITY_ATTRIBUTES lpEventAttributes, LPCSTR lpName, DWORD dwFlags, DWORD dwDesiredAccess); __CreateEventExA = (HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES, LPCSTR, DWORD, DWORD))GetProcAddress(GetModuleHandleA("kernel32.dll"), "CreateEventExA"); #undef CreateEventEx #define CreateEventEx __CreateEventExA #endif shutdownEvent = CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE); audioSamplesReadyEvent = CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE); streamSwitchEvent = CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE); ZeroMemory(&format, sizeof(format)); format.wFormatTag = WAVE_FORMAT_PCM; format.nChannels = 1; format.nSamplesPerSec = 48000; format.nBlockAlign = 2; format.nAvgBytesPerSec = format.nSamplesPerSec * format.nBlockAlign; format.wBitsPerSample = 16; #ifdef TGVOIP_WINDOWS_DESKTOP res = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&enumerator)); CHECK_RES(res, "CoCreateInstance(MMDeviceEnumerator)"); res = enumerator->RegisterEndpointNotificationCallback(this); CHECK_RES(res, "enumerator->RegisterEndpointNotificationCallback"); audioSessionControl = nullptr; device = nullptr; #endif audioClient = nullptr; captureClient = nullptr; thread = nullptr; started = false; SetCurrentDevice(deviceID); } AudioInputWASAPI::~AudioInputWASAPI() { if (audioClient && started) { audioClient->Stop(); } #ifdef TGVOIP_WINDOWS_DESKTOP if (audioSessionControl) { audioSessionControl->UnregisterAudioSessionNotification(this); } #endif SetEvent(shutdownEvent); if (thread) { WaitForSingleObjectEx(thread, INFINITE, false); CloseHandle(thread); } #ifdef TGVOIP_WINDOWS_DESKTOP SafeRelease(&audioSessionControl); #endif SafeRelease(&captureClient); SafeRelease(&audioClient); #ifdef TGVOIP_WINDOWS_DESKTOP SafeRelease(&device); #endif CloseHandle(shutdownEvent); CloseHandle(audioSamplesReadyEvent); CloseHandle(streamSwitchEvent); #ifdef TGVOIP_WINDOWS_DESKTOP if (enumerator) enumerator->UnregisterEndpointNotificationCallback(this); SafeRelease(&enumerator); #endif } void AudioInputWASAPI::Start() { isRecording = true; if (!thread) { thread = CreateThread(nullptr, 0, AudioInputWASAPI::StartThread, this, 0, nullptr); } if (audioClient && !started) { LOGI("audioClient->Start"); audioClient->Start(); started = true; } } void AudioInputWASAPI::Stop() { isRecording = false; } bool AudioInputWASAPI::IsRecording() { return isRecording; } void AudioInputWASAPI::EnumerateDevices(std::vector& devs) { #ifdef TGVOIP_WINDOWS_DESKTOP HRESULT res; res = CoInitializeEx(nullptr, COINIT_MULTITHREADED); if (FAILED(res) && res != RPC_E_CHANGED_MODE) { SCHECK_RES(res, "CoInitializeEx"); } IMMDeviceEnumerator* deviceEnumerator = nullptr; IMMDeviceCollection* deviceCollection = nullptr; res = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&deviceEnumerator)); SCHECK_RES(res, "CoCreateInstance(MMDeviceEnumerator)"); res = deviceEnumerator->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &deviceCollection); SCHECK_RES(res, "EnumAudioEndpoints"); UINT devCount; res = deviceCollection->GetCount(&devCount); SCHECK_RES(res, "GetCount"); for (UINT i = 0; i < devCount; i++) { IMMDevice* device; res = deviceCollection->Item(i, &device); SCHECK_RES(res, "GetDeviceItem"); wchar_t* devID; res = device->GetId(&devID); SCHECK_RES(res, "get device id"); IPropertyStore* propStore; res = device->OpenPropertyStore(STGM_READ, &propStore); SafeRelease(&device); SCHECK_RES(res, "OpenPropertyStore"); PROPVARIANT friendlyName; PropVariantInit(&friendlyName); res = propStore->GetValue(PKEY_Device_FriendlyName, &friendlyName); SafeRelease(&propStore); AudioInputDevice dev; wchar_t actualFriendlyName[128]; if (friendlyName.vt == VT_LPWSTR) { wcsncpy(actualFriendlyName, friendlyName.pwszVal, sizeof(actualFriendlyName) / sizeof(wchar_t)); } else { wcscpy(actualFriendlyName, L"Unknown"); } PropVariantClear(&friendlyName); char buf[256]; WideCharToMultiByte(CP_UTF8, 0, devID, -1, buf, sizeof(buf), nullptr, nullptr); dev.id = buf; WideCharToMultiByte(CP_UTF8, 0, actualFriendlyName, -1, buf, sizeof(buf), nullptr, nullptr); dev.displayName = buf; devs.emplace_back(dev); CoTaskMemFree(devID); } SafeRelease(&deviceCollection); SafeRelease(&deviceEnumerator); #endif } void AudioInputWASAPI::SetCurrentDevice(std::string deviceID) { if (thread) { streamChangeToDevice = deviceID; SetEvent(streamSwitchEvent); } else { ActuallySetCurrentDevice(deviceID); } } void AudioInputWASAPI::ActuallySetCurrentDevice(std::string deviceID) { m_currentDevice = deviceID; HRESULT res; if (audioClient) { res = audioClient->Stop(); CHECK_RES(res, "audioClient->Stop"); } #ifdef TGVOIP_WINDOWS_DESKTOP if (audioSessionControl) { res = audioSessionControl->UnregisterAudioSessionNotification(this); CHECK_RES(res, "audioSessionControl->UnregisterAudioSessionNotification"); } SafeRelease(&audioSessionControl); #endif SafeRelease(&captureClient); SafeRelease(&audioClient); #ifdef TGVOIP_WINDOWS_DESKTOP SafeRelease(&device); IMMDeviceCollection* deviceCollection = nullptr; if (deviceID == "default") { isDefaultDevice = true; res = enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications, &device); CHECK_RES(res, "GetDefaultAudioEndpoint"); } else { isDefaultDevice = false; res = enumerator->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &deviceCollection); CHECK_RES(res, "EnumAudioEndpoints"); UINT devCount; res = deviceCollection->GetCount(&devCount); CHECK_RES(res, "GetCount"); for (UINT i = 0; i < devCount; i++) { IMMDevice* device; res = deviceCollection->Item(i, &device); CHECK_RES(res, "GetDeviceItem"); wchar_t* _devID; res = device->GetId(&_devID); CHECK_RES(res, "get device id"); char devID[128]; WideCharToMultiByte(CP_UTF8, 0, _devID, -1, devID, 128, nullptr, nullptr); CoTaskMemFree(_devID); if (deviceID == devID) { this->device = device; //device->AddRef(); break; } } } if (deviceCollection) SafeRelease(&deviceCollection); if (!device) { LOGE("Didn't find capture device; failing"); m_failed = true; return; } res = device->Activate(__uuidof(IAudioClient), CLSCTX_INPROC_SERVER, nullptr, (void**)&audioClient); CHECK_RES(res, "device->Activate"); #else std::wstring devID; if (deviceID == "default") { Platform::String ^ defaultDevID = Windows::Media::Devices::MediaDevice::GetDefaultAudioCaptureId(Windows::Media::Devices::AudioDeviceRole::Communications); if (defaultDevID == nullptr) { LOGE("Didn't find capture device; failing"); failed = true; return; } else { isDefaultDevice = true; devID = defaultDevID->Data(); } } else { int wchars_num = MultiByteToWideChar(CP_UTF8, 0, deviceID.c_str(), -1, nullptr, 0); wchar_t* wstr = new wchar_t[wchars_num]; MultiByteToWideChar(CP_UTF8, 0, deviceID.c_str(), -1, wstr, wchars_num); devID = wstr; } HRESULT res1, res2; IAudioClient2* audioClient2 = WindowsSandboxUtils::ActivateAudioDevice(devID.c_str(), &res1, &res2); CHECK_RES(res1, "activate1"); CHECK_RES(res2, "activate2"); AudioClientProperties properties = {}; properties.cbSize = sizeof AudioClientProperties; properties.eCategory = AudioCategory_Communications; res = audioClient2->SetClientProperties(&properties); CHECK_RES(res, "audioClient2->SetClientProperties"); audioClient = audioClient2; #endif // {2C693079-3F59-49FD-964F-61C005EAA5D3} const GUID guid = {0x2c693079, 0x3f59, 0x49fd, {0x96, 0x4f, 0x61, 0xc0, 0x5, 0xea, 0xa5, 0xd3}}; // Use 1000ms buffer to avoid resampling glitches on Windows 8.1 and older. This should not increase latency. res = audioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST | AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY, 1000 * 10000, 0, &format, &guid); CHECK_RES(res, "audioClient->Initialize"); std::uint32_t bufSize; res = audioClient->GetBufferSize(&bufSize); CHECK_RES(res, "audioClient->GetBufferSize"); LOGV("buffer size: %u", bufSize); m_estimatedDelay = 0; REFERENCE_TIME latency, devicePeriod; if (SUCCEEDED(audioClient->GetStreamLatency(&latency))) { if (SUCCEEDED(audioClient->GetDevicePeriod(&devicePeriod, nullptr))) { m_estimatedDelay = (std::int32_t)(latency / 10000 + devicePeriod / 10000); } } res = audioClient->SetEventHandle(audioSamplesReadyEvent); CHECK_RES(res, "audioClient->SetEventHandle"); res = audioClient->GetService(IID_PPV_ARGS(&captureClient)); CHECK_RES(res, "audioClient->GetService"); #ifdef TGVOIP_WINDOWS_DESKTOP res = audioClient->GetService(IID_PPV_ARGS(&audioSessionControl)); CHECK_RES(res, "audioClient->GetService(IAudioSessionControl)"); res = audioSessionControl->RegisterAudioSessionNotification(this); CHECK_RES(res, "audioSessionControl->RegisterAudioSessionNotification"); #endif if (isRecording) audioClient->Start(); LOGV("set current input device done"); } DWORD WINAPI AudioInputWASAPI::StartThread(void* arg) { LOGV("WASAPI capture thread starting"); ((AudioInputWASAPI*)arg)->RunThread(); return 0; } void AudioInputWASAPI::RunThread() { if (m_failed) return; SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); HANDLE waitArray[] = {shutdownEvent, streamSwitchEvent, audioSamplesReadyEvent}; HRESULT res = CoInitializeEx(nullptr, COINIT_MULTITHREADED); CHECK_RES(res, "CoInitializeEx in capture thread"); std::uint32_t bufferSize = 0; std::uint64_t framesWritten = 0; bool running = true; //double prevCallback=VoIPController::GetCurrentTime(); while (running) { DWORD waitResult = WaitForMultipleObjectsEx(3, waitArray, false, INFINITE, false); if (waitResult == WAIT_OBJECT_0) { // shutdownEvent LOGV("capture thread shutting down"); running = false; } else if (waitResult == WAIT_OBJECT_0 + 1) { // streamSwitchEvent LOGV("stream switch"); ActuallySetCurrentDevice(streamChangeToDevice); ResetEvent(streamSwitchEvent); bufferSize = 0; LOGV("stream switch done"); } else if (waitResult == WAIT_OBJECT_0 + 2) { // audioSamplesReadyEvent if (!audioClient) continue; res = captureClient->GetNextPacketSize(&bufferSize); CHECK_RES(res, "captureClient->GetNextPacketSize"); BYTE* data; std::uint32_t framesAvailable = 0; DWORD flags; res = captureClient->GetBuffer(&data, &framesAvailable, &flags, nullptr, nullptr); CHECK_RES(res, "captureClient->GetBuffer"); std::size_t dataLen = framesAvailable * 2; assert(remainingDataLen + dataLen < sizeof(remainingData)); if (flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY) { LOGW("Audio capture data discontinuity"); } //double t=VoIPController::GetCurrentTime(); //LOGV("audio capture: %u, time %f, flags %u", framesAvailable, t-prevCallback, flags); //prevCallback=t; std::memcpy(remainingData + remainingDataLen, data, dataLen); remainingDataLen += dataLen; while (remainingDataLen > 960 * 2) { if (isRecording) InvokeCallback(remainingData, 960 * 2); //LOGV("remaining data len %u", remainingDataLen); memmove(remainingData, remainingData + (960 * 2), remainingDataLen - 960 * 2); remainingDataLen -= 960 * 2; } res = captureClient->ReleaseBuffer(framesAvailable); CHECK_RES(res, "captureClient->ReleaseBuffer"); //estimatedDelay=(std::int32_t)((devicePosition-framesWritten)/48); framesWritten += framesAvailable; } } } #ifdef TGVOIP_WINDOWS_DESKTOP HRESULT AudioInputWASAPI::OnSessionDisconnected(AudioSessionDisconnectReason reason) { if (!isDefaultDevice) { streamChangeToDevice = "default"; SetEvent(streamSwitchEvent); } return S_OK; } HRESULT AudioInputWASAPI::OnDefaultDeviceChanged(EDataFlow flow, ERole role, LPCWSTR newDevID) { if (flow == eCapture && role == eCommunications && isDefaultDevice) { streamChangeToDevice = "default"; SetEvent(streamSwitchEvent); } return S_OK; } ULONG AudioInputWASAPI::AddRef() { return InterlockedIncrement(&refCount); } ULONG AudioInputWASAPI::Release() { return InterlockedDecrement(&refCount); } HRESULT AudioInputWASAPI::QueryInterface(REFIID iid, void** obj) { if (!obj) { return E_POINTER; } *obj = nullptr; if (iid == IID_IUnknown) { *obj = static_cast(static_cast(this)); AddRef(); } else if (iid == __uuidof(IMMNotificationClient)) { *obj = static_cast(this); AddRef(); } else if (iid == __uuidof(IAudioSessionEvents)) { *obj = static_cast(this); AddRef(); } else { return E_NOINTERFACE; } return S_OK; } #endif