From a02af1a297eb2ea62e58e3208fcf03680c6f9908 Mon Sep 17 00:00:00 2001 From: jenz Date: Mon, 1 Jun 2026 20:52:20 +0100 Subject: [PATCH] this is good enough for nosteamers. primary focus point was working on decoding the steam voice packets correctly into subframes --- extension.cpp | 339 ++++++++++++++++++++++++-------------------------- extension.h | 2 - 2 files changed, 164 insertions(+), 177 deletions(-) diff --git a/extension.cpp b/extension.cpp index c8985b8..ad9d499 100644 --- a/extension.cpp +++ b/extension.cpp @@ -231,7 +231,6 @@ DETOUR_DECL_MEMBER1(ProcessVoiceData, bool, void *, msg) { //convert steam OPUS packet to CELT for no steam clients g_Interface.PushPlayerVoiceData(playerSlot, nBytes, voiceDataBuffer); - g_Interface.HandlePlayerVoiceData(playerSlot); //send celt packets to nosteamers. // Send steam Opus packet to Steam clients int maxClients = iserver->GetClientCount(); for (int i = 0; i < maxClients; i++) @@ -252,7 +251,6 @@ DETOUR_DECL_MEMBER1(ProcessVoiceData, bool, void *, msg) SendVoiceDataMsg(playerSlot, pToClient, (unsigned char *)voiceDataBuffer, nBytes, voiceMsg->m_xuid); } } - g_fLastVoiceData[clientIndex] = gpGlobals->curtime; return true; } @@ -260,25 +258,6 @@ DETOUR_DECL_STATIC4(SV_BroadcastVoiceData, void, IClient *, pClient, int, nBytes { } -#ifdef _WIN32 -DETOUR_DECL_STATIC2(SV_BroadcastVoiceData_LTCG, void, char *, data, int64, xuid) -{ - IClient *pClient = NULL; - int nBytes = 0; - - __asm mov pClient, ecx; - __asm mov nBytes, edx; - - bool ret = g_Interface.OnBroadcastVoiceData(pClient, nBytes, data); - - __asm mov ecx, pClient; - __asm mov edx, nBytes; - - if(ret) - DETOUR_STATIC_CALL(SV_BroadcastVoiceData_LTCG)(data, xuid); -} -#endif - double getTime() { struct timespec tv; @@ -327,7 +306,6 @@ CVoice::CVoice() m_pCeltModePlayer = NULL; m_torchMonoAccumLen = 0; m_torchResampleAccum = 0; - memset(m_playerResampleAccum, 0, sizeof(m_playerResampleAccum)); memset(m_torchMonoAccum, 0, sizeof(m_torchMonoAccum)); memset(m_nosteamSeqNum, 0, sizeof(m_nosteamSeqNum)); @@ -430,18 +408,7 @@ bool CVoice::SDK_OnLoad(char *error, size_t maxlength, bool late) // Setup voice detour. CDetourManager::Init(g_pSM->GetScriptingEngine(), NULL); -#ifdef _WIN32 - if (engineVersion == SOURCE_ENGINE_CSGO || engineVersion == SOURCE_ENGINE_INSURGENCY) - { - m_VoiceDetour = DETOUR_CREATE_STATIC(SV_BroadcastVoiceData_LTCG, adrVoiceData); - } - else - { - m_VoiceDetour = DETOUR_CREATE_STATIC(SV_BroadcastVoiceData, adrVoiceData); - } -#else m_VoiceDetour = DETOUR_CREATE_STATIC(SV_BroadcastVoiceData, adrVoiceData); -#endif if (!m_VoiceDetour) { @@ -462,7 +429,6 @@ bool CVoice::SDK_OnLoad(char *error, size_t maxlength, bool late) //opus edit int err; - //m_OpusEncoder = opus_encoder_create(24000, 2, OPUS_APPLICATION_AUDIO, &err); m_OpusEncoder = opus_encoder_create(48000, 2, OPUS_APPLICATION_AUDIO, &err); if (err<0) { @@ -494,7 +460,8 @@ bool CVoice::SDK_OnLoad(char *error, size_t maxlength, bool late) // CELT encoder (22050 Hz mono, 512 samples/frame, 64 byte packets) m_CeltEncoderSettings.SampleRate_Hz = 22050; - m_CeltEncoderSettings.TargetBitRate_Kbps = 64; + //m_CeltEncoderSettings.TargetBitRate_Kbps = 64; + m_CeltEncoderSettings.TargetBitRate_Kbps = 48; m_CeltEncoderSettings.FrameSize = 512; m_CeltEncoderSettings.PacketSize = 64; m_CeltEncoderSettings.Complexity = 10; @@ -594,7 +561,7 @@ cell_t Native_SendCeltVoiceInit(IPluginContext *pContext, const cell_t *params) SVC_VoiceInit msg("vaudio_celt", 22050); pClient->SendNetMsg(msg); - smutils->LogMessage(myself, "Sent SVC_VoiceInit vaudio_celt to client %d", client); + //smutils->LogMessage(myself, "Sent SVC_VoiceInit vaudio_celt to client %d", client); return 1; } @@ -807,16 +774,24 @@ void CVoice::OnGameFrame(bool simulating) HandleVoiceData(); //torchlight audio emitting HandleNoSteamVoiceData(); //send opus packets to steamers. + //send celt packets to nosteamers + int maxClients = iserver->GetClientCount(); + for (int i = 0; i < maxClients; i++) + { + if (!m_pCeltCodecPlayer[i]) + continue; + + // Keep draining 512-sample blocks until the buffer has less than one full frame remaining + while (m_playerVoiceBuffer[i].TotalLength() >= m_CeltEncoderSettings.FrameSize) + { + HandlePlayerVoiceData(i); + } + } + // Reset per-client voice byte counter to 0 every frame. memset(g_aFrameVoiceBytes, 0, sizeof(g_aFrameVoiceBytes)); } -bool CVoice::OnBroadcastVoiceData(IClient *pClient, int nBytes, char *data) -{ - //not actually used anymore anyways - return true; -} - void CVoice::HandleNetwork() { if(m_ListenSocket == -1) @@ -1237,11 +1212,43 @@ void CVoice::PushPlayerVoiceData(int playerSlot, int nBytes, char *data) } } + double now = (double)gpGlobals->curtime; + double timeSinceLastVoice = now - g_fLastVoiceData[playerSlot + 1]; + if (timeSinceLastVoice > 0.5 && g_fLastVoiceData[playerSlot + 1] != 0.0) + { + celt_encoder_ctl(m_pCeltCodecPlayer[playerSlot], CELT_RESET_STATE_REQUEST, NULL); + opus_decoder_ctl(m_PlayerOpusDecoder[playerSlot], OPUS_RESET_STATE); + + // Clear out any stale, partial remaining samples left over in the queue + while(m_playerVoiceBuffer[playerSlot].TotalLength() > 0) + { + int16_t trash[512]; + size_t toPop = m_playerVoiceBuffer[playerSlot].TotalLength() > 512 ? 512 : m_playerVoiceBuffer[playerSlot].TotalLength(); + m_playerVoiceBuffer[playerSlot].Pop(trash, toPop); + } + + //smutils->LogMessage(myself, "Voice states flushed cleanly. playerSlot: %d", playerSlot); + } + + // Update the timestamp to mark this active packet's arrival + g_fLastVoiceData[playerSlot + 1] = now; + const unsigned char *p = (const unsigned char *)data; - // Verify raw network packets match standard Valve Opus voice signatures - if (nBytes < 18 || p[8] != 0x0B || (p[11] != 0x05 && p[11] != 0x06)) - return; + /* + int dumpLen = nBytes < 24 ? nBytes : 24; + char hexBuf[24 * 3 + 1]; + int pos = 0; + for (int i = 0; i < dumpLen; i++) + { + static const char hex[] = "0123456789ABCDEF"; + hexBuf[pos++] = hex[(p[i] >> 4) & 0xF]; + hexBuf[pos++] = hex[p[i] & 0xF]; + hexBuf[pos++] = ' '; + } + hexBuf[pos] = '\0'; + smutils->LogMessage(myself, "[INBOUND STEAM] nBytes=%d, First bytes: %s", nBytes, hexBuf); + */ uint16_t totalDataLength; memcpy(&totalDataLength, p + 12, sizeof(uint16_t)); @@ -1258,68 +1265,112 @@ void CVoice::PushPlayerVoiceData(int playerSlot, int nBytes, char *data) if (offset + 4 <= end) { - uint16_t frameLen; - memcpy(&frameLen, p + offset, sizeof(uint16_t)); - offset += 2; - offset += 2; + uint16_t trueAudioPayloadLen; + memcpy(&trueAudioPayloadLen, p + 12, sizeof(uint16_t)); - if (frameLen <= 2 || offset + (int)(frameLen - 2) > end) - return; + // Frame 1 always starts exactly at offset 18 + int frame1Start = 18; + if (frame1Start >= end) return; - const unsigned char *opusFrame = p + offset; - - // Maximum safe decoded PCM space buffer allocation (120ms frame at 24kHz = 2880 samples) - int16_t pcmBuf[2880]; - int decoded = opus_decode(m_PlayerOpusDecoder[playerSlot], - opusFrame, frameLen - 2, - pcmBuf, 2880, 0); - if (decoded <= 0) + const unsigned char *frame1Ptr = p + frame1Start; + + // Find the start of Frame 2 by scanning for the next 0x68 TOC marker + int frame2Start = -1; + + // A standard 24kHz Opus frame will practically never be shorter than 30 bytes + for (int i = frame1Start + 30; i < end - 4; i++) { - smutils->LogError(myself, "PushPlayerVoiceData: opus_decode failed: %s", opus_strerror(decoded)); - return; - } - - // PHASE-PERFECT FIXED POINT RESAMPLING (24000 Hz -> 22050 Hz) - // Shift left into 16.16 fixed-point space to prevent rounding/truncation drift errors - uint32_t step_fp = ((uint32_t)24000 << 16) / (uint32_t)m_CeltEncoderSettings.SampleRate_Hz; - uint32_t curr_fp = m_playerResampleAccum[playerSlot]; // Treat tracking value storage as raw fixed-point register - - // Allocate temporary staging stack vector array to drop into ring-buffer cleanly in one pass - int16_t resampledStaging[2880]; - int outSamplesCount = 0; - - while (true) - { - uint32_t srcIndex = curr_fp >> 16; - if (srcIndex >= (uint32_t)decoded) + if (p[i] == 0x68) //TOC -> 0x68 + { + // Verify if this is the start of Frame 2 + frame2Start = i; break; - - // Linear Interpolation over adjacent samples to prevent digital harmonic hiss - int16_t s1 = pcmBuf[srcIndex]; - int16_t s2 = (srcIndex + 1 < (uint32_t)decoded) ? pcmBuf[srcIndex + 1] : s1; - - uint32_t frac = curr_fp & 0xFFFF; - int32_t interpolatedSample = s1 + (((int32_t)(s2 - s1) * (int32_t)frac) >> 16); - - resampledStaging[outSamplesCount++] = (int16_t)interpolatedSample; - curr_fp += step_fp; + } } - // Normalize state trackers back relative to zero base offset - m_playerResampleAccum[playerSlot] = curr_fp - ((uint32_t)decoded << 16); + int16_t pcmFrameBuffer[960]; + int totalDecodedSamples = 0; - // Bulk push data down onto RingBuffer payload safely in one operation - if (outSamplesCount > 0 && (size_t)outSamplesCount <= m_playerVoiceBuffer[playerSlot].CurrentFree()) + // Calculate explicit size for Frame 1 + int frame1Size = (frame2Start != -1) ? (frame2Start - frame1Start) : (end - frame1Start); + + if (frame1Size > 0) { - m_playerVoiceBuffer[playerSlot].Push(resampledStaging, outSamplesCount); + //smutils->LogMessage(myself, "[VOICE-FIX] Frame 1 Determined Size: %d bytes. TOC: 0x%02X", frame1Size, frame1Ptr[0]); + + int samples1 = opus_decode(m_PlayerOpusDecoder[playerSlot], + frame1Ptr, frame1Size, + &pcmFrameBuffer[0], 480, 0); + + if (samples1 > 0) totalDecodedSamples += samples1; + } + + // Process Frame 2 if a second TOC marker was identified + if (frame2Start != -1) + { + const unsigned char *frame2Ptr = p + frame2Start; + int frame2Size = end - frame2Start; // Frame 2 spans to the end of the payload buffer + + if (frame2Size > 0) + { + //smutils->LogMessage(myself, "[VOICE-FIX] Frame 2 Determined Size: %d bytes. TOC: 0x%02X", frame2Size, frame2Ptr[0]); + + int samples2 = opus_decode(m_PlayerOpusDecoder[playerSlot], + frame2Ptr, frame2Size, + &pcmFrameBuffer[totalDecodedSamples], 480, 0); + + if (samples2 > 0) totalDecodedSamples += samples2; + } + } + + int16_t resampledFrameBuffer[960]; + int totalResampledSamples = 0; + + if (totalDecodedSamples > 0) + { + // The exact conversion ratio factor is 147 / 160 + // 480 samples at 24kHz converts precisely into 441 samples at 22.05kHz + // 960 samples (2 frames) converts precisely into 882 samples + totalResampledSamples = (totalDecodedSamples * 147) / 160; + + for (int i = 0; i < totalResampledSamples; i++) + { + // Determine where this output sample lands on the input timeline + double srcPosition = (double)i * 160.0 / 147.0; + int srcIndex = (int)srcPosition; + double fraction = srcPosition - (double)srcIndex; + + if (srcIndex + 1 < totalDecodedSamples) + { + // Linear interpolate between the two closest matching input samples + int16_t sampleA = pcmFrameBuffer[srcIndex]; + int16_t sampleB = pcmFrameBuffer[srcIndex + 1]; + resampledFrameBuffer[i] = (int16_t)(sampleA + fraction * (sampleB - sampleA)); + } + else + { + // Handle boundary edge case for the final trailing sample + resampledFrameBuffer[i] = pcmFrameBuffer[srcIndex]; + } + } + } + // --- PUSH SOUND TIMELINES SEQUENTIALLY --- + if (totalResampledSamples > 0) + { + size_t freeSpace = m_playerVoiceBuffer[playerSlot].CurrentFree(); + if ((size_t)totalResampledSamples <= freeSpace) + { + // Push the perfectly timed 22050Hz stream to the CELT ring buffer + m_playerVoiceBuffer[playerSlot].Push(resampledFrameBuffer, totalResampledSamples); + } } } } void CVoice::HandlePlayerVoiceData(int playerSlot) { - const int CELT_FRAME_SIZE = m_CeltEncoderSettings.FrameSize; - const int CELT_PACKET_SIZE = m_CeltEncoderSettings.PacketSize; + const int CELT_FRAME_SIZE = m_CeltEncoderSettings.FrameSize; + const int CELT_PACKET_SIZE = m_CeltEncoderSettings.PacketSize; int maxClients = iserver->GetClientCount(); if (!m_pCeltCodecPlayer[playerSlot]) @@ -1327,102 +1378,40 @@ void CVoice::HandlePlayerVoiceData(int playerSlot) size_t currentBufferLength = m_playerVoiceBuffer[playerSlot].TotalLength(); if (currentBufferLength < (size_t)CELT_FRAME_SIZE) - return; - - // Use Engine Time instead of System Time to stay perfectly in sync with server tickrates - double now = (double)gpGlobals->curtime; - - double timeSinceLastVoice = now - g_fLastVoiceData[playerSlot + 1]; - if (timeSinceLastVoice > 0.5) // Reduced to 500ms for responsiveness { - m_playerAvailableTime[playerSlot + 1] = 0.0; - - // Completely clear internal codec history matrices to ensure clean starts - if (m_pCeltCodecPlayer[playerSlot]) { - celt_encoder_ctl(m_pCeltCodecPlayer[playerSlot], CELT_RESET_STATE_REQUEST, NULL); - } - if (m_PlayerOpusDecoder[playerSlot]) { - opus_decoder_ctl(m_PlayerOpusDecoder[playerSlot], OPUS_RESET_STATE); - } + return; // Not enough data yet } - if (m_playerAvailableTime[playerSlot + 1] == 0.0) + // We have a solid block! Extract exactly one frame's worth of samples + int16_t celtInput[512]; + if (!m_playerVoiceBuffer[playerSlot].Pop(celtInput, CELT_FRAME_SIZE)) { - m_playerAvailableTime[playerSlot + 1] = now; return; } - double elapsed = now - m_playerAvailableTime[playerSlot + 1]; - int framesToEmit = (int)(elapsed / m_CeltEncoderSettings.FrameTime); + // Run the encoder pass on our clean block + unsigned char celtPacket[64]; + int celtBytes = celt_encode(m_pCeltCodecPlayer[playerSlot], celtInput, + CELT_FRAME_SIZE, celtPacket, CELT_PACKET_SIZE); - // DYNAMIC JITTER BUFFER CATCH-UP - // If the server lag spikes and calculates a massive frame burst, check what is actually in the buffer. - // There's no point trying to emit 37 frames if the client has only sent 4 frames of real data! - int framesInRealBuffer = (int)(currentBufferLength / (size_t)CELT_FRAME_SIZE); - - if (framesToEmit > framesInRealBuffer) + if (celtBytes > 0) { - framesToEmit = framesInRealBuffer; - } - //smutils->LogMessage(myself, "framesToEmit: %d", framesToEmit); - - // Smooth-cap the maximum frames processed per server frame to avoid robotic bursts - // 4 frames = ~92ms of audio, which is an ideal ceiling for single-frame catchups. - if (framesToEmit > 4) - { - framesToEmit = 4; - } - - if (framesToEmit <= 0) - return; - - int framesProcessed = 0; - - while (framesProcessed < framesToEmit && - m_playerVoiceBuffer[playerSlot].TotalLength() >= (size_t)CELT_FRAME_SIZE) - { - int16_t celtInput[CELT_FRAME_SIZE]; - if (!m_playerVoiceBuffer[playerSlot].Pop(celtInput, CELT_FRAME_SIZE)) + for (int i = 0; i < maxClients; i++) { - break; - } + IClient *pToClient = iserver->GetClient(i); + if (!pToClient || !pToClient->IsConnected() || !pToClient->IsActive()) + continue; + if (!g_bIsNoSteam[i + 1]) + continue; + if (g_bClientMuted[i + 1][playerSlot + 1]) + continue; - unsigned char celtPacket[CELT_PACKET_SIZE]; - int celtBytes = celt_encode(m_pCeltCodecPlayer[playerSlot], celtInput, - CELT_FRAME_SIZE, celtPacket, CELT_PACKET_SIZE); - if (celtBytes > 0) - { - for (int i = 0; i < maxClients; i++) - { - IClient *pToClient = iserver->GetClient(i); - if (!pToClient || !pToClient->IsConnected() || !pToClient->IsActive()) - continue; - if (!g_bIsNoSteam[i + 1]) - continue; - if (g_bClientMuted[i + 1][playerSlot + 1]) - continue; - SendVoiceDataMsg(playerSlot, pToClient, celtPacket, celtBytes, 0); - } + SendVoiceDataMsg(playerSlot, pToClient, celtPacket, celtBytes, 0); } - else - { - smutils->LogError(myself, "HandlePlayerVoiceData: celt_encode failed: %d", celtBytes); - break; - } - - framesProcessed++; } - - // Advance our tracking clock safely based on what we processed - if (framesProcessed > 0) + else { - m_playerAvailableTime[playerSlot + 1] += (double)framesProcessed * m_CeltEncoderSettings.FrameTime; - } - else if (framesToEmit > 0) - { - // If we wanted to emit frames but the buffer was empty, pull the clock forward - // to 'now' so we don't build up a permanent structural timing lag. - m_playerAvailableTime[playerSlot + 1] = now; + smutils->LogError(myself, "HandlePlayerVoiceData: celt_encode failed: %d", celtBytes); } } diff --git a/extension.h b/extension.h index 9086e80..2fc74e8 100644 --- a/extension.h +++ b/extension.h @@ -137,7 +137,6 @@ public: // IConCommandBaseAccessor public: CVoice(); void OnGameFrame(bool simulating); - bool OnBroadcastVoiceData(IClient *pClient, int nBytes, char *data); void ListenSocket(); void PushPlayerVoiceData(int playerSlot, int nBytes, char *data); @@ -174,7 +173,6 @@ private: // Player transcode state (24000 Hz mono -> 22050 Hz mono -> CELT) OpusDecoder *m_PlayerOpusDecoder[SM_MAXPLAYERS + 1]; - int m_playerResampleAccum[SM_MAXPLAYERS + 1]; double m_AvailableTime; double m_playerAvailableTime[SM_MAXPLAYERS + 1];