this is good enough for nosteamers. primary focus point was working on decoding the steam voice packets correctly into subframes

This commit is contained in:
jenz 2026-06-01 20:52:20 +01:00
parent 7d46e3b9eb
commit a02af1a297
2 changed files with 164 additions and 177 deletions

View File

@ -231,7 +231,6 @@ DETOUR_DECL_MEMBER1(ProcessVoiceData, bool, void *, msg)
{
//convert steam OPUS packet to CELT for no steam clients
g_Interface.PushPlayerVoiceData(playerSlot, nBytes, voiceDataBuffer);
g_Interface.HandlePlayerVoiceData(playerSlot); //send celt packets to nosteamers.
// Send steam Opus packet to Steam clients
int maxClients = iserver->GetClientCount();
for (int i = 0; i < maxClients; i++)
@ -252,7 +251,6 @@ DETOUR_DECL_MEMBER1(ProcessVoiceData, bool, void *, msg)
SendVoiceDataMsg(playerSlot, pToClient, (unsigned char *)voiceDataBuffer, nBytes, voiceMsg->m_xuid);
}
}
g_fLastVoiceData[clientIndex] = gpGlobals->curtime;
return true;
}
@ -260,25 +258,6 @@ DETOUR_DECL_STATIC4(SV_BroadcastVoiceData, void, IClient *, pClient, int, nBytes
{
}
#ifdef _WIN32
DETOUR_DECL_STATIC2(SV_BroadcastVoiceData_LTCG, void, char *, data, int64, xuid)
{
IClient *pClient = NULL;
int nBytes = 0;
__asm mov pClient, ecx;
__asm mov nBytes, edx;
bool ret = g_Interface.OnBroadcastVoiceData(pClient, nBytes, data);
__asm mov ecx, pClient;
__asm mov edx, nBytes;
if(ret)
DETOUR_STATIC_CALL(SV_BroadcastVoiceData_LTCG)(data, xuid);
}
#endif
double getTime()
{
struct timespec tv;
@ -327,7 +306,6 @@ CVoice::CVoice()
m_pCeltModePlayer = NULL;
m_torchMonoAccumLen = 0;
m_torchResampleAccum = 0;
memset(m_playerResampleAccum, 0, sizeof(m_playerResampleAccum));
memset(m_torchMonoAccum, 0, sizeof(m_torchMonoAccum));
memset(m_nosteamSeqNum, 0, sizeof(m_nosteamSeqNum));
@ -430,18 +408,7 @@ bool CVoice::SDK_OnLoad(char *error, size_t maxlength, bool late)
// Setup voice detour.
CDetourManager::Init(g_pSM->GetScriptingEngine(), NULL);
#ifdef _WIN32
if (engineVersion == SOURCE_ENGINE_CSGO || engineVersion == SOURCE_ENGINE_INSURGENCY)
{
m_VoiceDetour = DETOUR_CREATE_STATIC(SV_BroadcastVoiceData_LTCG, adrVoiceData);
}
else
{
m_VoiceDetour = DETOUR_CREATE_STATIC(SV_BroadcastVoiceData, adrVoiceData);
}
#else
m_VoiceDetour = DETOUR_CREATE_STATIC(SV_BroadcastVoiceData, adrVoiceData);
#endif
if (!m_VoiceDetour)
{
@ -462,7 +429,6 @@ bool CVoice::SDK_OnLoad(char *error, size_t maxlength, bool late)
//opus edit
int err;
//m_OpusEncoder = opus_encoder_create(24000, 2, OPUS_APPLICATION_AUDIO, &err);
m_OpusEncoder = opus_encoder_create(48000, 2, OPUS_APPLICATION_AUDIO, &err);
if (err<0)
{
@ -494,7 +460,8 @@ bool CVoice::SDK_OnLoad(char *error, size_t maxlength, bool late)
// CELT encoder (22050 Hz mono, 512 samples/frame, 64 byte packets)
m_CeltEncoderSettings.SampleRate_Hz = 22050;
m_CeltEncoderSettings.TargetBitRate_Kbps = 64;
//m_CeltEncoderSettings.TargetBitRate_Kbps = 64;
m_CeltEncoderSettings.TargetBitRate_Kbps = 48;
m_CeltEncoderSettings.FrameSize = 512;
m_CeltEncoderSettings.PacketSize = 64;
m_CeltEncoderSettings.Complexity = 10;
@ -594,7 +561,7 @@ cell_t Native_SendCeltVoiceInit(IPluginContext *pContext, const cell_t *params)
SVC_VoiceInit msg("vaudio_celt", 22050);
pClient->SendNetMsg(msg);
smutils->LogMessage(myself, "Sent SVC_VoiceInit vaudio_celt to client %d", client);
//smutils->LogMessage(myself, "Sent SVC_VoiceInit vaudio_celt to client %d", client);
return 1;
}
@ -807,16 +774,24 @@ void CVoice::OnGameFrame(bool simulating)
HandleVoiceData(); //torchlight audio emitting
HandleNoSteamVoiceData(); //send opus packets to steamers.
//send celt packets to nosteamers
int maxClients = iserver->GetClientCount();
for (int i = 0; i < maxClients; i++)
{
if (!m_pCeltCodecPlayer[i])
continue;
// Keep draining 512-sample blocks until the buffer has less than one full frame remaining
while (m_playerVoiceBuffer[i].TotalLength() >= m_CeltEncoderSettings.FrameSize)
{
HandlePlayerVoiceData(i);
}
}
// Reset per-client voice byte counter to 0 every frame.
memset(g_aFrameVoiceBytes, 0, sizeof(g_aFrameVoiceBytes));
}
bool CVoice::OnBroadcastVoiceData(IClient *pClient, int nBytes, char *data)
{
//not actually used anymore anyways
return true;
}
void CVoice::HandleNetwork()
{
if(m_ListenSocket == -1)
@ -1237,11 +1212,43 @@ void CVoice::PushPlayerVoiceData(int playerSlot, int nBytes, char *data)
}
}
double now = (double)gpGlobals->curtime;
double timeSinceLastVoice = now - g_fLastVoiceData[playerSlot + 1];
if (timeSinceLastVoice > 0.5 && g_fLastVoiceData[playerSlot + 1] != 0.0)
{
celt_encoder_ctl(m_pCeltCodecPlayer[playerSlot], CELT_RESET_STATE_REQUEST, NULL);
opus_decoder_ctl(m_PlayerOpusDecoder[playerSlot], OPUS_RESET_STATE);
// Clear out any stale, partial remaining samples left over in the queue
while(m_playerVoiceBuffer[playerSlot].TotalLength() > 0)
{
int16_t trash[512];
size_t toPop = m_playerVoiceBuffer[playerSlot].TotalLength() > 512 ? 512 : m_playerVoiceBuffer[playerSlot].TotalLength();
m_playerVoiceBuffer[playerSlot].Pop(trash, toPop);
}
//smutils->LogMessage(myself, "Voice states flushed cleanly. playerSlot: %d", playerSlot);
}
// Update the timestamp to mark this active packet's arrival
g_fLastVoiceData[playerSlot + 1] = now;
const unsigned char *p = (const unsigned char *)data;
// Verify raw network packets match standard Valve Opus voice signatures
if (nBytes < 18 || p[8] != 0x0B || (p[11] != 0x05 && p[11] != 0x06))
return;
/*
int dumpLen = nBytes < 24 ? nBytes : 24;
char hexBuf[24 * 3 + 1];
int pos = 0;
for (int i = 0; i < dumpLen; i++)
{
static const char hex[] = "0123456789ABCDEF";
hexBuf[pos++] = hex[(p[i] >> 4) & 0xF];
hexBuf[pos++] = hex[p[i] & 0xF];
hexBuf[pos++] = ' ';
}
hexBuf[pos] = '\0';
smutils->LogMessage(myself, "[INBOUND STEAM] nBytes=%d, First bytes: %s", nBytes, hexBuf);
*/
uint16_t totalDataLength;
memcpy(&totalDataLength, p + 12, sizeof(uint16_t));
@ -1258,68 +1265,112 @@ void CVoice::PushPlayerVoiceData(int playerSlot, int nBytes, char *data)
if (offset + 4 <= end)
{
uint16_t frameLen;
memcpy(&frameLen, p + offset, sizeof(uint16_t));
offset += 2;
offset += 2;
uint16_t trueAudioPayloadLen;
memcpy(&trueAudioPayloadLen, p + 12, sizeof(uint16_t));
if (frameLen <= 2 || offset + (int)(frameLen - 2) > end)
return;
// Frame 1 always starts exactly at offset 18
int frame1Start = 18;
if (frame1Start >= end) return;
const unsigned char *opusFrame = p + offset;
// Maximum safe decoded PCM space buffer allocation (120ms frame at 24kHz = 2880 samples)
int16_t pcmBuf[2880];
int decoded = opus_decode(m_PlayerOpusDecoder[playerSlot],
opusFrame, frameLen - 2,
pcmBuf, 2880, 0);
if (decoded <= 0)
const unsigned char *frame1Ptr = p + frame1Start;
// Find the start of Frame 2 by scanning for the next 0x68 TOC marker
int frame2Start = -1;
// A standard 24kHz Opus frame will practically never be shorter than 30 bytes
for (int i = frame1Start + 30; i < end - 4; i++)
{
smutils->LogError(myself, "PushPlayerVoiceData: opus_decode failed: %s", opus_strerror(decoded));
return;
}
// PHASE-PERFECT FIXED POINT RESAMPLING (24000 Hz -> 22050 Hz)
// Shift left into 16.16 fixed-point space to prevent rounding/truncation drift errors
uint32_t step_fp = ((uint32_t)24000 << 16) / (uint32_t)m_CeltEncoderSettings.SampleRate_Hz;
uint32_t curr_fp = m_playerResampleAccum[playerSlot]; // Treat tracking value storage as raw fixed-point register
// Allocate temporary staging stack vector array to drop into ring-buffer cleanly in one pass
int16_t resampledStaging[2880];
int outSamplesCount = 0;
while (true)
{
uint32_t srcIndex = curr_fp >> 16;
if (srcIndex >= (uint32_t)decoded)
if (p[i] == 0x68) //TOC -> 0x68
{
// Verify if this is the start of Frame 2
frame2Start = i;
break;
// Linear Interpolation over adjacent samples to prevent digital harmonic hiss
int16_t s1 = pcmBuf[srcIndex];
int16_t s2 = (srcIndex + 1 < (uint32_t)decoded) ? pcmBuf[srcIndex + 1] : s1;
uint32_t frac = curr_fp & 0xFFFF;
int32_t interpolatedSample = s1 + (((int32_t)(s2 - s1) * (int32_t)frac) >> 16);
resampledStaging[outSamplesCount++] = (int16_t)interpolatedSample;
curr_fp += step_fp;
}
}
// Normalize state trackers back relative to zero base offset
m_playerResampleAccum[playerSlot] = curr_fp - ((uint32_t)decoded << 16);
int16_t pcmFrameBuffer[960];
int totalDecodedSamples = 0;
// Bulk push data down onto RingBuffer payload safely in one operation
if (outSamplesCount > 0 && (size_t)outSamplesCount <= m_playerVoiceBuffer[playerSlot].CurrentFree())
// Calculate explicit size for Frame 1
int frame1Size = (frame2Start != -1) ? (frame2Start - frame1Start) : (end - frame1Start);
if (frame1Size > 0)
{
m_playerVoiceBuffer[playerSlot].Push(resampledStaging, outSamplesCount);
//smutils->LogMessage(myself, "[VOICE-FIX] Frame 1 Determined Size: %d bytes. TOC: 0x%02X", frame1Size, frame1Ptr[0]);
int samples1 = opus_decode(m_PlayerOpusDecoder[playerSlot],
frame1Ptr, frame1Size,
&pcmFrameBuffer[0], 480, 0);
if (samples1 > 0) totalDecodedSamples += samples1;
}
// Process Frame 2 if a second TOC marker was identified
if (frame2Start != -1)
{
const unsigned char *frame2Ptr = p + frame2Start;
int frame2Size = end - frame2Start; // Frame 2 spans to the end of the payload buffer
if (frame2Size > 0)
{
//smutils->LogMessage(myself, "[VOICE-FIX] Frame 2 Determined Size: %d bytes. TOC: 0x%02X", frame2Size, frame2Ptr[0]);
int samples2 = opus_decode(m_PlayerOpusDecoder[playerSlot],
frame2Ptr, frame2Size,
&pcmFrameBuffer[totalDecodedSamples], 480, 0);
if (samples2 > 0) totalDecodedSamples += samples2;
}
}
int16_t resampledFrameBuffer[960];
int totalResampledSamples = 0;
if (totalDecodedSamples > 0)
{
// The exact conversion ratio factor is 147 / 160
// 480 samples at 24kHz converts precisely into 441 samples at 22.05kHz
// 960 samples (2 frames) converts precisely into 882 samples
totalResampledSamples = (totalDecodedSamples * 147) / 160;
for (int i = 0; i < totalResampledSamples; i++)
{
// Determine where this output sample lands on the input timeline
double srcPosition = (double)i * 160.0 / 147.0;
int srcIndex = (int)srcPosition;
double fraction = srcPosition - (double)srcIndex;
if (srcIndex + 1 < totalDecodedSamples)
{
// Linear interpolate between the two closest matching input samples
int16_t sampleA = pcmFrameBuffer[srcIndex];
int16_t sampleB = pcmFrameBuffer[srcIndex + 1];
resampledFrameBuffer[i] = (int16_t)(sampleA + fraction * (sampleB - sampleA));
}
else
{
// Handle boundary edge case for the final trailing sample
resampledFrameBuffer[i] = pcmFrameBuffer[srcIndex];
}
}
}
// --- PUSH SOUND TIMELINES SEQUENTIALLY ---
if (totalResampledSamples > 0)
{
size_t freeSpace = m_playerVoiceBuffer[playerSlot].CurrentFree();
if ((size_t)totalResampledSamples <= freeSpace)
{
// Push the perfectly timed 22050Hz stream to the CELT ring buffer
m_playerVoiceBuffer[playerSlot].Push(resampledFrameBuffer, totalResampledSamples);
}
}
}
}
void CVoice::HandlePlayerVoiceData(int playerSlot)
{
const int CELT_FRAME_SIZE = m_CeltEncoderSettings.FrameSize;
const int CELT_PACKET_SIZE = m_CeltEncoderSettings.PacketSize;
const int CELT_FRAME_SIZE = m_CeltEncoderSettings.FrameSize;
const int CELT_PACKET_SIZE = m_CeltEncoderSettings.PacketSize;
int maxClients = iserver->GetClientCount();
if (!m_pCeltCodecPlayer[playerSlot])
@ -1327,102 +1378,40 @@ void CVoice::HandlePlayerVoiceData(int playerSlot)
size_t currentBufferLength = m_playerVoiceBuffer[playerSlot].TotalLength();
if (currentBufferLength < (size_t)CELT_FRAME_SIZE)
return;
// Use Engine Time instead of System Time to stay perfectly in sync with server tickrates
double now = (double)gpGlobals->curtime;
double timeSinceLastVoice = now - g_fLastVoiceData[playerSlot + 1];
if (timeSinceLastVoice > 0.5) // Reduced to 500ms for responsiveness
{
m_playerAvailableTime[playerSlot + 1] = 0.0;
// Completely clear internal codec history matrices to ensure clean starts
if (m_pCeltCodecPlayer[playerSlot]) {
celt_encoder_ctl(m_pCeltCodecPlayer[playerSlot], CELT_RESET_STATE_REQUEST, NULL);
}
if (m_PlayerOpusDecoder[playerSlot]) {
opus_decoder_ctl(m_PlayerOpusDecoder[playerSlot], OPUS_RESET_STATE);
}
return; // Not enough data yet
}
if (m_playerAvailableTime[playerSlot + 1] == 0.0)
// We have a solid block! Extract exactly one frame's worth of samples
int16_t celtInput[512];
if (!m_playerVoiceBuffer[playerSlot].Pop(celtInput, CELT_FRAME_SIZE))
{
m_playerAvailableTime[playerSlot + 1] = now;
return;
}
double elapsed = now - m_playerAvailableTime[playerSlot + 1];
int framesToEmit = (int)(elapsed / m_CeltEncoderSettings.FrameTime);
// Run the encoder pass on our clean block
unsigned char celtPacket[64];
int celtBytes = celt_encode(m_pCeltCodecPlayer[playerSlot], celtInput,
CELT_FRAME_SIZE, celtPacket, CELT_PACKET_SIZE);
// DYNAMIC JITTER BUFFER CATCH-UP
// If the server lag spikes and calculates a massive frame burst, check what is actually in the buffer.
// There's no point trying to emit 37 frames if the client has only sent 4 frames of real data!
int framesInRealBuffer = (int)(currentBufferLength / (size_t)CELT_FRAME_SIZE);
if (framesToEmit > framesInRealBuffer)
if (celtBytes > 0)
{
framesToEmit = framesInRealBuffer;
}
//smutils->LogMessage(myself, "framesToEmit: %d", framesToEmit);
// Smooth-cap the maximum frames processed per server frame to avoid robotic bursts
// 4 frames = ~92ms of audio, which is an ideal ceiling for single-frame catchups.
if (framesToEmit > 4)
{
framesToEmit = 4;
}
if (framesToEmit <= 0)
return;
int framesProcessed = 0;
while (framesProcessed < framesToEmit &&
m_playerVoiceBuffer[playerSlot].TotalLength() >= (size_t)CELT_FRAME_SIZE)
{
int16_t celtInput[CELT_FRAME_SIZE];
if (!m_playerVoiceBuffer[playerSlot].Pop(celtInput, CELT_FRAME_SIZE))
for (int i = 0; i < maxClients; i++)
{
break;
}
IClient *pToClient = iserver->GetClient(i);
if (!pToClient || !pToClient->IsConnected() || !pToClient->IsActive())
continue;
if (!g_bIsNoSteam[i + 1])
continue;
if (g_bClientMuted[i + 1][playerSlot + 1])
continue;
unsigned char celtPacket[CELT_PACKET_SIZE];
int celtBytes = celt_encode(m_pCeltCodecPlayer[playerSlot], celtInput,
CELT_FRAME_SIZE, celtPacket, CELT_PACKET_SIZE);
if (celtBytes > 0)
{
for (int i = 0; i < maxClients; i++)
{
IClient *pToClient = iserver->GetClient(i);
if (!pToClient || !pToClient->IsConnected() || !pToClient->IsActive())
continue;
if (!g_bIsNoSteam[i + 1])
continue;
if (g_bClientMuted[i + 1][playerSlot + 1])
continue;
SendVoiceDataMsg(playerSlot, pToClient, celtPacket, celtBytes, 0);
}
SendVoiceDataMsg(playerSlot, pToClient, celtPacket, celtBytes, 0);
}
else
{
smutils->LogError(myself, "HandlePlayerVoiceData: celt_encode failed: %d", celtBytes);
break;
}
framesProcessed++;
}
// Advance our tracking clock safely based on what we processed
if (framesProcessed > 0)
else
{
m_playerAvailableTime[playerSlot + 1] += (double)framesProcessed * m_CeltEncoderSettings.FrameTime;
}
else if (framesToEmit > 0)
{
// If we wanted to emit frames but the buffer was empty, pull the clock forward
// to 'now' so we don't build up a permanent structural timing lag.
m_playerAvailableTime[playerSlot + 1] = now;
smutils->LogError(myself, "HandlePlayerVoiceData: celt_encode failed: %d", celtBytes);
}
}

View File

@ -137,7 +137,6 @@ public: // IConCommandBaseAccessor
public:
CVoice();
void OnGameFrame(bool simulating);
bool OnBroadcastVoiceData(IClient *pClient, int nBytes, char *data);
void ListenSocket();
void PushPlayerVoiceData(int playerSlot, int nBytes, char *data);
@ -174,7 +173,6 @@ private:
// Player transcode state (24000 Hz mono -> 22050 Hz mono -> CELT)
OpusDecoder *m_PlayerOpusDecoder[SM_MAXPLAYERS + 1];
int m_playerResampleAccum[SM_MAXPLAYERS + 1];
double m_AvailableTime;
double m_playerAvailableTime[SM_MAXPLAYERS + 1];