use aligned alloc for screen and column buffers

16 byte alingment

should speed up everything a bit
This commit is contained in:
Alug 2025-10-19 20:48:16 +02:00
parent bad7b9f980
commit f0e0edbc1d
2 changed files with 29 additions and 2 deletions

View file

@ -546,12 +546,24 @@ void R_InitViewBuffer(INT32 width, INT32 height)
if (temp_dc.buf)
{
#if defined(__SSE__)
aligned_free(temp_dc.buf);
#else
Z_Free(temp_dc.buf);
#endif
}
memset(&temp_dc, 0, sizeof(temp_dc));
temp_dc.buf = static_cast<UINT8*>(Z_Calloc(bufsize, PU_STATIC, NULL));
#if defined(__SSE__)
while (bufsize & 15)
bufsize++;
temp_dc.buf = static_cast<UINT8*>(aligned_alloc(16, bufsize));
#else
temp_dc.buf = static_cast<UINT8*>(Z_Malloc(bufsize, PU_STATIC, NULL));
#endif
memset(temp_dc.buf, 0, bufsize);
linesize = vid.width; // killough 11/98
renderscreen = vid.screens[0]; // haleyjd 07/02/14

View file

@ -3873,12 +3873,19 @@ UINT8 GetColorLUTDirect(colorlookup_t *lut, UINT8 r, UINT8 g, UINT8 b)
void V_Init(void)
{
INT32 i;
const INT32 screensize = vid.rowbytes * vid.height;
INT32 screensize = vid.rowbytes * vid.height;
for (i = 0; i < NUMSCREENS; i++)
{
if (vid.screens[i])
{
#if defined(__SSE__)
aligned_free(vid.screens[i]);
#else
free(vid.screens[i]);
#endif
}
vid.screens[i] = NULL;
}
@ -3887,7 +3894,15 @@ void V_Init(void)
{
for (i = 0; i < NUMSCREENS; i++)
{
// we need to allocate these relative to their cpu restrictions to not trigger segfaults
// TODO: add support for sve and neon
#if defined(__SSE__)
while (screensize & 15)
screensize++;
vid.screens[i] = aligned_alloc(16, screensize);
#else
vid.screens[i] = malloc(screensize);
#endif
memset(vid.screens[i], 0, screensize);
}
}