diff --git a/src/r_draw.cpp b/src/r_draw.cpp index e8cfde488..23835fe4f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -546,12 +546,24 @@ void R_InitViewBuffer(INT32 width, INT32 height) if (temp_dc.buf) { +#if defined(__SSE__) + aligned_free(temp_dc.buf); +#else Z_Free(temp_dc.buf); +#endif } memset(&temp_dc, 0, sizeof(temp_dc)); - temp_dc.buf = static_cast(Z_Calloc(bufsize, PU_STATIC, NULL)); +#if defined(__SSE__) + while (bufsize & 15) + bufsize++; + temp_dc.buf = static_cast(aligned_alloc(16, bufsize)); +#else + temp_dc.buf = static_cast(Z_Malloc(bufsize, PU_STATIC, NULL)); +#endif + + memset(temp_dc.buf, 0, bufsize); linesize = vid.width; // killough 11/98 renderscreen = vid.screens[0]; // haleyjd 07/02/14 diff --git a/src/v_video.c b/src/v_video.c index 34a88cecf..ccc6fcdac 100644 --- a/src/v_video.c +++ b/src/v_video.c @@ -3873,12 +3873,19 @@ UINT8 GetColorLUTDirect(colorlookup_t *lut, UINT8 r, UINT8 g, UINT8 b) void V_Init(void) { INT32 i; - const INT32 screensize = vid.rowbytes * vid.height; + INT32 screensize = vid.rowbytes * vid.height; for (i = 0; i < NUMSCREENS; i++) { if (vid.screens[i]) + { +#if defined(__SSE__) + aligned_free(vid.screens[i]); +#else free(vid.screens[i]); +#endif + } + vid.screens[i] = NULL; } @@ -3887,7 +3894,15 @@ void V_Init(void) { for (i = 0; i < NUMSCREENS; i++) { + // we need to allocate these relative to their cpu restrictions to not trigger segfaults + // TODO: add support for sve and neon +#if defined(__SSE__) + while (screensize & 15) + screensize++; + vid.screens[i] = aligned_alloc(16, screensize); +#else vid.screens[i] = malloc(screensize); +#endif memset(vid.screens[i], 0, screensize); } }