From b865638cb863b6a5d320d3bef9fdcc35db6d1879 Mon Sep 17 00:00:00 2001 From: NepDisk Date: Mon, 18 Aug 2025 09:53:12 -0400 Subject: [PATCH] Port restrict and LIKELY/UNLIKELY macros and mark stuff for blitting loop as const and with register --- src/doomdef.h | 21 +++++++++++++++++++++ src/hardware/hw_cache.c | 8 ++++---- src/hardware/hw_main.c | 40 ++++++++++++++++++++++++++-------------- src/m_aatree.c | 4 ++-- src/p_saveg.c | 8 ++++---- src/r_draw_span.cpp | 2 +- src/r_things.cpp | 8 ++++---- src/sdl/i_video.cpp | 7 ++++--- 8 files changed, 66 insertions(+), 32 deletions(-) diff --git a/src/doomdef.h b/src/doomdef.h index 3b1b4eef8..001d61bd3 100644 --- a/src/doomdef.h +++ b/src/doomdef.h @@ -455,6 +455,27 @@ UINT32 quickncasehash (const char *p, size_t n) #define DBL_EPSILON 2.2204460492503131e-16l #endif +#if defined(__GNUC__) || defined(__clang__) +#ifndef LIKELY +#define LIKELY(x) __builtin_expect(!!(x), 1) +#endif + +#ifndef UNLIKELY +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#endif +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +#ifdef __cplusplus +#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) +#define restrict __restrict +#else +#define restrict +#endif +#endif + // An assert-type mechanism. // NOTE: USE SRB2_ASSERT FOR C++ CODE INSTEAD #ifdef PARANOIA diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c index 2028f2479..8940d4ee1 100644 --- a/src/hardware/hw_cache.c +++ b/src/hardware/hw_cache.c @@ -469,8 +469,8 @@ static void HWR_GenerateTexture(GLMapTexture_t *grtex, INT32 texnum, boolean noe grtex->mipmap.format = textureformat; // hack the Legacy skies.. - if (memcmp(texture->name, "SKY", 3) == 0 && - (texture->name[4] == 0 || texture->name[5] == 0)) + if (UNLIKELY(memcmp(texture->name, "SKY", 3) == 0 && + (texture->name[4] == 0 || texture->name[5] == 0))) { skyspecial = true; grtex->mipmap.flags &= ~TF_CHROMAKEYED; // don't use the chromakey for sky @@ -1310,10 +1310,10 @@ void HWR_GetMappedPatch(patch_t *patch, const UINT8 *colormap) // search for the mipmap // skip the first (no colormap translated) - for (grMipmap = grPatch->mipmap; grMipmap->nextcolormap; ) + for (grMipmap = grPatch->mipmap; LIKELY(grMipmap->nextcolormap); ) { grMipmap = grMipmap->nextcolormap; - if (grMipmap->colormap && grMipmap->colormap->source == colormap) + if (UNLIKELY(grMipmap->colormap && grMipmap->colormap->source == colormap)) { if (memcmp(grMipmap->colormap->data, colormap, 256 * sizeof(UINT8))) { diff --git a/src/hardware/hw_main.c b/src/hardware/hw_main.c index 58279e1d0..1f2131732 100644 --- a/src/hardware/hw_main.c +++ b/src/hardware/hw_main.c @@ -952,7 +952,7 @@ static boolean HWR_BlendMidtextureSurface(FSurfaceInfo *pSurf) pSurf->PolyColor.s.alpha = 0xFF; - if (!gl_curline->polyseg) + if (LIKELY(!gl_curline->polyseg)) { if (gl_linedef->blendmode && gl_linedef->blendmode != AST_FOG) { @@ -1008,7 +1008,7 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom gl_sidedef = gl_curline->sidedef; gl_linedef = gl_curline->linedef; - if (gl_curline->pv1) + if (LIKELY(gl_curline->pv1)) { vs.x = ((polyvertex_t *)gl_curline->pv1)->x; vs.y = ((polyvertex_t *)gl_curline->pv1)->y; @@ -1018,7 +1018,7 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom vs.x = FIXED_TO_FLOAT(gl_curline->v1->x); vs.y = FIXED_TO_FLOAT(gl_curline->v1->y); } - if (gl_curline->pv2) + if (LIKELY(gl_curline->pv2)) { ve.x = ((polyvertex_t *)gl_curline->pv2)->x; ve.y = ((polyvertex_t *)gl_curline->pv2)->y; @@ -2006,7 +2006,7 @@ static boolean CheckClip(seg_t * seg, sector_t * afrontsector, sector_t * abacks if (afrontsector->f_slope || afrontsector->c_slope || abacksector->f_slope || abacksector->c_slope) { fixed_t v1x, v1y, v2x, v2y; // the seg's vertexes as fixed_t - if (gl_curline->pv1) + if (LIKELY(gl_curline->pv1)) { v1x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->x); v1y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->y); @@ -2016,7 +2016,7 @@ static boolean CheckClip(seg_t * seg, sector_t * afrontsector, sector_t * abacks v1x = gl_curline->v1->x; v1y = gl_curline->v1->y; } - if (gl_curline->pv2) + if (LIKELY(gl_curline->pv2)) { v2x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->x); v2y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->y); @@ -2188,7 +2188,7 @@ static void HWR_AddLine(seg_t * line) gl_curline = line; - if (gl_curline->pv1) + if (LIKELY(gl_curline->pv1)) { v1x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->x); v1y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->y); @@ -2198,7 +2198,7 @@ static void HWR_AddLine(seg_t * line) v1x = gl_curline->v1->x; v1y = gl_curline->v1->y; } - if (gl_curline->pv2) + if (LIKELY(gl_curline->pv2)) { v2x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->x); v2y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->y); @@ -2866,12 +2866,24 @@ static void HWR_Subsector(size_t num) // without talking about the overdraw of course. sub->sector->validcount = validcount;/// \todo fix that in a better way - while (count--) + if (UNLIKELY(numPolyObjects)) { - if (!line->glseg && !line->polyseg) // ignore segs that belong to polyobjects + while (count--) + { + + if (LIKELY(!line->polyseg)) // ignore segs that belong to polyobjects + HWR_AddLine(line); + line++; + } + } + else + { + while (count--) + { HWR_AddLine(line); - line++; + line++; + } } } @@ -4574,7 +4586,7 @@ static void HWR_DrawSprites(void) if (spr->mobj && spr->mobj->skin && spr->mobj->sprite == SPR_PLAY) { - if (!cv_glmodels.value || md2_playermodels[(skin_t*)spr->mobj->skin-skins].notfound || md2_playermodels[(skin_t*)spr->mobj->skin-skins].scale < 0.0f) + if (LIKELY(!cv_glmodels.value || md2_playermodels[(skin_t*)spr->mobj->skin-skins].notfound || md2_playermodels[(skin_t*)spr->mobj->skin-skins].scale < 0.0f)) HWR_DrawSprite(spr); else { @@ -4584,7 +4596,7 @@ static void HWR_DrawSprites(void) } else { - if (!cv_glmodels.value || md2_models[spr->mobj->sprite].notfound || md2_models[spr->mobj->sprite].scale < 0.0f) + if (LIKELY(!cv_glmodels.value || md2_models[spr->mobj->sprite].notfound || md2_models[spr->mobj->sprite].scale < 0.0f)) HWR_DrawSprite(spr); else { @@ -5873,7 +5885,7 @@ void HWR_RenderSkyboxView(player_t *player) validcount++; - if (cv_glbatching.value) + if (LIKELY(cv_glbatching.value)) HWR_StartBatching(); #ifdef HWPRECIP @@ -5882,7 +5894,7 @@ void HWR_RenderSkyboxView(player_t *player) HWR_RenderBSPNode((INT32)numnodes-1); - if (cv_glbatching.value) + if (LIKELY(cv_glbatching.value)) HWR_RenderBatches(); // Check for new console commands. diff --git a/src/m_aatree.c b/src/m_aatree.c index 23f1faf29..c67800a1c 100644 --- a/src/m_aatree.c +++ b/src/m_aatree.c @@ -151,9 +151,9 @@ void M_AATreeSet(aatree_t *aatree, INT32 key, void* value) // and nodes with value == NULL. static void *M_AATreeGet_Node(aatree_node_t *node, INT32 key) { - if (node) + if (LIKELY(node)) { - if (node->key == key) + if (UNLIKELY(node->key == key)) return node->value; else if(node->key < key) return M_AATreeGet_Node(node->right, key); diff --git a/src/p_saveg.c b/src/p_saveg.c index 80de3ea72..4d5b01278 100644 --- a/src/p_saveg.c +++ b/src/p_saveg.c @@ -4921,9 +4921,9 @@ static void P_RelinkPointers(void) mobj = (mobj_t *)currentthinker; - if (mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER + if (UNLIKELY(mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER // MT_SPARK: used for debug stuff - || mobj->type == MT_SPARK) + || mobj->type == MT_SPARK)) continue; if (mobj->tracer) @@ -5598,9 +5598,9 @@ void P_SaveNetGame(savebuffer_t *save, boolean resending) continue; mobj = (mobj_t *)th; - if (mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER + if (UNLIKELY(mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER // MT_SPARK: used for debug stuff - || mobj->type == MT_SPARK) + || mobj->type == MT_SPARK)) continue; mobj->mobjnum = i++; } diff --git a/src/r_draw_span.cpp b/src/r_draw_span.cpp index 77d707be3..2fd3e9c22 100644 --- a/src/r_draw_span.cpp +++ b/src/r_draw_span.cpp @@ -149,7 +149,7 @@ static void R_DrawSpanTemplate(drawspandata_t* ds) UINT8 *dsrc; const INT32 vidwidth = vid.width; - const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height; + const UINT8 * restrict deststop = screens[0] + vid.rowbytes * vid.height; size_t count = (ds->x2 - ds->x1 + 1); size_t i; diff --git a/src/r_things.cpp b/src/r_things.cpp index db3aeeb6d..0cf72358f 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -3697,16 +3697,16 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal) /* Check if thing may be drawn from our current view. */ boolean R_ThingVisible (mobj_t *thing) { - if (thing->sprite == SPR_NULL) + if (UNLIKELY(thing->sprite == SPR_NULL)) return false; - if (r_viewmobj && (thing == r_viewmobj || (r_viewmobj->player && r_viewmobj->player->followmobj == thing))) + if (UNLIKELY(r_viewmobj && (thing == r_viewmobj || (r_viewmobj->player && r_viewmobj->player->followmobj == thing)))) return false; - if ((viewssnum == 0 && (thing->renderflags & RF_DONTDRAWP1)) + if (UNLIKELY((viewssnum == 0 && (thing->renderflags & RF_DONTDRAWP1)) || (viewssnum == 1 && (thing->renderflags & RF_DONTDRAWP2)) || (viewssnum == 2 && (thing->renderflags & RF_DONTDRAWP3)) - || (viewssnum == 3 && (thing->renderflags & RF_DONTDRAWP4))) + || (viewssnum == 3 && (thing->renderflags & RF_DONTDRAWP4)))) return false; return true; diff --git a/src/sdl/i_video.cpp b/src/sdl/i_video.cpp index dd50ff13c..9e8e11e56 100644 --- a/src/sdl/i_video.cpp +++ b/src/sdl/i_video.cpp @@ -1330,9 +1330,10 @@ void I_FinishUpdate(void) { SDL_LockSurface(vidSurface); // copy pixels ourselves to the video surface (prevents a crash in libsdl) - UINT32 *dst = (UINT32*)vidSurface->pixels; - UINT8 *src = screens[0]; - for (int32_t i = 0; i < vid.width * vid.height; i++) + UINT32 *restrict dst = (UINT32*)vidSurface->pixels; + const UINT8 *restrict src = screens[0]; + const INT32 count = vid.width * vid.height; + for (INT32 i = 0; i < count; i++) *dst++ = localPalette[*src++]; SDL_UnlockSurface(vidSurface); // Fury -- there's no way around UpdateTexture, the GL backend uses it anyway