Port restrict and LIKELY/UNLIKELY macros and mark stuff for blitting loop as const and with register

This commit is contained in:
NepDisk 2025-08-18 09:53:12 -04:00
parent a4b695da04
commit b865638cb8
8 changed files with 66 additions and 32 deletions

View file

@ -455,6 +455,27 @@ UINT32 quickncasehash (const char *p, size_t n)
#define DBL_EPSILON 2.2204460492503131e-16l
#endif
#if defined(__GNUC__) || defined(__clang__)
#ifndef LIKELY
#define LIKELY(x) __builtin_expect(!!(x), 1)
#endif
#ifndef UNLIKELY
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#endif
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif
#ifdef __cplusplus
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
#define restrict __restrict
#else
#define restrict
#endif
#endif
// An assert-type mechanism.
// NOTE: USE SRB2_ASSERT FOR C++ CODE INSTEAD
#ifdef PARANOIA

View file

@ -469,8 +469,8 @@ static void HWR_GenerateTexture(GLMapTexture_t *grtex, INT32 texnum, boolean noe
grtex->mipmap.format = textureformat;
// hack the Legacy skies..
if (memcmp(texture->name, "SKY", 3) == 0 &&
(texture->name[4] == 0 || texture->name[5] == 0))
if (UNLIKELY(memcmp(texture->name, "SKY", 3) == 0 &&
(texture->name[4] == 0 || texture->name[5] == 0)))
{
skyspecial = true;
grtex->mipmap.flags &= ~TF_CHROMAKEYED; // don't use the chromakey for sky
@ -1310,10 +1310,10 @@ void HWR_GetMappedPatch(patch_t *patch, const UINT8 *colormap)
// search for the mipmap
// skip the first (no colormap translated)
for (grMipmap = grPatch->mipmap; grMipmap->nextcolormap; )
for (grMipmap = grPatch->mipmap; LIKELY(grMipmap->nextcolormap); )
{
grMipmap = grMipmap->nextcolormap;
if (grMipmap->colormap && grMipmap->colormap->source == colormap)
if (UNLIKELY(grMipmap->colormap && grMipmap->colormap->source == colormap))
{
if (memcmp(grMipmap->colormap->data, colormap, 256 * sizeof(UINT8)))
{

View file

@ -952,7 +952,7 @@ static boolean HWR_BlendMidtextureSurface(FSurfaceInfo *pSurf)
pSurf->PolyColor.s.alpha = 0xFF;
if (!gl_curline->polyseg)
if (LIKELY(!gl_curline->polyseg))
{
if (gl_linedef->blendmode && gl_linedef->blendmode != AST_FOG)
{
@ -1008,7 +1008,7 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom
gl_sidedef = gl_curline->sidedef;
gl_linedef = gl_curline->linedef;
if (gl_curline->pv1)
if (LIKELY(gl_curline->pv1))
{
vs.x = ((polyvertex_t *)gl_curline->pv1)->x;
vs.y = ((polyvertex_t *)gl_curline->pv1)->y;
@ -1018,7 +1018,7 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom
vs.x = FIXED_TO_FLOAT(gl_curline->v1->x);
vs.y = FIXED_TO_FLOAT(gl_curline->v1->y);
}
if (gl_curline->pv2)
if (LIKELY(gl_curline->pv2))
{
ve.x = ((polyvertex_t *)gl_curline->pv2)->x;
ve.y = ((polyvertex_t *)gl_curline->pv2)->y;
@ -2006,7 +2006,7 @@ static boolean CheckClip(seg_t * seg, sector_t * afrontsector, sector_t * abacks
if (afrontsector->f_slope || afrontsector->c_slope || abacksector->f_slope || abacksector->c_slope)
{
fixed_t v1x, v1y, v2x, v2y; // the seg's vertexes as fixed_t
if (gl_curline->pv1)
if (LIKELY(gl_curline->pv1))
{
v1x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->x);
v1y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->y);
@ -2016,7 +2016,7 @@ static boolean CheckClip(seg_t * seg, sector_t * afrontsector, sector_t * abacks
v1x = gl_curline->v1->x;
v1y = gl_curline->v1->y;
}
if (gl_curline->pv2)
if (LIKELY(gl_curline->pv2))
{
v2x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->x);
v2y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->y);
@ -2188,7 +2188,7 @@ static void HWR_AddLine(seg_t * line)
gl_curline = line;
if (gl_curline->pv1)
if (LIKELY(gl_curline->pv1))
{
v1x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->x);
v1y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv1)->y);
@ -2198,7 +2198,7 @@ static void HWR_AddLine(seg_t * line)
v1x = gl_curline->v1->x;
v1y = gl_curline->v1->y;
}
if (gl_curline->pv2)
if (LIKELY(gl_curline->pv2))
{
v2x = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->x);
v2y = FLOAT_TO_FIXED(((polyvertex_t *)gl_curline->pv2)->y);
@ -2866,12 +2866,24 @@ static void HWR_Subsector(size_t num)
// without talking about the overdraw of course.
sub->sector->validcount = validcount;/// \todo fix that in a better way
while (count--)
if (UNLIKELY(numPolyObjects))
{
if (!line->glseg && !line->polyseg) // ignore segs that belong to polyobjects
while (count--)
{
if (LIKELY(!line->polyseg)) // ignore segs that belong to polyobjects
HWR_AddLine(line);
line++;
}
}
else
{
while (count--)
{
HWR_AddLine(line);
line++;
line++;
}
}
}
@ -4574,7 +4586,7 @@ static void HWR_DrawSprites(void)
if (spr->mobj && spr->mobj->skin && spr->mobj->sprite == SPR_PLAY)
{
if (!cv_glmodels.value || md2_playermodels[(skin_t*)spr->mobj->skin-skins].notfound || md2_playermodels[(skin_t*)spr->mobj->skin-skins].scale < 0.0f)
if (LIKELY(!cv_glmodels.value || md2_playermodels[(skin_t*)spr->mobj->skin-skins].notfound || md2_playermodels[(skin_t*)spr->mobj->skin-skins].scale < 0.0f))
HWR_DrawSprite(spr);
else
{
@ -4584,7 +4596,7 @@ static void HWR_DrawSprites(void)
}
else
{
if (!cv_glmodels.value || md2_models[spr->mobj->sprite].notfound || md2_models[spr->mobj->sprite].scale < 0.0f)
if (LIKELY(!cv_glmodels.value || md2_models[spr->mobj->sprite].notfound || md2_models[spr->mobj->sprite].scale < 0.0f))
HWR_DrawSprite(spr);
else
{
@ -5873,7 +5885,7 @@ void HWR_RenderSkyboxView(player_t *player)
validcount++;
if (cv_glbatching.value)
if (LIKELY(cv_glbatching.value))
HWR_StartBatching();
#ifdef HWPRECIP
@ -5882,7 +5894,7 @@ void HWR_RenderSkyboxView(player_t *player)
HWR_RenderBSPNode((INT32)numnodes-1);
if (cv_glbatching.value)
if (LIKELY(cv_glbatching.value))
HWR_RenderBatches();
// Check for new console commands.

View file

@ -151,9 +151,9 @@ void M_AATreeSet(aatree_t *aatree, INT32 key, void* value)
// and nodes with value == NULL.
static void *M_AATreeGet_Node(aatree_node_t *node, INT32 key)
{
if (node)
if (LIKELY(node))
{
if (node->key == key)
if (UNLIKELY(node->key == key))
return node->value;
else if(node->key < key)
return M_AATreeGet_Node(node->right, key);

View file

@ -4921,9 +4921,9 @@ static void P_RelinkPointers(void)
mobj = (mobj_t *)currentthinker;
if (mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER
if (UNLIKELY(mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER
// MT_SPARK: used for debug stuff
|| mobj->type == MT_SPARK)
|| mobj->type == MT_SPARK))
continue;
if (mobj->tracer)
@ -5598,9 +5598,9 @@ void P_SaveNetGame(savebuffer_t *save, boolean resending)
continue;
mobj = (mobj_t *)th;
if (mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER
if (UNLIKELY(mobj->type == MT_HOOP || mobj->type == MT_HOOPCOLLIDE || mobj->type == MT_HOOPCENTER
// MT_SPARK: used for debug stuff
|| mobj->type == MT_SPARK)
|| mobj->type == MT_SPARK))
continue;
mobj->mobjnum = i++;
}

View file

@ -149,7 +149,7 @@ static void R_DrawSpanTemplate(drawspandata_t* ds)
UINT8 *dsrc;
const INT32 vidwidth = vid.width;
const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
const UINT8 * restrict deststop = screens[0] + vid.rowbytes * vid.height;
size_t count = (ds->x2 - ds->x1 + 1);
size_t i;

View file

@ -3697,16 +3697,16 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal)
/* Check if thing may be drawn from our current view. */
boolean R_ThingVisible (mobj_t *thing)
{
if (thing->sprite == SPR_NULL)
if (UNLIKELY(thing->sprite == SPR_NULL))
return false;
if (r_viewmobj && (thing == r_viewmobj || (r_viewmobj->player && r_viewmobj->player->followmobj == thing)))
if (UNLIKELY(r_viewmobj && (thing == r_viewmobj || (r_viewmobj->player && r_viewmobj->player->followmobj == thing))))
return false;
if ((viewssnum == 0 && (thing->renderflags & RF_DONTDRAWP1))
if (UNLIKELY((viewssnum == 0 && (thing->renderflags & RF_DONTDRAWP1))
|| (viewssnum == 1 && (thing->renderflags & RF_DONTDRAWP2))
|| (viewssnum == 2 && (thing->renderflags & RF_DONTDRAWP3))
|| (viewssnum == 3 && (thing->renderflags & RF_DONTDRAWP4)))
|| (viewssnum == 3 && (thing->renderflags & RF_DONTDRAWP4))))
return false;
return true;

View file

@ -1330,9 +1330,10 @@ void I_FinishUpdate(void)
{
SDL_LockSurface(vidSurface);
// copy pixels ourselves to the video surface (prevents a crash in libsdl)
UINT32 *dst = (UINT32*)vidSurface->pixels;
UINT8 *src = screens[0];
for (int32_t i = 0; i < vid.width * vid.height; i++)
UINT32 *restrict dst = (UINT32*)vidSurface->pixels;
const UINT8 *restrict src = screens[0];
const INT32 count = vid.width * vid.height;
for (INT32 i = 0; i < count; i++)
*dst++ = localPalette[*src++];
SDL_UnlockSurface(vidSurface);
// Fury -- there's no way around UpdateTexture, the GL backend uses it anyway