From 76fc2b21d7f9254d7159df540a18a07e6acc218c Mon Sep 17 00:00:00 2001 From: Alug Date: Fri, 27 Jun 2025 11:47:53 +0200 Subject: [PATCH] Fix software sky threading synchronization issues By splitting sky drawing from plane drawing, so both dont step on each other during drawing this also marginally improves performance --- src/r_main.cpp | 96 ++++++++++-------------- src/r_plane.cpp | 195 ++++++++++++++++++++++++++++-------------------- 2 files changed, 155 insertions(+), 136 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 064de28ef..d3e994aab 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -1533,47 +1533,30 @@ static void R_RenderViewpoint(maskcount_t* mask, INT32 cachenum) void R_RenderPlayerView(void) { - INT32 nummasks = 1; - maskcount_t* masks = static_cast(malloc(sizeof(maskcount_t))); - player_t * player = &players[displayplayers[viewssnum]]; - const boolean skybox = (skyboxmo[0] && cv_skybox.value); - UINT8 i; + INT32 nummasks = 1; + maskcount_t* masks = static_cast(malloc(sizeof(maskcount_t))); + player_t * player = &players[displayplayers[viewssnum]]; + const boolean skybox = (skyboxmo[0] && cv_skybox.value); + const boolean oldsky = (skybox && mapnamespace == MNS_SRB2KART); - srb2::ThreadPool::Sema tp_sema; - srb2::g_main_threadpool->begin_sema(); + // using sky portals on kart maps usually results in pretty bad performance + skyVisible = oldsky && skyVisiblePerPlayer[viewssnum]; - // using sky portals on kart maps usually results in very bad performance - if (mapnamespace == MNS_SRB2KART) + if (skyVisible) { - // load previous saved value of skyVisible for the player - for (i = 0; i <= splitscreen; i++) - { - if (player != &players[displayplayers[i]]) - continue; - skyVisible = skyVisiblePerPlayer[i]; - break; - } - if (skybox && skyVisible) - { - R_SkyboxFrame(viewssnum); - R_ClearClipSegs(); - R_ClearDrawSegs(); - R_ClearSegTables(); - R_ClearPlanes(); - R_ClearSprites(); - Mask_Pre(&masks[nummasks - 1]); - R_RenderBSPNode((INT32)numnodes - 1); - Mask_Post(&masks[nummasks - 1]); - R_ClipSprites(drawsegs, NULL); - R_DrawPlanes(); - // well sometimes synchronization is off and may result in some visual glitching, oh well - tp_sema = srb2::g_main_threadpool->end_sema(); - srb2::g_main_threadpool->notify_sema(tp_sema); - srb2::g_main_threadpool->wait_sema(tp_sema); - srb2::g_main_threadpool->begin_sema(); - R_DrawMasked(masks, nummasks); - } + R_SkyboxFrame(viewssnum); + R_ClearClipSegs(); + R_ClearDrawSegs(); + R_ClearSegTables(); + R_ClearPlanes(); + R_ClearSprites(); + R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1); + R_ClipSprites(drawsegs, NULL); + R_DrawSkyPlanes(); + R_DrawPlanes(); + R_DrawMasked(masks, nummasks); } + R_SetupFrame(viewssnum, skybox); skyVisible = false; framecount++; @@ -1589,7 +1572,7 @@ void R_RenderPlayerView(void) portalclipend = viewwidth-viewmorph[viewssnum].x1-1; R_PortalClearClipSegs(portalclipstart, portalclipend); memcpy(ceilingclip, viewmorph[viewssnum].ceilingclip, sizeof(INT16)*vid.width); - memcpy(floorclip, viewmorph[viewssnum].floorclip, sizeof(INT16)*vid.width); + memcpy(floorclip, viewmorph[viewssnum].floorclip, sizeof(INT16)*vid.width); } else { @@ -1611,6 +1594,7 @@ void R_RenderPlayerView(void) mytotal = 0; ProfZeroTimer(); #endif + ps_numbspcalls = ps_numpolyobjects = ps_numdrawnodes = 0; ps_bsptime = I_GetPreciseTime(); @@ -1631,7 +1615,7 @@ void R_RenderPlayerView(void) ps_sw_spritecliptime = I_GetPreciseTime() - ps_sw_spritecliptime; // Add skybox portals caused by sky visplanes. - if (mapnamespace != MNS_SRB2KART && skybox) + if (skybox && !oldsky) Portal_AddSkyboxPortals(); // Portal rendering. Hijacks the BSP traversal. @@ -1672,7 +1656,6 @@ void R_RenderPlayerView(void) // Render the BSP from the new viewpoint, and clip // any sprites with the new clipsegs and window. - R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1); //portalskipprecipmobjs = false; @@ -1690,10 +1673,8 @@ void R_RenderPlayerView(void) ps_sw_portaltime = I_GetPreciseTime() - ps_sw_portaltime; ps_sw_planetime = I_GetPreciseTime(); + R_DrawSkyPlanes(); R_DrawPlanes(); - tp_sema = srb2::g_main_threadpool->end_sema(); - srb2::g_main_threadpool->notify_sema(tp_sema); - srb2::g_main_threadpool->wait_sema(tp_sema); ps_sw_planetime = I_GetPreciseTime() - ps_sw_planetime; // draw mid texture and sprite @@ -1710,6 +1691,7 @@ void R_RenderPlayerView(void) { if (pl->minx > pl->maxx) continue; + auto col = [](int x, int top, int bot) { if (top > bot) @@ -1718,7 +1700,9 @@ void R_RenderPlayerView(void) top = 0; if (bot > viewheight-1) bot = viewheight-1; + UINT8* p = &topleft[x + top * vid.width]; + while (top <= bot) { *p = 35; @@ -1726,21 +1710,27 @@ void R_RenderPlayerView(void) top++; } }; + auto span = [col](int x, int top, int bot) { if (top <= bot) col(x, top, bot); }; + INT32 top = pl->top[pl->minx]; INT32 bottom = pl->bottom[pl->minx]; + span(pl->minx, top, bottom); span(pl->maxx, pl->top[pl->maxx], pl->bottom[pl->maxx]); + for (INT32 x = pl->minx + 1; x < std::min(pl->maxx, viewwidth); ++x) { INT32 new_top = pl->top[x]; INT32 new_bottom = pl->bottom[x]; + if (new_top > new_bottom) continue; + if (top > bottom) { col(x, new_top, new_top); @@ -1751,6 +1741,7 @@ void R_RenderPlayerView(void) col(x, top, new_top); col(x, bottom, new_bottom); } + top = new_top; bottom = new_bottom; } @@ -1761,13 +1752,13 @@ void R_RenderPlayerView(void) // debugrender_portal: fill portals with red, draw over everything if (portal_base && cv_debugrender_portal.value) { - const UINT8 pal = 0x23; // red portal_t *portal; + constexpr UINT8 pal = 0x23; // red for (portal = portal_base; portal; portal = portal_base) { - INT32 width = (portal->end - portal->start); INT32 i; + INT32 width = (portal->end - portal->start); for (i = 0; i < std::min(width, viewwidth); ++i) { @@ -1784,18 +1775,9 @@ void R_RenderPlayerView(void) } } - if (mapnamespace == MNS_SRB2KART) - { - // save value to skyVisiblePerPlayer - // this is so that P1 can't affect whether P2 can see a skybox or not, or vice versa - for (i = 0; i <= splitscreen; i++) - { - if (player != &players[displayplayers[i]]) - continue; - skyVisiblePerPlayer[i] = skyVisible; - break; - } - } + // save value to skyVisiblePerPlayer + // this is so that P1 can't affect whether P2 can see a skybox or not, or vice versa + skyVisiblePerPlayer[viewssnum] = (!oldsky || skyVisible); // if we dont draw the sky using the old method, always mark this as true free(masks); } diff --git a/src/r_plane.cpp b/src/r_plane.cpp index cd0bd462d..291c17fbb 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -149,12 +149,13 @@ static void R_MapPlane(drawspandata_t *ds, spandrawfunc_t *spanfunc, INT32 y, IN if (!R_CheckMapPlane(__func__, y, x1, x2)) return; - angle = (ds->currentplane->viewangle + ds->currentplane->plangle)>>ANGLETOFINESHIFT; + angle = (ds->currentplane->viewangle + ds->currentplane->plangle)>>ANGLETOFINESHIFT; planecos = FINECOSINE(angle); planesin = FINESINE(angle); distance = FixedMul(ds->planeheight, yslope[y]); - span = abs(centery - y); + span = abs(centery - y); + if (span) // Don't divide by zero { ds->xstep = FixedMul(planesin, ds->planeheight) / span; @@ -214,7 +215,7 @@ static void R_MapPlane(drawspandata_t *ds, spandrawfunc_t *spanfunc, INT32 y, IN } } - ds->y = y; + ds->y = y; ds->x1 = x1; ds->x2 = x2; @@ -306,6 +307,7 @@ void R_ClearPlanes(void) static visplane_t *new_visplane(unsigned hash) { visplane_t *check = freetail; + if (!check) { check = static_cast(calloc(1, sizeof (*check))); @@ -317,6 +319,7 @@ static visplane_t *new_visplane(unsigned hash) if (!freetail) freehead = &freetail; } + check->next = visplanes[hash]; visplanes[hash] = check; @@ -409,6 +412,7 @@ visplane_t *R_FindPlane(fixed_t height, INT32 picnum, INT32 lightlevel, if (!pfloor) { hash = visplane_hash(picnum, lightlevel, height); + for (check = visplanes[hash]; check; check = check->next) { if (polyobj != check->polyobj) @@ -456,8 +460,8 @@ visplane_t *R_FindPlane(fixed_t height, INT32 picnum, INT32 lightlevel, check->ripple = ripple; check->damage = damage; - memset(check->top, 0xff, sizeof (check->top)); - memset(check->bottom, 0x00, sizeof (check->bottom)); + memset(check->top, 0xff, sizeof(check->top)); + memset(check->bottom, 0x00, sizeof(check->bottom)); return check; } @@ -512,8 +516,7 @@ visplane_t *R_CheckPlane(visplane_t *pl, INT32 start, INT32 stop) } else { - unsigned hash = - visplane_hash(pl->picnum, pl->lightlevel, pl->height); + unsigned hash = visplane_hash(pl->picnum, pl->lightlevel, pl->height); new_pl = new_visplane(hash); } @@ -537,8 +540,8 @@ visplane_t *R_CheckPlane(visplane_t *pl, INT32 start, INT32 stop) pl = new_pl; pl->minx = start; pl->maxx = stop; - memset(pl->top, 0xff, sizeof pl->top); - memset(pl->bottom, 0x00, sizeof pl->bottom); + memset(pl->top, 0xff, sizeof(pl->top)); + memset(pl->bottom, 0x00, sizeof(pl->bottom)); } return pl; } @@ -570,7 +573,7 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw if (b1 >= vid.height) b1 = vid.height-1; if (t2 >= vid.height) t2 = vid.height-1; if (b2 >= vid.height) b2 = vid.height-1; - if (x-1 >= vid.width) x = vid.width; + if (x-1 >= vid.width) x = vid.width; // We want to draw N spans per subtask to ensure the work is // coarse enough to not be too slow due to task scheduling overhead. @@ -582,6 +585,7 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw { INT32 spanstartcopy[kSpanTaskGranularity] = {0}; INT32 taskspans = 0; + for (int i = 0; i < kSpanTaskGranularity; i++) { if (!((t1 + i) < t2 && (t1 + i) <= b1)) @@ -591,12 +595,14 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw spanstartcopy[i] = spanstart[t1 + i]; taskspans += 1; } + auto task = [=]() mutable -> void { for (int i = 0; i < taskspans; i++) { mapfunc(&dc_copy, spanfunc, t1 + i, spanstartcopy[i], x - 1, false); } }; + if (allow_parallel) { srb2::g_main_threadpool->schedule(std::move(task)); @@ -605,12 +611,14 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw { (task)(); } + t1 += taskspans; } while (b1 > b2 && b1 >= t1) { INT32 spanstartcopy[kSpanTaskGranularity] = {0}; INT32 taskspans = 0; + for (int i = 0; i < kSpanTaskGranularity; i++) { if (!((b1 - i) > b2 && (b1 - i) >= t1)) @@ -620,12 +628,14 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw spanstartcopy[i] = spanstart[b1 - i]; taskspans += 1; } + auto task = [=]() mutable -> void { for (int i = 0; i < taskspans; i++) { mapfunc(&dc_copy, spanfunc, b1 - i, spanstartcopy[i], x - 1, false); } }; + if (allow_parallel) { srb2::g_main_threadpool->schedule(std::move(task)); @@ -634,6 +644,7 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw { (task)(); } + b1 -= taskspans; } @@ -643,28 +654,6 @@ static void R_MakeSpans(void (*mapfunc)(drawspandata_t* ds, void(*spanfunc)(draw spanstart[b2--] = x; } -void R_DrawPlanes(void) -{ - visplane_t *pl; - INT32 i; - drawspandata_t ds {0}; - - ZoneScoped; - - R_UpdatePlaneRipple(&ds); - - for (i = 0; i < MAXVISPLANES; i++, pl++) - { - for (pl = visplanes[i]; pl; pl = pl->next) - { - if (pl->ffloor != NULL || pl->polyobj != NULL) - continue; - - R_DrawSinglePlane(&ds, pl, cv_parallelsoftware.value); - } - } -} - // R_DrawSkyPlane // // Draws the sky within the plane's top/bottom bounds @@ -759,6 +748,75 @@ static void R_DrawSkyPlane(visplane_t *pl, void(*colfunc)(drawcolumndata_t*), bo } } +void R_DrawSkyPlanes(void) +{ + INT32 i; + visplane_t *pl; + + ZoneScoped; + + srb2::ThreadPool::Sema tp_sema; + srb2::g_main_threadpool->begin_sema(); + for (i = 0; i < MAXVISPLANES; i++, pl++) + { + for (pl = visplanes[i]; pl; pl = pl->next) + { + if (pl->picnum != skyflatnum || pl->ffloor || pl->polyobj) + continue; + + INT16 highlight = R_PlaneIsHighlighted(pl); + + if (highlight != -1) + { + drawcolumndata_t dc = {}; + dc.r8_flatcolor = highlight; + dc.lightmap = colormaps; + + for (dc.x = pl->minx; dc.x <= pl->maxx; ++dc.x) + { + dc.yl = pl->top[dc.x]; + dc.yh = pl->bottom[dc.x]; + R_DrawColumn_Flat(&dc); + } + } + else + { + R_DrawSkyPlane(pl, colfunc, cv_parallelsoftware.value); + } + } + } + tp_sema = srb2::g_main_threadpool->end_sema(); + srb2::g_main_threadpool->notify_sema(tp_sema); + srb2::g_main_threadpool->wait_sema(tp_sema); +} + +void R_DrawPlanes(void) +{ + visplane_t *pl; + INT32 i; + drawspandata_t ds {0}; + + ZoneScoped; + + R_UpdatePlaneRipple(&ds); + + srb2::ThreadPool::Sema tp_sema; + srb2::g_main_threadpool->begin_sema(); + for (i = 0; i < MAXVISPLANES; i++, pl++) + { + for (pl = visplanes[i]; pl; pl = pl->next) + { + if (pl->ffloor != NULL || pl->polyobj != NULL) + continue; + + R_DrawSinglePlane(&ds, pl, cv_parallelsoftware.value); + } + } + tp_sema = srb2::g_main_threadpool->end_sema(); + srb2::g_main_threadpool->notify_sema(tp_sema); + srb2::g_main_threadpool->wait_sema(tp_sema); +} + // Returns the height of the sloped plane at (x, y) as a 32.16 fixed_t static INT64 R_GetSlopeZAt(const pslope_t *slope, fixed_t x, fixed_t y) { @@ -939,38 +997,23 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle INT32 spanfunctype = BASEDRAWFUNC; debugrender_highlight_t debug = debugrender_highlight_t::SW_HI_PLANES; void (*mapfunc)(drawspandata_t*, void(*)(drawspandata_t*), INT32, INT32, INT32, boolean) = R_MapPlane; - INT16 highlight = R_PlaneIsHighlighted(pl); + INT16 highlight; if (!(pl->minx <= pl->maxx)) return; ZoneScoped; - R_UpdatePlaneRipple(ds); - - // sky flat + // we check this here due to drawmasked if (pl->picnum == skyflatnum) { - if (highlight != -1) - { - drawcolumndata_t dc = {}; - dc.r8_flatcolor = highlight; - dc.lightmap = colormaps; - - for (dc.x = pl->minx; dc.x <= pl->maxx; ++dc.x) - { - dc.yl = pl->top[dc.x]; - dc.yh = pl->bottom[dc.x]; - R_DrawColumn_Flat(&dc); - } - } - else - { - R_DrawSkyPlane(pl, colfunc, allow_parallel); - } return; } + R_UpdatePlaneRipple(ds); + + highlight = R_PlaneIsHighlighted(pl); + ds->planeripple.active = false; ds->brightmap = NULL; R_SetSpanFunc(BASEDRAWFUNC, false, false); @@ -980,13 +1023,13 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle // Hacked up support for alpha value in software mode Tails 09-24-2002 (sidenote: ported to polys 10-15-2014, there was no time travel involved -Red) if (pl->polyobj->translucency >= NUMTRANSMAPS) return; // Don't even draw it - else if (pl->polyobj->translucency > 0) - { - spanfunctype = (pl->polyobj->flags & POF_SPLAT) ? SPANDRAWFUNC_TRANSSPLAT : SPANDRAWFUNC_TRANS; - ds->transmap = R_GetTranslucencyTable(pl->polyobj->translucency); - } - else if (pl->polyobj->flags & POF_SPLAT) // Opaque, but allow transparent flat pixels - spanfunctype = SPANDRAWFUNC_SPLAT; + else if (pl->polyobj->translucency > 0) + { + spanfunctype = (pl->polyobj->flags & POF_SPLAT) ? SPANDRAWFUNC_TRANSSPLAT : SPANDRAWFUNC_TRANS; + ds->transmap = R_GetTranslucencyTable(pl->polyobj->translucency); + } + else if (pl->polyobj->flags & POF_SPLAT) // Opaque, but allow transparent flat pixels + spanfunctype = SPANDRAWFUNC_SPLAT; if (pl->polyobj->translucency == 0 || (pl->extra_colormap && (pl->extra_colormap->flags & CMF_FOG))) light = (pl->lightlevel >> LIGHTSEGSHIFT); @@ -1050,7 +1093,6 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle debug = SW_HI_PLANES; } - #ifndef NOWATER if (pl->ripple) { INT32 top, bottom; @@ -1061,13 +1103,8 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle spanfunctype = SPANDRAWFUNC_WATER; // Copy the current scene, ugh - top = pl->high-8; - bottom = pl->low+8; - - if (top < 0) - top = 0; - if (bottom > viewheight) - bottom = viewheight; + top = std::max(0, pl->high - 8); + bottom = std::min(viewheight, pl->low + 8); // Only copy the part of the screen we need UINT8 i = R_GetViewNumber(); @@ -1104,7 +1141,6 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle vid.width, vid.width); } } - #endif } ds->currentplane = pl; @@ -1116,7 +1152,7 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle return; texture_t *texture = textures[R_GetTextureNumForFlat(levelflat)]; - ds->flatwidth = texture->width; + ds->flatwidth = texture->width; ds->flatheight = texture->height; /*(if (R_CheckSolidColorFlat()) @@ -1228,8 +1264,8 @@ void R_DrawSinglePlane(drawspandata_t *ds, visplane_t *pl, boolean allow_paralle } // set the maximum value for unsigned - pl->top[pl->maxx+1] = 0xffff; - pl->top[pl->minx-1] = 0xffff; + pl->top[pl->maxx+1] = 0xffff; + pl->top[pl->minx-1] = 0xffff; pl->bottom[pl->maxx+1] = 0x0000; pl->bottom[pl->minx-1] = 0x0000; @@ -1250,10 +1286,11 @@ void R_PlaneBounds(visplane_t *plane) for (i = plane->minx + 1; i <= plane->maxx; i++) { if (plane->top[i] < hi) - hi = plane->top[i]; + hi = plane->top[i]; if (plane->bottom[i] > low) - low = plane->bottom[i]; + low = plane->bottom[i]; } + plane->high = hi; plane->low = low; } @@ -1262,11 +1299,11 @@ INT16 R_PlaneIsHighlighted(const visplane_t *pl) { switch (pl->damage) { - case SD_DEATHPIT: - case SD_INSTAKILL: - return 35; // red + case SD_DEATHPIT: + case SD_INSTAKILL: + return 35; // red - default: - return -1; + default: + return -1; } }