From 659aa667eb1c2d66e68aa914e0497c82eb2a6327 Mon Sep 17 00:00:00 2001 From: NepDisk Date: Sun, 19 Oct 2025 12:12:33 -0400 Subject: [PATCH 1/8] Software Renderer: Implement Column buffering system A port of https://github.com/Indev450/SRB2Kart-Saturn/pull/205, currently doesn't work --- src/r_draw.cpp | 106 ++++++++++++++++++ src/r_draw.h | 22 ++++ src/r_draw_column.cpp | 73 ++++++++++--- src/r_draw_flush.cpp | 247 ++++++++++++++++++++++++++++++++++++++++++ src/r_draw_span.cpp | 64 +++++++---- src/r_main.cpp | 8 ++ src/r_things.cpp | 10 +- src/screen.c | 43 ++++++-- src/screen.h | 4 +- src/sdl/i_video.cpp | 3 +- src/v_video.c | 9 -- src/v_video.h | 12 ++ 12 files changed, 540 insertions(+), 61 deletions(-) create mode 100644 src/r_draw_flush.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index eb809cbf0..1cffcf8a8 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -434,6 +434,92 @@ UINT16 R_GetSuperColorByName(const char *name) // in reality, the few routines that can work for either mode, are // put here +enum columncontext_e columncontext = COLUMNCONTEXT_DIRECT; + +enum ColumnFlushType +{ + FLUSH_NONE = 0x0000, + FLUSH_OPAQUE = 0x0001, + FLUSH_TRANS = 0x0002, + FLUSH_COLORMAP = 0x0004, + FLUSH_COLORMAP_TRANS = 0x0008, +}; + +typedef struct drawcolumndata_temp_s +{ + INT32 x; + INT32 yl[8], yh[8]; + + // e6y: resolution limitation is removed + UINT8 *buf; + + INT32 startx; + ColumnFlushType type; + INT32 commontop, commonbot; + UINT8 *transmap; + // SoM 7-28-04: Fix the fuzz problem. + UINT8 *translation; +} drawcolumndata_temp_t; + +drawcolumndata_temp_t temp_dc = {}; + +// +// Error functions that will abort if R_FlushColumns tries to flush +// columns without a column type. +// +FUNCNORETURN static ATTRNORETURN void R_FlushWholeError(void) +{ + I_Error("R_FlushWholeColumns called without being initialized.\n"); +} + +FUNCNORETURN static ATTRNORETURN void R_FlushHTError(void) +{ + I_Error("R_FlushHTColumns called without being initialized.\n"); +} + +FUNCNORETURN static ATTRNORETURN void R_QuadFlushError(void) +{ + I_Error("R_FlushQuadColumn called without being initialized.\n"); +} + +static void (*R_FlushWholeColumns)(void) = R_FlushWholeError; +static void (*R_FlushHTColumns)(void) = R_FlushHTError; +static void (*R_FlushQuadColumn)(void) = R_QuadFlushError; + +static void R_FlushColumns(void) +{ + if (temp_dc.x != 8 || temp_dc.commontop >= temp_dc.commonbot) + R_FlushWholeColumns(); + else + { + R_FlushHTColumns(); + R_FlushQuadColumn(); + } + + temp_dc.x = 0; +} + +// +// R_ResetColumnBuffer +// +// haleyjd 09/13/04: new function to call from main rendering loop +// which gets rid of the unnecessary reset of various variables during +// column drawing. +// +void R_ResetColumnBuffer(void) +{ + // haleyjd 10/06/05: this must not be done if x == 0! + if (temp_dc.x) + { + R_FlushColumns(); + } + + temp_dc.type = FLUSH_NONE; + R_FlushWholeColumns = R_FlushWholeError; + R_FlushHTColumns = R_FlushHTError; + R_FlushQuadColumn = R_QuadFlushError; +} + /** \brief The R_InitViewBuffer function Creates lookup tables for getting the framebuffer address @@ -456,6 +542,26 @@ void R_InitViewBuffer(INT32 width, INT32 height) viewwindowx = 0; viewwindowy = 0; + INT32 bufsize = (vid.width * 8) * sizeof(*temp_dc.buf); + + if (temp_dc.buf) + { +#if defined(__SSE__) + aligned_free(temp_dc.buf); +#else + Z_Free(temp_dc.buf); +#endif + } + + memset(&temp_dc, 0, sizeof(temp_dc)); + +#if defined(__SSE__) + while (bufsize & 15) + bufsize++; + temp_dc.buf = static_cast(aligned_alloc(16, bufsize)); +#else + temp_dc.buf = static_cast(Z_Calloc(bufsize, PU_STATIC, NULL)); +#endif linesize = vid.width; // killough 11/98 renderscreen = vid.screens[0]; // haleyjd 07/02/14 diff --git a/src/r_draw.h b/src/r_draw.h index a7e446006..5d2a0c036 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -44,6 +44,18 @@ extern floatv3_t *ds_su, *ds_sv, *ds_sz; extern float focallengthf[MAXSPLITSCREENPLAYERS]; extern float zeroheight; +enum columncontext_e +{ + COLUMNCONTEXT_DIRECT = 0, + COLUMNCONTEXT_FLUSH, +}; + +extern enum columncontext_e columncontext; +void R_SetColumnContext(enum columncontext_e _columncontext); +void SCR_SetDrawFuncs(enum columncontext_e _columncontext); + +void R_ResetColumnBuffer(void); + /// \brief Top border #define BRDR_T 0 /// \brief Bottom border @@ -207,6 +219,16 @@ void R_DrawTranslatedColumn(drawcolumndata_t* dc); void R_DrawTranslatedTranslucentColumn(drawcolumndata_t* dc); void R_Draw2sMultiPatchColumn(drawcolumndata_t* dc); void R_Draw2sMultiPatchTranslucentColumn(drawcolumndata_t* dc); + +// column drawers which use buffered drawing with flush +void R_DrawColumnFlush(drawcolumndata_t* dc); +void R_DrawTranslucentColumnFlush(drawcolumndata_t* dc); +void R_DrawTranslatedColumnFlush(drawcolumndata_t* dc); +void R_DrawColumnShadowedFlush(drawcolumndata_t* dc); +void R_DrawTranslatedTranslucentColumnFlush(drawcolumndata_t* dc); +void R_Draw2sMultiPatchColumnFlush(drawcolumndata_t* dc); +void R_Draw2sMultiPatchTranslucentColumnFlush(drawcolumndata_t* dc); + void R_DrawFogColumn(drawcolumndata_t* dc); void R_DrawColumnShadowed(drawcolumndata_t* dc); diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index a1b79c38c..8a8a6ed3c 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -21,6 +21,12 @@ // a has a constant z depth from top to bottom. // +#include "r_draw.h" +#include + + +#include "r_draw_flush.cpp" + enum DrawColumnType { DC_BASIC = 0x0000, @@ -29,10 +35,11 @@ enum DrawColumnType DC_BRIGHTMAP = 0x0004, DC_HOLES = 0x0008, DC_LIGHTLIST = 0x0010, + DC_DIRECT = 0x0020, // draw our columns directly to screen! }; template -static constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col) { if constexpr (Type & DrawColumnType::DC_COLORMAP) { @@ -45,7 +52,7 @@ static constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col) } template -static constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, UINT8 col) { col = R_GetColumnTranslated(dc, col); @@ -60,8 +67,9 @@ static constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, return dc->colormap[col]; } +// translucency is handled on flush side now! template -static constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit, UINT8 col) { col = R_GetColumnBrightmapped(dc, bit, col); @@ -76,7 +84,7 @@ static constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, } template -static constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit) { UINT8 col = dc->source[bit]; @@ -88,7 +96,14 @@ static constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT } } - return R_GetColumnTranslucent(dc, dest, bit, col); + if constexpr (Type & DrawColumnType::DC_DIRECT) + { // if we dont buffer our columns, we need to handle translucency again + return R_GetColumnTranslucent(dc, dest, bit, col); + } + else + { + return R_GetColumnTranslated(dc, col); + } } /** \brief The R_DrawColumn function @@ -102,8 +117,18 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) const INT32 vidheight = vid.height; const INT32 vidwidth = vid.width; + // leban 1/17/99: + // removed the + 1 here, adjusted the if test, and added an increment + // later. this helps a compiler pipeline a bit better. the x86 + // assembler also does this. count = dc->yh - dc->yl; + // leban 1/17/99: + // this case isn't executed too often. depending on how many instructions + // there are between here and the second if test below, this case could + // be moved down and might save instructions overall. since there are + // probably different wads that favor one way or the other, i'll leave + // this alone for now. if (count < 0) // Zero length, column does not exceed a pixel. { return; @@ -174,6 +199,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) } R_DrawColumnTemplate(&dc_copy); + if (solid) { dc_copy.yl = bheight; @@ -209,7 +235,17 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) // Framebuffer destination address. - dest = R_Address(dc->x, dc->yl); + if constexpr (Type & DrawColumnType::DC_DIRECT) + dest = R_Address(dc->x, dc->yl); + else if constexpr ((Type & (DrawColumnType::DC_COLORMAP | DrawColumnType::DC_TRANSMAP)) + == (DrawColumnType::DC_COLORMAP | DrawColumnType::DC_TRANSMAP)) + dest = R_GetBufferColormapTrans(dc); + else if constexpr (Type & DrawColumnType::DC_TRANSMAP) + dest = R_GetBufferTrans(dc); + else if constexpr (Type & DrawColumnType::DC_COLORMAP) + dest = R_GetBufferColormap(dc); + else + dest = R_GetBufferOpaque(dc); count++; @@ -333,13 +369,24 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) DEFINE_COLUMN_FUNC(name, flags) \ DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_BRIGHTMAP) -DEFINE_COLUMN_COMBO(R_DrawColumn, DC_BASIC) -DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_COLORMAP) -DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_LIGHTLIST) -DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_COLORMAP|DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_HOLES) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_HOLES|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawColumn, DC_DIRECT|DC_BASIC) +DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_DIRECT|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_DIRECT|DC_COLORMAP) +DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_DIRECT|DC_LIGHTLIST) +DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_DIRECT|DC_COLORMAP|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_DIRECT|DC_HOLES) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_DIRECT|DC_HOLES|DC_TRANSMAP) + +DEFINE_COLUMN_COMBO(R_DrawColumnFlush, DC_BASIC) +DEFINE_COLUMN_COMBO(R_DrawTranslucentColumnFlush, DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawTranslatedColumnFlush, DC_COLORMAP) +DEFINE_COLUMN_COMBO(R_DrawColumnShadowedFlush, DC_LIGHTLIST) +DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumnFlush, DC_COLORMAP|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumnFlush, DC_HOLES) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumnFlush, DC_HOLES|DC_TRANSMAP) + +//skymyass +//DEFINE_COLUMN_FUNC(R_DrawSkyColumn, DC_SKY) void R_DrawFogColumn(drawcolumndata_t *dc) { diff --git a/src/r_draw_flush.cpp b/src/r_draw_flush.cpp new file mode 100644 index 000000000..80da61705 --- /dev/null +++ b/src/r_draw_flush.cpp @@ -0,0 +1,247 @@ +// SONIC ROBO BLAST 2 KART +//----------------------------------------------------------------------------- +// Copyright (C) 2025 by Kart Krew. +// Copyright (C) 2020 by Sonic Team Junior. +// Copyright (C) 2000 by DooM Legacy Team. +// Copyright (C) 1996 by id Software, Inc. +// Copyright (C) 1999 by Chi Hoang, Lee Killough, Jim Flynn, Rand Phares, Ty Halderman +// Copyright (C) 1999-2000 by Jess Haas, Nicolas Kalkhof, Colin Phipps, Florian Schulze +// Copyright (C) Copyright 2005, 2006 by Florian Schulze, Colin Phipps, Neil Stevens, Andrey Budko +// Copyright (C) 2013 by James Haley, Stephen McGranahan, et al. +// +// This program is free software distributed under the +// terms of the GNU General Public License, version 2. +// See the 'LICENSE' file for more details. +//----------------------------------------------------------------------------- +/// \file r_draw_flush.cpp +/// \brief Optimized quad column buffer code. By SoM. +/// \note no includes because this is included as part of r_draw.cpp + +template +FUNCINLINE static ATTRINLINE constexpr UINT8 +R_GetFlushPixelTranslated(const drawcolumndata_temp_t *t_dc, UINT8 col) +{ + if constexpr (Type & (ColumnFlushType::FLUSH_COLORMAP | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + col = t_dc->translation[col]; + } + + return col; +} + +template +FUNCINLINE static ATTRINLINE constexpr UINT8 +R_GetFlushPixelTranslucent(const drawcolumndata_temp_t *t_dc, UINT8 * restrict dest, UINT8 col) +{ + col = R_GetFlushPixelTranslated(t_dc, col); + + if constexpr (Type & (ColumnFlushType::FLUSH_TRANS | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + // haleyjd 09/11/04: use temptranmap here + return *(t_dc->transmap + (col << 8) + (*dest)); + } + else + { + return col; + } +} + +template +FUNCINLINE static ATTRINLINE constexpr UINT8 +R_DrawFlushPixel(const drawcolumndata_temp_t *t_dc, UINT8 * restrict dest, const UINT8 * restrict source) +{ + UINT8 col = *source; + return R_GetFlushPixelTranslucent(t_dc, dest, col); +} + +// +// R_FlushWhole +// +// Flushes the entire columns in the buffer, one at a time. +// This is used when a quad flush isn't possible. +// +template +static void R_FlushWhole(void) +{ + UINT8 * restrict source; + UINT8 * restrict dest; + INT32 count, yl; + const INT32 stride = vid.width; + drawcolumndata_temp_t *t_dc = &temp_dc; + UINT8 *restrict buf = t_dc->buf; + + while (--t_dc->x >= 0) + { + yl = t_dc->yl[t_dc->x]; + source = &buf[t_dc->x + (yl << 3)]; + dest = R_Address(t_dc->startx + t_dc->x, yl); + count = t_dc->yh[t_dc->x] - yl + 1; + + while (--count >= 0) + { + *dest = R_DrawFlushPixel(t_dc, dest, source); + source += 8; + dest += stride; + } + } +} + +// +// R_FlushHT +// +// Flushes the head and tail of columns in the buffer in +// preparation for a quad flush. +// +template +static void R_FlushHT(void) +{ + UINT8 * restrict source; + UINT8 * restrict dest; + INT32 count, colnum = 0; + INT32 yl, yh; + const INT32 stride = vid.width; + const drawcolumndata_temp_t *t_dc = &temp_dc; + UINT8 *restrict buf = t_dc->buf; + + while (colnum < 8) + { + yl = t_dc->yl[colnum]; + yh = t_dc->yh[colnum]; + + // flush column head + if (yl < t_dc->commontop) + { + source = &buf[colnum + (yl << 3)]; + dest = R_Address(t_dc->startx + colnum, yl); + count = t_dc->commontop - yl; + + while (--count >= 0) + { + *dest = R_DrawFlushPixel(t_dc, dest, source); + source += 8; + dest += stride; + } + } + + // flush column tail + if (yh > t_dc->commonbot) + { + source = &buf[colnum + ((t_dc->commonbot + 1) << 3)]; + dest = R_Address(t_dc->startx + colnum, t_dc->commonbot + 1); + count = yh - t_dc->commonbot; + + while (--count >= 0) + { + *dest = R_DrawFlushPixel(t_dc, dest, source); + source += 8; + dest += stride; + } + } + + ++colnum; + } +} + +// Begin: Quad column flushing functions. +template +static void R_FlushQuad(void) +{ + const INT32 stride = vid.width; + const drawcolumndata_temp_t *t_dc = &temp_dc; + INT32 count = t_dc->commonbot - t_dc->commontop + 1; + const UINT8 *restrict buf = t_dc->buf; + + if constexpr (Type & ColumnFlushType::FLUSH_OPAQUE) + { + const INT64 *source = reinterpret_cast(buf + (t_dc->commontop << 3)); + INT64 *dest = reinterpret_cast(R_Address(t_dc->startx, t_dc->commontop)); + const INT32 deststep = stride / 8; + + while (--count >= 0) + { + *dest = *source++; + dest += deststep; + } + } + else + { + const UINT8 * restrict source = buf + (t_dc->commontop << 3); + UINT8 * restrict dest = R_Address(t_dc->startx, t_dc->commontop); + + while (--count >= 0) + { + dest[0] = R_DrawFlushPixel(t_dc, &dest[0], &source[0]); + dest[1] = R_DrawFlushPixel(t_dc, &dest[1], &source[1]); + dest[2] = R_DrawFlushPixel(t_dc, &dest[2], &source[2]); + dest[3] = R_DrawFlushPixel(t_dc, &dest[3], &source[3]); + dest[4] = R_DrawFlushPixel(t_dc, &dest[4], &source[4]); + dest[5] = R_DrawFlushPixel(t_dc, &dest[5], &source[5]); + dest[6] = R_DrawFlushPixel(t_dc, &dest[6], &source[6]); + dest[7] = R_DrawFlushPixel(t_dc, &dest[7], &source[7]); + source += 8; + dest += stride; + } + } +} + +// haleyjd 09/12/04: split up R_GetBuffer into various different +// functions to minimize the number of branches and take advantage +// of as much precalculated information as possible. +template +static UINT8 *R_GetBuffer(drawcolumndata_t *dc) +{ + drawcolumndata_temp_t *t_dc = &temp_dc; + + // haleyjd: reordered predicates + if (t_dc->x == 8 || + (t_dc->x && (t_dc->type != Type || t_dc->x + t_dc->startx != dc->x))) + R_FlushColumns(); + + if (!t_dc->x) + { + ++t_dc->x; + t_dc->startx = dc->x; + t_dc->yl[0] = t_dc->commontop = dc->yl; + t_dc->yh[0] = t_dc->commonbot = dc->yh; + t_dc->type = Type; + + if constexpr (Type & (ColumnFlushType::FLUSH_TRANS | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + t_dc->transmap = dc->transmap; + } + + if constexpr (Type & (ColumnFlushType::FLUSH_COLORMAP | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + t_dc->translation = dc->translation; + } + + R_FlushWholeColumns = R_FlushWhole; + R_FlushHTColumns = R_FlushHT; + R_FlushQuadColumn = R_FlushQuad; + + return &t_dc->buf[dc->yl << 3]; + } + + t_dc->yl[t_dc->x] = dc->yl; + t_dc->yh[t_dc->x] = dc->yh; + + if (dc->yl > t_dc->commontop) + t_dc->commontop = dc->yl; + if (dc->yh < t_dc->commonbot) + t_dc->commonbot = dc->yh; + + return &t_dc->buf[(dc->yl << 3) + t_dc->x++]; +} + +#define DEFINE_GETBUF_FUNC(name, flags) \ + FUNCINLINE static ATTRINLINE UINT8 *name(drawcolumndata_t *dc) \ + { \ + constexpr ColumnFlushType opt = static_cast(flags); \ + return R_GetBuffer(dc); \ + } + +DEFINE_GETBUF_FUNC(R_GetBufferOpaque, FLUSH_OPAQUE) +DEFINE_GETBUF_FUNC(R_GetBufferTrans, FLUSH_TRANS) +DEFINE_GETBUF_FUNC(R_GetBufferColormap, FLUSH_COLORMAP) +DEFINE_GETBUF_FUNC(R_GetBufferColormapTrans, FLUSH_COLORMAP_TRANS) + diff --git a/src/r_draw_span.cpp b/src/r_draw_span.cpp index a8a171b95..a4228a453 100644 --- a/src/r_draw_span.cpp +++ b/src/r_draw_span.cpp @@ -13,6 +13,9 @@ /// \brief span drawer functions /// \note no includes because this is included as part of r_draw.cpp +#include "r_draw.h" +#include + using namespace libdivide; // ========================================================================== @@ -39,7 +42,7 @@ enum DrawSpanType }; template -static constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col) { if constexpr (Type & DrawSpanType::DS_COLORMAP) { @@ -52,7 +55,7 @@ static constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col) } template -static constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap, UINT32 bit, UINT8 col) { col = R_GetSpanTranslated(ds, col); @@ -85,7 +88,7 @@ static constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap } template -static constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit, UINT8 col) { col = R_GetSpanBrightmapped(ds, colormap, bit, col); @@ -100,7 +103,7 @@ static constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UIN } template -static constexpr UINT8 R_DrawSpanPixel(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawSpanPixel(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit) { UINT8 col = 0; @@ -197,14 +200,18 @@ static void R_DrawSpanTemplate(drawspandata_t* ds) { bit = (((UINT32)yposition >> ds->nflatyshift) & ds->nflatmask) | ((UINT32)xposition >> ds->nflatxshift); - dest[i] = R_DrawSpanPixel(ds, &dsrc[i], ds->colormap, bit); + if constexpr (Type & DS_RIPPLE) + dest[i] = R_DrawSpanPixel(ds, &dsrc[i], ds->colormap, bit); + else + dest[i] = R_DrawSpanPixel(ds, &dest[i], ds->colormap, bit); xposition += xstep; yposition += ystep; } dest += 8; - dsrc += 8; + if constexpr (Type & DS_RIPPLE) + dsrc += 8; count -= 8; } @@ -213,10 +220,14 @@ static void R_DrawSpanTemplate(drawspandata_t* ds) { bit = (((UINT32)yposition >> ds->nflatyshift) & ds->nflatmask) | ((UINT32)xposition >> ds->nflatxshift); - *dest = R_DrawSpanPixel(ds, dsrc, ds->colormap, bit); + if constexpr (Type & DS_RIPPLE) + *dest = R_DrawSpanPixel(ds, dsrc, ds->colormap, bit); + else + *dest = R_DrawSpanPixel(ds, dest, ds->colormap, bit); dest++; - dsrc++; + if constexpr (Type & DS_RIPPLE) + dsrc++; xposition += xstep; yposition += ystep; @@ -278,6 +289,8 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) const INT32 nflatmask = ds->nflatmask; iz = ds->szp.z + ds->szp.y*(centery-ds->y) + ds->szp.x*(ds->x1-centerx); + uz = ds->sup.z + ds->sup.y*(centery-ds->y) + ds->sup.x*(ds->x1-centerx); + vz = ds->svp.z + ds->svp.y*(centery-ds->y) + ds->svp.x*(ds->x1-centerx); // Lighting is simple. It's just linear interpolation from start to end if constexpr (!(Type & DS_SPRITE)) @@ -292,9 +305,6 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) //CONS_Printf("tilted lighting %f to %f (foc %f)\n", lightstart, lightend, focallengthf); } - uz = ds->sup.z + ds->sup.y*(centery-ds->y) + ds->sup.x*(ds->x1-centerx); - vz = ds->svp.z + ds->svp.y*(centery-ds->y) + ds->svp.x*(ds->x1-centerx); - colormap = ds->colormap; if constexpr (Type & DS_RIPPLE) @@ -364,12 +374,16 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) colormap = ds->planezlight[tiltlighting[x1 + i]] + (ds->colormap - colormaps); } - dest[i] = R_DrawSpanPixel(ds, &dsrc[i], colormap, bit); + if constexpr (Type & DS_RIPPLE) + dest[i] = R_DrawSpanPixel(ds, &dsrc[i], colormap, bit); + else + dest[i] = R_DrawSpanPixel(ds, &dest[i], colormap, bit); } ds->x1 += SPANSIZE; dest += SPANSIZE; - dsrc += SPANSIZE; + if constexpr (Type & DS_RIPPLE) + dsrc += SPANSIZE; startu = endu; startv = endv; width -= SPANSIZE; @@ -386,7 +400,11 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) { colormap = ds->planezlight[tiltlighting[ds->x1]] + (ds->colormap - colormaps); } - *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + if constexpr (Type & DS_RIPPLE) + *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + else + *dest = R_DrawSpanPixel(ds, dest, colormap, bit); + ds->x1++; } else @@ -412,10 +430,16 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) { colormap = ds->planezlight[tiltlighting[ds->x1]] + (ds->colormap - colormaps); } - *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + + if constexpr (Type & DS_RIPPLE) + *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + else + *dest = R_DrawSpanPixel(ds, dest, colormap, bit); + dest++; + if constexpr (Type & DS_RIPPLE) + dsrc++; ds->x1++; - dsrc++; u += stepu; v += stepv; } @@ -768,18 +792,16 @@ void R_DrawFogSpan(drawspandata_t* ds) { ZoneScoped; + INT32 count = ds->x2 - ds->x1 + 1; + UINT8 *colormap; UINT8 *dest; const INT32 vidwidth = vid.width; - size_t count; - colormap = ds->colormap; dest = R_Address(ds->x1, ds->y); - count = ds->x2 - ds->x1 + 1; - while (count >= 4) { dest[0] = colormap[dest[0]]; @@ -787,7 +809,7 @@ void R_DrawFogSpan(drawspandata_t* ds) dest[2] = colormap[dest[2]]; dest[3] = colormap[dest[3]]; - dest += 4; + dest += 4; count -= 4; } diff --git a/src/r_main.cpp b/src/r_main.cpp index f5c4904f2..9a7d3d7e2 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -1538,8 +1538,11 @@ void R_RenderPlayerView(void) R_ClearSegTables(); R_ClearPlanes(); R_ClearSprites(); + R_SetColumnContext(COLUMNCONTEXT_FLUSH); R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1, false); R_ClipSprites(drawsegs, NULL); + R_ResetColumnBuffer(); + R_SetColumnContext(COLUMNCONTEXT_DIRECT); R_DrawSkyPlanes(); R_DrawPlanes(); R_DrawMasked(masks, nummasks); @@ -1577,6 +1580,7 @@ void R_RenderPlayerView(void) NetUpdate(); // The head node is the last node output. + R_SetColumnContext(COLUMNCONTEXT_FLUSH); ps_numbspcalls = ps_numpolyobjects = ps_numdrawnodes = 0; ps_bsptime = I_GetPreciseTime(); R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1, true); @@ -1585,6 +1589,7 @@ void R_RenderPlayerView(void) ps_sw_spritecliptime = I_GetPreciseTime(); R_ClipSprites(drawsegs, NULL); ps_sw_spritecliptime = I_GetPreciseTime() - ps_sw_spritecliptime; + R_ResetColumnBuffer(); // Add skybox portals caused by sky visplanes. if (skybox && !oldsky) @@ -1629,6 +1634,8 @@ void R_RenderPlayerView(void) R_ClipSprites(ds_p - (masks[nummasks - 1].drawsegs[1] - masks[nummasks - 1].drawsegs[0]), portal); + R_ResetColumnBuffer(); + Portal_Remove(portal); } @@ -1639,6 +1646,7 @@ void R_RenderPlayerView(void) } ps_sw_portaltime = I_GetPreciseTime() - ps_sw_portaltime; + R_SetColumnContext(COLUMNCONTEXT_DIRECT); ps_sw_planetime = I_GetPreciseTime(); R_DrawSkyPlanes(); R_DrawPlanes(); diff --git a/src/r_things.cpp b/src/r_things.cpp index dcba2ec97..66ef4eeca 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -3602,6 +3602,11 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal) drawseg_t* ds; INT32 i; + if (visspritecount - clippedvissprites <= 0) + { + return; + } + // e6y // Reducing of cache misses in the following R_DrawSprite() // Makes sense for scenes with huge amount of drawsegs. @@ -3611,11 +3616,6 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal) drawsegs_xranges[i].count = 0; } - if (visspritecount - clippedvissprites <= 0) - { - return; - } - if (drawsegs_xrange_size < maxdrawsegs) { // haleyjd: fix reallocation to track 2x size diff --git a/src/screen.c b/src/screen.c index aeee8254a..4db2536fb 100644 --- a/src/screen.c +++ b/src/screen.c @@ -98,19 +98,33 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_ // ========================================================================= -void SCR_SetDrawFuncs(void) +void SCR_SetDrawFuncs(enum columncontext_e _columncontext) { // // setup the right draw routines // - colfuncs[BASEDRAWFUNC] = R_DrawColumn; - colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn; - colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn; - colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed; - colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn; + if (_columncontext == COLUMNCONTEXT_FLUSH) + { + colfuncs[BASEDRAWFUNC] = R_DrawColumnFlush; + colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumnFlush; + colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumnFlush; + colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowedFlush; + colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumnFlush; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumnFlush; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumnFlush; + } + else + { + colfuncs[BASEDRAWFUNC] = R_DrawColumn; + colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn; + colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn; + colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed; + colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn; + } + colfuncs[COLDRAWFUNC_FOG] = R_DrawFogColumn; colfuncs[COLDRAWFUNC_DROPSHADOW] = R_DrawDropShadowColumn; @@ -215,6 +229,17 @@ void SCR_SetDrawFuncs(void) R_SetSpanFunc(BASEDRAWFUNC, false, false); } +// used to switch between column buffering and drawing them directly to screen +// our sky "plane" drawer cannot handle the buffer system due to multithreading +// (that would require alot of extra complexity for smth with massive diminishing results) +// Our masked drawing step draws things in a very particular order, which results in alot of flushing to screen +// effectively adding massive overhead due to excessive flushing, so we draw our masked thing directly to screen instead +void R_SetColumnContext(enum columncontext_e _columncontext) +{ + columncontext = _columncontext; + SCR_SetDrawFuncs(_columncontext); // set our column drawers +} + void R_SetColumnFunc(size_t id, boolean brightmapped) { I_Assert(id < COLDRAWFUNC_MAX); @@ -336,7 +361,7 @@ void SCR_SetMode(void) V_SetPalette(0); - SCR_SetDrawFuncs(); + SCR_SetDrawFuncs(COLUMNCONTEXT_DIRECT); // Shoot! The screen texture was flushed! Y_CleanupScreenBuffer(); diff --git a/src/screen.h b/src/screen.h index 1d4333111..f1f25a2ff 100644 --- a/src/screen.h +++ b/src/screen.h @@ -112,10 +112,8 @@ void SCR_Startup(void); // Change video mode, only at the start of a refresh. void SCR_SetMode(void); -// Set drawer functions for Software -void SCR_SetDrawFuncs(void); - // Set current column / span drawers +//void R_SetColumnContext(enum columncontext_e _columncontext); // declared in r_draw! void R_SetColumnFunc(size_t id, boolean brightmapped); void R_SetSpanFunc(size_t id, boolean npo2, boolean brightmapped); boolean R_SetSpanFuncFlat(size_t id); // flat color diff --git a/src/sdl/i_video.cpp b/src/sdl/i_video.cpp index db017f4fa..72ca37956 100644 --- a/src/sdl/i_video.cpp +++ b/src/sdl/i_video.cpp @@ -67,6 +67,7 @@ #include "../console.h" #include "../command.h" #include "../r_main.h" +#include "../r_draw.h" #include "../lua_hook.h" #include "sdlmain.h" #include "../i_system.h" @@ -1485,7 +1486,7 @@ boolean VID_CheckRenderer(void) if (rendermode == render_soft) { vid.rowbytes = vid.width; - SCR_SetDrawFuncs(); + SCR_SetDrawFuncs(COLUMNCONTEXT_DIRECT); } #ifdef HWRENDER else if (rendermode == render_opengl && rendererchanged) diff --git a/src/v_video.c b/src/v_video.c index f710528e9..34a88cecf 100644 --- a/src/v_video.c +++ b/src/v_video.c @@ -32,15 +32,6 @@ #include "doomstat.h" #include "r_fps.h" -#if defined(__SSE__) || defined(__AVX__) -#ifdef _WIN32 -#include -#define aligned_alloc(align, size) _aligned_malloc(size, align) -#endif - -#include -#endif - #ifdef HWRENDER #include "hardware/hw_glob.h" #endif diff --git a/src/v_video.h b/src/v_video.h index 0ea17220a..e0a20f0dd 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -19,6 +19,18 @@ #include "r_defs.h" #include "r_main.h" +#if defined(__SSE__) +#ifdef _WIN32 +#include +#define aligned_alloc(align, size) _aligned_malloc(size, align) +#define aligned_free(ptr) _aligned_free(ptr) +#else +#define aligned_free(ptr) free(ptr) +#endif + +#include +#endif + // SRB2Kart #include "hu_stuff.h" // fonts From a3689c694c3e5ffafd569be195e96b899479448f Mon Sep 17 00:00:00 2001 From: NepDisk Date: Sun, 19 Oct 2025 12:19:13 -0400 Subject: [PATCH 2/8] Add brightmap versions --- src/r_draw.h | 10 ++++++++++ src/screen.c | 29 ++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 5d2a0c036..5a1ef0f1d 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -240,6 +240,16 @@ void R_Draw2sMultiPatchColumn_Brightmap(drawcolumndata_t* dc); void R_Draw2sMultiPatchTranslucentColumn_Brightmap(drawcolumndata_t* dc); void R_DrawColumnShadowed_Brightmap(drawcolumndata_t* dc); +// column drawers which use buffered drawing with flush +void R_DrawColumnFlush_Brightmap(drawcolumndata_t* dc); +void R_DrawTranslucentColumnFlush_Brightmap(drawcolumndata_t* dc); +void R_DrawTranslatedColumnFlush_Brightmap(drawcolumndata_t* dc); +void R_DrawColumnShadowedFlush_Brightmap(drawcolumndata_t* dc); +void R_DrawTranslatedTranslucentColumnFlush_Brightmap(drawcolumndata_t* dc); +void R_Draw2sMultiPatchColumnFlush_Brightmap(drawcolumndata_t* dc); +void R_Draw2sMultiPatchTranslucentColumnFlush_Brightmap(drawcolumndata_t* dc); + + void R_DrawSpan(drawspandata_t* ds); void R_DrawTranslucentSpan(drawspandata_t* ds); void R_DrawSplat(drawspandata_t* ds); diff --git a/src/screen.c b/src/screen.c index 4db2536fb..860501b47 100644 --- a/src/screen.c +++ b/src/screen.c @@ -128,13 +128,28 @@ void SCR_SetDrawFuncs(enum columncontext_e _columncontext) colfuncs[COLDRAWFUNC_FOG] = R_DrawFogColumn; colfuncs[COLDRAWFUNC_DROPSHADOW] = R_DrawDropShadowColumn; - colfuncs_bm[BASEDRAWFUNC] = R_DrawColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed_Brightmap; - colfuncs_bm[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn_Brightmap; + + if (_columncontext == COLUMNCONTEXT_FLUSH) + { + colfuncs_bm[BASEDRAWFUNC] = R_DrawColumnFlush_Brightmap; + colfuncs_bm[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumnFlush_Brightmap; + colfuncs_bm[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumnFlush_Brightmap; + colfuncs_bm[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowedFlush_Brightmap; + colfuncs_bm[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumnFlush_Brightmap; + colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumnFlush_Brightmap; + colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumnFlush_Brightmap; + } + else + { + colfuncs_bm[BASEDRAWFUNC] = R_DrawColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed_Brightmap; + colfuncs_bm[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn_Brightmap; + } + colfuncs_bm[COLDRAWFUNC_FOG] = NULL; // Not needed colfuncs_bm[COLDRAWFUNC_DROPSHADOW] = NULL; // Not needed From 2ce83bb48d13e1a93ce238e32b55834199c32246 Mon Sep 17 00:00:00 2001 From: NepDisk Date: Sun, 19 Oct 2025 13:07:23 -0400 Subject: [PATCH 3/8] Revert "Add brightmap versions" This reverts commit a3689c694c3e5ffafd569be195e96b899479448f. --- src/r_draw.h | 10 ---------- src/screen.c | 29 +++++++---------------------- 2 files changed, 7 insertions(+), 32 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 5a1ef0f1d..5d2a0c036 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -240,16 +240,6 @@ void R_Draw2sMultiPatchColumn_Brightmap(drawcolumndata_t* dc); void R_Draw2sMultiPatchTranslucentColumn_Brightmap(drawcolumndata_t* dc); void R_DrawColumnShadowed_Brightmap(drawcolumndata_t* dc); -// column drawers which use buffered drawing with flush -void R_DrawColumnFlush_Brightmap(drawcolumndata_t* dc); -void R_DrawTranslucentColumnFlush_Brightmap(drawcolumndata_t* dc); -void R_DrawTranslatedColumnFlush_Brightmap(drawcolumndata_t* dc); -void R_DrawColumnShadowedFlush_Brightmap(drawcolumndata_t* dc); -void R_DrawTranslatedTranslucentColumnFlush_Brightmap(drawcolumndata_t* dc); -void R_Draw2sMultiPatchColumnFlush_Brightmap(drawcolumndata_t* dc); -void R_Draw2sMultiPatchTranslucentColumnFlush_Brightmap(drawcolumndata_t* dc); - - void R_DrawSpan(drawspandata_t* ds); void R_DrawTranslucentSpan(drawspandata_t* ds); void R_DrawSplat(drawspandata_t* ds); diff --git a/src/screen.c b/src/screen.c index 860501b47..4db2536fb 100644 --- a/src/screen.c +++ b/src/screen.c @@ -128,28 +128,13 @@ void SCR_SetDrawFuncs(enum columncontext_e _columncontext) colfuncs[COLDRAWFUNC_FOG] = R_DrawFogColumn; colfuncs[COLDRAWFUNC_DROPSHADOW] = R_DrawDropShadowColumn; - - if (_columncontext == COLUMNCONTEXT_FLUSH) - { - colfuncs_bm[BASEDRAWFUNC] = R_DrawColumnFlush_Brightmap; - colfuncs_bm[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumnFlush_Brightmap; - colfuncs_bm[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumnFlush_Brightmap; - colfuncs_bm[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowedFlush_Brightmap; - colfuncs_bm[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumnFlush_Brightmap; - colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumnFlush_Brightmap; - colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumnFlush_Brightmap; - } - else - { - colfuncs_bm[BASEDRAWFUNC] = R_DrawColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed_Brightmap; - colfuncs_bm[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_Brightmap; - colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn_Brightmap; - } - + colfuncs_bm[BASEDRAWFUNC] = R_DrawColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed_Brightmap; + colfuncs_bm[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_Brightmap; + colfuncs_bm[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn_Brightmap; colfuncs_bm[COLDRAWFUNC_FOG] = NULL; // Not needed colfuncs_bm[COLDRAWFUNC_DROPSHADOW] = NULL; // Not needed From e8e72db32f405ef8814e39c6f1fbca37430e2ff8 Mon Sep 17 00:00:00 2001 From: Alug Date: Sun, 19 Oct 2025 20:38:22 +0200 Subject: [PATCH 4/8] fix columnbuf crashes -our buffered uhh buffer needs to be only advanced per column not per screenwidth, caused the buffer to advance beyond its boundaries -brightmap drawers get the DC_DIRECT flag to make sure they never attempt to draw buffered removed tempbuffer alignment for now as reported by ubsan --- src/r_defs.h | 2 +- src/r_draw.cpp | 10 ---------- src/r_draw_column.cpp | 20 +++++++++----------- src/r_things.cpp | 4 ++-- 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/r_defs.h b/src/r_defs.h index 758438616..1aac18eb3 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -906,7 +906,7 @@ struct rotsprite_t // Patches are used for sprites and all masked pictures, and we compose // textures from the TEXTURES list of patches. // -typedef enum +typedef enum { PATCHALIGN_AUTOCENTER = 1<<0, PATCHALIGN_USEPIVOTS = 1<<1, diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 1cffcf8a8..e8cfde488 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -546,22 +546,12 @@ void R_InitViewBuffer(INT32 width, INT32 height) if (temp_dc.buf) { -#if defined(__SSE__) - aligned_free(temp_dc.buf); -#else Z_Free(temp_dc.buf); -#endif } memset(&temp_dc, 0, sizeof(temp_dc)); -#if defined(__SSE__) - while (bufsize & 15) - bufsize++; - temp_dc.buf = static_cast(aligned_alloc(16, bufsize)); -#else temp_dc.buf = static_cast(Z_Calloc(bufsize, PU_STATIC, NULL)); -#endif linesize = vid.width; // killough 11/98 renderscreen = vid.screens[0]; // haleyjd 07/02/14 diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index 8a8a6ed3c..f6f1085c7 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -96,14 +96,8 @@ FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* } } - if constexpr (Type & DrawColumnType::DC_DIRECT) - { // if we dont buffer our columns, we need to handle translucency again - return R_GetColumnTranslucent(dc, dest, bit, col); - } - else - { - return R_GetColumnTranslated(dc, col); - } + // if we dont buffer our columns, we need to handle translucency again + return R_GetColumnTranslucent(dc, dest, bit, col); } /** \brief The R_DrawColumn function @@ -115,7 +109,6 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) INT32 count; UINT8 *dest; const INT32 vidheight = vid.height; - const INT32 vidwidth = vid.width; // leban 1/17/99: // removed the + 1 here, adjusted the if test, and added an increment @@ -134,7 +127,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) return; } - if ((unsigned)dc->x >= (unsigned)vidwidth || dc->yl < 0 || dc->yh >= vidheight) + if ((unsigned)dc->x >= (unsigned)vid.width || dc->yl < 0 || dc->yh >= vidheight) { return; } @@ -247,6 +240,11 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) else dest = R_GetBufferOpaque(dc); + INT32 vidwidth = 8; //SoM: Oh, Oh it's MAGIC! You know... + + if constexpr (Type & DrawColumnType::DC_DIRECT) + vidwidth = vid.width; + count++; // Determine scaling, which is the only mapping to be done. @@ -367,7 +365,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) #define DEFINE_COLUMN_COMBO(name, flags) \ DEFINE_COLUMN_FUNC(name, flags) \ - DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_BRIGHTMAP) + DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_DIRECT|DC_BRIGHTMAP) DEFINE_COLUMN_COMBO(R_DrawColumn, DC_DIRECT|DC_BASIC) DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_DIRECT|DC_TRANSMAP) diff --git a/src/r_things.cpp b/src/r_things.cpp index 66ef4eeca..e4a420918 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -867,9 +867,9 @@ UINT8 *R_GetSpriteTranslation(vissprite_t *vis) if (vis->mobj->color) { // New colormap stuff for skins Tails 06-07-2002 - + if (!(vis->cut & SC_PRECIP) && vis->mobj->colorized) - { + { return R_GetTranslationColormap(R_IsOverlayingInvinciblePlayer(vis->mobj) ? TC_BLINK : TC_RAINBOW, static_cast(vis->mobj->color), GTC_CACHE); From bad7b9f9808efe029a86e475d2175d7575f1c048 Mon Sep 17 00:00:00 2001 From: Alug Date: Sun, 19 Oct 2025 20:40:51 +0200 Subject: [PATCH 5/8] remove unused hires variable from drawcolumndata --- src/r_defs.h | 1 - src/r_draw_column.cpp | 2 +- src/r_things.cpp | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/r_defs.h b/src/r_defs.h index 1aac18eb3..44c3bf56d 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1093,7 +1093,6 @@ typedef struct INT32 yh; fixed_t iscale; fixed_t texturemid; - UINT8 hires; UINT8 shadowcolor; UINT8* source; // first pixel in a column diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index f6f1085c7..1824fb442 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -250,7 +250,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) // Determine scaling, which is the only mapping to be done. fracstep = dc->iscale; //frac = dc_texturemid + (dc_yl - centery)*fracstep; - frac = (dc->texturemid + FixedMul((dc->yl << FRACBITS) - centeryfrac, fracstep)) * (!dc->hires); + frac = dc->texturemid + FixedMul((dc->yl << FRACBITS) - centeryfrac, fracstep); // Inner loop that does the actual texture mapping, e.g. a DDA-like scaling. // This is as fast as it gets. diff --git a/src/r_things.cpp b/src/r_things.cpp index e4a420918..3b8e69808 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1141,7 +1141,6 @@ static void R_DrawVisSprite(vissprite_t *vis) } R_SetColumnFunc(BASEDRAWFUNC, false); - dc.hires = 0; vis->x1 = x1; vis->x2 = x2; From f0e0edbc1dd04053b13286aa2eb8538acc053aaf Mon Sep 17 00:00:00 2001 From: Alug Date: Sun, 19 Oct 2025 20:48:16 +0200 Subject: [PATCH 6/8] use aligned alloc for screen and column buffers 16 byte alingment should speed up everything a bit --- src/r_draw.cpp | 14 +++++++++++++- src/v_video.c | 17 ++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index e8cfde488..23835fe4f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -546,12 +546,24 @@ void R_InitViewBuffer(INT32 width, INT32 height) if (temp_dc.buf) { +#if defined(__SSE__) + aligned_free(temp_dc.buf); +#else Z_Free(temp_dc.buf); +#endif } memset(&temp_dc, 0, sizeof(temp_dc)); - temp_dc.buf = static_cast(Z_Calloc(bufsize, PU_STATIC, NULL)); +#if defined(__SSE__) + while (bufsize & 15) + bufsize++; + temp_dc.buf = static_cast(aligned_alloc(16, bufsize)); +#else + temp_dc.buf = static_cast(Z_Malloc(bufsize, PU_STATIC, NULL)); +#endif + + memset(temp_dc.buf, 0, bufsize); linesize = vid.width; // killough 11/98 renderscreen = vid.screens[0]; // haleyjd 07/02/14 diff --git a/src/v_video.c b/src/v_video.c index 34a88cecf..ccc6fcdac 100644 --- a/src/v_video.c +++ b/src/v_video.c @@ -3873,12 +3873,19 @@ UINT8 GetColorLUTDirect(colorlookup_t *lut, UINT8 r, UINT8 g, UINT8 b) void V_Init(void) { INT32 i; - const INT32 screensize = vid.rowbytes * vid.height; + INT32 screensize = vid.rowbytes * vid.height; for (i = 0; i < NUMSCREENS; i++) { if (vid.screens[i]) + { +#if defined(__SSE__) + aligned_free(vid.screens[i]); +#else free(vid.screens[i]); +#endif + } + vid.screens[i] = NULL; } @@ -3887,7 +3894,15 @@ void V_Init(void) { for (i = 0; i < NUMSCREENS; i++) { + // we need to allocate these relative to their cpu restrictions to not trigger segfaults + // TODO: add support for sve and neon +#if defined(__SSE__) + while (screensize & 15) + screensize++; + vid.screens[i] = aligned_alloc(16, screensize); +#else vid.screens[i] = malloc(screensize); +#endif memset(vid.screens[i], 0, screensize); } } From 6b31428b08147bd9998f28dc94a5edcadb3e6520 Mon Sep 17 00:00:00 2001 From: Alug Date: Sun, 19 Oct 2025 21:04:36 +0200 Subject: [PATCH 7/8] use DEFINE_COLUMN_COMBO macro for flush function definitions --- src/r_draw.h | 14 +++++++------- src/r_draw_column.cpp | 29 ++++++++++------------------- src/screen.c | 14 +++++++------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 5d2a0c036..216870767 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -221,13 +221,13 @@ void R_Draw2sMultiPatchColumn(drawcolumndata_t* dc); void R_Draw2sMultiPatchTranslucentColumn(drawcolumndata_t* dc); // column drawers which use buffered drawing with flush -void R_DrawColumnFlush(drawcolumndata_t* dc); -void R_DrawTranslucentColumnFlush(drawcolumndata_t* dc); -void R_DrawTranslatedColumnFlush(drawcolumndata_t* dc); -void R_DrawColumnShadowedFlush(drawcolumndata_t* dc); -void R_DrawTranslatedTranslucentColumnFlush(drawcolumndata_t* dc); -void R_Draw2sMultiPatchColumnFlush(drawcolumndata_t* dc); -void R_Draw2sMultiPatchTranslucentColumnFlush(drawcolumndata_t* dc); +void R_DrawColumn_Flush(drawcolumndata_t* dc); +void R_DrawTranslucentColumn_Flush(drawcolumndata_t* dc); +void R_DrawTranslatedColumn_Flush(drawcolumndata_t* dc); +void R_DrawColumnShadowed_Flush(drawcolumndata_t* dc); +void R_DrawTranslatedTranslucentColumn_Flush(drawcolumndata_t* dc); +void R_Draw2sMultiPatchColumn_Flush(drawcolumndata_t* dc); +void R_Draw2sMultiPatchTranslucentColumn_Flush(drawcolumndata_t* dc); void R_DrawFogColumn(drawcolumndata_t* dc); void R_DrawColumnShadowed(drawcolumndata_t* dc); diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index 1824fb442..f84ec2ea6 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -364,27 +364,18 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) } #define DEFINE_COLUMN_COMBO(name, flags) \ - DEFINE_COLUMN_FUNC(name, flags) \ - DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_DIRECT|DC_BRIGHTMAP) + DEFINE_COLUMN_FUNC(name, flags|DC_DIRECT) \ + DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_DIRECT|DC_BRIGHTMAP) \ + DEFINE_COLUMN_FUNC(name ## _Flush, flags) -DEFINE_COLUMN_COMBO(R_DrawColumn, DC_DIRECT|DC_BASIC) -DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_DIRECT|DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_DIRECT|DC_COLORMAP) -DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_DIRECT|DC_LIGHTLIST) -DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_DIRECT|DC_COLORMAP|DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_DIRECT|DC_HOLES) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_DIRECT|DC_HOLES|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawColumn, DC_BASIC) +DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_COLORMAP) +DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_LIGHTLIST) +DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_COLORMAP|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_HOLES) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_HOLES|DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_DrawColumnFlush, DC_BASIC) -DEFINE_COLUMN_COMBO(R_DrawTranslucentColumnFlush, DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_DrawTranslatedColumnFlush, DC_COLORMAP) -DEFINE_COLUMN_COMBO(R_DrawColumnShadowedFlush, DC_LIGHTLIST) -DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumnFlush, DC_COLORMAP|DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumnFlush, DC_HOLES) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumnFlush, DC_HOLES|DC_TRANSMAP) - -//skymyass -//DEFINE_COLUMN_FUNC(R_DrawSkyColumn, DC_SKY) void R_DrawFogColumn(drawcolumndata_t *dc) { diff --git a/src/screen.c b/src/screen.c index 4db2536fb..b9e233841 100644 --- a/src/screen.c +++ b/src/screen.c @@ -106,13 +106,13 @@ void SCR_SetDrawFuncs(enum columncontext_e _columncontext) if (_columncontext == COLUMNCONTEXT_FLUSH) { - colfuncs[BASEDRAWFUNC] = R_DrawColumnFlush; - colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumnFlush; - colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumnFlush; - colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowedFlush; - colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumnFlush; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumnFlush; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumnFlush; + colfuncs[BASEDRAWFUNC] = R_DrawColumn_Flush; + colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_Flush; + colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn_Flush; + colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed_Flush; + colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn_Flush; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_Flush; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn_Flush; } else { From a802a49647f0af0d60fee340903a2dd9ef9735bb Mon Sep 17 00:00:00 2001 From: Alug Date: Sun, 19 Oct 2025 21:06:34 +0200 Subject: [PATCH 8/8] rename to stride more sensible name in this new case --- src/r_draw_column.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index f84ec2ea6..ca61218ca 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -240,10 +240,10 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) else dest = R_GetBufferOpaque(dc); - INT32 vidwidth = 8; //SoM: Oh, Oh it's MAGIC! You know... + INT32 stride = 8; // SoM: Oh, Oh it's MAGIC! You know... if constexpr (Type & DrawColumnType::DC_DIRECT) - vidwidth = vid.width; + stride = vid.width; count++; @@ -268,7 +268,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) while (--count > 0) { *dest = R_DrawColumnPixel(dc, dest, frac>>FRACBITS); - dest += vidwidth; + dest += stride; frac += fracstep; } } @@ -313,7 +313,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) *dest = R_DrawColumnPixel(dc, dest, n); } - dest += vidwidth; + dest += stride; // Avoid overflow. if (fracstep > 0x7FFFFFFF - frac) @@ -338,12 +338,12 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) { *dest = R_DrawColumnPixel(dc, dest, (frac>>FRACBITS) & heightmask); - dest += vidwidth; + dest += stride; frac += fracstep; *dest = R_DrawColumnPixel(dc, dest, (frac>>FRACBITS) & heightmask); - dest += vidwidth; + dest += stride; frac += fracstep; }