diff --git a/src/r_draw.cpp b/src/r_draw.cpp index eb809cbf0..1cffcf8a8 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -434,6 +434,92 @@ UINT16 R_GetSuperColorByName(const char *name) // in reality, the few routines that can work for either mode, are // put here +enum columncontext_e columncontext = COLUMNCONTEXT_DIRECT; + +enum ColumnFlushType +{ + FLUSH_NONE = 0x0000, + FLUSH_OPAQUE = 0x0001, + FLUSH_TRANS = 0x0002, + FLUSH_COLORMAP = 0x0004, + FLUSH_COLORMAP_TRANS = 0x0008, +}; + +typedef struct drawcolumndata_temp_s +{ + INT32 x; + INT32 yl[8], yh[8]; + + // e6y: resolution limitation is removed + UINT8 *buf; + + INT32 startx; + ColumnFlushType type; + INT32 commontop, commonbot; + UINT8 *transmap; + // SoM 7-28-04: Fix the fuzz problem. + UINT8 *translation; +} drawcolumndata_temp_t; + +drawcolumndata_temp_t temp_dc = {}; + +// +// Error functions that will abort if R_FlushColumns tries to flush +// columns without a column type. +// +FUNCNORETURN static ATTRNORETURN void R_FlushWholeError(void) +{ + I_Error("R_FlushWholeColumns called without being initialized.\n"); +} + +FUNCNORETURN static ATTRNORETURN void R_FlushHTError(void) +{ + I_Error("R_FlushHTColumns called without being initialized.\n"); +} + +FUNCNORETURN static ATTRNORETURN void R_QuadFlushError(void) +{ + I_Error("R_FlushQuadColumn called without being initialized.\n"); +} + +static void (*R_FlushWholeColumns)(void) = R_FlushWholeError; +static void (*R_FlushHTColumns)(void) = R_FlushHTError; +static void (*R_FlushQuadColumn)(void) = R_QuadFlushError; + +static void R_FlushColumns(void) +{ + if (temp_dc.x != 8 || temp_dc.commontop >= temp_dc.commonbot) + R_FlushWholeColumns(); + else + { + R_FlushHTColumns(); + R_FlushQuadColumn(); + } + + temp_dc.x = 0; +} + +// +// R_ResetColumnBuffer +// +// haleyjd 09/13/04: new function to call from main rendering loop +// which gets rid of the unnecessary reset of various variables during +// column drawing. +// +void R_ResetColumnBuffer(void) +{ + // haleyjd 10/06/05: this must not be done if x == 0! + if (temp_dc.x) + { + R_FlushColumns(); + } + + temp_dc.type = FLUSH_NONE; + R_FlushWholeColumns = R_FlushWholeError; + R_FlushHTColumns = R_FlushHTError; + R_FlushQuadColumn = R_QuadFlushError; +} + /** \brief The R_InitViewBuffer function Creates lookup tables for getting the framebuffer address @@ -456,6 +542,26 @@ void R_InitViewBuffer(INT32 width, INT32 height) viewwindowx = 0; viewwindowy = 0; + INT32 bufsize = (vid.width * 8) * sizeof(*temp_dc.buf); + + if (temp_dc.buf) + { +#if defined(__SSE__) + aligned_free(temp_dc.buf); +#else + Z_Free(temp_dc.buf); +#endif + } + + memset(&temp_dc, 0, sizeof(temp_dc)); + +#if defined(__SSE__) + while (bufsize & 15) + bufsize++; + temp_dc.buf = static_cast(aligned_alloc(16, bufsize)); +#else + temp_dc.buf = static_cast(Z_Calloc(bufsize, PU_STATIC, NULL)); +#endif linesize = vid.width; // killough 11/98 renderscreen = vid.screens[0]; // haleyjd 07/02/14 diff --git a/src/r_draw.h b/src/r_draw.h index a7e446006..5d2a0c036 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -44,6 +44,18 @@ extern floatv3_t *ds_su, *ds_sv, *ds_sz; extern float focallengthf[MAXSPLITSCREENPLAYERS]; extern float zeroheight; +enum columncontext_e +{ + COLUMNCONTEXT_DIRECT = 0, + COLUMNCONTEXT_FLUSH, +}; + +extern enum columncontext_e columncontext; +void R_SetColumnContext(enum columncontext_e _columncontext); +void SCR_SetDrawFuncs(enum columncontext_e _columncontext); + +void R_ResetColumnBuffer(void); + /// \brief Top border #define BRDR_T 0 /// \brief Bottom border @@ -207,6 +219,16 @@ void R_DrawTranslatedColumn(drawcolumndata_t* dc); void R_DrawTranslatedTranslucentColumn(drawcolumndata_t* dc); void R_Draw2sMultiPatchColumn(drawcolumndata_t* dc); void R_Draw2sMultiPatchTranslucentColumn(drawcolumndata_t* dc); + +// column drawers which use buffered drawing with flush +void R_DrawColumnFlush(drawcolumndata_t* dc); +void R_DrawTranslucentColumnFlush(drawcolumndata_t* dc); +void R_DrawTranslatedColumnFlush(drawcolumndata_t* dc); +void R_DrawColumnShadowedFlush(drawcolumndata_t* dc); +void R_DrawTranslatedTranslucentColumnFlush(drawcolumndata_t* dc); +void R_Draw2sMultiPatchColumnFlush(drawcolumndata_t* dc); +void R_Draw2sMultiPatchTranslucentColumnFlush(drawcolumndata_t* dc); + void R_DrawFogColumn(drawcolumndata_t* dc); void R_DrawColumnShadowed(drawcolumndata_t* dc); diff --git a/src/r_draw_column.cpp b/src/r_draw_column.cpp index a1b79c38c..8a8a6ed3c 100644 --- a/src/r_draw_column.cpp +++ b/src/r_draw_column.cpp @@ -21,6 +21,12 @@ // a has a constant z depth from top to bottom. // +#include "r_draw.h" +#include + + +#include "r_draw_flush.cpp" + enum DrawColumnType { DC_BASIC = 0x0000, @@ -29,10 +35,11 @@ enum DrawColumnType DC_BRIGHTMAP = 0x0004, DC_HOLES = 0x0008, DC_LIGHTLIST = 0x0010, + DC_DIRECT = 0x0020, // draw our columns directly to screen! }; template -static constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col) { if constexpr (Type & DrawColumnType::DC_COLORMAP) { @@ -45,7 +52,7 @@ static constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col) } template -static constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, UINT8 col) { col = R_GetColumnTranslated(dc, col); @@ -60,8 +67,9 @@ static constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, return dc->colormap[col]; } +// translucency is handled on flush side now! template -static constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit, UINT8 col) { col = R_GetColumnBrightmapped(dc, bit, col); @@ -76,7 +84,7 @@ static constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, } template -static constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit) { UINT8 col = dc->source[bit]; @@ -88,7 +96,14 @@ static constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT } } - return R_GetColumnTranslucent(dc, dest, bit, col); + if constexpr (Type & DrawColumnType::DC_DIRECT) + { // if we dont buffer our columns, we need to handle translucency again + return R_GetColumnTranslucent(dc, dest, bit, col); + } + else + { + return R_GetColumnTranslated(dc, col); + } } /** \brief The R_DrawColumn function @@ -102,8 +117,18 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) const INT32 vidheight = vid.height; const INT32 vidwidth = vid.width; + // leban 1/17/99: + // removed the + 1 here, adjusted the if test, and added an increment + // later. this helps a compiler pipeline a bit better. the x86 + // assembler also does this. count = dc->yh - dc->yl; + // leban 1/17/99: + // this case isn't executed too often. depending on how many instructions + // there are between here and the second if test below, this case could + // be moved down and might save instructions overall. since there are + // probably different wads that favor one way or the other, i'll leave + // this alone for now. if (count < 0) // Zero length, column does not exceed a pixel. { return; @@ -174,6 +199,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) } R_DrawColumnTemplate(&dc_copy); + if (solid) { dc_copy.yl = bheight; @@ -209,7 +235,17 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) // Framebuffer destination address. - dest = R_Address(dc->x, dc->yl); + if constexpr (Type & DrawColumnType::DC_DIRECT) + dest = R_Address(dc->x, dc->yl); + else if constexpr ((Type & (DrawColumnType::DC_COLORMAP | DrawColumnType::DC_TRANSMAP)) + == (DrawColumnType::DC_COLORMAP | DrawColumnType::DC_TRANSMAP)) + dest = R_GetBufferColormapTrans(dc); + else if constexpr (Type & DrawColumnType::DC_TRANSMAP) + dest = R_GetBufferTrans(dc); + else if constexpr (Type & DrawColumnType::DC_COLORMAP) + dest = R_GetBufferColormap(dc); + else + dest = R_GetBufferOpaque(dc); count++; @@ -333,13 +369,24 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc) DEFINE_COLUMN_FUNC(name, flags) \ DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_BRIGHTMAP) -DEFINE_COLUMN_COMBO(R_DrawColumn, DC_BASIC) -DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_COLORMAP) -DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_LIGHTLIST) -DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_COLORMAP|DC_TRANSMAP) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_HOLES) -DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_HOLES|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawColumn, DC_DIRECT|DC_BASIC) +DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_DIRECT|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_DIRECT|DC_COLORMAP) +DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_DIRECT|DC_LIGHTLIST) +DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_DIRECT|DC_COLORMAP|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_DIRECT|DC_HOLES) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_DIRECT|DC_HOLES|DC_TRANSMAP) + +DEFINE_COLUMN_COMBO(R_DrawColumnFlush, DC_BASIC) +DEFINE_COLUMN_COMBO(R_DrawTranslucentColumnFlush, DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_DrawTranslatedColumnFlush, DC_COLORMAP) +DEFINE_COLUMN_COMBO(R_DrawColumnShadowedFlush, DC_LIGHTLIST) +DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumnFlush, DC_COLORMAP|DC_TRANSMAP) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumnFlush, DC_HOLES) +DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumnFlush, DC_HOLES|DC_TRANSMAP) + +//skymyass +//DEFINE_COLUMN_FUNC(R_DrawSkyColumn, DC_SKY) void R_DrawFogColumn(drawcolumndata_t *dc) { diff --git a/src/r_draw_flush.cpp b/src/r_draw_flush.cpp new file mode 100644 index 000000000..80da61705 --- /dev/null +++ b/src/r_draw_flush.cpp @@ -0,0 +1,247 @@ +// SONIC ROBO BLAST 2 KART +//----------------------------------------------------------------------------- +// Copyright (C) 2025 by Kart Krew. +// Copyright (C) 2020 by Sonic Team Junior. +// Copyright (C) 2000 by DooM Legacy Team. +// Copyright (C) 1996 by id Software, Inc. +// Copyright (C) 1999 by Chi Hoang, Lee Killough, Jim Flynn, Rand Phares, Ty Halderman +// Copyright (C) 1999-2000 by Jess Haas, Nicolas Kalkhof, Colin Phipps, Florian Schulze +// Copyright (C) Copyright 2005, 2006 by Florian Schulze, Colin Phipps, Neil Stevens, Andrey Budko +// Copyright (C) 2013 by James Haley, Stephen McGranahan, et al. +// +// This program is free software distributed under the +// terms of the GNU General Public License, version 2. +// See the 'LICENSE' file for more details. +//----------------------------------------------------------------------------- +/// \file r_draw_flush.cpp +/// \brief Optimized quad column buffer code. By SoM. +/// \note no includes because this is included as part of r_draw.cpp + +template +FUNCINLINE static ATTRINLINE constexpr UINT8 +R_GetFlushPixelTranslated(const drawcolumndata_temp_t *t_dc, UINT8 col) +{ + if constexpr (Type & (ColumnFlushType::FLUSH_COLORMAP | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + col = t_dc->translation[col]; + } + + return col; +} + +template +FUNCINLINE static ATTRINLINE constexpr UINT8 +R_GetFlushPixelTranslucent(const drawcolumndata_temp_t *t_dc, UINT8 * restrict dest, UINT8 col) +{ + col = R_GetFlushPixelTranslated(t_dc, col); + + if constexpr (Type & (ColumnFlushType::FLUSH_TRANS | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + // haleyjd 09/11/04: use temptranmap here + return *(t_dc->transmap + (col << 8) + (*dest)); + } + else + { + return col; + } +} + +template +FUNCINLINE static ATTRINLINE constexpr UINT8 +R_DrawFlushPixel(const drawcolumndata_temp_t *t_dc, UINT8 * restrict dest, const UINT8 * restrict source) +{ + UINT8 col = *source; + return R_GetFlushPixelTranslucent(t_dc, dest, col); +} + +// +// R_FlushWhole +// +// Flushes the entire columns in the buffer, one at a time. +// This is used when a quad flush isn't possible. +// +template +static void R_FlushWhole(void) +{ + UINT8 * restrict source; + UINT8 * restrict dest; + INT32 count, yl; + const INT32 stride = vid.width; + drawcolumndata_temp_t *t_dc = &temp_dc; + UINT8 *restrict buf = t_dc->buf; + + while (--t_dc->x >= 0) + { + yl = t_dc->yl[t_dc->x]; + source = &buf[t_dc->x + (yl << 3)]; + dest = R_Address(t_dc->startx + t_dc->x, yl); + count = t_dc->yh[t_dc->x] - yl + 1; + + while (--count >= 0) + { + *dest = R_DrawFlushPixel(t_dc, dest, source); + source += 8; + dest += stride; + } + } +} + +// +// R_FlushHT +// +// Flushes the head and tail of columns in the buffer in +// preparation for a quad flush. +// +template +static void R_FlushHT(void) +{ + UINT8 * restrict source; + UINT8 * restrict dest; + INT32 count, colnum = 0; + INT32 yl, yh; + const INT32 stride = vid.width; + const drawcolumndata_temp_t *t_dc = &temp_dc; + UINT8 *restrict buf = t_dc->buf; + + while (colnum < 8) + { + yl = t_dc->yl[colnum]; + yh = t_dc->yh[colnum]; + + // flush column head + if (yl < t_dc->commontop) + { + source = &buf[colnum + (yl << 3)]; + dest = R_Address(t_dc->startx + colnum, yl); + count = t_dc->commontop - yl; + + while (--count >= 0) + { + *dest = R_DrawFlushPixel(t_dc, dest, source); + source += 8; + dest += stride; + } + } + + // flush column tail + if (yh > t_dc->commonbot) + { + source = &buf[colnum + ((t_dc->commonbot + 1) << 3)]; + dest = R_Address(t_dc->startx + colnum, t_dc->commonbot + 1); + count = yh - t_dc->commonbot; + + while (--count >= 0) + { + *dest = R_DrawFlushPixel(t_dc, dest, source); + source += 8; + dest += stride; + } + } + + ++colnum; + } +} + +// Begin: Quad column flushing functions. +template +static void R_FlushQuad(void) +{ + const INT32 stride = vid.width; + const drawcolumndata_temp_t *t_dc = &temp_dc; + INT32 count = t_dc->commonbot - t_dc->commontop + 1; + const UINT8 *restrict buf = t_dc->buf; + + if constexpr (Type & ColumnFlushType::FLUSH_OPAQUE) + { + const INT64 *source = reinterpret_cast(buf + (t_dc->commontop << 3)); + INT64 *dest = reinterpret_cast(R_Address(t_dc->startx, t_dc->commontop)); + const INT32 deststep = stride / 8; + + while (--count >= 0) + { + *dest = *source++; + dest += deststep; + } + } + else + { + const UINT8 * restrict source = buf + (t_dc->commontop << 3); + UINT8 * restrict dest = R_Address(t_dc->startx, t_dc->commontop); + + while (--count >= 0) + { + dest[0] = R_DrawFlushPixel(t_dc, &dest[0], &source[0]); + dest[1] = R_DrawFlushPixel(t_dc, &dest[1], &source[1]); + dest[2] = R_DrawFlushPixel(t_dc, &dest[2], &source[2]); + dest[3] = R_DrawFlushPixel(t_dc, &dest[3], &source[3]); + dest[4] = R_DrawFlushPixel(t_dc, &dest[4], &source[4]); + dest[5] = R_DrawFlushPixel(t_dc, &dest[5], &source[5]); + dest[6] = R_DrawFlushPixel(t_dc, &dest[6], &source[6]); + dest[7] = R_DrawFlushPixel(t_dc, &dest[7], &source[7]); + source += 8; + dest += stride; + } + } +} + +// haleyjd 09/12/04: split up R_GetBuffer into various different +// functions to minimize the number of branches and take advantage +// of as much precalculated information as possible. +template +static UINT8 *R_GetBuffer(drawcolumndata_t *dc) +{ + drawcolumndata_temp_t *t_dc = &temp_dc; + + // haleyjd: reordered predicates + if (t_dc->x == 8 || + (t_dc->x && (t_dc->type != Type || t_dc->x + t_dc->startx != dc->x))) + R_FlushColumns(); + + if (!t_dc->x) + { + ++t_dc->x; + t_dc->startx = dc->x; + t_dc->yl[0] = t_dc->commontop = dc->yl; + t_dc->yh[0] = t_dc->commonbot = dc->yh; + t_dc->type = Type; + + if constexpr (Type & (ColumnFlushType::FLUSH_TRANS | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + t_dc->transmap = dc->transmap; + } + + if constexpr (Type & (ColumnFlushType::FLUSH_COLORMAP | ColumnFlushType::FLUSH_COLORMAP_TRANS)) + { + t_dc->translation = dc->translation; + } + + R_FlushWholeColumns = R_FlushWhole; + R_FlushHTColumns = R_FlushHT; + R_FlushQuadColumn = R_FlushQuad; + + return &t_dc->buf[dc->yl << 3]; + } + + t_dc->yl[t_dc->x] = dc->yl; + t_dc->yh[t_dc->x] = dc->yh; + + if (dc->yl > t_dc->commontop) + t_dc->commontop = dc->yl; + if (dc->yh < t_dc->commonbot) + t_dc->commonbot = dc->yh; + + return &t_dc->buf[(dc->yl << 3) + t_dc->x++]; +} + +#define DEFINE_GETBUF_FUNC(name, flags) \ + FUNCINLINE static ATTRINLINE UINT8 *name(drawcolumndata_t *dc) \ + { \ + constexpr ColumnFlushType opt = static_cast(flags); \ + return R_GetBuffer(dc); \ + } + +DEFINE_GETBUF_FUNC(R_GetBufferOpaque, FLUSH_OPAQUE) +DEFINE_GETBUF_FUNC(R_GetBufferTrans, FLUSH_TRANS) +DEFINE_GETBUF_FUNC(R_GetBufferColormap, FLUSH_COLORMAP) +DEFINE_GETBUF_FUNC(R_GetBufferColormapTrans, FLUSH_COLORMAP_TRANS) + diff --git a/src/r_draw_span.cpp b/src/r_draw_span.cpp index a8a171b95..a4228a453 100644 --- a/src/r_draw_span.cpp +++ b/src/r_draw_span.cpp @@ -13,6 +13,9 @@ /// \brief span drawer functions /// \note no includes because this is included as part of r_draw.cpp +#include "r_draw.h" +#include + using namespace libdivide; // ========================================================================== @@ -39,7 +42,7 @@ enum DrawSpanType }; template -static constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col) { if constexpr (Type & DrawSpanType::DS_COLORMAP) { @@ -52,7 +55,7 @@ static constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col) } template -static constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap, UINT32 bit, UINT8 col) { col = R_GetSpanTranslated(ds, col); @@ -85,7 +88,7 @@ static constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap } template -static constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit, UINT8 col) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit, UINT8 col) { col = R_GetSpanBrightmapped(ds, colormap, bit, col); @@ -100,7 +103,7 @@ static constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UIN } template -static constexpr UINT8 R_DrawSpanPixel(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit) +FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawSpanPixel(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit) { UINT8 col = 0; @@ -197,14 +200,18 @@ static void R_DrawSpanTemplate(drawspandata_t* ds) { bit = (((UINT32)yposition >> ds->nflatyshift) & ds->nflatmask) | ((UINT32)xposition >> ds->nflatxshift); - dest[i] = R_DrawSpanPixel(ds, &dsrc[i], ds->colormap, bit); + if constexpr (Type & DS_RIPPLE) + dest[i] = R_DrawSpanPixel(ds, &dsrc[i], ds->colormap, bit); + else + dest[i] = R_DrawSpanPixel(ds, &dest[i], ds->colormap, bit); xposition += xstep; yposition += ystep; } dest += 8; - dsrc += 8; + if constexpr (Type & DS_RIPPLE) + dsrc += 8; count -= 8; } @@ -213,10 +220,14 @@ static void R_DrawSpanTemplate(drawspandata_t* ds) { bit = (((UINT32)yposition >> ds->nflatyshift) & ds->nflatmask) | ((UINT32)xposition >> ds->nflatxshift); - *dest = R_DrawSpanPixel(ds, dsrc, ds->colormap, bit); + if constexpr (Type & DS_RIPPLE) + *dest = R_DrawSpanPixel(ds, dsrc, ds->colormap, bit); + else + *dest = R_DrawSpanPixel(ds, dest, ds->colormap, bit); dest++; - dsrc++; + if constexpr (Type & DS_RIPPLE) + dsrc++; xposition += xstep; yposition += ystep; @@ -278,6 +289,8 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) const INT32 nflatmask = ds->nflatmask; iz = ds->szp.z + ds->szp.y*(centery-ds->y) + ds->szp.x*(ds->x1-centerx); + uz = ds->sup.z + ds->sup.y*(centery-ds->y) + ds->sup.x*(ds->x1-centerx); + vz = ds->svp.z + ds->svp.y*(centery-ds->y) + ds->svp.x*(ds->x1-centerx); // Lighting is simple. It's just linear interpolation from start to end if constexpr (!(Type & DS_SPRITE)) @@ -292,9 +305,6 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) //CONS_Printf("tilted lighting %f to %f (foc %f)\n", lightstart, lightend, focallengthf); } - uz = ds->sup.z + ds->sup.y*(centery-ds->y) + ds->sup.x*(ds->x1-centerx); - vz = ds->svp.z + ds->svp.y*(centery-ds->y) + ds->svp.x*(ds->x1-centerx); - colormap = ds->colormap; if constexpr (Type & DS_RIPPLE) @@ -364,12 +374,16 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) colormap = ds->planezlight[tiltlighting[x1 + i]] + (ds->colormap - colormaps); } - dest[i] = R_DrawSpanPixel(ds, &dsrc[i], colormap, bit); + if constexpr (Type & DS_RIPPLE) + dest[i] = R_DrawSpanPixel(ds, &dsrc[i], colormap, bit); + else + dest[i] = R_DrawSpanPixel(ds, &dest[i], colormap, bit); } ds->x1 += SPANSIZE; dest += SPANSIZE; - dsrc += SPANSIZE; + if constexpr (Type & DS_RIPPLE) + dsrc += SPANSIZE; startu = endu; startv = endv; width -= SPANSIZE; @@ -386,7 +400,11 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) { colormap = ds->planezlight[tiltlighting[ds->x1]] + (ds->colormap - colormaps); } - *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + if constexpr (Type & DS_RIPPLE) + *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + else + *dest = R_DrawSpanPixel(ds, dest, colormap, bit); + ds->x1++; } else @@ -412,10 +430,16 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds) { colormap = ds->planezlight[tiltlighting[ds->x1]] + (ds->colormap - colormaps); } - *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + + if constexpr (Type & DS_RIPPLE) + *dest = R_DrawSpanPixel(ds, dsrc, colormap, bit); + else + *dest = R_DrawSpanPixel(ds, dest, colormap, bit); + dest++; + if constexpr (Type & DS_RIPPLE) + dsrc++; ds->x1++; - dsrc++; u += stepu; v += stepv; } @@ -768,18 +792,16 @@ void R_DrawFogSpan(drawspandata_t* ds) { ZoneScoped; + INT32 count = ds->x2 - ds->x1 + 1; + UINT8 *colormap; UINT8 *dest; const INT32 vidwidth = vid.width; - size_t count; - colormap = ds->colormap; dest = R_Address(ds->x1, ds->y); - count = ds->x2 - ds->x1 + 1; - while (count >= 4) { dest[0] = colormap[dest[0]]; @@ -787,7 +809,7 @@ void R_DrawFogSpan(drawspandata_t* ds) dest[2] = colormap[dest[2]]; dest[3] = colormap[dest[3]]; - dest += 4; + dest += 4; count -= 4; } diff --git a/src/r_main.cpp b/src/r_main.cpp index f5c4904f2..9a7d3d7e2 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -1538,8 +1538,11 @@ void R_RenderPlayerView(void) R_ClearSegTables(); R_ClearPlanes(); R_ClearSprites(); + R_SetColumnContext(COLUMNCONTEXT_FLUSH); R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1, false); R_ClipSprites(drawsegs, NULL); + R_ResetColumnBuffer(); + R_SetColumnContext(COLUMNCONTEXT_DIRECT); R_DrawSkyPlanes(); R_DrawPlanes(); R_DrawMasked(masks, nummasks); @@ -1577,6 +1580,7 @@ void R_RenderPlayerView(void) NetUpdate(); // The head node is the last node output. + R_SetColumnContext(COLUMNCONTEXT_FLUSH); ps_numbspcalls = ps_numpolyobjects = ps_numdrawnodes = 0; ps_bsptime = I_GetPreciseTime(); R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1, true); @@ -1585,6 +1589,7 @@ void R_RenderPlayerView(void) ps_sw_spritecliptime = I_GetPreciseTime(); R_ClipSprites(drawsegs, NULL); ps_sw_spritecliptime = I_GetPreciseTime() - ps_sw_spritecliptime; + R_ResetColumnBuffer(); // Add skybox portals caused by sky visplanes. if (skybox && !oldsky) @@ -1629,6 +1634,8 @@ void R_RenderPlayerView(void) R_ClipSprites(ds_p - (masks[nummasks - 1].drawsegs[1] - masks[nummasks - 1].drawsegs[0]), portal); + R_ResetColumnBuffer(); + Portal_Remove(portal); } @@ -1639,6 +1646,7 @@ void R_RenderPlayerView(void) } ps_sw_portaltime = I_GetPreciseTime() - ps_sw_portaltime; + R_SetColumnContext(COLUMNCONTEXT_DIRECT); ps_sw_planetime = I_GetPreciseTime(); R_DrawSkyPlanes(); R_DrawPlanes(); diff --git a/src/r_things.cpp b/src/r_things.cpp index dcba2ec97..66ef4eeca 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -3602,6 +3602,11 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal) drawseg_t* ds; INT32 i; + if (visspritecount - clippedvissprites <= 0) + { + return; + } + // e6y // Reducing of cache misses in the following R_DrawSprite() // Makes sense for scenes with huge amount of drawsegs. @@ -3611,11 +3616,6 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal) drawsegs_xranges[i].count = 0; } - if (visspritecount - clippedvissprites <= 0) - { - return; - } - if (drawsegs_xrange_size < maxdrawsegs) { // haleyjd: fix reallocation to track 2x size diff --git a/src/screen.c b/src/screen.c index aeee8254a..4db2536fb 100644 --- a/src/screen.c +++ b/src/screen.c @@ -98,19 +98,33 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_ // ========================================================================= -void SCR_SetDrawFuncs(void) +void SCR_SetDrawFuncs(enum columncontext_e _columncontext) { // // setup the right draw routines // - colfuncs[BASEDRAWFUNC] = R_DrawColumn; - colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn; - colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn; - colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed; - colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn; + if (_columncontext == COLUMNCONTEXT_FLUSH) + { + colfuncs[BASEDRAWFUNC] = R_DrawColumnFlush; + colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumnFlush; + colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumnFlush; + colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowedFlush; + colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumnFlush; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumnFlush; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumnFlush; + } + else + { + colfuncs[BASEDRAWFUNC] = R_DrawColumn; + colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn; + colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn; + colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed; + colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn; + colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn; + } + colfuncs[COLDRAWFUNC_FOG] = R_DrawFogColumn; colfuncs[COLDRAWFUNC_DROPSHADOW] = R_DrawDropShadowColumn; @@ -215,6 +229,17 @@ void SCR_SetDrawFuncs(void) R_SetSpanFunc(BASEDRAWFUNC, false, false); } +// used to switch between column buffering and drawing them directly to screen +// our sky "plane" drawer cannot handle the buffer system due to multithreading +// (that would require alot of extra complexity for smth with massive diminishing results) +// Our masked drawing step draws things in a very particular order, which results in alot of flushing to screen +// effectively adding massive overhead due to excessive flushing, so we draw our masked thing directly to screen instead +void R_SetColumnContext(enum columncontext_e _columncontext) +{ + columncontext = _columncontext; + SCR_SetDrawFuncs(_columncontext); // set our column drawers +} + void R_SetColumnFunc(size_t id, boolean brightmapped) { I_Assert(id < COLDRAWFUNC_MAX); @@ -336,7 +361,7 @@ void SCR_SetMode(void) V_SetPalette(0); - SCR_SetDrawFuncs(); + SCR_SetDrawFuncs(COLUMNCONTEXT_DIRECT); // Shoot! The screen texture was flushed! Y_CleanupScreenBuffer(); diff --git a/src/screen.h b/src/screen.h index 1d4333111..f1f25a2ff 100644 --- a/src/screen.h +++ b/src/screen.h @@ -112,10 +112,8 @@ void SCR_Startup(void); // Change video mode, only at the start of a refresh. void SCR_SetMode(void); -// Set drawer functions for Software -void SCR_SetDrawFuncs(void); - // Set current column / span drawers +//void R_SetColumnContext(enum columncontext_e _columncontext); // declared in r_draw! void R_SetColumnFunc(size_t id, boolean brightmapped); void R_SetSpanFunc(size_t id, boolean npo2, boolean brightmapped); boolean R_SetSpanFuncFlat(size_t id); // flat color diff --git a/src/sdl/i_video.cpp b/src/sdl/i_video.cpp index db017f4fa..72ca37956 100644 --- a/src/sdl/i_video.cpp +++ b/src/sdl/i_video.cpp @@ -67,6 +67,7 @@ #include "../console.h" #include "../command.h" #include "../r_main.h" +#include "../r_draw.h" #include "../lua_hook.h" #include "sdlmain.h" #include "../i_system.h" @@ -1485,7 +1486,7 @@ boolean VID_CheckRenderer(void) if (rendermode == render_soft) { vid.rowbytes = vid.width; - SCR_SetDrawFuncs(); + SCR_SetDrawFuncs(COLUMNCONTEXT_DIRECT); } #ifdef HWRENDER else if (rendermode == render_opengl && rendererchanged) diff --git a/src/v_video.c b/src/v_video.c index f710528e9..34a88cecf 100644 --- a/src/v_video.c +++ b/src/v_video.c @@ -32,15 +32,6 @@ #include "doomstat.h" #include "r_fps.h" -#if defined(__SSE__) || defined(__AVX__) -#ifdef _WIN32 -#include -#define aligned_alloc(align, size) _aligned_malloc(size, align) -#endif - -#include -#endif - #ifdef HWRENDER #include "hardware/hw_glob.h" #endif diff --git a/src/v_video.h b/src/v_video.h index 0ea17220a..e0a20f0dd 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -19,6 +19,18 @@ #include "r_defs.h" #include "r_main.h" +#if defined(__SSE__) +#ifdef _WIN32 +#include +#define aligned_alloc(align, size) _aligned_malloc(size, align) +#define aligned_free(ptr) _aligned_free(ptr) +#else +#define aligned_free(ptr) free(ptr) +#endif + +#include +#endif + // SRB2Kart #include "hu_stuff.h" // fonts