Software Renderer: Implement Column buffering system

A port of https://github.com/Indev450/SRB2Kart-Saturn/pull/205, currently doesn't work
This commit is contained in:
NepDisk 2025-10-19 12:12:33 -04:00
parent 8288b6779f
commit 659aa667eb
12 changed files with 540 additions and 61 deletions

View file

@ -434,6 +434,92 @@ UINT16 R_GetSuperColorByName(const char *name)
// in reality, the few routines that can work for either mode, are
// put here
enum columncontext_e columncontext = COLUMNCONTEXT_DIRECT;
enum ColumnFlushType
{
FLUSH_NONE = 0x0000,
FLUSH_OPAQUE = 0x0001,
FLUSH_TRANS = 0x0002,
FLUSH_COLORMAP = 0x0004,
FLUSH_COLORMAP_TRANS = 0x0008,
};
typedef struct drawcolumndata_temp_s
{
INT32 x;
INT32 yl[8], yh[8];
// e6y: resolution limitation is removed
UINT8 *buf;
INT32 startx;
ColumnFlushType type;
INT32 commontop, commonbot;
UINT8 *transmap;
// SoM 7-28-04: Fix the fuzz problem.
UINT8 *translation;
} drawcolumndata_temp_t;
drawcolumndata_temp_t temp_dc = {};
//
// Error functions that will abort if R_FlushColumns tries to flush
// columns without a column type.
//
FUNCNORETURN static ATTRNORETURN void R_FlushWholeError(void)
{
I_Error("R_FlushWholeColumns called without being initialized.\n");
}
FUNCNORETURN static ATTRNORETURN void R_FlushHTError(void)
{
I_Error("R_FlushHTColumns called without being initialized.\n");
}
FUNCNORETURN static ATTRNORETURN void R_QuadFlushError(void)
{
I_Error("R_FlushQuadColumn called without being initialized.\n");
}
static void (*R_FlushWholeColumns)(void) = R_FlushWholeError;
static void (*R_FlushHTColumns)(void) = R_FlushHTError;
static void (*R_FlushQuadColumn)(void) = R_QuadFlushError;
static void R_FlushColumns(void)
{
if (temp_dc.x != 8 || temp_dc.commontop >= temp_dc.commonbot)
R_FlushWholeColumns();
else
{
R_FlushHTColumns();
R_FlushQuadColumn();
}
temp_dc.x = 0;
}
//
// R_ResetColumnBuffer
//
// haleyjd 09/13/04: new function to call from main rendering loop
// which gets rid of the unnecessary reset of various variables during
// column drawing.
//
void R_ResetColumnBuffer(void)
{
// haleyjd 10/06/05: this must not be done if x == 0!
if (temp_dc.x)
{
R_FlushColumns();
}
temp_dc.type = FLUSH_NONE;
R_FlushWholeColumns = R_FlushWholeError;
R_FlushHTColumns = R_FlushHTError;
R_FlushQuadColumn = R_QuadFlushError;
}
/** \brief The R_InitViewBuffer function
Creates lookup tables for getting the framebuffer address
@ -456,6 +542,26 @@ void R_InitViewBuffer(INT32 width, INT32 height)
viewwindowx = 0;
viewwindowy = 0;
INT32 bufsize = (vid.width * 8) * sizeof(*temp_dc.buf);
if (temp_dc.buf)
{
#if defined(__SSE__)
aligned_free(temp_dc.buf);
#else
Z_Free(temp_dc.buf);
#endif
}
memset(&temp_dc, 0, sizeof(temp_dc));
#if defined(__SSE__)
while (bufsize & 15)
bufsize++;
temp_dc.buf = static_cast<UINT8*>(aligned_alloc(16, bufsize));
#else
temp_dc.buf = static_cast<UINT8*>(Z_Calloc(bufsize, PU_STATIC, NULL));
#endif
linesize = vid.width; // killough 11/98
renderscreen = vid.screens[0]; // haleyjd 07/02/14

View file

@ -44,6 +44,18 @@ extern floatv3_t *ds_su, *ds_sv, *ds_sz;
extern float focallengthf[MAXSPLITSCREENPLAYERS];
extern float zeroheight;
enum columncontext_e
{
COLUMNCONTEXT_DIRECT = 0,
COLUMNCONTEXT_FLUSH,
};
extern enum columncontext_e columncontext;
void R_SetColumnContext(enum columncontext_e _columncontext);
void SCR_SetDrawFuncs(enum columncontext_e _columncontext);
void R_ResetColumnBuffer(void);
/// \brief Top border
#define BRDR_T 0
/// \brief Bottom border
@ -207,6 +219,16 @@ void R_DrawTranslatedColumn(drawcolumndata_t* dc);
void R_DrawTranslatedTranslucentColumn(drawcolumndata_t* dc);
void R_Draw2sMultiPatchColumn(drawcolumndata_t* dc);
void R_Draw2sMultiPatchTranslucentColumn(drawcolumndata_t* dc);
// column drawers which use buffered drawing with flush
void R_DrawColumnFlush(drawcolumndata_t* dc);
void R_DrawTranslucentColumnFlush(drawcolumndata_t* dc);
void R_DrawTranslatedColumnFlush(drawcolumndata_t* dc);
void R_DrawColumnShadowedFlush(drawcolumndata_t* dc);
void R_DrawTranslatedTranslucentColumnFlush(drawcolumndata_t* dc);
void R_Draw2sMultiPatchColumnFlush(drawcolumndata_t* dc);
void R_Draw2sMultiPatchTranslucentColumnFlush(drawcolumndata_t* dc);
void R_DrawFogColumn(drawcolumndata_t* dc);
void R_DrawColumnShadowed(drawcolumndata_t* dc);

View file

@ -21,6 +21,12 @@
// a has a constant z depth from top to bottom.
//
#include "r_draw.h"
#include <tracy/tracy/Tracy.hpp>
#include "r_draw_flush.cpp"
enum DrawColumnType
{
DC_BASIC = 0x0000,
@ -29,10 +35,11 @@ enum DrawColumnType
DC_BRIGHTMAP = 0x0004,
DC_HOLES = 0x0008,
DC_LIGHTLIST = 0x0010,
DC_DIRECT = 0x0020, // draw our columns directly to screen!
};
template<DrawColumnType Type>
static constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col)
{
if constexpr (Type & DrawColumnType::DC_COLORMAP)
{
@ -45,7 +52,7 @@ static constexpr UINT8 R_GetColumnTranslated(drawcolumndata_t* dc, UINT8 col)
}
template<DrawColumnType Type>
static constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, UINT8 col)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit, UINT8 col)
{
col = R_GetColumnTranslated<Type>(dc, col);
@ -60,8 +67,9 @@ static constexpr UINT8 R_GetColumnBrightmapped(drawcolumndata_t* dc, UINT32 bit,
return dc->colormap[col];
}
// translucency is handled on flush side now!
template<DrawColumnType Type>
static constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit, UINT8 col)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit, UINT8 col)
{
col = R_GetColumnBrightmapped<Type>(dc, bit, col);
@ -76,7 +84,7 @@ static constexpr UINT8 R_GetColumnTranslucent(drawcolumndata_t* dc, UINT8 *dest,
}
template<DrawColumnType Type>
static constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT32 bit)
{
UINT8 col = dc->source[bit];
@ -88,7 +96,14 @@ static constexpr UINT8 R_DrawColumnPixel(drawcolumndata_t* dc, UINT8 *dest, UINT
}
}
return R_GetColumnTranslucent<Type>(dc, dest, bit, col);
if constexpr (Type & DrawColumnType::DC_DIRECT)
{ // if we dont buffer our columns, we need to handle translucency again
return R_GetColumnTranslucent<Type>(dc, dest, bit, col);
}
else
{
return R_GetColumnTranslated<Type>(dc, col);
}
}
/** \brief The R_DrawColumn function
@ -102,8 +117,18 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc)
const INT32 vidheight = vid.height;
const INT32 vidwidth = vid.width;
// leban 1/17/99:
// removed the + 1 here, adjusted the if test, and added an increment
// later. this helps a compiler pipeline a bit better. the x86
// assembler also does this.
count = dc->yh - dc->yl;
// leban 1/17/99:
// this case isn't executed too often. depending on how many instructions
// there are between here and the second if test below, this case could
// be moved down and might save instructions overall. since there are
// probably different wads that favor one way or the other, i'll leave
// this alone for now.
if (count < 0) // Zero length, column does not exceed a pixel.
{
return;
@ -174,6 +199,7 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc)
}
R_DrawColumnTemplate<NewType>(&dc_copy);
if (solid)
{
dc_copy.yl = bheight;
@ -209,7 +235,17 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc)
// Framebuffer destination address.
dest = R_Address(dc->x, dc->yl);
if constexpr (Type & DrawColumnType::DC_DIRECT)
dest = R_Address(dc->x, dc->yl);
else if constexpr ((Type & (DrawColumnType::DC_COLORMAP | DrawColumnType::DC_TRANSMAP))
== (DrawColumnType::DC_COLORMAP | DrawColumnType::DC_TRANSMAP))
dest = R_GetBufferColormapTrans(dc);
else if constexpr (Type & DrawColumnType::DC_TRANSMAP)
dest = R_GetBufferTrans(dc);
else if constexpr (Type & DrawColumnType::DC_COLORMAP)
dest = R_GetBufferColormap(dc);
else
dest = R_GetBufferOpaque(dc);
count++;
@ -333,13 +369,24 @@ static void R_DrawColumnTemplate(drawcolumndata_t *dc)
DEFINE_COLUMN_FUNC(name, flags) \
DEFINE_COLUMN_FUNC(name ## _Brightmap, flags|DC_BRIGHTMAP)
DEFINE_COLUMN_COMBO(R_DrawColumn, DC_BASIC)
DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_COLORMAP)
DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_LIGHTLIST)
DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_COLORMAP|DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_HOLES)
DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_HOLES|DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_DrawColumn, DC_DIRECT|DC_BASIC)
DEFINE_COLUMN_COMBO(R_DrawTranslucentColumn, DC_DIRECT|DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_DrawTranslatedColumn, DC_DIRECT|DC_COLORMAP)
DEFINE_COLUMN_COMBO(R_DrawColumnShadowed, DC_DIRECT|DC_LIGHTLIST)
DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumn, DC_DIRECT|DC_COLORMAP|DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumn, DC_DIRECT|DC_HOLES)
DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumn, DC_DIRECT|DC_HOLES|DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_DrawColumnFlush, DC_BASIC)
DEFINE_COLUMN_COMBO(R_DrawTranslucentColumnFlush, DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_DrawTranslatedColumnFlush, DC_COLORMAP)
DEFINE_COLUMN_COMBO(R_DrawColumnShadowedFlush, DC_LIGHTLIST)
DEFINE_COLUMN_COMBO(R_DrawTranslatedTranslucentColumnFlush, DC_COLORMAP|DC_TRANSMAP)
DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchColumnFlush, DC_HOLES)
DEFINE_COLUMN_COMBO(R_Draw2sMultiPatchTranslucentColumnFlush, DC_HOLES|DC_TRANSMAP)
//skymyass
//DEFINE_COLUMN_FUNC(R_DrawSkyColumn, DC_SKY)
void R_DrawFogColumn(drawcolumndata_t *dc)
{

247
src/r_draw_flush.cpp Normal file
View file

@ -0,0 +1,247 @@
// SONIC ROBO BLAST 2 KART
//-----------------------------------------------------------------------------
// Copyright (C) 2025 by Kart Krew.
// Copyright (C) 2020 by Sonic Team Junior.
// Copyright (C) 2000 by DooM Legacy Team.
// Copyright (C) 1996 by id Software, Inc.
// Copyright (C) 1999 by Chi Hoang, Lee Killough, Jim Flynn, Rand Phares, Ty Halderman
// Copyright (C) 1999-2000 by Jess Haas, Nicolas Kalkhof, Colin Phipps, Florian Schulze
// Copyright (C) Copyright 2005, 2006 by Florian Schulze, Colin Phipps, Neil Stevens, Andrey Budko
// Copyright (C) 2013 by James Haley, Stephen McGranahan, et al.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file r_draw_flush.cpp
/// \brief Optimized quad column buffer code. By SoM.
/// \note no includes because this is included as part of r_draw.cpp
template<ColumnFlushType Type>
FUNCINLINE static ATTRINLINE constexpr UINT8
R_GetFlushPixelTranslated(const drawcolumndata_temp_t *t_dc, UINT8 col)
{
if constexpr (Type & (ColumnFlushType::FLUSH_COLORMAP | ColumnFlushType::FLUSH_COLORMAP_TRANS))
{
col = t_dc->translation[col];
}
return col;
}
template<ColumnFlushType Type>
FUNCINLINE static ATTRINLINE constexpr UINT8
R_GetFlushPixelTranslucent(const drawcolumndata_temp_t *t_dc, UINT8 * restrict dest, UINT8 col)
{
col = R_GetFlushPixelTranslated<Type>(t_dc, col);
if constexpr (Type & (ColumnFlushType::FLUSH_TRANS | ColumnFlushType::FLUSH_COLORMAP_TRANS))
{
// haleyjd 09/11/04: use temptranmap here
return *(t_dc->transmap + (col << 8) + (*dest));
}
else
{
return col;
}
}
template<ColumnFlushType Type>
FUNCINLINE static ATTRINLINE constexpr UINT8
R_DrawFlushPixel(const drawcolumndata_temp_t *t_dc, UINT8 * restrict dest, const UINT8 * restrict source)
{
UINT8 col = *source;
return R_GetFlushPixelTranslucent<Type>(t_dc, dest, col);
}
//
// R_FlushWhole
//
// Flushes the entire columns in the buffer, one at a time.
// This is used when a quad flush isn't possible.
//
template<ColumnFlushType Type>
static void R_FlushWhole(void)
{
UINT8 * restrict source;
UINT8 * restrict dest;
INT32 count, yl;
const INT32 stride = vid.width;
drawcolumndata_temp_t *t_dc = &temp_dc;
UINT8 *restrict buf = t_dc->buf;
while (--t_dc->x >= 0)
{
yl = t_dc->yl[t_dc->x];
source = &buf[t_dc->x + (yl << 3)];
dest = R_Address(t_dc->startx + t_dc->x, yl);
count = t_dc->yh[t_dc->x] - yl + 1;
while (--count >= 0)
{
*dest = R_DrawFlushPixel<Type>(t_dc, dest, source);
source += 8;
dest += stride;
}
}
}
//
// R_FlushHT
//
// Flushes the head and tail of columns in the buffer in
// preparation for a quad flush.
//
template<ColumnFlushType Type>
static void R_FlushHT(void)
{
UINT8 * restrict source;
UINT8 * restrict dest;
INT32 count, colnum = 0;
INT32 yl, yh;
const INT32 stride = vid.width;
const drawcolumndata_temp_t *t_dc = &temp_dc;
UINT8 *restrict buf = t_dc->buf;
while (colnum < 8)
{
yl = t_dc->yl[colnum];
yh = t_dc->yh[colnum];
// flush column head
if (yl < t_dc->commontop)
{
source = &buf[colnum + (yl << 3)];
dest = R_Address(t_dc->startx + colnum, yl);
count = t_dc->commontop - yl;
while (--count >= 0)
{
*dest = R_DrawFlushPixel<Type>(t_dc, dest, source);
source += 8;
dest += stride;
}
}
// flush column tail
if (yh > t_dc->commonbot)
{
source = &buf[colnum + ((t_dc->commonbot + 1) << 3)];
dest = R_Address(t_dc->startx + colnum, t_dc->commonbot + 1);
count = yh - t_dc->commonbot;
while (--count >= 0)
{
*dest = R_DrawFlushPixel<Type>(t_dc, dest, source);
source += 8;
dest += stride;
}
}
++colnum;
}
}
// Begin: Quad column flushing functions.
template<ColumnFlushType Type>
static void R_FlushQuad(void)
{
const INT32 stride = vid.width;
const drawcolumndata_temp_t *t_dc = &temp_dc;
INT32 count = t_dc->commonbot - t_dc->commontop + 1;
const UINT8 *restrict buf = t_dc->buf;
if constexpr (Type & ColumnFlushType::FLUSH_OPAQUE)
{
const INT64 *source = reinterpret_cast<const INT64 *>(buf + (t_dc->commontop << 3));
INT64 *dest = reinterpret_cast<INT64 *>(R_Address(t_dc->startx, t_dc->commontop));
const INT32 deststep = stride / 8;
while (--count >= 0)
{
*dest = *source++;
dest += deststep;
}
}
else
{
const UINT8 * restrict source = buf + (t_dc->commontop << 3);
UINT8 * restrict dest = R_Address(t_dc->startx, t_dc->commontop);
while (--count >= 0)
{
dest[0] = R_DrawFlushPixel<Type>(t_dc, &dest[0], &source[0]);
dest[1] = R_DrawFlushPixel<Type>(t_dc, &dest[1], &source[1]);
dest[2] = R_DrawFlushPixel<Type>(t_dc, &dest[2], &source[2]);
dest[3] = R_DrawFlushPixel<Type>(t_dc, &dest[3], &source[3]);
dest[4] = R_DrawFlushPixel<Type>(t_dc, &dest[4], &source[4]);
dest[5] = R_DrawFlushPixel<Type>(t_dc, &dest[5], &source[5]);
dest[6] = R_DrawFlushPixel<Type>(t_dc, &dest[6], &source[6]);
dest[7] = R_DrawFlushPixel<Type>(t_dc, &dest[7], &source[7]);
source += 8;
dest += stride;
}
}
}
// haleyjd 09/12/04: split up R_GetBuffer into various different
// functions to minimize the number of branches and take advantage
// of as much precalculated information as possible.
template<ColumnFlushType Type>
static UINT8 *R_GetBuffer(drawcolumndata_t *dc)
{
drawcolumndata_temp_t *t_dc = &temp_dc;
// haleyjd: reordered predicates
if (t_dc->x == 8 ||
(t_dc->x && (t_dc->type != Type || t_dc->x + t_dc->startx != dc->x)))
R_FlushColumns();
if (!t_dc->x)
{
++t_dc->x;
t_dc->startx = dc->x;
t_dc->yl[0] = t_dc->commontop = dc->yl;
t_dc->yh[0] = t_dc->commonbot = dc->yh;
t_dc->type = Type;
if constexpr (Type & (ColumnFlushType::FLUSH_TRANS | ColumnFlushType::FLUSH_COLORMAP_TRANS))
{
t_dc->transmap = dc->transmap;
}
if constexpr (Type & (ColumnFlushType::FLUSH_COLORMAP | ColumnFlushType::FLUSH_COLORMAP_TRANS))
{
t_dc->translation = dc->translation;
}
R_FlushWholeColumns = R_FlushWhole<Type>;
R_FlushHTColumns = R_FlushHT<Type>;
R_FlushQuadColumn = R_FlushQuad<Type>;
return &t_dc->buf[dc->yl << 3];
}
t_dc->yl[t_dc->x] = dc->yl;
t_dc->yh[t_dc->x] = dc->yh;
if (dc->yl > t_dc->commontop)
t_dc->commontop = dc->yl;
if (dc->yh < t_dc->commonbot)
t_dc->commonbot = dc->yh;
return &t_dc->buf[(dc->yl << 3) + t_dc->x++];
}
#define DEFINE_GETBUF_FUNC(name, flags) \
FUNCINLINE static ATTRINLINE UINT8 *name(drawcolumndata_t *dc) \
{ \
constexpr ColumnFlushType opt = static_cast<ColumnFlushType>(flags); \
return R_GetBuffer<opt>(dc); \
}
DEFINE_GETBUF_FUNC(R_GetBufferOpaque, FLUSH_OPAQUE)
DEFINE_GETBUF_FUNC(R_GetBufferTrans, FLUSH_TRANS)
DEFINE_GETBUF_FUNC(R_GetBufferColormap, FLUSH_COLORMAP)
DEFINE_GETBUF_FUNC(R_GetBufferColormapTrans, FLUSH_COLORMAP_TRANS)

View file

@ -13,6 +13,9 @@
/// \brief span drawer functions
/// \note no includes because this is included as part of r_draw.cpp
#include "r_draw.h"
#include <tracy/tracy/Tracy.hpp>
using namespace libdivide;
// ==========================================================================
@ -39,7 +42,7 @@ enum DrawSpanType
};
template<DrawSpanType Type>
static constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col)
{
if constexpr (Type & DrawSpanType::DS_COLORMAP)
{
@ -52,7 +55,7 @@ static constexpr UINT8 R_GetSpanTranslated(drawspandata_t* ds, UINT8 col)
}
template<DrawSpanType Type>
static constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap, UINT32 bit, UINT8 col)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap, UINT32 bit, UINT8 col)
{
col = R_GetSpanTranslated<Type>(ds, col);
@ -85,7 +88,7 @@ static constexpr UINT8 R_GetSpanBrightmapped(drawspandata_t* ds, UINT8 *colormap
}
template<DrawSpanType Type>
static constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit, UINT8 col)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit, UINT8 col)
{
col = R_GetSpanBrightmapped<Type>(ds, colormap, bit, col);
@ -100,7 +103,7 @@ static constexpr UINT8 R_GetSpanTranslucent(drawspandata_t* ds, UINT8 *dsrc, UIN
}
template<DrawSpanType Type>
static constexpr UINT8 R_DrawSpanPixel(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit)
FUNCINLINE static ATTRINLINE constexpr UINT8 R_DrawSpanPixel(drawspandata_t* ds, UINT8 *dsrc, UINT8 *colormap, UINT32 bit)
{
UINT8 col = 0;
@ -197,14 +200,18 @@ static void R_DrawSpanTemplate(drawspandata_t* ds)
{
bit = (((UINT32)yposition >> ds->nflatyshift) & ds->nflatmask) | ((UINT32)xposition >> ds->nflatxshift);
dest[i] = R_DrawSpanPixel<Type>(ds, &dsrc[i], ds->colormap, bit);
if constexpr (Type & DS_RIPPLE)
dest[i] = R_DrawSpanPixel<Type>(ds, &dsrc[i], ds->colormap, bit);
else
dest[i] = R_DrawSpanPixel<Type>(ds, &dest[i], ds->colormap, bit);
xposition += xstep;
yposition += ystep;
}
dest += 8;
dsrc += 8;
if constexpr (Type & DS_RIPPLE)
dsrc += 8;
count -= 8;
}
@ -213,10 +220,14 @@ static void R_DrawSpanTemplate(drawspandata_t* ds)
{
bit = (((UINT32)yposition >> ds->nflatyshift) & ds->nflatmask) | ((UINT32)xposition >> ds->nflatxshift);
*dest = R_DrawSpanPixel<Type>(ds, dsrc, ds->colormap, bit);
if constexpr (Type & DS_RIPPLE)
*dest = R_DrawSpanPixel<Type>(ds, dsrc, ds->colormap, bit);
else
*dest = R_DrawSpanPixel<Type>(ds, dest, ds->colormap, bit);
dest++;
dsrc++;
if constexpr (Type & DS_RIPPLE)
dsrc++;
xposition += xstep;
yposition += ystep;
@ -278,6 +289,8 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds)
const INT32 nflatmask = ds->nflatmask;
iz = ds->szp.z + ds->szp.y*(centery-ds->y) + ds->szp.x*(ds->x1-centerx);
uz = ds->sup.z + ds->sup.y*(centery-ds->y) + ds->sup.x*(ds->x1-centerx);
vz = ds->svp.z + ds->svp.y*(centery-ds->y) + ds->svp.x*(ds->x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
if constexpr (!(Type & DS_SPRITE))
@ -292,9 +305,6 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds)
//CONS_Printf("tilted lighting %f to %f (foc %f)\n", lightstart, lightend, focallengthf);
}
uz = ds->sup.z + ds->sup.y*(centery-ds->y) + ds->sup.x*(ds->x1-centerx);
vz = ds->svp.z + ds->svp.y*(centery-ds->y) + ds->svp.x*(ds->x1-centerx);
colormap = ds->colormap;
if constexpr (Type & DS_RIPPLE)
@ -364,12 +374,16 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds)
colormap = ds->planezlight[tiltlighting[x1 + i]] + (ds->colormap - colormaps);
}
dest[i] = R_DrawSpanPixel<Type>(ds, &dsrc[i], colormap, bit);
if constexpr (Type & DS_RIPPLE)
dest[i] = R_DrawSpanPixel<Type>(ds, &dsrc[i], colormap, bit);
else
dest[i] = R_DrawSpanPixel<Type>(ds, &dest[i], colormap, bit);
}
ds->x1 += SPANSIZE;
dest += SPANSIZE;
dsrc += SPANSIZE;
if constexpr (Type & DS_RIPPLE)
dsrc += SPANSIZE;
startu = endu;
startv = endv;
width -= SPANSIZE;
@ -386,7 +400,11 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds)
{
colormap = ds->planezlight[tiltlighting[ds->x1]] + (ds->colormap - colormaps);
}
*dest = R_DrawSpanPixel<Type>(ds, dsrc, colormap, bit);
if constexpr (Type & DS_RIPPLE)
*dest = R_DrawSpanPixel<Type>(ds, dsrc, colormap, bit);
else
*dest = R_DrawSpanPixel<Type>(ds, dest, colormap, bit);
ds->x1++;
}
else
@ -412,10 +430,16 @@ static void R_DrawTiltedSpanTemplate(drawspandata_t* ds)
{
colormap = ds->planezlight[tiltlighting[ds->x1]] + (ds->colormap - colormaps);
}
*dest = R_DrawSpanPixel<Type>(ds, dsrc, colormap, bit);
if constexpr (Type & DS_RIPPLE)
*dest = R_DrawSpanPixel<Type>(ds, dsrc, colormap, bit);
else
*dest = R_DrawSpanPixel<Type>(ds, dest, colormap, bit);
dest++;
if constexpr (Type & DS_RIPPLE)
dsrc++;
ds->x1++;
dsrc++;
u += stepu;
v += stepv;
}
@ -768,18 +792,16 @@ void R_DrawFogSpan(drawspandata_t* ds)
{
ZoneScoped;
INT32 count = ds->x2 - ds->x1 + 1;
UINT8 *colormap;
UINT8 *dest;
const INT32 vidwidth = vid.width;
size_t count;
colormap = ds->colormap;
dest = R_Address(ds->x1, ds->y);
count = ds->x2 - ds->x1 + 1;
while (count >= 4)
{
dest[0] = colormap[dest[0]];
@ -787,7 +809,7 @@ void R_DrawFogSpan(drawspandata_t* ds)
dest[2] = colormap[dest[2]];
dest[3] = colormap[dest[3]];
dest += 4;
dest += 4;
count -= 4;
}

View file

@ -1538,8 +1538,11 @@ void R_RenderPlayerView(void)
R_ClearSegTables();
R_ClearPlanes();
R_ClearSprites();
R_SetColumnContext(COLUMNCONTEXT_FLUSH);
R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1, false);
R_ClipSprites(drawsegs, NULL);
R_ResetColumnBuffer();
R_SetColumnContext(COLUMNCONTEXT_DIRECT);
R_DrawSkyPlanes();
R_DrawPlanes();
R_DrawMasked(masks, nummasks);
@ -1577,6 +1580,7 @@ void R_RenderPlayerView(void)
NetUpdate();
// The head node is the last node output.
R_SetColumnContext(COLUMNCONTEXT_FLUSH);
ps_numbspcalls = ps_numpolyobjects = ps_numdrawnodes = 0;
ps_bsptime = I_GetPreciseTime();
R_RenderViewpoint(&masks[nummasks - 1], nummasks - 1, true);
@ -1585,6 +1589,7 @@ void R_RenderPlayerView(void)
ps_sw_spritecliptime = I_GetPreciseTime();
R_ClipSprites(drawsegs, NULL);
ps_sw_spritecliptime = I_GetPreciseTime() - ps_sw_spritecliptime;
R_ResetColumnBuffer();
// Add skybox portals caused by sky visplanes.
if (skybox && !oldsky)
@ -1629,6 +1634,8 @@ void R_RenderPlayerView(void)
R_ClipSprites(ds_p - (masks[nummasks - 1].drawsegs[1] - masks[nummasks - 1].drawsegs[0]), portal);
R_ResetColumnBuffer();
Portal_Remove(portal);
}
@ -1639,6 +1646,7 @@ void R_RenderPlayerView(void)
}
ps_sw_portaltime = I_GetPreciseTime() - ps_sw_portaltime;
R_SetColumnContext(COLUMNCONTEXT_DIRECT);
ps_sw_planetime = I_GetPreciseTime();
R_DrawSkyPlanes();
R_DrawPlanes();

View file

@ -3602,6 +3602,11 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal)
drawseg_t* ds;
INT32 i;
if (visspritecount - clippedvissprites <= 0)
{
return;
}
// e6y
// Reducing of cache misses in the following R_DrawSprite()
// Makes sense for scenes with huge amount of drawsegs.
@ -3611,11 +3616,6 @@ void R_ClipSprites(drawseg_t* dsstart, portal_t* portal)
drawsegs_xranges[i].count = 0;
}
if (visspritecount - clippedvissprites <= 0)
{
return;
}
if (drawsegs_xrange_size < maxdrawsegs)
{
// haleyjd: fix reallocation to track 2x size

View file

@ -98,19 +98,33 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
// =========================================================================
void SCR_SetDrawFuncs(void)
void SCR_SetDrawFuncs(enum columncontext_e _columncontext)
{
//
// setup the right draw routines
//
colfuncs[BASEDRAWFUNC] = R_DrawColumn;
colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn;
colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn;
colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed;
colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn;
if (_columncontext == COLUMNCONTEXT_FLUSH)
{
colfuncs[BASEDRAWFUNC] = R_DrawColumnFlush;
colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumnFlush;
colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumnFlush;
colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowedFlush;
colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumnFlush;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumnFlush;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumnFlush;
}
else
{
colfuncs[BASEDRAWFUNC] = R_DrawColumn;
colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn;
colfuncs[COLDRAWFUNC_TRANS] = R_DrawTranslatedColumn;
colfuncs[COLDRAWFUNC_SHADOWED] = R_DrawColumnShadowed;
colfuncs[COLDRAWFUNC_TRANSTRANS] = R_DrawTranslatedTranslucentColumn;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn;
}
colfuncs[COLDRAWFUNC_FOG] = R_DrawFogColumn;
colfuncs[COLDRAWFUNC_DROPSHADOW] = R_DrawDropShadowColumn;
@ -215,6 +229,17 @@ void SCR_SetDrawFuncs(void)
R_SetSpanFunc(BASEDRAWFUNC, false, false);
}
// used to switch between column buffering and drawing them directly to screen
// our sky "plane" drawer cannot handle the buffer system due to multithreading
// (that would require alot of extra complexity for smth with massive diminishing results)
// Our masked drawing step draws things in a very particular order, which results in alot of flushing to screen
// effectively adding massive overhead due to excessive flushing, so we draw our masked thing directly to screen instead
void R_SetColumnContext(enum columncontext_e _columncontext)
{
columncontext = _columncontext;
SCR_SetDrawFuncs(_columncontext); // set our column drawers
}
void R_SetColumnFunc(size_t id, boolean brightmapped)
{
I_Assert(id < COLDRAWFUNC_MAX);
@ -336,7 +361,7 @@ void SCR_SetMode(void)
V_SetPalette(0);
SCR_SetDrawFuncs();
SCR_SetDrawFuncs(COLUMNCONTEXT_DIRECT);
// Shoot! The screen texture was flushed!
Y_CleanupScreenBuffer();

View file

@ -112,10 +112,8 @@ void SCR_Startup(void);
// Change video mode, only at the start of a refresh.
void SCR_SetMode(void);
// Set drawer functions for Software
void SCR_SetDrawFuncs(void);
// Set current column / span drawers
//void R_SetColumnContext(enum columncontext_e _columncontext); // declared in r_draw!
void R_SetColumnFunc(size_t id, boolean brightmapped);
void R_SetSpanFunc(size_t id, boolean npo2, boolean brightmapped);
boolean R_SetSpanFuncFlat(size_t id); // flat color

View file

@ -67,6 +67,7 @@
#include "../console.h"
#include "../command.h"
#include "../r_main.h"
#include "../r_draw.h"
#include "../lua_hook.h"
#include "sdlmain.h"
#include "../i_system.h"
@ -1485,7 +1486,7 @@ boolean VID_CheckRenderer(void)
if (rendermode == render_soft)
{
vid.rowbytes = vid.width;
SCR_SetDrawFuncs();
SCR_SetDrawFuncs(COLUMNCONTEXT_DIRECT);
}
#ifdef HWRENDER
else if (rendermode == render_opengl && rendererchanged)

View file

@ -32,15 +32,6 @@
#include "doomstat.h"
#include "r_fps.h"
#if defined(__SSE__) || defined(__AVX__)
#ifdef _WIN32
#include <malloc.h>
#define aligned_alloc(align, size) _aligned_malloc(size, align)
#endif
#include <immintrin.h>
#endif
#ifdef HWRENDER
#include "hardware/hw_glob.h"
#endif

View file

@ -19,6 +19,18 @@
#include "r_defs.h"
#include "r_main.h"
#if defined(__SSE__)
#ifdef _WIN32
#include <malloc.h>
#define aligned_alloc(align, size) _aligned_malloc(size, align)
#define aligned_free(ptr) _aligned_free(ptr)
#else
#define aligned_free(ptr) free(ptr)
#endif
#include <immintrin.h>
#endif
// SRB2Kart
#include "hu_stuff.h" // fonts