Make affine patch drawing use dynamic clipping

The whole thing can actually draw in Software now!
This commit is contained in:
yamamama 2026-02-14 21:26:27 -05:00
parent bd1e0fc013
commit 40d9ce7e71
3 changed files with 148 additions and 13 deletions

View file

@ -1094,11 +1094,19 @@ struct spritedef_t
// the dimensions of an affine patch.
struct affine_bounding_t
{
INT32 l; // Leftmost bounding
INT32 r; // Rightmost bounding
vector2_t pivot;
INT32 t; // Highest bounding
INT32 b; // Lowest bounding
// Differences between the "pivot" of the bound and each corner position.
INT32 xleft, yup, xright, ydown;
// General length between each bounding position
INT32 xlen, ylen;
INT32 l; // Leftmost bounding
INT32 r; // Rightmost bounding
INT32 t; // Highest bounding
INT32 b; // Lowest bounding
};
// an affine transformation matrix. maps screen coordinates to texture coordinates

View file

@ -810,6 +810,106 @@ static int sortcoords(const void *a, const void *b)
}
*/
affine_bounding_t* V_GetAffineBounds(const affine_t* transform,
patch_t* patch,
affine_bounding_t* out)
{
// A decent chunk of this code is just fixedpointizing stuff
// from HWR_GetAffinePatch; That system's near flawless, why fix what isn't broken?
// Kudos to Generic for making my life easier 🥹
// First, let's set our output to a bunch of dummy values.
out->l = INT32_MAX;
out->t = INT32_MAX;
out->r = INT32_MIN;
out->b = INT32_MIN;
fixed_t fa = (transform->a / vid.dup);
fixed_t fd = (transform->d / vid.dup);
fixed_t fc = (transform->c / vid.dup);
fixed_t fb = (transform->b / vid.dup);
fixed_t fx = (transform->ox);
fixed_t fy = (transform->oy);
// now, the matrix passed to this function maps screen coordinates to texel coordinates...
// but to translate this from software to GL, we have to figure out where each corner
// (or vertex) of the patch should end up on the screen.
// which means we have to map texel coordinates to screen coordinates.
// which means we have to invert the matrix.
// how do you invert a matrix?
// ...
// i don't fucking know, i spent a day on this and got absolutely nowhere, but this guy
// knows:
// https://nigeltao.github.io/blog/2021/inverting-3x2-affine-transformation-matrix.html
// ...so I thought this would be standard fare order-of-operations stuff
// NOPE!
// The compiled assembly code does THIS:
// (fa * fd) - (fb * fc)
fixed_t determinant = FixedMul(fa, fd) - FixedMul(fb, fc);
if (determinant == 0)
return out;
fixed_t ba = FixedDiv(fd, determinant);
fixed_t bb = FixedDiv(-fb, determinant);
fixed_t bc = FixedDiv(-fc, determinant);
fixed_t bd = FixedDiv(fa, determinant);
// set the polygon vertices to the right positions
// 3--2
// | /|
// |/ |
// 0--1
fixed_t pw = (patch->width << FRACBITS);
fixed_t ph = (patch->height << FRACBITS);
vector2_t v[4] = {
[3] = {.x = (FixedMul(ba, -fx) - FixedMul(bb, fy)) + fx,
.y = (FixedMul(bc, -fx) - FixedMul(bd, fy)) + fy },
[2] = { .x = (FixedMul(ba, (pw - fx)) - FixedMul(bb, fy)) + fx,
.y = (FixedMul(bc, (pw - fx)) - FixedMul(bd, fy)) + fy },
[0] = { .x = (FixedMul(ba, -fx) + FixedMul(bb, (ph - fy))) + fx,
.y = (FixedMul(bc, -fx) + FixedMul(bd, (ph - fy))) + fy },
[1] = { .x = (FixedMul(ba, (pw - fx)) + FixedMul(bb, (ph - fy))) + fx,
.y = (FixedMul(bc, (pw - fx)) + FixedMul(bd, (ph - fy))) + fy },
};
// Get the leftmost and uppermost bounds of the current resolution.
const INT32 vw = vid.width, vh = vid.height;
const INT32 leftmost = ((BASEVIDWIDTH - vw) / 2), uppermost = ((BASEVIDHEIGHT - vh) / 2);
// ...okay, now comb through all four vertices and set the output bounds based on this.
// "Why not a loop?" According to SM64 programming wizard Kaze Emanuar,
// loops take more processing time. If we can help it, it's better to just cut corners.
#define BOUNDCHECK(i) \
{ \
out->l = max(leftmost, min(v[i].x >> FRACBITS, out->l)); \
out->r = min(vw, max(v[i].x >> FRACBITS, out->r)); \
out->t = max(uppermost, min(v[i].y >> FRACBITS, out->t)); \
out->b = min(vh, max(v[i].y >> FRACBITS, out->b)); \
}
BOUNDCHECK(0);
BOUNDCHECK(1);
BOUNDCHECK(2);
BOUNDCHECK(3);
#undef BOUNDCHECK
// Cool, we have our bounds. Get the diffs so the loops have something to reference.
out->xlen = abs(out->r - out->l);
out->ylen = abs(out->b - out->t);
out->xleft = (fx >> FRACBITS) - out->l;
out->xright = out->r - (fx >> FRACBITS);
out->yup = (fy >> FRACBITS) - out->t;
out->ydown = out->b - (fy >> FRACBITS);
return out;
}
void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 scrn, patch_t *patch, const UINT8 *colormap)
{
if (rendermode == render_none)
@ -858,6 +958,9 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc
}
#endif
affine_bounding_t bounds = {0};
V_GetAffineBounds(transform, patch, &bounds);
Patch_GenerateFlat(patch, 0);
const UINT16 *src = patch->flats[0];
if (src == NULL)
@ -867,8 +970,8 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc
const fixed_t b = transform->b / dup;
const fixed_t c = transform->c / dup;
const fixed_t d = transform->d / dup;
const fixed_t cx = transform->ox;
const fixed_t cy = transform->oy;
fixed_t cx = transform->ox;
fixed_t cy = transform->oy;
const INT32 scrwidth = vid.width;
const INT32 pw = patch->width, ph = patch->height;
@ -892,20 +995,43 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc
fixed_t y4 = FixedMul(bc, pw*FRACUNIT - cx) + FixedMul(bd, ph*FRACUNIT - cy) + cy;
*/
UINT8 * const destbase = vid.screens[0] + y * vid.width + x;
INT32 dx = 0, dy = 0;
INT32 ydiff = (bounds.yup - (transform->oy >> FRACBITS));
INT32 xdiff = (bounds.xleft - (transform->ox >> FRACBITS));
for (dy = 0; dy < ph; dy++)
// Get the clipping values, since that can vary per-resolution.
INT32 yclip = min(y - ydiff, 0) * -1;
INT32 xclip = min(x - xdiff, 0) * -1;
ydiff -= yclip;
xdiff -= xclip;
// Get the leftmost and uppermost bounds of the current resolution.
const INT32 vw = vid.width, vh = vid.height;
INT32 yy = CLAMP(y - ydiff, 0, vh);
INT32 xx = CLAMP(x - xdiff, 0, vw);
INT32 xmax = max(bounds.xlen - xclip, pw), ymax = max(bounds.ylen - yclip, ph);
// Offset our X and Y positions by the bounding differences.
fixed_t cxx = cx + (xdiff * FRACUNIT);
fixed_t cyy = cy + (ydiff * FRACUNIT);
intptr_t dest_y = (intptr_t)(vid.screens[0]) + yy * vw;
UINT8 * const destbase = (UINT8 * const)(dest_y + xx);
INT32 dx = 0, dy = 0;
for (dy = 0; dy < ymax; dy++)
{
// yoinked from NovaSquirrel's mode 7 preview
// ...which is in turn yoinked from Mesen's S-PPU code
// i can't do matrix math to save my life :face_holding_back_tears:
// (m7xofs and m7yofs are already factored in by destbase)
fixed_t ux = FixedMul(a, -cx) + FixedMul(b, -cy) + b*dy + cx;
fixed_t uy = FixedMul(c, -cx) + FixedMul(d, -cy) + d*dy + cy;
UINT8 *dest = destbase + dy * scrwidth;
fixed_t ux = (FixedMul(a, -cxx) + FixedMul(b, -cyy) + b*(dy) + cx);
fixed_t uy = FixedMul(c, -cxx) + FixedMul(d, -cyy) + d*(dy) + cy;
UINT8 *dest = (destbase) + (dy) * scrwidth;
for (dx = 0; dx < pw; dx++, dest++)
for (dx = 0; dx < xmax; dx++, dest++)
{
const INT32 srcx = ux >> FRACBITS;
const INT32 srcy = uy >> FRACBITS;

View file

@ -218,6 +218,7 @@ void V_ClearClipRect(void);
void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap);
void V_DrawCroppedPatch(fixed_t x, fixed_t y, fixed_t pscale, INT32 scrn, patch_t *patch, fixed_t sx, fixed_t sy, fixed_t w, fixed_t h);
affine_bounding_t *V_GetAffineBounds(const affine_t *transform, patch_t *patch, affine_bounding_t *out);
void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 scrn, patch_t *patch, const UINT8 *colormap);
void V_DrawRotatedPatch(fixed_t x, fixed_t y, angle_t angle, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap);