Make affine patch drawing use dynamic clipping

The whole thing can actually draw in Software now!
2026-02-14 21:26:27 -05:00 · 2026-02-14 21:26:27 -05:00 · 40d9ce7e71
commit 40d9ce7e71
parent bd1e0fc013
3 changed files with 148 additions and 13 deletions
--- a/src/r_defs.h
+++ b/src/r_defs.h
@ -1094,11 +1094,19 @@ struct spritedef_t
 // the dimensions of an affine patch.
 struct affine_bounding_t
 {
-	INT32 l; // Leftmost bounding
-	INT32 r; // Rightmost bounding
+	vector2_t pivot;

-	INT32 t; // Highest bounding
-	INT32 b; // Lowest bounding
+	// Differences between the "pivot" of the bound and each corner position.
+	INT32 xleft, yup, xright, ydown;
+
+	// General length between each bounding position
+	INT32 xlen, ylen;
+
+	INT32 l;  // Leftmost bounding
+	INT32 r;  // Rightmost bounding
+
+	INT32 t;  // Highest bounding
+	INT32 b;  // Lowest bounding
 };

 // an affine transformation matrix. maps screen coordinates to texture coordinates
--- a/src/v_video.c
+++ b/src/v_video.c
@ -810,6 +810,106 @@ static int sortcoords(const void *a, const void *b)
 }
 */

+affine_bounding_t* V_GetAffineBounds(const affine_t* transform,
+				     patch_t* patch,
+				     affine_bounding_t* out)
+{
+	// A decent chunk of this code is just fixedpointizing stuff
+	// from HWR_GetAffinePatch; That system's near flawless, why fix what isn't broken?
+	// Kudos to Generic for making my life easier 🥹
+
+	// First, let's set our output to a bunch of dummy values.
+
+	out->l = INT32_MAX;
+	out->t = INT32_MAX;
+	out->r = INT32_MIN;
+	out->b = INT32_MIN;
+
+	fixed_t fa = (transform->a / vid.dup);
+	fixed_t fd = (transform->d / vid.dup);
+	fixed_t fc = (transform->c / vid.dup);
+	fixed_t fb = (transform->b / vid.dup);
+	fixed_t fx = (transform->ox);
+	fixed_t fy = (transform->oy);
+
+	// now, the matrix passed to this function maps screen coordinates to texel coordinates...
+	// but to translate this from software to GL, we have to figure out where each corner
+	// (or vertex) of the patch should end up on the screen.
+	// which means we have to map texel coordinates to screen coordinates.
+	// which means we have to invert the matrix.
+	// how do you invert a matrix?
+	// ...
+	// i don't fucking know, i spent a day on this and got absolutely nowhere, but this guy
+	// knows:
+	// https://nigeltao.github.io/blog/2021/inverting-3x2-affine-transformation-matrix.html
+
+	// ...so I thought this would be standard fare order-of-operations stuff
+	// NOPE!
+	// The compiled assembly code does THIS:
+	// (fa * fd) - (fb * fc)
+	fixed_t determinant = FixedMul(fa, fd) - FixedMul(fb, fc);
+
+	if (determinant == 0)
+		return out;
+
+	fixed_t ba = FixedDiv(fd, determinant);
+	fixed_t bb = FixedDiv(-fb, determinant);
+	fixed_t bc = FixedDiv(-fc, determinant);
+	fixed_t bd = FixedDiv(fa, determinant);
+
+	// set the polygon vertices to the right positions
+	//  3--2
+	//  | /|
+	//  |/ |
+	//  0--1
+	fixed_t pw = (patch->width << FRACBITS);
+	fixed_t ph = (patch->height << FRACBITS);
+	vector2_t v[4] = {
+	    [3] = {.x = (FixedMul(ba, -fx) - FixedMul(bb, fy)) + fx,
+		        .y = (FixedMul(bc, -fx) - FixedMul(bd, fy)) + fy },
+		[2] = { .x = (FixedMul(ba, (pw - fx)) - FixedMul(bb, fy)) + fx,
+		        .y = (FixedMul(bc, (pw - fx)) - FixedMul(bd, fy)) + fy },
+		[0] = { .x = (FixedMul(ba, -fx) + FixedMul(bb, (ph - fy))) + fx,
+		        .y = (FixedMul(bc, -fx) + FixedMul(bd, (ph - fy))) + fy },
+		[1] = { .x = (FixedMul(ba, (pw - fx)) + FixedMul(bb, (ph - fy))) + fx,
+		        .y = (FixedMul(bc, (pw - fx)) + FixedMul(bd, (ph - fy))) + fy },
+	};
+
+	// Get the leftmost and uppermost bounds of the current resolution.
+	const INT32 vw = vid.width, vh = vid.height;
+	const INT32 leftmost = ((BASEVIDWIDTH - vw) / 2), uppermost = ((BASEVIDHEIGHT - vh) / 2);
+
+	// ...okay, now comb through all four vertices and set the output bounds based on this.
+	// "Why not a loop?" According to SM64 programming wizard Kaze Emanuar,
+	// loops take more processing time. If we can help it, it's better to just cut corners.
+#define BOUNDCHECK(i)                                     \
+	{                                                 \
+		out->l = max(leftmost, min(v[i].x >> FRACBITS, out->l)); \
+		out->r = min(vw, max(v[i].x >> FRACBITS, out->r)); \
+		out->t = max(uppermost, min(v[i].y >> FRACBITS, out->t)); \
+		out->b = min(vh, max(v[i].y >> FRACBITS, out->b)); \
+	}
+
+	BOUNDCHECK(0);
+	BOUNDCHECK(1);
+	BOUNDCHECK(2);
+	BOUNDCHECK(3);
+
+#undef BOUNDCHECK
+
+	// Cool, we have our bounds. Get the diffs so the loops have something to reference.
+	out->xlen = abs(out->r - out->l);
+	out->ylen = abs(out->b - out->t);
+
+	out->xleft = (fx >> FRACBITS) - out->l;
+	out->xright = out->r - (fx >> FRACBITS);
+
+	out->yup = (fy >> FRACBITS) - out->t;
+	out->ydown = out->b - (fy >> FRACBITS);
+
+	return out;
+}
+
 void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 scrn, patch_t *patch, const UINT8 *colormap)
 {
 	if (rendermode == render_none)
@ -858,6 +958,9 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc
 	}
 #endif

+	affine_bounding_t bounds = {0};
+	V_GetAffineBounds(transform, patch, &bounds);
+
 	Patch_GenerateFlat(patch, 0);
 	const UINT16 *src = patch->flats[0];
 	if (src == NULL)
@ -867,8 +970,8 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc
 	const fixed_t b = transform->b / dup;
 	const fixed_t c = transform->c / dup;
 	const fixed_t d = transform->d / dup;
-	const fixed_t cx = transform->ox;
-	const fixed_t cy = transform->oy;
+	fixed_t cx = transform->ox;
+	fixed_t cy = transform->oy;

 	const INT32 scrwidth = vid.width;
 	const INT32 pw = patch->width, ph = patch->height;
@ -892,20 +995,43 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc
 	fixed_t y4 = FixedMul(bc, pw*FRACUNIT - cx) + FixedMul(bd, ph*FRACUNIT - cy) + cy;
 	*/

-	UINT8 * const destbase = vid.screens[0] + y * vid.width + x;
-	INT32 dx = 0, dy = 0;
+	INT32 ydiff = (bounds.yup - (transform->oy >> FRACBITS));
+	INT32 xdiff = (bounds.xleft - (transform->ox >> FRACBITS));

-	for (dy = 0; dy < ph; dy++)
+	// Get the clipping values, since that can vary per-resolution.
+	INT32 yclip = min(y - ydiff, 0) * -1;
+	INT32 xclip = min(x - xdiff, 0) * -1;
+
+	ydiff -= yclip;
+	xdiff -= xclip;
+
+	// Get the leftmost and uppermost bounds of the current resolution.
+	const INT32 vw = vid.width, vh = vid.height;
+
+	INT32 yy = CLAMP(y - ydiff, 0, vh);
+	INT32 xx = CLAMP(x - xdiff, 0, vw);
+
+	INT32 xmax = max(bounds.xlen - xclip, pw), ymax = max(bounds.ylen - yclip, ph);
+
+	// Offset our X and Y positions by the bounding differences.
+	fixed_t cxx = cx + (xdiff * FRACUNIT);
+	fixed_t cyy = cy + (ydiff * FRACUNIT);
+
+	intptr_t dest_y = (intptr_t)(vid.screens[0]) + yy * vw;
+
+	UINT8 * const destbase = (UINT8 * const)(dest_y + xx);
+	INT32 dx = 0, dy = 0;
+	for (dy = 0; dy < ymax; dy++)
 	{
 		// yoinked from NovaSquirrel's mode 7 preview
 		// ...which is in turn yoinked from Mesen's S-PPU code
 		// i can't do matrix math to save my life :face_holding_back_tears:
 		// (m7xofs and m7yofs are already factored in by destbase)
-		fixed_t ux = FixedMul(a, -cx) + FixedMul(b, -cy) + b*dy + cx;
-		fixed_t uy = FixedMul(c, -cx) + FixedMul(d, -cy) + d*dy + cy;
-		UINT8 *dest = destbase + dy * scrwidth;
+		fixed_t ux = (FixedMul(a, -cxx) + FixedMul(b, -cyy) + b*(dy) + cx);
+		fixed_t uy = FixedMul(c, -cxx) + FixedMul(d, -cyy) + d*(dy) + cy;
+		UINT8 *dest = (destbase) + (dy) * scrwidth;

-		for (dx = 0; dx < pw; dx++, dest++)
+		for (dx = 0; dx < xmax; dx++, dest++)
 		{
 			const INT32 srcx = ux >> FRACBITS;
 			const INT32 srcy = uy >> FRACBITS;
--- a/src/v_video.h
+++ b/src/v_video.h
@ -218,6 +218,7 @@ void V_ClearClipRect(void);
 void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap);
 void V_DrawCroppedPatch(fixed_t x, fixed_t y, fixed_t pscale, INT32 scrn, patch_t *patch, fixed_t sx, fixed_t sy, fixed_t w, fixed_t h);

+affine_bounding_t *V_GetAffineBounds(const affine_t *transform, patch_t *patch, affine_bounding_t *out);
 void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 scrn, patch_t *patch, const UINT8 *colormap);
 void V_DrawRotatedPatch(fixed_t x, fixed_t y, angle_t angle, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap);