diff --git a/src/r_defs.h b/src/r_defs.h index dc24fda3d..27b89a930 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1094,11 +1094,19 @@ struct spritedef_t // the dimensions of an affine patch. struct affine_bounding_t { - INT32 l; // Leftmost bounding - INT32 r; // Rightmost bounding + vector2_t pivot; - INT32 t; // Highest bounding - INT32 b; // Lowest bounding + // Differences between the "pivot" of the bound and each corner position. + INT32 xleft, yup, xright, ydown; + + // General length between each bounding position + INT32 xlen, ylen; + + INT32 l; // Leftmost bounding + INT32 r; // Rightmost bounding + + INT32 t; // Highest bounding + INT32 b; // Lowest bounding }; // an affine transformation matrix. maps screen coordinates to texture coordinates diff --git a/src/v_video.c b/src/v_video.c index ec1f477ab..18fd336a4 100644 --- a/src/v_video.c +++ b/src/v_video.c @@ -810,6 +810,106 @@ static int sortcoords(const void *a, const void *b) } */ +affine_bounding_t* V_GetAffineBounds(const affine_t* transform, + patch_t* patch, + affine_bounding_t* out) +{ + // A decent chunk of this code is just fixedpointizing stuff + // from HWR_GetAffinePatch; That system's near flawless, why fix what isn't broken? + // Kudos to Generic for making my life easier 🥹 + + // First, let's set our output to a bunch of dummy values. + + out->l = INT32_MAX; + out->t = INT32_MAX; + out->r = INT32_MIN; + out->b = INT32_MIN; + + fixed_t fa = (transform->a / vid.dup); + fixed_t fd = (transform->d / vid.dup); + fixed_t fc = (transform->c / vid.dup); + fixed_t fb = (transform->b / vid.dup); + fixed_t fx = (transform->ox); + fixed_t fy = (transform->oy); + + // now, the matrix passed to this function maps screen coordinates to texel coordinates... + // but to translate this from software to GL, we have to figure out where each corner + // (or vertex) of the patch should end up on the screen. + // which means we have to map texel coordinates to screen coordinates. + // which means we have to invert the matrix. + // how do you invert a matrix? + // ... + // i don't fucking know, i spent a day on this and got absolutely nowhere, but this guy + // knows: + // https://nigeltao.github.io/blog/2021/inverting-3x2-affine-transformation-matrix.html + + // ...so I thought this would be standard fare order-of-operations stuff + // NOPE! + // The compiled assembly code does THIS: + // (fa * fd) - (fb * fc) + fixed_t determinant = FixedMul(fa, fd) - FixedMul(fb, fc); + + if (determinant == 0) + return out; + + fixed_t ba = FixedDiv(fd, determinant); + fixed_t bb = FixedDiv(-fb, determinant); + fixed_t bc = FixedDiv(-fc, determinant); + fixed_t bd = FixedDiv(fa, determinant); + + // set the polygon vertices to the right positions + // 3--2 + // | /| + // |/ | + // 0--1 + fixed_t pw = (patch->width << FRACBITS); + fixed_t ph = (patch->height << FRACBITS); + vector2_t v[4] = { + [3] = {.x = (FixedMul(ba, -fx) - FixedMul(bb, fy)) + fx, + .y = (FixedMul(bc, -fx) - FixedMul(bd, fy)) + fy }, + [2] = { .x = (FixedMul(ba, (pw - fx)) - FixedMul(bb, fy)) + fx, + .y = (FixedMul(bc, (pw - fx)) - FixedMul(bd, fy)) + fy }, + [0] = { .x = (FixedMul(ba, -fx) + FixedMul(bb, (ph - fy))) + fx, + .y = (FixedMul(bc, -fx) + FixedMul(bd, (ph - fy))) + fy }, + [1] = { .x = (FixedMul(ba, (pw - fx)) + FixedMul(bb, (ph - fy))) + fx, + .y = (FixedMul(bc, (pw - fx)) + FixedMul(bd, (ph - fy))) + fy }, + }; + + // Get the leftmost and uppermost bounds of the current resolution. + const INT32 vw = vid.width, vh = vid.height; + const INT32 leftmost = ((BASEVIDWIDTH - vw) / 2), uppermost = ((BASEVIDHEIGHT - vh) / 2); + + // ...okay, now comb through all four vertices and set the output bounds based on this. + // "Why not a loop?" According to SM64 programming wizard Kaze Emanuar, + // loops take more processing time. If we can help it, it's better to just cut corners. +#define BOUNDCHECK(i) \ + { \ + out->l = max(leftmost, min(v[i].x >> FRACBITS, out->l)); \ + out->r = min(vw, max(v[i].x >> FRACBITS, out->r)); \ + out->t = max(uppermost, min(v[i].y >> FRACBITS, out->t)); \ + out->b = min(vh, max(v[i].y >> FRACBITS, out->b)); \ + } + + BOUNDCHECK(0); + BOUNDCHECK(1); + BOUNDCHECK(2); + BOUNDCHECK(3); + +#undef BOUNDCHECK + + // Cool, we have our bounds. Get the diffs so the loops have something to reference. + out->xlen = abs(out->r - out->l); + out->ylen = abs(out->b - out->t); + + out->xleft = (fx >> FRACBITS) - out->l; + out->xright = out->r - (fx >> FRACBITS); + + out->yup = (fy >> FRACBITS) - out->t; + out->ydown = out->b - (fy >> FRACBITS); + + return out; +} + void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 scrn, patch_t *patch, const UINT8 *colormap) { if (rendermode == render_none) @@ -858,6 +958,9 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc } #endif + affine_bounding_t bounds = {0}; + V_GetAffineBounds(transform, patch, &bounds); + Patch_GenerateFlat(patch, 0); const UINT16 *src = patch->flats[0]; if (src == NULL) @@ -867,8 +970,8 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc const fixed_t b = transform->b / dup; const fixed_t c = transform->c / dup; const fixed_t d = transform->d / dup; - const fixed_t cx = transform->ox; - const fixed_t cy = transform->oy; + fixed_t cx = transform->ox; + fixed_t cy = transform->oy; const INT32 scrwidth = vid.width; const INT32 pw = patch->width, ph = patch->height; @@ -892,20 +995,43 @@ void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 sc fixed_t y4 = FixedMul(bc, pw*FRACUNIT - cx) + FixedMul(bd, ph*FRACUNIT - cy) + cy; */ - UINT8 * const destbase = vid.screens[0] + y * vid.width + x; - INT32 dx = 0, dy = 0; + INT32 ydiff = (bounds.yup - (transform->oy >> FRACBITS)); + INT32 xdiff = (bounds.xleft - (transform->ox >> FRACBITS)); - for (dy = 0; dy < ph; dy++) + // Get the clipping values, since that can vary per-resolution. + INT32 yclip = min(y - ydiff, 0) * -1; + INT32 xclip = min(x - xdiff, 0) * -1; + + ydiff -= yclip; + xdiff -= xclip; + + // Get the leftmost and uppermost bounds of the current resolution. + const INT32 vw = vid.width, vh = vid.height; + + INT32 yy = CLAMP(y - ydiff, 0, vh); + INT32 xx = CLAMP(x - xdiff, 0, vw); + + INT32 xmax = max(bounds.xlen - xclip, pw), ymax = max(bounds.ylen - yclip, ph); + + // Offset our X and Y positions by the bounding differences. + fixed_t cxx = cx + (xdiff * FRACUNIT); + fixed_t cyy = cy + (ydiff * FRACUNIT); + + intptr_t dest_y = (intptr_t)(vid.screens[0]) + yy * vw; + + UINT8 * const destbase = (UINT8 * const)(dest_y + xx); + INT32 dx = 0, dy = 0; + for (dy = 0; dy < ymax; dy++) { // yoinked from NovaSquirrel's mode 7 preview // ...which is in turn yoinked from Mesen's S-PPU code // i can't do matrix math to save my life :face_holding_back_tears: // (m7xofs and m7yofs are already factored in by destbase) - fixed_t ux = FixedMul(a, -cx) + FixedMul(b, -cy) + b*dy + cx; - fixed_t uy = FixedMul(c, -cx) + FixedMul(d, -cy) + d*dy + cy; - UINT8 *dest = destbase + dy * scrwidth; + fixed_t ux = (FixedMul(a, -cxx) + FixedMul(b, -cyy) + b*(dy) + cx); + fixed_t uy = FixedMul(c, -cxx) + FixedMul(d, -cyy) + d*(dy) + cy; + UINT8 *dest = (destbase) + (dy) * scrwidth; - for (dx = 0; dx < pw; dx++, dest++) + for (dx = 0; dx < xmax; dx++, dest++) { const INT32 srcx = ux >> FRACBITS; const INT32 srcy = uy >> FRACBITS; diff --git a/src/v_video.h b/src/v_video.h index a49d1b622..fb40f6afc 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -218,6 +218,7 @@ void V_ClearClipRect(void); void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap); void V_DrawCroppedPatch(fixed_t x, fixed_t y, fixed_t pscale, INT32 scrn, patch_t *patch, fixed_t sx, fixed_t sy, fixed_t w, fixed_t h); +affine_bounding_t *V_GetAffineBounds(const affine_t *transform, patch_t *patch, affine_bounding_t *out); void V_DrawAffinePatch(fixed_t x, fixed_t y, const affine_t *transform, INT32 scrn, patch_t *patch, const UINT8 *colormap); void V_DrawRotatedPatch(fixed_t x, fixed_t y, angle_t angle, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap);