From e92362f36c7ddd3fc284a5ae155be8678bc810ba Mon Sep 17 00:00:00 2001 From: GenericHeroGuy Date: Tue, 9 Sep 2025 22:37:42 +0200 Subject: [PATCH 1/3] (WIP) Rewrite screen capture code for GIFs Downscaling is now performed during screen capture instead of GIF writing This may or may not make GIF recording faster? Maybe it would be faster if I knew how OpenGL worked... Regardless, the m*th in the GIF code is gone, fixing the high/odd res issues Also includes a little deduped hardware code, and GL 4.3 debugging code --- src/dummy/i_video.c | 2 +- src/f_wipe.c | 4 +- src/hardware/hw_draw.c | 30 +----- src/hardware/hw_drv.h | 4 +- src/hardware/hw_main.h | 3 +- src/hardware/r_opengl/r_opengl.c | 167 ++++++++++++++++++++++--------- src/i_video.h | 2 +- src/m_anigif.c | 162 +++++++++++------------------- src/m_misc.cpp | 39 ++++---- src/sdl/hwsym_sdl.c | 2 +- src/sdl/i_video.cpp | 16 ++- 11 files changed, 222 insertions(+), 209 deletions(-) diff --git a/src/dummy/i_video.c b/src/dummy/i_video.c index 3b0a12a32..914f56aa9 100644 --- a/src/dummy/i_video.c +++ b/src/dummy/i_video.c @@ -68,7 +68,7 @@ void I_WaitVBL(INT32 count) (void)count; } -void I_ReadScreen(UINT8 *scr) +void I_ReadScreen(UINT8 * restrict scr, INT32 scale) { (void)scr; } diff --git a/src/f_wipe.c b/src/f_wipe.c index 9dc414237..61f8303bd 100644 --- a/src/f_wipe.c +++ b/src/f_wipe.c @@ -316,7 +316,7 @@ void F_WipeStartScreen(void) } #endif wipe_scr_start = screens[3]; - I_ReadScreen(wipe_scr_start); + I_ReadScreen(wipe_scr_start, 1); #endif } @@ -333,7 +333,7 @@ void F_WipeEndScreen(void) } #endif wipe_scr_end = screens[4]; - I_ReadScreen(wipe_scr_end); + I_ReadScreen(wipe_scr_end, 1); V_DrawBlock(0, 0, 0, vid.width, vid.height, wipe_scr_start); #endif } diff --git a/src/hardware/hw_draw.c b/src/hardware/hw_draw.c index 0c42fd201..8c0ad0345 100644 --- a/src/hardware/hw_draw.c +++ b/src/hardware/hw_draw.c @@ -21,7 +21,6 @@ #include "hw_glob.h" #include "hw_drv.h" -#include "../m_misc.h" //FIL_WriteFile() #include "../r_draw.h" //viewborderlump #include "../r_main.h" #include "../w_wad.h" @@ -1252,41 +1251,18 @@ static inline boolean saveTGA(const char *file_name, void *buffer, // screen shot // -------------------------------------------------------------------------- -UINT8 *HWR_GetScreenshot(void) +UINT8 *HWR_GetScreenshot(INT32 scale) { static UINT8 *buf = NULL; - buf = realloc(buf, vid.width * vid.height * 3); + buf = realloc(buf, (vid.width/scale)*(vid.height/scale)*3); if (!buf) return NULL; // returns 24bit 888 RGB - HWD.pfnReadRect(0, 0, vid.width, vid.height, vid.width * 3, (void *)buf); + HWD.pfnReadScreenFinalTexture(buf, scale); return buf; } -boolean HWR_Screenshot(const char *pathname) -{ - boolean ret; - UINT8 *buf = malloc(vid.width * vid.height * 3 * sizeof (*buf)); - - if (!buf) - { - CONS_Debug(DBG_RENDER, "HWR_Screenshot: Failed to allocate memory\n"); - return false; - } - - // returns 24bit 888 RGB - HWD.pfnReadRect(0, 0, vid.width, vid.height, vid.width * 3, (void *)buf); - -#ifdef USE_PNG - ret = M_SavePNG(pathname, buf, vid.width, vid.height, NULL); -#else - ret = saveTGA(pathname, buf, vid.width, vid.height); -#endif - free(buf); - return ret; -} - #endif //HWRENDER diff --git a/src/hardware/hw_drv.h b/src/hardware/hw_drv.h index 5ec09450d..63d78e549 100644 --- a/src/hardware/hw_drv.h +++ b/src/hardware/hw_drv.h @@ -44,7 +44,7 @@ EXPORT void HWRAPI(ClearBuffer) (FBOOLEAN ColorMask, FBOOLEAN DepthMask, FRGBAFl EXPORT void HWRAPI(SetTexture) (GLMipmap_t *TexInfo); EXPORT void HWRAPI(UpdateTexture) (GLMipmap_t *TexInfo); EXPORT void HWRAPI(DeleteTexture) (GLMipmap_t *TexInfo); -EXPORT void HWRAPI(ReadRect) (INT32 x, INT32 y, INT32 width, INT32 height, INT32 dst_stride, UINT16 *dst_data); +EXPORT void HWRAPI(ReadScreenFinalTexture) (UINT8 * restrict dest, INT32 scale); EXPORT void HWRAPI(GClipRect) (INT32 minx, INT32 miny, INT32 maxx, INT32 maxy, float nearclip); EXPORT void HWRAPI(ClearMipMapCache) (void); @@ -99,7 +99,7 @@ struct hwdriver_s SetTexture pfnSetTexture; UpdateTexture pfnUpdateTexture; DeleteTexture pfnDeleteTexture; - ReadRect pfnReadRect; + ReadScreenFinalTexture pfnReadScreenFinalTexture; GClipRect pfnGClipRect; ClearMipMapCache pfnClearMipMapCache; SetSpecialState pfnSetSpecialState;//Hurdler: added for backward compatibility diff --git a/src/hardware/hw_main.h b/src/hardware/hw_main.h index 83503176c..9fa695a61 100644 --- a/src/hardware/hw_main.h +++ b/src/hardware/hw_main.h @@ -54,8 +54,7 @@ void HWR_DrawConsoleFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 color, UINT32 void HWR_DrawDiag(INT32 x, INT32 y, INT32 wh, INT32 color); void HWR_DrawPic(INT32 x,INT32 y,lumpnum_t lumpnum); -UINT8 *HWR_GetScreenshot(void); -boolean HWR_Screenshot(const char *pathname); +UINT8 *HWR_GetScreenshot(INT32 scale); void HWR_AddCommands(void); void HWR_AddSessionCommands(void); diff --git a/src/hardware/r_opengl/r_opengl.c b/src/hardware/r_opengl/r_opengl.c index 144cf00c2..ea5f95987 100644 --- a/src/hardware/r_opengl/r_opengl.c +++ b/src/hardware/r_opengl/r_opengl.c @@ -25,6 +25,9 @@ #include "r_opengl.h" #include "r_vbo.h" +// requires GL 4.3 +//#define GLDEBUGMESSAGE + #if defined (HWRENDER) && !defined (NOROPENGL) struct GLRGBAFloat @@ -265,6 +268,7 @@ static void GL_MSG_Error(const char *format, ...) /* Raster functions */ #define pglPixelStorei glPixelStorei #define pglReadPixels glReadPixels +#define pglGetTexImage glGetTexImage /* Texture mapping */ #define pglTexEnvi glTexEnvi @@ -281,6 +285,10 @@ static void GL_MSG_Error(const char *format, ...) #define pglCopyTexImage2D glCopyTexImage2D #define pglCopyTexSubImage2D glCopyTexSubImage2D +#ifdef GLDEBUGMESSAGE +#define pglDebugMessageCallback glDebugMessageCallback +#endif + #else //!STATIC_OPENGL /* 1.0 functions */ @@ -373,6 +381,8 @@ typedef void (APIENTRY * PFNglPixelStorei) (GLenum pname, GLint param); static PFNglPixelStorei pglPixelStorei; typedef void (APIENTRY * PFNglReadPixels) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels); static PFNglReadPixels pglReadPixels; +typedef void (APIENTRY * PFNglGetTexImage) (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels); +static PFNglGetTexImage pglGetTexImage; /* Texture mapping */ typedef void (APIENTRY * PFNglTexEnvi) (GLenum target, GLenum pname, GLint param); @@ -423,6 +433,11 @@ static PFNglDeleteBuffers pglDeleteBuffers; typedef void (APIENTRY * PFNglBlendEquation) (GLenum mode); static PFNglBlendEquation pglBlendEquation; +#ifdef GLDEBUGMESSAGE +typedef void (APIENTRY * PFNglDebugMessageCallback) (void (APIENTRY *DEBUGPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam), const void *userParam); +static PFNglDebugMessageCallback pglDebugMessageCallback; +#endif + /* 1.2 Parms */ /* GL_CLAMP_TO_EDGE_EXT */ @@ -511,6 +526,7 @@ boolean SetupGLfunc(void) GETOPENGLFUNC(pglPixelStorei, glPixelStorei) GETOPENGLFUNC(pglReadPixels, glReadPixels) + GETOPENGLFUNC(pglGetTexImage, glGetTexImage) GETOPENGLFUNC(pglTexEnvi, glTexEnvi) GETOPENGLFUNC(pglTexParameteri, glTexParameteri) @@ -983,6 +999,10 @@ void SetupGLFunc4(void) *(void**)&pglUniform3fv = GetGLFunc("glUniform3fv"); *(void**)&pglGetUniformLocation = GetGLFunc("glGetUniformLocation"); #endif + +#ifdef GLDEBUGMESSAGE + *(void**)&pglDebugMessageCallback = GetGLFunc("glDebugMessageCallback"); +#endif } EXPORT boolean HWRAPI(CompileShaders) (void) @@ -1247,6 +1267,7 @@ static void GLPerspective(GLfloat fovy, GLfloat aspect) pglMultMatrixf(&m[0][0]); } +#if 0 static void GLProject(GLfloat objX, GLfloat objY, GLfloat objZ, GLfloat* winX, GLfloat* winY, GLfloat* winZ) { @@ -1286,6 +1307,7 @@ static void GLProject(GLfloat objX, GLfloat objY, GLfloat objZ, *winY=in[1]; *winZ=in[2]; } +#endif // -----------------+ // SetModelView : @@ -1335,6 +1357,50 @@ void SetModelView(GLint w, GLint h) pglGetFloatv(GL_PROJECTION_MATRIX, projMatrix); } +#ifdef GLDEBUGMESSAGE +static void APIENTRY DebugMessage(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam) +{ + const char *debugsource, *debugtype; + + switch (source) + { +#define S(s) case GL_DEBUG_SOURCE_##s: debugsource = #s; break + S(API); + S(WINDOW_SYSTEM); + S(SHADER_COMPILER); + S(THIRD_PARTY); + S(APPLICATION); + S(OTHER); +#undef S + default: + debugsource = "unknown"; + break; + } + + switch (type) + { +#define S(s) case GL_DEBUG_TYPE_##s: debugtype = #s; break + S(ERROR); + S(DEPRECATED_BEHAVIOR); + S(UNDEFINED_BEHAVIOR); + S(PORTABILITY); + S(PERFORMANCE); + S(MARKER); + S(PUSH_GROUP); + S(POP_GROUP); + S(OTHER); +#undef S + default: + debugtype = "unknown"; + break; + } + + alerttype_t level = severity == GL_DEBUG_SEVERITY_HIGH ? CONS_ERROR + : severity == GL_DEBUG_SEVERITY_MEDIUM ? CONS_WARNING + : CONS_NOTICE; + CONS_Alert(level, "OpenGL (%s) (%s): %s\n", debugsource, debugtype, message); +} +#endif // -----------------+ // SetStates : Set permanent states @@ -1395,6 +1461,12 @@ void SetStates(void) pglLoadIdentity(); pglScalef(1.0f, 1.0f, -1.0f); pglGetFloatv(GL_MODELVIEW_MATRIX, modelMatrix); // added for new coronas' code (without depth buffer) + +#ifdef GLDEBUGMESSAGE + pglEnable(GL_DEBUG_OUTPUT); + pglEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + pglDebugMessageCallback(&DebugMessage, NULL); +#endif } @@ -1514,58 +1586,53 @@ EXPORT void HWRAPI(ClearMipMapCache) (void) Flush(); } +// nope, definitely not faster. software has the same flickering/z-fighting bugs with laggy GIF recording anyway +//#define GETTEXIMAGE -// -----------------+ -// ReadRect : Read a rectangle region of the truecolor framebuffer -// : store pixels as 16bit 565 RGB -// Returns : 16bit 565 RGB pixel array stored in dst_data -// -----------------+ -EXPORT void HWRAPI(ReadRect) (INT32 x, INT32 y, INT32 width, INT32 height, - INT32 dst_stride, UINT16 * dst_data) +#ifdef GETTEXIMAGE +#define TEXWIDTH texsize +#else +#define TEXWIDTH screen_width +#endif + +// -----------------------+ +// ReadScreenFinalTexture : Reads out the final screen texture +// Returns : 24bit RGB pixel array stored in dest +// -----------------------+ +EXPORT void HWRAPI(ReadScreenFinalTexture) (UINT8 * restrict dest, INT32 scale) { - INT32 i; - // GL_DBG_Printf ("ReadRect()\n"); - if (dst_stride == width*3) - { - GLubyte*top = (GLvoid*)dst_data, *bottom = top + dst_stride * (height - 1); - GLubyte *row = malloc(dst_stride); - if (!row) return; - pglPixelStorei(GL_PACK_ALIGNMENT, 1); - pglReadPixels(x, y, width, height, GL_RGB, GL_UNSIGNED_BYTE, dst_data); - pglPixelStorei(GL_UNPACK_ALIGNMENT, 1); - for(i = 0; i < height/2; i++) - { - memcpy(row, top, dst_stride); - memcpy(top, bottom, dst_stride); - memcpy(bottom, row, dst_stride); - top += dst_stride; - bottom -= dst_stride; - } - free(row); - } - else - { - INT32 j; - GLubyte *image = malloc(width*height*3*sizeof (*image)); - if (!image) return; - pglPixelStorei(GL_PACK_ALIGNMENT, 1); - pglReadPixels(x, y, width, height, GL_RGB, GL_UNSIGNED_BYTE, image); - pglPixelStorei(GL_UNPACK_ALIGNMENT, 1); - for (i = height-1; i >= 0; i--) - { - for (j = 0; j < width; j++) - { - dst_data[(height-1-i)*width+j] = - (UINT16)( - ((image[(i*width+j)*3]>>3)<<11) | - ((image[(i*width+j)*3+1]>>2)<<5) | - ((image[(i*width+j)*3+2]>>3))); - } - } - free(image); - } -} + const INT32 stride = (screen_width/scale)*3; + INT32 scanlines = screen_height; + GLubyte * restrict image; +#ifdef GETTEXIMAGE + image = malloc(texsize*texsize*3); // oof + pglBindTexture(GL_TEXTURE_2D, finalScreenTexture); + pglGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, image); + tex_downloaded = finalScreenTexture; +#else + image = malloc(screen_width*screen_height*3); + pglPixelStorei(GL_PACK_ALIGNMENT, 1); + pglReadPixels(0, 0, screen_width, screen_height, GL_RGB, GL_UNSIGNED_BYTE, image); +#endif + + // TODO the downscaling happens in the screen capture code now, + // yet we're still doing this on the CPU? sheesh... + // this is where actual knowledge of OpenGL would've come in handy + image += scanlines*TEXWIDTH*3; + while ((scanlines -= scale) >= 0) + { + image -= TEXWIDTH*scale*3; + if (scale == 1) + memcpy(dest, image, stride); + else for (size_t i = 0; i < stride; i += 3) + memcpy(dest + i, image + i*scale, 3); + dest += stride; + } + + // ...yet still, restrict doesn't make the inner loop any faster + free(image - ((screen_height % scale) * TEXWIDTH*3)); +} // -----------------+ // GClipRect : Defines the 2D hardware clipping window @@ -2347,6 +2414,7 @@ static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD // this test is added for new coronas' code (without depth buffer) // I think I should do a separate function for drawing coronas, so it will be a little faster +#if 0 if (PolyFlags & PF_Corona) // check to see if we need to draw the corona { FUINT i; @@ -2407,6 +2475,7 @@ static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD c[3] = (unsigned char)(alpha * 255); pglColor4ubv(c); } +#endif Shader_SetUniforms(pSurf, &poly, &tint, &fade); } diff --git a/src/i_video.h b/src/i_video.h index 4bebe7c81..b35e2c997 100644 --- a/src/i_video.h +++ b/src/i_video.h @@ -141,7 +141,7 @@ void I_WaitVBL(INT32 count); \return void */ -void I_ReadScreen(UINT8 *scr); +void I_ReadScreen(UINT8 * restrict scr, INT32 scale); /** \brief Start disk icon */ diff --git a/src/m_anigif.c b/src/m_anigif.c index 75d055d73..ab145f8e3 100644 --- a/src/m_anigif.c +++ b/src/m_anigif.c @@ -59,12 +59,20 @@ static UINT8 gif_writeover = 0; typedef struct { - void *pixels; + UINT8 *pixels; size_t size; boolean owns_pixels; } gif_screen_t; static gif_screen_t gif_screens[2]; +// SCReen BUFfer (obviously) +// --- +static UINT8 *scrbuf_pos; +static UINT8 *scrbuf_linebegin; +static UINT8 *scrbuf_lineend; +static UINT8 *scrbuf_writeend; +static INT16 scrbuf_downscaleamt = 1; +static UINT16 scrbuf_width, scrbuf_height; // OPTIMIZE gif output // --- @@ -79,13 +87,13 @@ static gif_screen_t gif_screens[2]; static UINT8 GIF_optimizecmprow(const UINT8 *dst, const UINT8 *src, INT32 row, INT32 *last, INT32 *left, INT32 *right) { - const UINT8 *dp = dst + (vid.width * row); - const UINT8 *sp = src + (vid.width * row); + const UINT8 *dp = dst + (scrbuf_width * row); + const UINT8 *sp = src + (scrbuf_width * row); const UINT8 *dtmp, *stmp; UINT8 doleft = 1, doright = 1; INT32 i = 0; - if (!memcmp(sp, dp, vid.width)) + if (!memcmp(sp, dp, scrbuf_width)) return 0; // unchanged. *last = row; @@ -112,14 +120,14 @@ static UINT8 GIF_optimizecmprow(const UINT8 *dst, const UINT8 *src, INT32 row, } // right side - i = vid.width - 1; - if (*right == vid.width - 1) // edge reached + i = scrbuf_width - 1; + if (*right == scrbuf_width - 1) // edge reached doright = 0; else if (*right >= 0) // right set, non-end-of-width { dtmp = dp + *right + 1; stmp = sp + *right + 1; - if (!memcmp(stmp, dtmp, vid.width - (*right + 1))) + if (!memcmp(stmp, dtmp, scrbuf_width - (*right + 1))) doright = 0; // right side not changed } while (doright) @@ -149,7 +157,7 @@ static UINT8 GIF_optimizecmprow(const UINT8 *dst, const UINT8 *src, INT32 row, static void GIF_optimizeregion(const UINT8 *dst, const UINT8 *src, INT32 *x, INT32 *y, INT32 *w, INT32 *h) { - INT32 st = 0, sb = vid.height - 1; // work from both directions + INT32 st = 0, sb = scrbuf_height - 1; // work from both directions INT32 firstchg_t = -1, firstchg_b = -1; // store first changed row. INT32 lastchg_t = -1, lastchg_b = -1; // Store last row... just in case INT32 lmpix = -1, rmpix = -1; // store left and rightmost change @@ -160,7 +168,7 @@ static void GIF_optimizeregion(const UINT8 *dst, const UINT8 *src, if (!stopt) { if (GIF_optimizecmprow(dst, src, st++, &lastchg_t, &lmpix, &rmpix) - && lmpix == 0 && rmpix == vid.width - 1) + && lmpix == 0 && rmpix == scrbuf_width - 1) stopt = 1; if (firstchg_t < 0 && lastchg_t >= 0) firstchg_t = lastchg_t; @@ -168,7 +176,7 @@ static void GIF_optimizeregion(const UINT8 *dst, const UINT8 *src, if (!stopb) { if (GIF_optimizecmprow(dst, src, sb--, &lastchg_b, &lmpix, &rmpix) - && lmpix == 0 && rmpix == vid.width - 1) + && lmpix == 0 && rmpix == scrbuf_width - 1) stopb = 1; if (firstchg_b < 0 && lastchg_b >= 0) firstchg_b = lastchg_b; @@ -238,18 +246,6 @@ static void GIF_bwrwrite(UINT32 idata) } } - - -// SCReen BUFfer (obviously) -// --- -static UINT8 *scrbuf_pos; -static UINT8 *scrbuf_linebegin; -static UINT8 *scrbuf_lineend; -static UINT8 *scrbuf_writeend; -static INT16 scrbuf_downscaleamt = 1; - - - // GIF LZW algorithm // --- #define GIFLZW_TABLECLR 0x100 @@ -375,10 +371,10 @@ static void GIF_lzw(void) GIF_bwrwrite(GIFLZW_TABLECLR); GIF_prepareLZW(); } - if ((scrbuf_pos += scrbuf_downscaleamt) >= scrbuf_lineend) + if (++scrbuf_pos >= scrbuf_lineend) { - scrbuf_lineend += (vid.width * scrbuf_downscaleamt); - scrbuf_linebegin += (vid.width * scrbuf_downscaleamt); + scrbuf_lineend += scrbuf_width; + scrbuf_linebegin += scrbuf_width; scrbuf_pos = scrbuf_linebegin; } // Just a bit of overflow prevention @@ -459,7 +455,6 @@ static void GIF_headwrite(void) { UINT8 *gifhead = Z_Malloc(800, PU_STATIC, NULL); UINT8 *p = gifhead; - UINT16 rwidth, rheight; if (!gif_out) return; @@ -470,18 +465,18 @@ static void GIF_headwrite(void) if (gif_downscale) { scrbuf_downscaleamt = vid.dupx; - rwidth = (vid.width / scrbuf_downscaleamt); - rheight = (vid.height / scrbuf_downscaleamt); + scrbuf_width = (vid.width / scrbuf_downscaleamt); + scrbuf_height = (vid.height / scrbuf_downscaleamt); } else { scrbuf_downscaleamt = 1; - rwidth = vid.width; - rheight = vid.height; + scrbuf_width = vid.width; + scrbuf_height = vid.height; } - WRITEUINT16(p, rwidth); - WRITEUINT16(p, rheight); + WRITEUINT16(p, scrbuf_width); + WRITEUINT16(p, scrbuf_height); // colors, aspect, etc WRITEUINT8(p, 0xF7); // (0xF7 = 1111 0111) @@ -515,28 +510,16 @@ static size_t gifframe_size = 8192; #ifdef HWRENDER static colorlookup_t gif_colorlookup; -static void GIF_rgbconvert(UINT8 *linear, UINT8 *scr) +static void GIF_rgbconvert(UINT8 * restrict linear, UINT8 * restrict scr) { - UINT8 r, g, b; - size_t src, dest; - int x, y; - InitColorLUT(&gif_colorlookup, (gif_localcolortable) ? gif_framepalette : gif_headerpalette, true); - for (x = 0; x < vid.width; x += scrbuf_downscaleamt) + for (INT32 i = 0; i < scrbuf_width*scrbuf_height; i++) { - for (y = 0; y < vid.height; y += scrbuf_downscaleamt) - { - dest = y*vid.width + x; - src = dest*3; - - r = (UINT8)linear[src]; - g = (UINT8)linear[src + 1]; - b = (UINT8)linear[src + 2]; - scr[dest] = GetColorLUTDirect(&gif_colorlookup, r, g, b); - src += (3 * scrbuf_downscaleamt); - dest += scrbuf_downscaleamt; - } + UINT8 r = *linear++; + UINT8 g = *linear++; + UINT8 b = *linear++; + *scr++ = GetColorLUTDirect(&gif_colorlookup, r, g, b); } } #endif @@ -569,48 +552,32 @@ static void GIF_framewrite(void) else palchanged = false; + // blit to temp screen + if (rendermode == render_soft) + I_ReadScreen(movie_screen, scrbuf_downscaleamt); +#ifdef HWRENDER + else if (rendermode == render_opengl) + { + // save previous frame (software already does this elsewhere...?) + memcpy(base_screen, movie_screen, scrbuf_width * scrbuf_height); + + UINT8 *linear = HWR_GetScreenshot(scrbuf_downscaleamt); + GIF_rgbconvert(linear, movie_screen); + //free(linear); // Allocated 'statically', no need to free now + } +#endif + // Compare image data (for optimizing GIF) // If the palette has changed, the entire frame is considered to be different. if (gif_optimize && gif_frames > 0 && (!palchanged)) { - // before blit movie_screen points to last frame, cur_screen points to this frame - UINT8 *cur_screen = base_screen; - GIF_optimizeregion(cur_screen, movie_screen, &blitx, &blity, &blitw, &blith); - - // blit to temp screen - if (rendermode == render_soft) - I_ReadScreen(movie_screen); -#ifdef HWRENDER - else if (rendermode == render_opengl) - { - UINT8 *linear = HWR_GetScreenshot(); - GIF_rgbconvert(linear, movie_screen); - //free(linear); // Allocated 'statically', no need to free now - } -#endif + GIF_optimizeregion(base_screen, movie_screen, &blitx, &blity, &blitw, &blith); } else { blitx = blity = 0; - blitw = vid.width; - blith = vid.height; - -#ifdef HWRENDER - // Copy the current OpenGL frame into the base screen - if (rendermode == render_opengl) - { - UINT8 *linear = HWR_GetScreenshot(); - GIF_rgbconvert(linear, base_screen); - //free(linear); // Allocated 'statically', no need to free now - } -#endif - - // Copy the first frame into the movie screen - // OpenGL already does the same above. - if (gif_frames == 0 && rendermode == render_soft) - I_ReadScreen(movie_screen); - - movie_screen = base_screen; + blitw = scrbuf_width; + blith = scrbuf_height; } // screen regions are handled in GIF_lzw @@ -654,20 +621,11 @@ static void GIF_framewrite(void) WRITEUINT8(p, 0); WRITEUINT8(p, 0); // end of GCE - if (scrbuf_downscaleamt > 1) - { - // Ensure our downscaled blitx/y starts and ends on a pixel. - blitx -= (blitx % scrbuf_downscaleamt); - blity -= (blity % scrbuf_downscaleamt); - blitw = ((blitw + (scrbuf_downscaleamt - 1)) / scrbuf_downscaleamt) * scrbuf_downscaleamt; - blith = ((blith + (scrbuf_downscaleamt - 1)) / scrbuf_downscaleamt) * scrbuf_downscaleamt; - } - WRITEUINT8(p, 0x2C); - WRITEUINT16(p, (UINT16)(blitx / scrbuf_downscaleamt)); - WRITEUINT16(p, (UINT16)(blity / scrbuf_downscaleamt)); - WRITEUINT16(p, (UINT16)(blitw / scrbuf_downscaleamt)); - WRITEUINT16(p, (UINT16)(blith / scrbuf_downscaleamt)); + WRITEUINT16(p, (UINT16)blitx); + WRITEUINT16(p, (UINT16)blity); + WRITEUINT16(p, (UINT16)blitw); + WRITEUINT16(p, (UINT16)blith); if (!gif_localcolortable) WRITEUINT8(p, 0); // no local table of colors @@ -683,8 +641,8 @@ static void GIF_framewrite(void) WRITEUINT8(p, 0); // They are equal, no Local Color Table needed. } - scrbuf_pos = movie_screen + blitx + (blity * vid.width); - scrbuf_writeend = scrbuf_pos + (blitw - 1) + ((blith - 1) * vid.width); + scrbuf_pos = movie_screen + blitx + (blity * scrbuf_width); + scrbuf_writeend = scrbuf_pos + (blitw - 1) + ((blith - 1) * scrbuf_width); if (!gifbwr_buf) gifbwr_buf = Z_Malloc(256, PU_STATIC, NULL); @@ -694,8 +652,8 @@ static void GIF_framewrite(void) giflzw_workingCode = UINT16_MAX; WRITEUINT8(p, gifbwr_bits_min - 1); - startline = (scrbuf_pos - movie_screen) / vid.width; - scrbuf_linebegin = movie_screen + (startline * vid.width) + blitx; + startline = (scrbuf_pos - movie_screen) / scrbuf_width; + scrbuf_linebegin = movie_screen + (startline * scrbuf_width) + blitx; scrbuf_lineend = scrbuf_linebegin + blitw; //prewrite a table clear @@ -779,7 +737,7 @@ static void GIF_checkscreens(void) } else { - size_t sz = vid.width * vid.height * vid.bpp; + size_t sz = scrbuf_width * scrbuf_height; if (!gif_screens[i].owns_pixels) { diff --git a/src/m_misc.cpp b/src/m_misc.cpp index d8250fd10..ec861d2a5 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -1303,7 +1303,7 @@ void M_SaveFrame(void) { // munge planar buffer to linear linear = screens[2]; - I_ReadScreen(linear); + I_ReadScreen(linear, 1); } #ifdef HWRENDER else @@ -1576,6 +1576,7 @@ void M_DoScreenShot(void) char pathname[MAX_WADPATH]; boolean ret = false; UINT8 *linear = NULL; + UINT8 *palette; // Don't take multiple screenshots, obviously takescreenshot = false; @@ -1608,30 +1609,30 @@ void M_DoScreenShot(void) freename = Newsnapshotfile(pathname,"tga"); #endif + if (!freename) + goto failure; + if (rendermode == render_soft) { // munge planar buffer to linear linear = screens[2]; - I_ReadScreen(linear); - } - - if (!freename) - goto failure; - - // save the pcx file -#ifdef HWRENDER - if (rendermode == render_opengl) - ret = HWR_Screenshot(va(pandf,pathname,freename)); - else -#endif - { + I_ReadScreen(linear, 1); M_CreateScreenShotPalette(); -#ifdef USE_PNG - ret = M_SavePNG(va(pandf,pathname,freename), linear, vid.width, vid.height, screenshot_palette); -#else - ret = WritePCXfile(va(pandf,pathname,freename), linear, vid.width, vid.height, screenshot_palette); -#endif + palette = screenshot_palette; } +#ifdef HWRENDER + else if (rendermode == render_opengl) + { + linear = HWR_GetScreenshot(1); + palette = nullptr; + } +#endif + +#ifdef USE_PNG + ret = M_SavePNG(va(pandf,pathname,freename), linear, vid.width, vid.height, palette); +#else + ret = WritePCXfile(va(pandf,pathname,freename), linear, vid.width, vid.height, palette); +#endif failure: if (ret) diff --git a/src/sdl/hwsym_sdl.c b/src/sdl/hwsym_sdl.c index a00995698..890dc47ce 100644 --- a/src/sdl/hwsym_sdl.c +++ b/src/sdl/hwsym_sdl.c @@ -87,7 +87,7 @@ void *hwSym(const char *funcName,void *handle) GETFUNC(SetTexture); GETFUNC(UpdateTexture); GETFUNC(DeleteTexture); - GETFUNC(ReadRect); + GETFUNC(ReadScreenFinalTexture); GETFUNC(GClipRect); GETFUNC(ClearMipMapCache); GETFUNC(SetSpecialState); diff --git a/src/sdl/i_video.cpp b/src/sdl/i_video.cpp index 4b6bc1200..9eace8973 100644 --- a/src/sdl/i_video.cpp +++ b/src/sdl/i_video.cpp @@ -1324,14 +1324,24 @@ void I_UpdateNoVsync(void) // // I_ReadScreen // -void I_ReadScreen(UINT8 *scr) +void I_ReadScreen(UINT8 * restrict scr, INT32 scale) { if (rendermode != render_soft) I_Error ("I_ReadScreen: called while in non-software mode"); - else + else if (scale == 1) VID_BlitLinearScreen(screens[0], scr, vid.width*vid.bpp, vid.height, vid.rowbytes, vid.rowbytes); + else + { + UINT8 * restrict source = screens[0]; + INT32 w = vid.width/scale*scale, h = vid.height/scale*scale; + + // size_t saves a lea + movsxd over INT32. mind your types! + for (size_t y = 0; y < h; y += scale) + for (size_t x = 0; x < w; x += scale) + *scr++ = source[y*vid.width + x]; + } } // @@ -1728,7 +1738,7 @@ static void Impl_InitOpenGL(void) *(void**)&HWD.pfnSetTexture = hwSym("SetTexture",NULL); *(void**)&HWD.pfnUpdateTexture = hwSym("UpdateTexture",NULL); *(void**)&HWD.pfnDeleteTexture = hwSym("DeleteTexture",NULL); - *(void**)&HWD.pfnReadRect = hwSym("ReadRect",NULL); + *(void**)&HWD.pfnReadScreenFinalTexture=hwSym("ReadScreenFinalTexture",NULL); *(void**)&HWD.pfnGClipRect = hwSym("GClipRect",NULL); *(void**)&HWD.pfnClearMipMapCache = hwSym("ClearMipMapCache",NULL); *(void**)&HWD.pfnSetSpecialState = hwSym("SetSpecialState",NULL); From 54bb823b31d208d3e00fd0733b6bfe8b68375372 Mon Sep 17 00:00:00 2001 From: GenericHeroGuy Date: Thu, 11 Sep 2025 16:46:23 +0200 Subject: [PATCH 2/3] Remove GetTexImage stuff --- src/hardware/r_opengl/r_opengl.c | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/src/hardware/r_opengl/r_opengl.c b/src/hardware/r_opengl/r_opengl.c index ea5f95987..92af4e127 100644 --- a/src/hardware/r_opengl/r_opengl.c +++ b/src/hardware/r_opengl/r_opengl.c @@ -268,7 +268,6 @@ static void GL_MSG_Error(const char *format, ...) /* Raster functions */ #define pglPixelStorei glPixelStorei #define pglReadPixels glReadPixels -#define pglGetTexImage glGetTexImage /* Texture mapping */ #define pglTexEnvi glTexEnvi @@ -381,8 +380,6 @@ typedef void (APIENTRY * PFNglPixelStorei) (GLenum pname, GLint param); static PFNglPixelStorei pglPixelStorei; typedef void (APIENTRY * PFNglReadPixels) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels); static PFNglReadPixels pglReadPixels; -typedef void (APIENTRY * PFNglGetTexImage) (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels); -static PFNglGetTexImage pglGetTexImage; /* Texture mapping */ typedef void (APIENTRY * PFNglTexEnvi) (GLenum target, GLenum pname, GLint param); @@ -526,7 +523,6 @@ boolean SetupGLfunc(void) GETOPENGLFUNC(pglPixelStorei, glPixelStorei) GETOPENGLFUNC(pglReadPixels, glReadPixels) - GETOPENGLFUNC(pglGetTexImage, glGetTexImage) GETOPENGLFUNC(pglTexEnvi, glTexEnvi) GETOPENGLFUNC(pglTexParameteri, glTexParameteri) @@ -1267,7 +1263,6 @@ static void GLPerspective(GLfloat fovy, GLfloat aspect) pglMultMatrixf(&m[0][0]); } -#if 0 static void GLProject(GLfloat objX, GLfloat objY, GLfloat objZ, GLfloat* winX, GLfloat* winY, GLfloat* winZ) { @@ -1307,7 +1302,6 @@ static void GLProject(GLfloat objX, GLfloat objY, GLfloat objZ, *winY=in[1]; *winZ=in[2]; } -#endif // -----------------+ // SetModelView : @@ -1586,15 +1580,6 @@ EXPORT void HWRAPI(ClearMipMapCache) (void) Flush(); } -// nope, definitely not faster. software has the same flickering/z-fighting bugs with laggy GIF recording anyway -//#define GETTEXIMAGE - -#ifdef GETTEXIMAGE -#define TEXWIDTH texsize -#else -#define TEXWIDTH screen_width -#endif - // -----------------------+ // ReadScreenFinalTexture : Reads out the final screen texture // Returns : 24bit RGB pixel array stored in dest @@ -1605,24 +1590,17 @@ EXPORT void HWRAPI(ReadScreenFinalTexture) (UINT8 * restrict dest, INT32 scale) INT32 scanlines = screen_height; GLubyte * restrict image; -#ifdef GETTEXIMAGE - image = malloc(texsize*texsize*3); // oof - pglBindTexture(GL_TEXTURE_2D, finalScreenTexture); - pglGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, image); - tex_downloaded = finalScreenTexture; -#else image = malloc(screen_width*screen_height*3); pglPixelStorei(GL_PACK_ALIGNMENT, 1); pglReadPixels(0, 0, screen_width, screen_height, GL_RGB, GL_UNSIGNED_BYTE, image); -#endif // TODO the downscaling happens in the screen capture code now, // yet we're still doing this on the CPU? sheesh... // this is where actual knowledge of OpenGL would've come in handy - image += scanlines*TEXWIDTH*3; + image += scanlines*screen_width*3; while ((scanlines -= scale) >= 0) { - image -= TEXWIDTH*scale*3; + image -= screen_width*scale*3; if (scale == 1) memcpy(dest, image, stride); else for (size_t i = 0; i < stride; i += 3) @@ -1631,7 +1609,7 @@ EXPORT void HWRAPI(ReadScreenFinalTexture) (UINT8 * restrict dest, INT32 scale) } // ...yet still, restrict doesn't make the inner loop any faster - free(image - ((screen_height % scale) * TEXWIDTH*3)); + free(image - ((screen_height % scale) * screen_width*3)); } // -----------------+ @@ -2414,7 +2392,6 @@ static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD // this test is added for new coronas' code (without depth buffer) // I think I should do a separate function for drawing coronas, so it will be a little faster -#if 0 if (PolyFlags & PF_Corona) // check to see if we need to draw the corona { FUINT i; @@ -2475,7 +2452,6 @@ static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD c[3] = (unsigned char)(alpha * 255); pglColor4ubv(c); } -#endif Shader_SetUniforms(pSurf, &poly, &tint, &fade); } From aa71181c5a3760ef03126f0c00e3388b64585868 Mon Sep 17 00:00:00 2001 From: GenericHeroGuy Date: Thu, 11 Sep 2025 17:50:28 +0200 Subject: [PATCH 3/3] Fix GIF recording at 1x resolution, and stop recording on resolution change Simplifies the framebuffer management and eliminates the memcpy per frame --- src/m_anigif.c | 76 ++++++++++---------------------------------------- src/screen.c | 4 +++ 2 files changed, 18 insertions(+), 62 deletions(-) diff --git a/src/m_anigif.c b/src/m_anigif.c index ab145f8e3..7d1287ccc 100644 --- a/src/m_anigif.c +++ b/src/m_anigif.c @@ -57,14 +57,6 @@ static precise_t gif_prevframetime = 0; static UINT32 gif_delayus = 0; // "us" is microseconds static UINT8 gif_writeover = 0; -typedef struct -{ - UINT8 *pixels; - size_t size; - boolean owns_pixels; -} gif_screen_t; -static gif_screen_t gif_screens[2]; - // SCReen BUFfer (obviously) // --- static UINT8 *scrbuf_pos; @@ -73,6 +65,7 @@ static UINT8 *scrbuf_lineend; static UINT8 *scrbuf_writeend; static INT16 scrbuf_downscaleamt = 1; static UINT16 scrbuf_width, scrbuf_height; +static UINT8 *scrbuf_screens; // OPTIMIZE gif output // --- @@ -531,8 +524,8 @@ static void GIF_rgbconvert(UINT8 * restrict linear, UINT8 * restrict scr) static void GIF_framewrite(void) { UINT8 *p; - UINT8 *base_screen = gif_screens[0].pixels; - UINT8 *movie_screen = gif_screens[1].pixels; + UINT8 *base_screen = scrbuf_screens; + UINT8 *movie_screen = scrbuf_screens; INT32 blitx, blity, blitw, blith; boolean palchanged; @@ -552,15 +545,18 @@ static void GIF_framewrite(void) else palchanged = false; + // select your framebuffer + if (gif_frames & 1) + base_screen += scrbuf_width*scrbuf_height; + else + movie_screen += scrbuf_width*scrbuf_height; + // blit to temp screen if (rendermode == render_soft) I_ReadScreen(movie_screen, scrbuf_downscaleamt); #ifdef HWRENDER else if (rendermode == render_opengl) { - // save previous frame (software already does this elsewhere...?) - memcpy(base_screen, movie_screen, scrbuf_width * scrbuf_height); - UINT8 *linear = HWR_GetScreenshot(scrbuf_downscaleamt); GIF_rgbconvert(linear, movie_screen); //free(linear); // Allocated 'statically', no need to free now @@ -718,55 +714,11 @@ INT32 GIF_open(const char *filename) static void GIF_checkscreens(void) { - for (size_t i = 0; i < sizeof(gif_screens) / sizeof(gif_screens[0]); i++) - { - if (rendermode == render_soft) - { - if (gif_screens[i].owns_pixels) - { - Z_Free(gif_screens[i].pixels); - gif_screens[i].owns_pixels = false; - } + if (scrbuf_screens == NULL) + Z_Malloc(scrbuf_width * scrbuf_height * 2, PU_STATIC, &scrbuf_screens); - gif_screens[i].size = 0; - - if (i == 1) - gif_screens[i].pixels = screens[2]; - else - gif_screens[i].pixels = screens[0]; - } - else - { - size_t sz = scrbuf_width * scrbuf_height; - - if (!gif_screens[i].owns_pixels) - { - gif_screens[i].size = sz; - gif_screens[i].pixels = Z_Malloc(gif_screens[i].size, PU_STATIC, NULL); - gif_screens[i].owns_pixels = true; - } - else if (gif_screens[i].size != sz) - { - gif_screens[i].size = sz; - gif_screens[i].pixels = Z_Realloc(gif_screens[i].pixels, gif_screens[i].size, PU_STATIC, NULL); - } - } - } -} - -static void GIF_freescreens(void) -{ - for (size_t i = 0; i < sizeof(gif_screens) / sizeof(gif_screens[0]); i++) - { - if (gif_screens[i].owns_pixels) - { - Z_Free(gif_screens[i].pixels); - gif_screens[i].owns_pixels = false; - } - - gif_screens[i].size = 0; - gif_screens[i].pixels = NULL; - } + I_Assert(scrbuf_width == vid.width / scrbuf_downscaleamt); + I_Assert(scrbuf_height == vid.height / scrbuf_downscaleamt); } // @@ -805,7 +757,7 @@ INT32 GIF_close(void) Z_Free(giflzw_hashTable); giflzw_hashTable = NULL; - GIF_freescreens(); + Z_Free(scrbuf_screens); CONS_Printf(M_GetText("Animated gif closed; wrote %d frames\n"), gif_frames); return 1; diff --git a/src/screen.c b/src/screen.c index 1973b6f58..f7bb409f5 100644 --- a/src/screen.c +++ b/src/screen.c @@ -322,7 +322,11 @@ void SCR_SetMode(void) // Set the video mode in the video interface. if (setmodeneeded) + { + if (vid.modenum != setmodeneeded - 1) + M_StopMovie(); // nope, cry about it VID_SetMode(setmodeneeded - 1); + } V_SetPalette(0);