Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gpu: add make option GPU_SW_COMPILE_FAST=1 to perform fast compile of Software GPU #794

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions Makefile
Expand Up @@ -9,6 +9,7 @@ HAVE_LIGHTREC = 1
LINK_STATIC_LIBCPLUSPLUS = 1
THREADED_RECOMPILER = 1
LIGHTREC_DEBUG = 0
GPU_SW_COMPILE_FAST = 0

CORE_DIR := .
HAVE_GRIFFIN = 0
Expand Down Expand Up @@ -85,6 +86,10 @@ ifneq ($(LIGHTREC_DEBUG), 0)
endif
endif

ifneq ($(GPU_SW_COMPILE_FAST), 0)
FLAGS += -DGPU_SW_COMPILE_FAST=1
endif

# Unix
ifneq (,$(findstring unix,$(platform)))
TARGET := $(TARGET_NAME)_libretro.so
Expand Down
51 changes: 30 additions & 21 deletions mednafen/psx/gpu.cpp
Expand Up @@ -85,7 +85,7 @@ static FastFIFO<uint32, 0x20> GPU_BlitterFIFO; // 0x10 on an actual PS1 GPU, 0x2

struct CTEntry
{
void (*func[4][4])(PS_GPU* g, const uint32 *cb, bool MaskEval_TA);
void (*func[4][8])(PS_GPU* g, const uint32 *cb);
uint8_t len;
uint8_t fifo_fb_len;
bool ss_cmd;
Expand Down Expand Up @@ -141,28 +141,37 @@ static void SetTPage(PS_GPU *gpu, const uint32_t cmdw)

/* C-style function wrappers so our command table isn't so ginormous(in memory usage). */
template<int numvertices, bool shaded, bool textured,
int BlendMode, bool TexMult, uint32 TexMode_TA>
static void G_Command_DrawPolygon(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
int BlendMode, bool TexMult, uint32 TexMode_TA, bool MaskEval_TA>
static void G_Command_DrawPolygon(PS_GPU* g, const uint32 *cb)
{
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA>(g, cb, PGXP_enabled(), MaskEval_TA);
#if GPU_SW_COMPILE_FAST
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, true>(g, cb, PGXP_enabled());
#else
if (PGXP_enabled())
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, true>(g, cb, 1);
else
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, false>(g, cb, 0);
#endif
}


static void Command_ClearCache(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_ClearCache(PS_GPU* g, const uint32 *cb)
{
InvalidateCache(g);
}

static void Command_IRQ(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_IRQ(PS_GPU* g, const uint32 *cb)
{
g->IRQPending = true;
IRQ_Assert(IRQ_GPU, g->IRQPending);
}

// Special RAM write mode(16 pixels at a time),
// does *not* appear to use mask drawing environment settings.
static void Command_FBFill(PS_GPU* gpu, const uint32 *cb, bool MaskEval_TA)
static void Command_FBFill(PS_GPU* gpu, const uint32 *cb)
{
unsigned y;
int32_t r = cb[0] & 0xFF;
Expand Down Expand Up @@ -198,7 +207,7 @@ static void Command_FBFill(PS_GPU* gpu, const uint32 *cb, bool MaskEval_TA)
rsx_intf_fill_rect(cb[0], destX, destY, width, height);
}

static void Command_FBCopy(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_FBCopy(PS_GPU* g, const uint32 *cb)
{
unsigned y;
int32_t sourceX = (cb[1] >> 0) & 0x3FF;
Expand Down Expand Up @@ -251,7 +260,7 @@ static void Command_FBCopy(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
rsx_intf_copy_rect(sourceX, sourceY, destX, destY, width, height, g->MaskEvalAND, g->MaskSetOR);
}

static void Command_FBWrite(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_FBWrite(PS_GPU* g, const uint32 *cb)
{
//assert(InCmd == INCMD_NONE);

Expand Down Expand Up @@ -280,7 +289,7 @@ static void Command_FBWrite(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
* raw_height == 0, or raw_height != 0x200 && (raw_height & 0x1FF) == 0
*/

static void Command_FBRead(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_FBRead(PS_GPU* g, const uint32 *cb)
{
//assert(g->InCmd == INCMD_NONE);

Expand Down Expand Up @@ -318,7 +327,7 @@ static void Command_FBRead(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
}
}

static void Command_DrawMode(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_DrawMode(PS_GPU* g, const uint32 *cb)
{
const uint32 cmdw = *cb;

Expand All @@ -337,7 +346,7 @@ static void Command_DrawMode(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
//printf("*******************DFE: %d -- scanline=%d\n", dfe, scanline);
}

static void Command_TexWindow(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_TexWindow(PS_GPU* g, const uint32 *cb)
{
g->tww = (*cb & 0x1F);
g->twh = ((*cb >> 5) & 0x1F);
Expand All @@ -348,31 +357,31 @@ static void Command_TexWindow(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
rsx_intf_set_tex_window(g->tww, g->twh, g->twx, g->twy);
}

static void Command_Clip0(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_Clip0(PS_GPU* g, const uint32 *cb)
{
g->ClipX0 = *cb & 1023;
g->ClipY0 = (*cb >> 10) & 1023;
rsx_intf_set_draw_area(g->ClipX0, g->ClipY0,
g->ClipX1, g->ClipY1);
}

static void Command_Clip1(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_Clip1(PS_GPU* g, const uint32 *cb)
{
g->ClipX1 = *cb & 1023;
g->ClipY1 = (*cb >> 10) & 1023;
rsx_intf_set_draw_area(g->ClipX0, g->ClipY0,
g->ClipX1, g->ClipY1);
}

static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb)
{
g->OffsX = sign_x_to_s32(11, (*cb & 2047));
g->OffsY = sign_x_to_s32(11, ((*cb >> 11) & 2047));

//fprintf(stderr, "[GPU] Drawing offset: %d(raw=%d) %d(raw=%d) -- %d\n", OffsX, *cb, OffsY, *cb >> 11, scanline);
}

static void Command_MaskSetting(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_MaskSetting(PS_GPU* g, const uint32 *cb)
{
//printf("Mask setting: %08x\n", *cb);
g->MaskSetOR = (*cb & 1) ? 0x8000 : 0x0000;
Expand Down Expand Up @@ -1085,15 +1094,15 @@ static void ProcessFIFO(uint32_t in_count)
}

if ((cc >= 0x80) && (cc <= 0x9F))
Command_FBCopy(&GPU, CB, GPU.MaskEvalAND);
Command_FBCopy(&GPU, CB);
else if ((cc >= 0xA0) && (cc <= 0xBF))
Command_FBWrite(&GPU, CB, GPU.MaskEvalAND);
Command_FBWrite(&GPU, CB);
else if ((cc >= 0xC0) && (cc <= 0xDF))
Command_FBRead(&GPU, CB, GPU.MaskEvalAND);
Command_FBRead(&GPU, CB);
else
{
if (command->func[GPU.abr][GPU.TexMode])
command->func[GPU.abr][GPU.TexMode](&GPU, CB, GPU.MaskEvalAND);
command->func[GPU.abr][GPU.TexMode | (GPU.MaskEvalAND ? 0x4 : 0x0)](&GPU, CB);
}
}

Expand Down
95 changes: 64 additions & 31 deletions mednafen/psx/gpu_common.h
@@ -1,3 +1,18 @@

// Enable this to speed up build & link times quite a bit, at the cost of some performance in the sw renderer.
// Disables template optimization of MaskEval_TA and pgxp within software rendering.
#if !defined(GPU_SW_COMPILE_FAST)
# define GPU_SW_COMPILE_FAST 0
#endif

#if GPU_SW_COMPILE_FAST
# define GPU_MaskEvalAND (gpu->MaskEvalAND)
#else
// MaskEvalAND can only be either 0 or 0x8000, so if MaskEval_TA=1, then GPU_MaskEvalAND can be specifiec
// as a literal value 0x8000.
# define GPU_MaskEvalAND (0x8000)
#endif

extern enum dither_mode psx_gpu_dither_mode;

/* Return a pixel from VRAM */
Expand Down Expand Up @@ -68,8 +83,8 @@ static INLINE void PlotPixelBlend(uint16_t bg_pix, uint16_t *fore_pix)

}

template<int BlendMode, bool textured>
static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix, bool MaskEval_TA)
template<int BlendMode, bool MaskEval_TA, bool textured>
static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix)
{
// More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware.
y &= (512 << gpu->upscale_shift) - 1;
Expand All @@ -81,7 +96,7 @@ static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pi
PlotPixelBlend<BlendMode>(bg_pix, &fore_pix);
}

if(!MaskEval_TA || !(vram_fetch(gpu, x, y) & 0x8000))
if(!MaskEval_TA || !(vram_fetch(gpu, x, y) & GPU_MaskEvalAND))
{
if (textured)
vram_put(gpu, x, y, fore_pix | gpu->MaskSetOR);
Expand All @@ -91,8 +106,8 @@ static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pi
}

/// Copy of PlotPixel without internal upscaling, used to draw lines and sprites
template<int BlendMode, bool textured>
static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix, bool MaskEval_TA)
template<int BlendMode, bool MaskEval_TA, bool textured>
static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix)
{
uint16_t output;
y &= 511; // More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware.
Expand All @@ -103,7 +118,7 @@ static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t f
PlotPixelBlend<BlendMode>(bg_pix, &fore_pix);
}

if(!MaskEval_TA || !(texel_fetch(gpu, x, y) & 0x8000))
if(!MaskEval_TA || !(texel_fetch(gpu, x, y) & GPU_MaskEvalAND))
texel_put(x, y, (textured ? fore_pix : (fore_pix & 0x7FFF)) | gpu->MaskSetOR);
}

Expand Down Expand Up @@ -251,15 +266,24 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)

//#define BM_HELPER(fg) { fg(0), fg(1), fg(2), fg(3) }

#define POLY_HELPER_SUB(bm, cv, tm) \
G_Command_DrawPolygon<3 + ((cv & 0x8) >> 3), ((cv & 0x10) >> 4), ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm>

#define POLY_HELPER_FG(bm, cv) \
{ \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0)), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0)), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
#define POLY_HELPER_SUB(bm, cv, tm, mam) \
G_Command_DrawPolygon<3 + ((cv & 0x8) >> 3), ((cv & 0x10) >> 4), ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm, mam >

// force-enable MaskEvalAND for all conditions. When MaskEval_TA=1, behavior will always be judged according
// to the gpu's MaskEvalAND setting at runtime.
static bool const MaskEvalAND_TA_Enabled = GPU_SW_COMPILE_FAST ? 1 : 1;
static bool const MaskEvalAND_TA_Disabled = GPU_SW_COMPILE_FAST ? 1 : 0;

#define POLY_HELPER_FG(bm, cv) \
{ \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), MaskEvalAND_TA_Disabled), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), MaskEvalAND_TA_Disabled), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Disabled), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Disabled), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), MaskEvalAND_TA_Enabled ), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), MaskEvalAND_TA_Enabled ), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Enabled ), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Enabled ), \
}

#define POLY_HELPER(cv) \
Expand All @@ -270,17 +294,20 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \
}

#define SPR_HELPER_SUB(bm, cv, tm) Command_DrawSprite<(cv >> 3) & 0x3, ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm>

#define SPR_HELPER_FG(bm, cv) \
{ \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0)), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0)), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
#define SPR_HELPER_SUB(bm, cv, tm, mam) Command_DrawSprite<(cv >> 3) & 0x3, ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm, mam>

#define SPR_HELPER_FG(bm, cv) \
{ \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), MaskEvalAND_TA_Disabled), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), MaskEvalAND_TA_Disabled), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Disabled), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Disabled), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), MaskEvalAND_TA_Enabled ), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), MaskEvalAND_TA_Enabled ), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Enabled ), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), MaskEvalAND_TA_Enabled ), \
}


#define SPR_HELPER(cv) \
{ \
{ SPR_HELPER_FG(0, cv), SPR_HELPER_FG(1, cv), SPR_HELPER_FG(2, cv), SPR_HELPER_FG(3, cv) }, \
Expand All @@ -289,14 +316,18 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \
}

#define LINE_HELPER_SUB(bm, cv) Command_DrawLine<((cv & 0x08) >> 3), ((cv & 0x10) >> 4), ((cv & 0x2) >> 1) ? bm : -1>
#define LINE_HELPER_SUB(bm, cv, mam) Command_DrawLine<((cv & 0x08) >> 3), ((cv & 0x10) >> 4), ((cv & 0x2) >> 1) ? bm : -1, mam>

#define LINE_HELPER_FG(bm, cv) \
{ \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Disabled), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Disabled), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Disabled), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Disabled), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Enabled ), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Enabled ), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Enabled ), \
LINE_HELPER_SUB(bm, cv, MaskEvalAND_TA_Enabled ) \
}

#define LINE_HELPER(cv) \
Expand All @@ -307,13 +338,15 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \
}

#define OTHER_HELPER_FG(bm, arg_ptr) { arg_ptr, arg_ptr, arg_ptr, arg_ptr }
#define OTHER_HELPER_FG(bm, arg_ptr) { arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr }
#define NULLCMD_FG(bm) { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }

#define OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr) { { OTHER_HELPER_FG(0, arg_ptr), OTHER_HELPER_FG(1, arg_ptr), OTHER_HELPER_FG(2, arg_ptr), OTHER_HELPER_FG(3, arg_ptr) }, arg_cs, arg_fbcs, arg_ss }
#define OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X4(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X4(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X4(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X32(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr)

#define NULLCMD_FG(bm) { NULL, NULL, NULL, NULL }
#define NULLCMD() { { NULLCMD_FG(0), NULLCMD_FG(1), NULLCMD_FG(2), NULLCMD_FG(3) }, 1, 1, true }

12 changes: 6 additions & 6 deletions mednafen/psx/gpu_line.cpp
Expand Up @@ -101,8 +101,8 @@ static INLINE void AddLineStep(line_fxp_coord *point, const line_fxp_step *step)
}
}

template<bool gouraud, int BlendMode>
static void DrawLine(PS_GPU *gpu, line_point *points, bool MaskEval_TA)
template<bool gouraud, int BlendMode, bool MaskEval_TA>
static void DrawLine(PS_GPU *gpu, line_point *points)
{
line_fxp_coord cur_point;
line_fxp_step step;
Expand Down Expand Up @@ -155,15 +155,15 @@ static void DrawLine(PS_GPU *gpu, line_point *points, bool MaskEval_TA)

// FIXME: There has to be a faster way than checking for being inside the drawing area for each pixel.
if(x >= gpu->ClipX0 && x <= gpu->ClipX1 && y >= gpu->ClipY0 && y <= gpu->ClipY1)
PlotNativePixel<BlendMode, false>(gpu, x, y, pix, MaskEval_TA);
PlotNativePixel<BlendMode, MaskEval_TA, false>(gpu, x, y, pix);
}

AddLineStep<gouraud>(&cur_point, &step);
}
}

template<bool polyline, bool gouraud, int BlendMode>
static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA)
template<bool polyline, bool gouraud, int BlendMode, bool MaskEval_TA>
static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb)
{
line_point points[2];
const uint8_t cc = cb[0] >> 24; // For pline handling later.
Expand Down Expand Up @@ -240,5 +240,5 @@ static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA)
#endif

if (rsx_intf_has_software_renderer())
DrawLine<gouraud, BlendMode>(gpu, points, MaskEval_TA);
DrawLine<gouraud, BlendMode, MaskEval_TA>(gpu, points);
}