Skip to content

Commit

Permalink
Dreamcast: Simplify gldc code a bit, to hopefully alleviate the minor…
Browse files Browse the repository at this point in the history
… performance reduction from disabling LTO
  • Loading branch information
UnknownShadow200 committed Sep 30, 2023
1 parent f839d61 commit 152f30a
Show file tree
Hide file tree
Showing 14 changed files with 177 additions and 574 deletions.
2 changes: 1 addition & 1 deletion src/Graphics_Dreamcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ static void Gfx_RestoreState(void) {
gfx_format = -1;

glAlphaFunc(GL_GREATER, 0.5f);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glBlendFunc(PVR_BLEND_SRCALPHA, PVR_BLEND_INVSRCALPHA);
glDepthFunc(GL_LEQUAL);
}

Expand Down
4 changes: 2 additions & 2 deletions src/Window_Dreamcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void Window_Init(void) {

void Window_Create2D(int width, int height) {
launcherMode = true;
vid_set_mode(DEFAULT_VID_MODE, PM_RGB0888);
vid_set_mode(DEFAULT_VID_MODE, PM_RGB888);
vid_flip(0);
}

Expand All @@ -60,7 +60,7 @@ void Window_Show(void) { }
void Window_SetSize(int width, int height) { }

void Window_Close(void) {
/* TODO implement */
Event_RaiseVoid(&WindowEvents.Closing);
}

/*########################################################################################################################*
Expand Down
2 changes: 1 addition & 1 deletion third_party/gldc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ SOURCE_DIRS := src src/yalloc
C_FILES := $(foreach dir,$(SOURCE_DIRS),$(wildcard $(dir)/*.c))
OBJS := $(notdir $(C_FILES:%.c=%.o))

C_FLAGS = -O3 -DNDEBUG -mfsrra -mfsca -ffp-contract=fast -ffast-math -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions -flto -fno-fat-lto-objects -ml -m4-single-only -ffunction-sections -fdata-sections -std=gnu99
C_FLAGS = -O3 -DNDEBUG -mfsrra -mfsca -ffp-contract=fast -ffast-math -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions -ml -m4-single-only -ffunction-sections -fdata-sections -std=gnu99

C_DEFINES = -DDREAMCAST -DNDEBUG -D__DREAMCAST__ -D__arch_dreamcast -D_arch_dreamcast -D_arch_sub_pristine

Expand Down
57 changes: 3 additions & 54 deletions third_party/gldc/include/gldc.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,6 @@ __BEGIN_DECLS
#define GL_TRIANGLE_STRIP 0x0005
#define GL_QUADS 0x0007

/* FrontFaceDirection */
#define GL_CW 0x0900
#define GL_CCW 0x0901

#define GL_NONE 0
#define GL_FRONT_LEFT 0x0400
#define GL_FRONT_RIGHT 0x0401
#define GL_BACK_LEFT 0x0402
#define GL_BACK_RIGHT 0x0403
#define GL_FRONT 0x0404
#define GL_BACK 0x0405
#define GL_LEFT 0x0406
#define GL_RIGHT 0x0407
#define GL_FRONT_AND_BACK 0x0408
#define GL_CULL_FACE 0x0B44
#define GL_CULL_FACE_MODE 0x0B45
#define GL_FRONT_FACE 0x0B46

/* Scissor box */
#define GL_SCISSOR_TEST 0x0008 /* capability bit */
#define GL_SCISSOR_BOX 0x0C10
Expand All @@ -62,24 +44,9 @@ __BEGIN_DECLS
#define GL_DEPTH_BITS 0x0D56
#define GL_DEPTH_FUNC 0x0B74
#define GL_DEPTH_WRITEMASK 0x0B72
#define GL_DEPTH_COMPONENT 0x1902

/* Blending: Simply Need to Map GL constants to PVR constants */
#define GL_BLEND_DST 0x0BE0
#define GL_BLEND_SRC 0x0BE1
#define GL_BLEND 0x0BE2 /* capability bit */

#define GL_ZERO 0x0
#define GL_ONE 0x1
#define GL_SRC_COLOR 0x0300
#define GL_ONE_MINUS_SRC_COLOR 0x0301
#define GL_SRC_ALPHA 0x0302
#define GL_ONE_MINUS_SRC_ALPHA 0x0303
#define GL_DST_ALPHA 0x0304
#define GL_ONE_MINUS_DST_ALPHA 0x0305
#define GL_DST_COLOR 0x0306
#define GL_ONE_MINUS_DST_COLOR 0x0307
#define GL_SRC_ALPHA_SATURATE 0x0308

/* Blending */
#define GL_BLEND 0x0BE2 /* capability bit */

/* Misc texture constants */
#define GL_TEXTURE_2D 0x0001 /* capability bit */
Expand Down Expand Up @@ -134,21 +101,7 @@ __BEGIN_DECLS

#define GL_RGBA 0x1908

/* Polygons */
#define GL_POINT 0x1B00
#define GL_LINE 0x1B01
#define GL_FILL 0x1B02
#define GL_CW 0x0900
#define GL_CCW 0x0901
#define GL_FRONT 0x0404
#define GL_BACK 0x0405
#define GL_POLYGON_MODE 0x0B40
#define GL_POLYGON_SMOOTH 0x0B41
#define GL_POLYGON_STIPPLE 0x0B42
#define GL_EDGE_FLAG 0x0B43
#define GL_CULL_FACE 0x0B44
#define GL_CULL_FACE_MODE 0x0B45
#define GL_FRONT_FACE 0x0B46

#define GLbyte char
#define GLshort short
Expand Down Expand Up @@ -207,10 +160,6 @@ GLAPI void glDepthFunc(GLenum func);
GLAPI void glDepthRange(GLclampf n, GLclampf f);
GLAPI void glDepthRangef(GLclampf n, GLclampf f);

/* Culling */
GLAPI void glFrontFace(GLenum mode);
GLAPI void glCullFace(GLenum mode);

/* Shading - Flat or Goraud */
GLAPI void glShadeModel(GLenum mode);

Expand Down
28 changes: 0 additions & 28 deletions third_party/gldc/src/aligned_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,31 +25,3 @@ void aligned_vector_init(AlignedVector* vector, uint32_t element_size) {
assert(ptr);
(void) ptr;
}

void aligned_vector_shrink_to_fit(AlignedVector* vector) {
AlignedVectorHeader* const hdr = &vector->hdr;
if(hdr->size == 0) {
uint32_t element_size = hdr->element_size;
free(vector->data);

/* Reallocate the header */
vector->data = NULL;
hdr->size = hdr->capacity = 0;
hdr->element_size = element_size;
} else {
uint32_t new_byte_size = (hdr->size * hdr->element_size);
uint8_t* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);

if(original_data) {
FASTCPY(vector->data, original_data, new_byte_size);
free(original_data);
}
hdr->capacity = hdr->size;
}
}

void aligned_vector_cleanup(AlignedVector* vector) {
aligned_vector_clear(vector);
aligned_vector_shrink_to_fit(vector);
}
3 changes: 0 additions & 3 deletions third_party/gldc/src/aligned_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,6 @@ AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
hdr->size = 0;
}

void aligned_vector_shrink_to_fit(AlignedVector* vector);
void aligned_vector_cleanup(AlignedVector* vector);

#ifdef __cplusplus
}
#endif
67 changes: 19 additions & 48 deletions third_party/gldc/src/draw.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) {
/* Perform perspective divide on each vertex */
Vertex* vertex = _glSubmissionTargetStart(target);

const float h = GetVideoMode()->height;
const float h = vid_mode->height;

ITERATE(target->count) {
const float f = MATH_Fast_Invert(vertex->w);
Expand All @@ -135,23 +135,19 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) {
}

GL_FORCE_INLINE int _calc_pvr_face_culling() {
if(!_glIsCullingEnabled()) {
if(!CULLING_ENABLED) {
return GPU_CULLING_SMALL;
} else {
if(_glGetCullFace() == GL_BACK) {
return (_glGetFrontFace() == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
} else {
return (_glGetFrontFace() == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
}
return GPU_CULLING_CW;
}
}

GL_FORCE_INLINE int _calc_pvr_depth_test() {
if(!_glIsDepthTestEnabled()) {
if(!DEPTH_TEST_ENABLED) {
return GPU_DEPTHCMP_ALWAYS;
}

switch(_glGetDepthFunc()) {
switch(DEPTH_FUNC) {
case GL_NEVER:
return GPU_DEPTHCMP_NEVER;
case GL_LESS:
Expand All @@ -173,40 +169,15 @@ GL_FORCE_INLINE int _calc_pvr_depth_test() {
}
}

GL_FORCE_INLINE int _calcPVRBlendFactor(GLenum factor) {
switch(factor) {
case GL_ZERO:
return GPU_BLEND_ZERO;
case GL_SRC_ALPHA:
return GPU_BLEND_SRCALPHA;
case GL_DST_COLOR:
return GPU_BLEND_DESTCOLOR;
case GL_DST_ALPHA:
return GPU_BLEND_DESTALPHA;
case GL_ONE_MINUS_DST_COLOR:
return GPU_BLEND_INVDESTCOLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return GPU_BLEND_INVSRCALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return GPU_BLEND_INVDESTALPHA;
case GL_ONE:
return GPU_BLEND_ONE;
default:
fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor);
return GPU_BLEND_ONE;
}
}


GL_FORCE_INLINE void _updatePVRBlend(PolyContext* context) {
if(_glIsBlendingEnabled() || _glIsAlphaTestEnabled()) {
if(BLEND_ENABLED || ALPHA_TEST_ENABLED) {
context->gen.alpha = GPU_ALPHA_ENABLE;
} else {
context->gen.alpha = GPU_ALPHA_DISABLE;
}

context->blend.src = _calcPVRBlendFactor(_glGetBlendSourceFactor());
context->blend.dst = _calcPVRBlendFactor(_glGetBlendDestFactor());
context->blend.src = BLEND_SRC_FACTOR;
context->blend.dst = BLEND_DST_FACTOR;
}

GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, PolyList* activePolyList) {
Expand All @@ -223,17 +194,17 @@ GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, PolyList* activePolyL

ctx.gen.culling = _calc_pvr_face_culling();
ctx.depth.comparison = _calc_pvr_depth_test();
ctx.depth.write = _glIsDepthWriteEnabled() ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE;
ctx.depth.write = DEPTH_MASK_ENABLED ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE;

ctx.gen.shading = (_glGetShadeModel() == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT;
ctx.gen.shading = (SHADE_MODEL == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT;

if(_glIsScissorTestEnabled()) {
if(SCISSOR_TEST_ENABLED) {
ctx.gen.clip_mode = GPU_USERCLIP_INSIDE;
} else {
ctx.gen.clip_mode = GPU_USERCLIP_DISABLE;
}

if(_glIsFogEnabled()) {
if(FOG_ENABLED) {
ctx.gen.fog_type = GPU_FOG_TABLE;
} else {
ctx.gen.fog_type = GPU_FOG_DISABLE;
Expand All @@ -243,12 +214,12 @@ GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, PolyList* activePolyL

if(ctx.list_type == GPU_LIST_OP_POLY) {
/* Opaque polys are always one/zero */
ctx.blend.src = GPU_BLEND_ONE;
ctx.blend.dst = GPU_BLEND_ZERO;
ctx.blend.src = PVR_BLEND_ONE;
ctx.blend.dst = PVR_BLEND_ZERO;
} else if(ctx.list_type == GPU_LIST_PT_POLY) {
/* Punch-through polys require fixed blending and depth modes */
ctx.blend.src = GPU_BLEND_SRCALPHA;
ctx.blend.dst = GPU_BLEND_INVSRCALPHA;
ctx.blend.src = PVR_BLEND_SRCALPHA;
ctx.blend.dst = PVR_BLEND_INVSRCALPHA;
ctx.depth.comparison = GPU_DEPTHCMP_LEQUAL;
} else if(ctx.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) {
/* Autosort mode requires this mode for transparent polys */
Expand Down Expand Up @@ -305,7 +276,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count) {

uint32_t vector_size = aligned_vector_size(&target->output->vector);

GLboolean header_required = (vector_size == 0) || _glGPUStateIsDirty();
GLboolean header_required = (vector_size == 0) || STATE_DIRTY;

target->count = count * 6 / 4; // quads -> triangles
target->header_offset = vector_size;
Expand All @@ -320,7 +291,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count) {

if(header_required) {
apply_poly_header(_glSubmissionTargetHeader(target), target->output);
_glGPUStateMarkClean();
STATE_DIRTY = GL_FALSE;
}

generateQuads(target, first, count);
Expand All @@ -334,4 +305,4 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) {
void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
VERTEX_PTR = pointer;
VERTEX_STRIDE = stride;
}
}
37 changes: 12 additions & 25 deletions third_party/gldc/src/flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,6 @@ PolyList TR_LIST;

GLboolean AUTOSORT_ENABLED = GL_FALSE;

PolyList* _glOpaquePolyList() {
return &OP_LIST;
}

PolyList* _glPunchThruPolyList() {
return &PT_LIST;
}

PolyList *_glTransparentPolyList() {
return &TR_LIST;
}


void APIENTRY glKosInitConfig(GLdcConfig* config) {
config->autosort_enabled = GL_FALSE;
Expand All @@ -41,8 +29,6 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
void APIENTRY glKosInitEx(GLdcConfig* config) {
TRACE();

puts("\nWelcome to GLdc!\n");

InitGPU(config->autosort_enabled, config->fsaa_enabled);

AUTOSORT_ENABLED = config->autosort_enabled;
Expand Down Expand Up @@ -74,27 +60,28 @@ void APIENTRY glKosInit() {

void APIENTRY glKosSwapBuffers() {
TRACE();

SceneBegin();
pvr_wait_ready();

pvr_scene_begin();
if(aligned_vector_header(&OP_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_OP_POLY);
pvr_list_begin(GPU_LIST_OP_POLY);
SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector));
SceneListFinish();
pvr_list_finish();
}

if(aligned_vector_header(&PT_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_PT_POLY);
pvr_list_begin(GPU_LIST_PT_POLY);
SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector));
SceneListFinish();
pvr_list_finish();
}

if(aligned_vector_header(&TR_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_TR_POLY);
pvr_list_begin(GPU_LIST_TR_POLY);
SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector));
SceneListFinish();
}
SceneFinish();

pvr_list_finish();
}
pvr_scene_finish();
aligned_vector_clear(&OP_LIST.vector);
aligned_vector_clear(&PT_LIST.vector);
aligned_vector_clear(&TR_LIST.vector);
Expand Down
Loading

0 comments on commit 152f30a

Please sign in to comment.