Implement a bizarre register that nvidia did not even internally document. WTF?

This commit is contained in:
starfrost013
2025-04-18 19:05:28 +01:00
parent 102d47690d
commit 52c36d5644
5 changed files with 84 additions and 113 deletions

View File

@@ -18,7 +18,7 @@
#pragma once
/* Core */
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check);
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check, bool use_destination_buffer);
void nv3_render_current_bpp_dfb_8(uint32_t address);
void nv3_render_current_bpp_dfb_16(uint32_t address);
void nv3_render_current_bpp_dfb_32(uint32_t address);

View File

@@ -79,9 +79,9 @@ extern const device_config_t nv3t_config[]; // Confi
#define NV3_VBIOS_DEFAULT NV3_VBIOS_ERAZOR_V15403
// Temporary, will be loaded from settings
#define NV3_VRAM_SIZE_2MB 0x200000 // 2MB
#define NV3_VRAM_SIZE_4MB 0x400000 // 4MB
#define NV3_VRAM_SIZE_8MB 0x800000 // NV3T only
#define NV3_VRAM_SIZE_2MB 0x200000 // 2MB
#define NV3_VRAM_SIZE_4MB 0x400000 // 4MB
#define NV3_VRAM_SIZE_8MB 0x800000 // NV3T only
// There is also 1mb supported by the card but it was never used
// PCI config
@@ -676,10 +676,7 @@ extern const device_config_t nv3t_config[]; // Confi
#define NV3_PRMCIO_START 0x601000
#define NV3_PRMCIO_CRTC_REGISTER_CUR_INDEX_MONO 0x6013B4 // Current CRTC Register Index - Monochrome
#define NV3_PRMCIO_CRTC_REGISTER_CUR_MONO 0x6013B5 // Currently Selected CRTC Register - Monochrome
#define NV3_PRMCIO_CRTC_REGISTER_CUR_INDEX_COLOR 0x6013D4 // Current CRTC Register Index - Colour
#define NV3_PRMCIO_CRTC_REGISTER_CUR_COLOR 0x6013D5
#define NV3_PRMCIO_END 0x601FFF
#define NV3_PDAC_START 0x680000 // OPTIONAL external DAC
@@ -787,6 +784,9 @@ extern const device_config_t nv3t_config[]; // Confi
// CRTC/CIO (0x3b0-0x3df)
#define NV3_CRTC_REGISTER_INDEX_MONO 0x3B4
#define NV3_CRTC_REGISTER_MONO 0x3B5 // Currently Selected CRTC Register - Monochrome
#define NV3_CRTC_DATA_OUT 0x3C0
#define NV3_CRTC_MISCOUT 0x3C2
@@ -796,6 +796,8 @@ extern const device_config_t nv3t_config[]; // Confi
#define NV3_CRTC_REGISTER_INDEX 0x3D4
#define NV3_CRTC_REGISTER_CURRENT 0x3D5
#define NV3_CRTC_REGISTER_WTF 0x3D8
// These are standard (0-18h)
#define NV3_CRTC_REGISTER_HTOTAL 0x00
#define NV3_CRTC_REGISTER_HDISPEND 0x01
@@ -1076,8 +1078,8 @@ typedef struct nv3_pramdac_s
uint32_t hserr_width; // horizontal sync error width
uint8_t user_pixel_mask; // pixel mask for DAC lookup
uint32_t user_read_mode_address; // user read mode address
uint32_t user_write_mode_address; // user write mode address
uint32_t user_read_mode_address; // user read mode address
uint32_t user_write_mode_address; // user write mode address
uint8_t palette[NV3_USER_DAC_PALETTE_SIZE]; // Palette Info/CLUT - 256 entriesxr,g,b = 768 bytes
} nv3_pramdac_t;

View File

@@ -196,7 +196,6 @@ void nv3_mmio_write16(uint32_t addr, uint16_t val, void* priv)
nv_log_verbose_only("Redirected MMIO write16 to SVGA: addr=0x%04x val=0x%02x\n", addr, val);
nv3_svga_write(real_address, val & 0xFF, nv3);
nv3_svga_write(real_address + 1, (val >> 8) & 0xFF, nv3);
@@ -632,6 +631,9 @@ uint8_t nv3_svga_read(uint16_t addr, void* priv)
case NV3_CRTC_REGISTER_INDEX:
ret = nv3->nvbase.svga.crtcreg;
break;
case NV3_CRTC_REGISTER_WTF:
ret = 0x08; // Required to not freeze in certain situations on v3.xx drivers
break;
case NV3_CRTC_REGISTER_CURRENT:
// Support the extended NVIDIA CRTC register range
switch (nv3->nvbase.svga.crtcreg)

View File

@@ -131,10 +131,7 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
memset(&nv3_s2sb_line_buffer, 0x00, (sizeof(uint32_t) * nv3->pgraph.blit.size.h) * (sizeof(uint32_t) * nv3->pgraph.blit.size.w));
/* First calculate our source and destination buffer */
uint32_t src_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03;
bool wtf_nvidia = false;
uint32_t src_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03;
uint32_t dst_buffer = 0; // 5 = just use the source buffer
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER0_ENABLED) & 0x01) dst_buffer = 0;
@@ -142,22 +139,10 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER2_ENABLED) & 0x01) dst_buffer = 2;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER3_ENABLED) & 0x01) dst_buffer = 3;
nv3_position_16_t old_position = {0};
nv3_position_16_t new_position = {0};
bool cross_buffer_blit = (nv3->pgraph.boffset[src_buffer] != nv3->pgraph.boffset[dst_buffer]);
/* If src_buffer != dst_buffer, the positions and src/dst buffer seem to be swapped.
Some kind of hardware errata (?), otherwise, I have no explanation for this behaviour. */
if (nv3->pgraph.boffset[src_buffer] == nv3->pgraph.boffset[dst_buffer])
{
old_position = nv3->pgraph.blit.point_in;
new_position = nv3->pgraph.blit.point_out;
}
else
{
old_position = nv3->pgraph.blit.point_out;
new_position = nv3->pgraph.blit.point_in;
wtf_nvidia = true;
}
nv3_position_16_t old_position = nv3->pgraph.blit.point_in;
nv3_position_16_t new_position = nv3->pgraph.blit.point_out;
/* Coordinates for copying an entire line at a time */
uint32_t buf_position = 0, vram_position = 0, size_x = nv3->pgraph.blit.size.w;
@@ -175,13 +160,12 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
size_x <<= 1;
else if (nv3->nvbase.svga.bpp == 32)
size_x <<= 2;
for (int32_t y = 0; y < nv3->pgraph.blit.size.h; y++)
{
buf_position = (nv3->pgraph.blit.size.w * y);
/* shouldn't matter in non-wtf mode */
vram_position = nv3_render_get_vram_address_for_buffer(old_position, grobj, dst_buffer);
vram_position = nv3_render_get_vram_address_for_buffer(old_position, grobj, src_buffer);
memcpy(&nv3_s2sb_line_buffer[buf_position], &nv3->nvbase.svga.vram[vram_position], size_x);
old_position.y++;
@@ -192,16 +176,7 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
for (int32_t y = 0; y < nv3->pgraph.blit.size.h; y++)
{
buf_position = (nv3->pgraph.blit.size.w * y);
/* Trying to avoid making the above function more complex. It seems, src is used most of th etime...But this is bad... */
if (wtf_nvidia)
{
/* Use the parameters of our dst buffer with the position of our source buffer, seriously, who was thinking of this */
vram_position = nv3_render_get_vram_address_for_buffer(new_position, grobj, src_buffer);
//vram_position = vram_position - nv3->pgraph.boffset[dst_buffer] + nv3->pgraph.boffset[src_buffer];
}
else
vram_position = nv3_render_get_vram_address(new_position, grobj);
vram_position = nv3_render_get_vram_address_for_buffer(new_position, grobj, dst_buffer);
memcpy(&nv3->nvbase.svga.vram[vram_position], &nv3_s2sb_line_buffer[buf_position], size_x);
new_position.y++;
@@ -216,50 +191,43 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
nv3_size_16_t blit_size = {0};
/* Change the smallest area of the screen that moved */
if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x)
blit_size.w = (nv3->pgraph.blit.point_out.x - nv3->pgraph.blit.point_in.x) + nv3->pgraph.blit.size.w;
else if (nv3->pgraph.blit.point_out.x < nv3->pgraph.blit.point_in.x)
blit_size.w = (nv3->pgraph.blit.point_in.x - nv3->pgraph.blit.point_out.x) + nv3->pgraph.blit.size.w;
if (cross_buffer_blit)
{
blit_position = nv3->pgraph.blit.point_out;
blit_size = nv3->pgraph.blit.size;
}
else
blit_size.w = nv3->pgraph.blit.size.w;
{
if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x)
blit_size.w = (nv3->pgraph.blit.point_out.x - nv3->pgraph.blit.point_in.x) + nv3->pgraph.blit.size.w;
else if (nv3->pgraph.blit.point_out.x < nv3->pgraph.blit.point_in.x)
blit_size.w = (nv3->pgraph.blit.point_in.x - nv3->pgraph.blit.point_out.x) + nv3->pgraph.blit.size.w;
else
blit_size.w = nv3->pgraph.blit.size.w;
if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y)
blit_size.h = (nv3->pgraph.blit.point_out.y - nv3->pgraph.blit.point_in.y) + nv3->pgraph.blit.size.h;
else if (nv3->pgraph.blit.point_out.y < nv3->pgraph.blit.point_in.y)
blit_size.h = (nv3->pgraph.blit.point_in.y - nv3->pgraph.blit.point_out.y) + nv3->pgraph.blit.size.h;
else
blit_size.h = nv3->pgraph.blit.size.h;
if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y)
blit_size.h = (nv3->pgraph.blit.point_out.y - nv3->pgraph.blit.point_in.y) + nv3->pgraph.blit.size.h;
else if (nv3->pgraph.blit.point_out.y < nv3->pgraph.blit.point_in.y)
blit_size.h = (nv3->pgraph.blit.point_in.y - nv3->pgraph.blit.point_out.y) + nv3->pgraph.blit.size.h;
else
blit_size.h = nv3->pgraph.blit.size.h;
if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x)
blit_position.x = nv3->pgraph.blit.point_in.x;
else if (nv3->pgraph.blit.point_out.x <= nv3->pgraph.blit.point_in.x) // equals case, just use out
blit_position.x = nv3->pgraph.blit.point_out.x;
if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x)
blit_position.x = nv3->pgraph.blit.point_in.x;
else if (nv3->pgraph.blit.point_out.x <= nv3->pgraph.blit.point_in.x) // equals case, just use out
blit_position.x = nv3->pgraph.blit.point_out.x;
if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y)
blit_position.y = nv3->pgraph.blit.point_in.y;
else if (nv3->pgraph.blit.point_out.y <= nv3->pgraph.blit.point_in.y) // equals case, just use out
blit_position.y = nv3->pgraph.blit.point_out.y;
if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y)
blit_position.y = nv3->pgraph.blit.point_in.y;
else if (nv3->pgraph.blit.point_out.y <= nv3->pgraph.blit.point_in.y) // equals case, just use out
blit_position.y = nv3->pgraph.blit.point_out.y;
/* Figure out the Display Buffer Address from the CRTCs */
uint32_t dba = ((nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_RPC0] & 0x1F) << 16)
+ (nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_HIGH] << 8)
+ nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_LOW];
}
/* If the BUFFER_ADDRESS of the last buffer is not the DBA, we don't *actually* want to draw this, so let's not
Apply stupid hack */
if (wtf_nvidia)
{
if (nv3->pgraph.boffset[src_buffer] != dba)
return;
}
else
{
if (nv3->pgraph.boffset[dst_buffer] != dba)
return;
}
nv3_render_current_bpp(&nv3->nvbase.svga, blit_position, blit_size, grobj, false);
nv3_render_current_bpp(&nv3->nvbase.svga, blit_position, blit_size, grobj, false, true);
}

View File

@@ -31,10 +31,10 @@
#include <86box/utils/video_stdlib.h>
/* Functions only used in this translation unit */
void nv3_render_8bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj);
void nv3_render_15bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj);
void nv3_render_16bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj);
void nv3_render_32bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj);
void nv3_render_8bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer);
void nv3_render_15bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer);
void nv3_render_16bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer);
void nv3_render_32bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer);
/* Expand a colour.
NOTE: THE GPU INTERNALLY OPERATES ON RGB10!!!!!!!!!!!
@@ -235,19 +235,6 @@ uint32_t nv3_render_get_vram_address(nv3_position_16_t position, nv3_grobj_t gro
uint32_t vram_y = position.y;
uint32_t current_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03;
/*
uint32_t destination_buffer = 5; // 5 = just use the source buffer
// src is hardcoded to 1, dst to 0. Hmm...
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER0_ENABLED) & 0x01) destination_buffer = 0;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER1_ENABLED) & 0x01) destination_buffer = 1;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER2_ENABLED) & 0x01) destination_buffer = 2;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER3_ENABLED) & 0x01) destination_buffer = 3;
if (destination_buffer != current_buffer
&& destination_buffer != 5)
current_buffer = destination_buffer;
*/
uint32_t framebuffer_bpp = nv3->nvbase.svga.bpp;
// we have to multiply the x position by the number of bytes per pixel
@@ -504,7 +491,7 @@ void nv3_render_write_pixel(nv3_position_16_t position, uint32_t color, nv3_grob
/* Go write the pixel */
nv3_size_16_t size = {0};
size.w = size.h = 1;
nv3_render_current_bpp(&nv3->nvbase.svga, position, size, grobj, true);
nv3_render_current_bpp(&nv3->nvbase.svga, position, size, grobj, true, false);
}
/* Ensure the correct monitor size */
@@ -605,7 +592,7 @@ void nv3_render_current_bpp_dfb_32(uint32_t address)
/* Blit to the monitor from GPU, current bpp */
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check)
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check, bool use_destination_buffer)
{
/* Ensure that we are in the correct mode. Modified SVGA core code */
nv3_render_ensure_screen_size();
@@ -641,16 +628,16 @@ void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t s
fatal("NV3 - 4bpp not implemented (not even sure if it's SVGA only)");
break;
case 8:
nv3_render_8bpp(pos, size, grobj);
nv3_render_8bpp(pos, size, grobj, use_destination_buffer);
break;
case 15:
nv3_render_15bpp(pos, size, grobj);
nv3_render_15bpp(pos, size, grobj, use_destination_buffer);
break;
case 16:
nv3_render_16bpp(pos, size, grobj);
nv3_render_16bpp(pos, size, grobj, use_destination_buffer);
break;
case 32:
nv3_render_32bpp(pos, size, grobj);
nv3_render_32bpp(pos, size, grobj, use_destination_buffer);
break;
}
@@ -660,7 +647,7 @@ void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t s
Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, indexed 8 bits per pixel format
*/
void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj)
void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer)
{
if (!nv3)
return;
@@ -675,7 +662,10 @@ void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grob
for (uint32_t y = 0; y < size.h; y++)
{
/* re-set the vram address because we are basically "jumping" halfway across a line here */
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
if (use_destination_buffer)
vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now
else
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
for (uint32_t x = 0; x < size.w; x++)
{
@@ -698,7 +688,7 @@ void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grob
Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, 15 bits per pixel format
*/
void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj)
void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer)
{
if (!nv3)
return;
@@ -713,7 +703,10 @@ void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro
for (uint32_t y = 0; y < size.h; y++)
{
/* re-set the vram address because we are basically "jumping" halfway across a line here */
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
if (use_destination_buffer)
vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now
else
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
for (uint32_t x = 0; x < size.w; x++)
{
@@ -736,7 +729,7 @@ void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro
Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, 16 bits per pixel format
*/
void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj)
void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer)
{
if (!nv3)
return;
@@ -750,8 +743,11 @@ void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro
for (uint32_t y = 0; y < size.h; y++)
{
/* re-get the vram address because we are basically "jumping" halfway across a line here */
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
/* re-set the vram address because we are basically "jumping" halfway across a line here */
if (use_destination_buffer)
vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now
else
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
for (uint32_t x = 0; x < size.w; x++)
{
@@ -774,7 +770,7 @@ void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro
Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, 32 bits per pixel format
*/
void nv3_render_32bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj)
void nv3_render_32bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer)
{
if (!nv3)
return;
@@ -788,9 +784,12 @@ void nv3_render_32bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro
for (uint32_t y = 0; y < size.h; y++)
{
/* re-get the vram address because we are basically "jumping" halfway across a line here */
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
/* re-set the vram address because we are basically "jumping" halfway across a line here */
if (use_destination_buffer)
vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now
else
vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask;
for (uint32_t x = 0; x < size.w; x++)
{
p = &nv3->nvbase.svga.monitor->target_buffer->line[pos.y][pos.x];