even more fixes to hash lookup

2026-02-24 10:28:19 -07:00 · 2025-02-26 23:55:49 +00:00
parent 8cf57fdc14
commit 71bdf8d081
5 changed files with 30 additions and 17 deletions
--- a/src/include/86box/nv/vid_nv3.h
+++ b/src/include/86box/nv/vid_nv3.h
@@ -14,7 +14,7 @@
 *          Also check the doc folder for some more notres
 * 
 *          vid_nv3.h:      NV3 Architecture Hardware Reference (open-source)
- *          Last updated:   13 February 2025 (STILL WORKING ON IT!!!)
+ *          Last updated:   26 February 2025 (STILL WORKING ON IT!!!)
 *  
 * Authors: Connor Hyde <mario64crashed@gmail.com>
 *
@@ -41,6 +41,8 @@ extern const device_config_t nv3_config[];
 #define NV3_DMA_CHANNELS                                8
 #define NV3_DMA_SUBCHANNELS_PER_CHANNEL                 8

+#define NV3_DMA_CHANNELS_TOTAL                          0x7F            // This is also used somewhere despite there only being 8*8 = 64 channels
+
 #define NV3_86BOX_TIMER_SYSTEM_FIX_QUOTIENT             1               // The amount by which we have to ration out the memory clock because it's not fast enough...
                                                                        // Multiply by this value to get the real clock speed.
 #define NV3_LAST_VALID_GRAPHICS_OBJECT_ID               0x1F
--- a/src/video/nv/nv3/subsystems/nv3_pbus.c
+++ b/src/video/nv/nv3/subsystems/nv3_pbus.c
@@ -127,8 +127,8 @@ void nv3_pbus_write(uint32_t address, uint32_t value)
 uint8_t nv3_pbus_rma_read(uint16_t addr)
 {
    addr &= 0xFF;
-    uint32_t real_final_address;
-    uint8_t ret;
+    uint32_t real_final_address = 0x0;
+    uint8_t ret = 0x0;

    switch (addr)
    {
--- a/src/video/nv/nv3/subsystems/nv3_pfifo.c
+++ b/src/video/nv/nv3/subsystems/nv3_pfifo.c
@@ -290,6 +290,11 @@ uint32_t nv3_pfifo_read(uint32_t address)
    return ret; 
 }

+void nv3_pfifo_trigger_dma_if_required()
+{
+
+}
+
 void nv3_pfifo_write(uint32_t address, uint32_t value) 
 {
    // before doing anything, check the subsystem enablement
@@ -481,6 +486,9 @@ void nv3_pfifo_write(uint32_t address, uint32_t value)

        nv_log("PFIFO Cache1 CTX Write Entry=%d value=0x%04x", ctx_entry_id, value);
    }
+
+    /* Trigger DMA for notifications if we need to */
+    nv3_pfifo_trigger_dma_if_required();
 }

 /* 
@@ -690,7 +698,7 @@ void nv3_pfifo_cache1_pull()
    // NV_ROOT
    if (!current_method)
    {
-        if (!nv3_ramin_find_object(current_name, 0, current_channel, current_subchannel))
+        if (!nv3_ramin_find_object(current_name, 1, current_channel, current_subchannel))
            return; // interrupt was fired, and we went to ramro
    }

--- a/src/video/nv/nv3/subsystems/nv3_pramin.c
+++ b/src/video/nv/nv3/subsystems/nv3_pramin.c
@@ -114,7 +114,7 @@ uint32_t nv3_ramin_read32(uint32_t addr, void* priv)
    {
        val = vram_32bit[addr];

-        nv_log("Read dword from PRAMIN addr=0x%08x (raw address=0x%08x)\n", addr, raw_addr);
+        nv_log("Read dword from PRAMIN 0x%08x <- 0x%08x (raw address=0x%08x)\n", val, addr, raw_addr);
    }

    return val;
@@ -134,7 +134,7 @@ void nv3_ramin_write8(uint32_t addr, uint8_t val, void* priv)
    // reversal unit size in this case is 16 bytes, vram size is 2-8mb (but 8mb is zx/nv3t only and 2mb...i haven't found a 22mb card)
    addr ^= (nv3->nvbase.svga.vram_max - 0x10);

-    uint32_t val32 = 0x00;
+    uint32_t val32 = (uint32_t)val;

    if (!nv3_ramin_arbitrate_write(addr, val32))
    {
@@ -160,7 +160,7 @@ void nv3_ramin_write16(uint32_t addr, uint16_t val, void* priv)
    addr ^= (nv3->nvbase.svga.vram_max - 0x10);
    addr >>= 1; // what

-    uint32_t val32 = 0x00;
+    uint32_t val32 = (uint32_t)val;

    if (!nv3_ramin_arbitrate_write(addr, val32))
    {
@@ -186,9 +186,7 @@ void nv3_ramin_write32(uint32_t addr, uint32_t val, void* priv)
    addr ^= (nv3->nvbase.svga.vram_max - 0x10);
    addr >>= 2; // what

-    uint32_t val32 = 0x00;
-
-    if (!nv3_ramin_arbitrate_write(addr, val32))
+    if (!nv3_ramin_arbitrate_write(addr, val))
    {
        vram_32bit[addr] = val;
        nv_log("Write dword to PRAMIN addr=0x%08x val=0x%08x (raw address=0x%08x)\n", addr, val, raw_addr);
@@ -354,8 +352,9 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u
    // Not a switch statement in case newer gpus have larger ramins

    uint32_t bucket_entries = 2;
+    uint8_t ramht_size = (nv3->pfifo.ramht_config >> NV3_PFIFO_CONFIG_RAMHT_SIZE) & 0x03;

-    switch (nv3->pfifo.ramht_config)
+    switch (ramht_size)
    {
        case NV3_PFIFO_CONFIG_RAMHT_SIZE_4K:
            // stays as is
@@ -374,7 +373,10 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u
    
    // Calculate the address in the hashtable
    uint32_t ramht_base = ((nv3->pfifo.ramht_config >> NV3_PFIFO_CONFIG_RAMHT_BASE_ADDRESS) & 0x0F) << NV3_PFIFO_CONFIG_RAMHT_BASE_ADDRESS;
-    uint32_t ramht_cur_address = ramht_base + (nv3_ramht_hash(name, channel)) * bucket_entries * 8;
+
+    // This is certainly wrong. But the objects seem to be written to 4600? So I just multiply it by 80 to multiply the final address by 10.
+    // Why does this work?
+    uint32_t ramht_cur_address = ramht_base + (nv3_ramht_hash(name, channel) * bucket_entries * 8);

    nv_log("Beginning search for graphics object at RAMHT base=0x%04x, name=0x%08x, Cache%d, channel=%d.%d)\n",
        ramht_cur_address, name, cache_num, channel, subchannel);
@@ -382,8 +384,8 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u
    bool found_object = false;
    
    // set up some variables
-    uint32_t found_obj_name;
-    nv3_ramin_context_t obj_context_struct;
+    uint32_t found_obj_name = 0x00;
+    nv3_ramin_context_t obj_context_struct = {0};

    for (uint32_t bucket_entry = 0; bucket_entry < bucket_entries; bucket_entry++)
    {
--- a/src/video/nv/nv3/subsystems/nv3_pramin_ramht.c
+++ b/src/video/nv/nv3/subsystems/nv3_pramin_ramht.c
@@ -32,13 +32,14 @@
 It is used to get the offset within RAMHT of a graphics object.
 */

+
 uint32_t nv3_ramht_hash(uint32_t name, uint32_t channel)
 {
-    // convert the name to an array of bytes
-    uint8_t* hash_bytes = (uint8_t*)&name;
+    // the official nvidia hash algorithm, tweaked for readability
+    uint32_t hash = ((name ^ (name >> 8) ^ (name >> 16) ^ (name >> 24)) & 0xFF) ^ (channel & NV3_DMA_CHANNELS_TOTAL); 
+

    // is this the right endianness?
-    uint32_t hash = (hash_bytes[0] ^ hash_bytes[1] ^ hash_bytes[2] ^ hash_bytes[3] ^ (uint8_t)channel);
    nv_log("Generated RAMHT hash 0x%04x (RAMHT slot=0x%04x (from name 0x%08x for DMA channel 0x%04x)\n)\n", hash, (hash/8), name, channel);
    return hash;
 }