nesemu/ppu/ppu.c

577 lines
20 KiB
C

//
// Created by william on 12/30/23.
// https://www.reddit.com/r/EmuDev/comments/evu3u2/comment/fgr03ms/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button
//
// 1. Make sure you have NMI implemented on CPU (pretty straightforward)
// 2. Implement PPUSTATUS vblank flag (simple) and PPUCTRL NMI flag + background address flag (simple)
// 3. Implement PPUADDR/PPUDATA so that the nametables are filled out
// 4. Now you have some data your PPU can actually read for rendering background. Render it scanline by scanline - just follow the wiki on this. Maybe the timing will be bad, it doesn't matter for this game. Start off with rendering tiles based on the pattern table ID, don't try and fetch patterns.
// 5. Fix the inevitable bugs with your PPUDATA implementation until you see a blocky version of the Donkey Kong screen.
// 6. Now fetch pattern table data using the tile_id data. If it looks "wrong" make sure you are consuming the background address flag. Start off with black and white, then pick two colors to mix for the two bits. Now you should have something like https://i.imgur.com/7OIpHgd.png
// 7. (Optional) implement palette reads (I'm skipping this for now).
// 8. Implement OAMDMA (and OAMDATA I guess, I implemented one on top of the other)
// 9. Now you should have sprite data to render. Implement the logic for copying from primary OAM to scanline OAM. I'm doing it all as one step (not smearing it over up to 256 cycles like the actual hardware). Skip the confusing sprite overflow junk.
// 10. This is where I'm stuck. I think I need to read the "sprites" section of https://wiki.nesdev.com/w/index.php/PPU_rendering very carefully.
#include <assert.h>
#include <string.h>
#include "ppu.h"
#include "../include/ppu.h"
#include "../cpu/cpu.h"
#include "../include/rom.h"
#include "colors.h"
#include "tile_debugger.h"
#include "log.h"
#define PPU_SCANLINE_VISIBLE_MAX 240
#define PPU_SCANLINE_POST_RENDER_MIN PPU_SCANLINE_VISIBLE_MAX
#define PPU_SCANLINE_POST_RENDER_MAX 242
#define PPU_SCANLINE_PRE_RENDER 261
#define PPU_SCANLINE_MAX PPU_SCANLINE_PRE_RENDER
#define PPU_CYCLE_MAX 340
#define PPU_CYCLE_VISIBLE_MAX 256
#define NAMETABLE_TILE_SIZE 8
PPU ppu_state;
color_list colors = COLOR_LIST;
void ppu_init() {
memset(&ppu_state, 0, sizeof(PPU));
}
PPU *ppu_get_state() {
return &ppu_state;
}
void ppu_status_set(byte mask, bool enabled) {
if (enabled) {
ppu_state.registers[PPU_REGISTER_STATUS] |= mask;
} else {
ppu_state.registers[PPU_REGISTER_STATUS] &= ~mask;
}
}
int hits = 0;
void ppu_trigger_vbl_nmi() {
if (!ppu_read_flag(PPU_REGISTER_CTRL, PPU_CTRL_GEN_VBLANK_NMI)) {
// VBlank NMI generation is disabled
return;
}
hits++;
cpu_trigger_nmi();
}
/*
d888888b d888888b db d88888b
`~~88~~' `88' 88 88'
88 88 88 88ooooo
88 88 88 88~~~~~
88 .88. 88booo. 88.
YP Y888888P Y88888P Y88888P
d8888b. d88888b d8b db d8888b. d88888b d8888b. d888888b d8b db d888b
88 `8D 88' 888o 88 88 `8D 88' 88 `8D `88' 888o 88 88' Y8b
88oobY' 88ooooo 88V8o 88 88 88 88ooooo 88oobY' 88 88V8o 88 88
88`8b 88~~~~~ 88 V8o88 88 88 88~~~~~ 88`8b 88 88 V8o88 88 ooo
88 `88. 88. 88 V888 88 .8D 88. 88 `88. .88. 88 V888 88. ~8~
88 YD Y88888P VP V8P Y8888D' Y88888P 88 YD Y888888P VP V8P Y888P
*/
static inline byte ppu_pixel_get_palette(byte attribute) {
unsigned int tile_x = ppu_state.cycle / NAMETABLE_TILE_SIZE;
unsigned int tile_y = ppu_state.scanline / NAMETABLE_TILE_SIZE;
// Attribute Data:
// 7654 3210
// |||| ||++- Color bits 3-2 for top left quadrant of this byte
// |||| ++--- Color bits 3-2 for top right quadrant of this byte
// ||++------ Color bits 3-2 for bottom left quadrant of this byte
// ++-------- Color bits 3-2 for bottom right quadrant of this byte
byte palette = attribute;
if (tile_y % 4 >= 2) {
palette >>= 4;
}
if (tile_x % 4 >= 2) {
palette >>= 2;
}
return palette & 0b11;
}
static inline void ppu_pixel_set_color(pixel *pixel, byte pt_low, byte pt_high, byte attribute) {
for (int i = 0; i < 8; i++) {
byte pixel_offset = 8 - i - 1;
byte color_low = (pt_low >> pixel_offset) & 1;
byte color_high = (pt_high >> pixel_offset) & 1;
byte color_offset = (color_high << 1) | color_low;
address color_addr = 0x3f00 + color_offset;
if (color_offset != 0) { // The first color of a palette (0) is always the universal color
color_addr += ppu_pixel_get_palette(attribute) * 4;
}
byte color = ppu_read(color_addr);
*(pixel + i) = colors[color];
}
}
void ppu_draw_tile() {
PPUTileFetch fetch = ppu_state.fetch;
unsigned int y = ppu_state.scanline;
unsigned int x = ppu_state.cycle;
unsigned int pixel_index = (y * PPU_CYCLE_VISIBLE_MAX + x) % (240 * 256);
assert(pixel_index < 240 * 256); // If this goes over, the PPU registers will be overridden
pixel *pixel = &ppu_state.pixels[pixel_index];
ppu_pixel_set_color(pixel, fetch.pattern_table_tile_low, fetch.pattern_table_tile_high, fetch.attribute_table);
}
byte ppu_get_pattern(byte tile_index, byte high) {
#if DEBUG
if (ppu_state.debug.flags.tile_debugger) {
if ((ppu_state.debug.flags.tile_debugger_pattern_half == 1 && high) ||
(ppu_state.debug.flags.tile_debugger_pattern_half == 2 && !high)) {
return 0;
}
return tile_debugger_encode_number_as_pattern(tile_index, ppu_state.scanline % 8);
}
#endif
byte tile_row_index = (ppu_state.scanline + ppu_state.y_scroll) % 8;
address pattern_addr = ppu_state.bg_pattern_table_addr | tile_index << 4 | high << 3 | tile_row_index;
return ppu_read(pattern_addr);
}
void ppu_fetch_tile(bool render) {
byte fetch_cycle = (ppu_state.cycle - 1) % 8;
if (fetch_cycle == 1) {
address nametable_addr = (ppu_state.ppu_address & 0xfff) | 0x2000;
ppu_state.fetch.tile_id = ppu_read(nametable_addr);
} else if (fetch_cycle == 3) {
// PPU address:
// yyy NN YYYYY XXXXX
// ||| || ||||| +++++-- coarse X scroll
// ||| || +++++-------- coarse Y scroll
// ||| ++-------------- tile_id select
// +++----------------- fine Y scroll
//
// The attribute table is at the end of the tile_id and contains 64 bytes
// It controls the palette assignation of a 4x4 tiles area
byte tile_col = ppu_state.ppu_address & 0x1f;
byte tile_attr_col = (tile_col >> 2) & 0x7;
byte tile_row = (ppu_state.ppu_address & 0x3e0) >> 5;
byte tile_attr_row = (tile_row >> 2) & 0x7;
// 0x23c0 is the base address of the first attribute table
address attr_addr = 0x23c0 | (ppu_state.ppu_address & 0x0c00) | (tile_attr_row << 3) | tile_attr_col;
ppu_state.fetch.attribute_table = ppu_read(attr_addr);
} else if (fetch_cycle == 5) {
ppu_state.fetch.pattern_table_tile_low = ppu_get_pattern(ppu_state.fetch.tile_id, 0);
} else if (fetch_cycle == 7) {
ppu_state.fetch.pattern_table_tile_high = ppu_get_pattern(ppu_state.fetch.tile_id, 1);
if (render) {
ppu_draw_tile();
}
if ((ppu_state.ppu_address & 0x1f) == 0x1f) {
ppu_state.ppu_address &= ~0x1f;
ppu_state.ppu_address ^= 0x0400;
} else {
ppu_state.ppu_address++;
}
}
}
void ppu_visible_frame(unsigned int cycle) {
if (!ppu_read_flag(PPU_REGISTER_MASK, PPU_MASK_SHOW_BG)) {
// Background rendering is off
return;
}
if (cycle == 0) {
// Idle...
} else if (cycle <= 256) {
ppu_fetch_tile(true);
if (cycle == 256) {
if ((ppu_state.ppu_address & 0x7000) != 0x7000) {
ppu_state.ppu_address += 0x1000;
} else {
ppu_state.ppu_address &= ~0x7000;
if ((ppu_state.ppu_address & 0x3e0) != 0x3a0) {
ppu_state.ppu_address += 0x20;
} else {
ppu_state.ppu_address &= ~0x3e0;
ppu_state.ppu_address ^= 0x0800;
}
}
}
} else if (cycle <= 320) {
// OAMADDR is cleared on sprite loading for pre-render and visible lines
ppu_write_reg(PPU_REGISTER_OAM_ADDR, 0);
if (cycle == 257) {
ppu_state.ppu_address = (ppu_state.ppu_address & 0xfbe0) | (ppu_state.temp_ppu_addr & ~0xfbe0);
ppu_state.x_scroll = 0;
}
} else if (cycle <= 328) {
ppu_fetch_tile(false);
}
}
void ppu_pre_render(unsigned int x) {
if (x == 1) {
// VBlank clear
ppu_status_set(PPU_STATUS_VBLANK, false);
}
if (x >= 257 && x <= 320) {
// OAMADDR is cleared on sprite loading for pre-render and visible lines
ppu_write_reg(PPU_REGISTER_OAM_ADDR, 0);
}
}
void ppu_post_render(unsigned int x, unsigned int y) {
if (x == 1 && y == 241) {
// VBlank start
ppu_status_set(PPU_STATUS_VBLANK, true);
ppu_trigger_vbl_nmi();
}
}
int cycles = 0;
void ppu_cycle() {
if (ppu_state.scanline < PPU_SCANLINE_VISIBLE_MAX) {
ppu_visible_frame(ppu_state.cycle);
} else if (ppu_state.scanline >= PPU_SCANLINE_POST_RENDER_MIN && ppu_state.scanline <= PPU_SCANLINE_POST_RENDER_MAX) {
ppu_post_render(ppu_state.cycle, ppu_state.scanline);
} else if (ppu_state.scanline == PPU_SCANLINE_PRE_RENDER) {
ppu_pre_render(ppu_state.cycle);
ppu_state.ppu_address = ppu_state.temp_ppu_addr;
}
ppu_state.cycle++;
if (ppu_state.cycle >= PPU_CYCLE_MAX) {
ppu_state.cycle = 0;
ppu_state.scanline++;
}
if (ppu_state.scanline > PPU_SCANLINE_MAX) {
ppu_state.scanline = 0;
ppu_state.frame++;
ppu_state.odd_frame = !ppu_state.odd_frame;
}
cycles++;
}
void ppu_write(address addr, byte data) {
assert(addr < PPU_VRAM_SIZE);
address relative_addr;
if (addr < 0x2000) {
// TODO Unsupported ?
} else if (addr < 0x2400) {
relative_addr = addr - 0x2000;
ppu_state.memory.nametable_0[relative_addr] = data;
} else if (addr < 0x2800) {
relative_addr = addr - 0x2400;
byte *nametable;
if (rom_get()->nametable_mirrored) {
nametable = ppu_state.memory.nametable_1;
} else {
nametable = ppu_state.memory.nametable_0;
}
nametable[relative_addr] = data;
} else if (addr < 0x2c00) {
relative_addr = addr - 0x2800;
byte *nametable;
if (rom_get()->nametable_mirrored) {
nametable = ppu_state.memory.nametable_0;
} else {
nametable = ppu_state.memory.nametable_1;
}
nametable[relative_addr] = data;
} else if (addr < 0x3000) {
relative_addr = addr - 0x2c00;
ppu_state.memory.nametable_1[relative_addr] = data;
} else if (addr >= 0x3f00) {
relative_addr = (addr - 0x3f00) % PALETTE_TABLE_BYTES_SIZE;
ppu_state.memory.palette[relative_addr] = data;
}
}
byte ppu_read(address addr) {
assert(addr < PPU_VRAM_SIZE);
address relative_addr;
if (addr < 0x2000) {
return *system_get_mapper()->ppu_read(addr);
} else if (addr < 0x2400) {
relative_addr = addr - 0x2000;
return ppu_state.memory.nametable_0[relative_addr];
} else if (addr < 0x2800) {
relative_addr = addr - 0x2400;
byte *nametable;
if (rom_get()->nametable_mirrored) {
nametable = ppu_state.memory.nametable_1;
} else {
nametable = ppu_state.memory.nametable_0;
}
return nametable[relative_addr];
} else if (addr < 0x2c00) {
relative_addr = addr - 0x2800;
byte *nametable;
if (rom_get()->nametable_mirrored) {
nametable = ppu_state.memory.nametable_0;
} else {
nametable = ppu_state.memory.nametable_1;
}
return nametable[relative_addr];
} else if (addr < 0x3000) {
relative_addr = addr - 0x2c00;
return ppu_state.memory.nametable_1[relative_addr];
} else if (addr >= 0x3f00) {
relative_addr = (addr - 0x3f00) % PALETTE_TABLE_BYTES_SIZE;
return ppu_state.memory.palette[relative_addr];
}
// assert(false);
return 0;
}
bool ppu_read_flag(size_t reg, byte mask) {
return ppu_state.registers[reg] & mask;
}
/*
* d8888b. d88888b d888b d888888b .d8888. d888888b d88888b d8888b. .d8888.
* 88 `8D 88' 88' Y8b `88' 88' YP `~~88~~' 88' 88 `8D 88' YP
* 88oobY' 88ooooo 88 88 `8bo. 88 88ooooo 88oobY' `8bo.
* 88`8b 88~~~~~ 88 ooo 88 `Y8b. 88 88~~~~~ 88`8b `Y8b.
* 88 `88. 88. 88. ~8~ .88. db 8D 88 88. 88 `88. db 8D
* 88 YD Y88888P Y888P Y888888P `8888Y' YP Y88888P 88 YD `8888Y'
*/
static inline bool ppu_reg_bit_changed(byte reg, byte data, byte bit, byte *new_val) {
byte old_val = (ppu_state.registers[reg] >> bit) & 1;
*new_val = (data >> bit) & 1;
return old_val != *new_val;
}
void ppu_write_ctrl(byte data) {
// Logging
log_debug("PPU Ctrl - %#02x", data);
byte new_ctrl;
for (int i = 0; i < 8; i++) {
if (!ppu_reg_bit_changed(PPU_REGISTER_CTRL, data, i, &new_ctrl)) {
continue;
}
switch (i) {
case 0:
case 1:
log_debug("PPU Ctrl - Base tile_id address = %#04x", 0x2000 + (0x400 * data & 3));
break;
case 2:
log_debug("PPU Ctrl - VRAM address increment = %d", new_ctrl ? 32 : 1);
break;
case 3:
log_debug("PPU Ctrl - Sprite pattern table address = %#04x", new_ctrl ? 0x1000 : 0);
break;
case 4:
log_debug("PPU Ctrl - Background pattern table address = %#04x", new_ctrl ? 0x1000 : 0);
break;
case 5:
log_debug("PPU Ctrl - Sprite size = %s", new_ctrl ? "8x16" : "8x8");
break;
case 6:
log_debug("PPU Ctrl - PPU master/slave select = %s", new_ctrl ? "output color" : "backdrop");
break;
case 7:
log_debug("PPU Ctrl - Generate NMI at VBlanks = %s", new_ctrl ? "yes" : "no");
break;
default:
assert(false);
}
}
ppu_state.temp_ppu_addr = (ppu_state.temp_ppu_addr & 0xf3ff) | ((data & PPU_CTRL_BASE_NAMETABLE_ADDR) << 10);
ppu_state.bg_pattern_table_addr = (data & PPU_CTRL_BG_PATTERN_TABLE_ADDR) << 0x8; // 0x0000 or 0x1000
ppu_state.ppu_addr_increment = (data & PPU_CTRL_VRAM_ADDR_INCREMENT) ? 0x20 : 1;
if (ppu_read_flag(PPU_REGISTER_STATUS, PPU_STATUS_VBLANK) &&
!ppu_read_flag(PPU_REGISTER_CTRL, PPU_CTRL_GEN_VBLANK_NMI) &&
data & PPU_CTRL_GEN_VBLANK_NMI) {
// The VBlank flag is still set, and the GEN_VBLANK_NMI was set from 0 to 1
cpu_trigger_nmi();
}
}
void ppu_write_mask(byte data) {
// Logging
byte new_mask;
for (int i = 0; i < 8; i++) {
if (!ppu_reg_bit_changed(PPU_REGISTER_MASK, data, i, &new_mask)) {
continue;
}
switch (i) {
case 0:
log_debug("PPU Mask - Greyscale = %d", new_mask ? "normal" : "greyscale");
break;
case 1:
log_debug("PPU Mask - Render background in first vertical tile = %s", new_mask ? "yes" : "no");
break;
case 2:
log_debug("PPU Mask - Render sprites in first vertical tile = %s", new_mask ? "yes" : "no");
break;
case 3:
log_debug("PPU Mask - Render background = %s", new_mask ? "yes" : "no");
break;
case 4:
log_debug("PPU Mask - Render sprites = %s", new_mask ? "yes" : "no");
break;
case 5:
log_debug("PPU Mask - Emphasize red = %s", new_mask ? "yes" : "no");
break;
case 6:
log_debug("PPU Mask - Emphasize green = %s", new_mask ? "yes" : "no");
break;
case 7:
log_debug("PPU Mask - Emphasize blue = %s", new_mask ? "yes" : "no");
break;
default:
assert(false);
}
}
}
void ppu_write_scroll(byte data) {
ppu_state.w = !ppu_state.w;
// TODO: Understand and fix with a game using scrolling
if (ppu_state.w) {
ppu_state.temp_ppu_addr = (ppu_state.temp_ppu_addr & 0xffe0) | (data >> 3);
ppu_state.fine_x_scroll = data & 0x7;
} else {
ppu_state.temp_ppu_addr = ppu_state.temp_ppu_addr & 0xc1f;
ppu_state.temp_ppu_addr |= (data & 0xf8) << 2;
ppu_state.temp_ppu_addr |= (data & 0x7) << 12;
ppu_state.y_scroll = data;
}
}
void ppu_write_addr(byte data) {
ppu_state.w = !ppu_state.w;
if (ppu_state.w) {
ppu_state.temp_ppu_addr = (ppu_state.temp_ppu_addr & 0x00ff) | (data & 0x3f) << 8;
} else {
ppu_state.temp_ppu_addr = (ppu_state.temp_ppu_addr & 0xff00) | data;
ppu_state.ppu_address = ppu_state.temp_ppu_addr;
}
}
void ppu_write_data(byte data) {
address addr = ppu_state.ppu_address;
ppu_write(addr, data);
ppu_state.ppu_address = addr + ppu_state.ppu_addr_increment;
}
void ppu_write_oamdata(byte data) {
byte oam_addr = ppu_state.registers[PPU_REGISTER_OAM_ADDR];
ppu_write_reg(PPU_REGISTER_OAM_ADDR, oam_addr + 1);
}
void ppu_write_oamaddr(byte data) {
ppu_state.oam_dma_register = data;
}
void ppu_write_reg(byte reg, byte data) {
assert(reg >= 0);
assert(reg <= PPU_REGISTER_SIZE);
switch (reg) {
case PPU_REGISTER_CTRL:
ppu_write_ctrl(data);
break;
case PPU_REGISTER_MASK:
ppu_write_mask(data);
break;
case PPU_REGISTER_SCROLL:
ppu_write_scroll(data);
break;
case PPU_REGISTER_ADDR:
ppu_write_addr(data);
break;
case PPU_REGISTER_DATA:
ppu_write_data(data);
break;
case PPU_REGISTER_OAM_DATA:
ppu_write_oamdata(data);
break;
default:
break;
}
ppu_state.registers[reg] = data;
}
byte ppu_read_status() {
ppu_state.w = false;
byte status = ppu_state.registers[PPU_REGISTER_STATUS];
ppu_state.registers[PPU_REGISTER_STATUS] &= ~PPU_STATUS_VBLANK;
return status;
}
byte ppu_read_data() {
// Access to VRAM memory is slow, so reading it a first time generally return the memory at the previous address.
// So we get the data first, then update the register.
byte data = ppu_state.registers[PPU_REGISTER_DATA];
ppu_state.registers[PPU_REGISTER_DATA] = ppu_read(ppu_state.ppu_address);
if (ppu_state.ppu_address > 0x3eff) {
// But the palette data is returned immediately
data = ppu_state.registers[PPU_REGISTER_DATA];
}
ppu_state.ppu_address = ppu_state.ppu_address + ppu_state.ppu_addr_increment;
return data;
}
byte ppu_read_reg(byte reg) {
assert(reg >= 0);
assert(reg <= PPU_REGISTER_SIZE);
switch (reg) {
case PPU_REGISTER_STATUS:
return ppu_read_status();
case PPU_REGISTER_DATA:
return ppu_read_data();
default:
return ppu_state.registers[reg];
}
}