From 77468702bc5bc54dd21c4bcd976fd3d4e0927cc1 Mon Sep 17 00:00:00 2001 From: Forest Belton <65484+forestbelton@users.noreply.github.com> Date: Sun, 11 Jul 2021 17:50:06 -0400 Subject: [PATCH] Enqueue map row updates instead of writing immediately --- inc/util.inc | 1 + src/main.s | 20 +++++ src/map.s | 211 +++++++++++++++++++++------------------------------ 3 files changed, 109 insertions(+), 123 deletions(-) diff --git a/inc/util.inc b/inc/util.inc index 63b07a5..4eebde5 100644 --- a/inc/util.inc +++ b/inc/util.inc @@ -33,6 +33,7 @@ ENDM ; Adds A to a 16-bit register ; \1 Destination register +; 5 * 4 = 20 cycles MACRO ADD16 add LOW(\1) ld LOW(\1), a diff --git a/src/main.s b/src/main.s index dd5019b..6930237 100644 --- a/src/main.s +++ b/src/main.s @@ -1,5 +1,6 @@ INCLUDE "hardware.inc" INCLUDE "input.inc" +INCLUDE "util.inc" SECTION "Header", ROM0[$100] @@ -56,6 +57,11 @@ start: call Keys_Update + ; Clear pending row state + xor a + ld [PENDING_ROW_PTR], a + ld [PENDING_ROW_PTR + 1], a + ; TODO: remove once scrolling is implemented ; scroll map with arrow keys ld a, [keys] @@ -147,7 +153,21 @@ start: .vbl: halt + ; BC = *PENDING_ROW_PTR + ld a, [PENDING_ROW_PTR] + ld c, a + ld a, [PENDING_ROW_PTR + 1] + ld b, a + + ; Write pending row if enqueued + or c + jr z, .dma_oam + ld hl, PENDING_ROW_DATA + ld d, SCRN_VX_B + MEMCPY bc, hl, d + ; ~160 cycles +.dma_oam: ld a, HIGH(_OAM) call DMA_Start diff --git a/src/map.s b/src/map.s index 859f8f0..b0aabe1 100644 --- a/src/map.s +++ b/src/map.s @@ -9,6 +9,9 @@ PAGEY:: DB LAST_SCX:: DB LAST_SCY:: DB +PENDING_ROW_PTR:: DW +PENDING_ROW_DATA:: DS SCRN_VX_B + CURRENT_DATA_START:: CURRENT_TILE_PTR:: DW CURRENT_TILE_SIZE:: DB @@ -35,24 +38,27 @@ DEF STACK_OFFSET_LEFT EQU 0 ; Increments a 16-bit value located on the stack ; \1 Stack offset +; if carry, 14 * 4 = 56 cycles +; otherwise, 8 * 4 = 32 cycles MACRO INC_STACK16 - ld hl, sp + \1 - inc [hl] - jr nz, .no_carry\@ + ld hl, sp + \1 ; 3 + inc [hl] ; 3 + jr nz, .no_carry\@ ; 3/2 - inc hl - inc [hl] + inc hl ; 2 + inc [hl] ; 3 .no_carry\@: ENDM +; 11 * 4 = 44 cycles MACRO ZERO_ROW_TILE - ld hl, sp + STACK_OFFSET_ROW - ld a, [hl+] - ld e, a - ld d, [hl] - xor a - ld [de], a + ld hl, sp + STACK_OFFSET_ROW ; 3 + ld a, [hl+] ; 2 + ld e, a ; 1 + ld d, [hl] ; 2 + xor a ; 1 + ld [de], a ; 2 ENDM ; Loads a map @@ -114,6 +120,7 @@ Map_Scroll:: ; If SCY = PAGEY, write map row ; map coords = CURRENT_CAMERA_X, CURRENT_CAMERA_Y - 2 ; HL = _SCRN0 + 32 * (32 - page_y/8 - 2) + ; TODO: WTF does this shit even do... ld hl, PAGEY ld a, [rSCY] cp [hl] @@ -164,8 +171,9 @@ Map_Scroll:: jr .loop .write_up: - halt - call write_map_row + ; halt + ; call write_map_row + call enqueue_row_write ld a, [PAGEY] sub 8 @@ -189,155 +197,112 @@ Map_Scroll:: ret -; Write a row of map data into map RAM +; Write a row of map data into row buffer ; @param b Map X coordinate (signed) ; @param c Map Y coordinate (signed) -; @param hl Start of the row to write -write_map_row: - push bc - push de - - ; If Y < 0, write a row of 0s +; @param hl Where to write the row in map VRAM +enqueue_row_write: + ; PENDING_ROW_PTR = HL + ld a, l + ld [PENDING_ROW_PTR], a + ld a, h + ld [PENDING_ROW_PTR + 1], a + + ; If Y < 0, write 0s bit 7, c jr nz, .zero_row - ; If Y >= MAP_HEIGHT, write a row of 0s + ; If Y >= MAP_HEIGHT, write 0s ld a, [CURRENT_MAP_HEIGHT] dec a cp c - jr nc, .begin_writing + jr c, .zero_row -.zero_row: - ld d, SCRN_VX_B - xor a -.zero_row_loop: - ld a, [hl+] - dec d - jr nz, .zero_row_loop -.zero_row_done: - pop de - pop bc - ret - -.begin_writing: - push hl - - ; Allocate 2 bytes for map pointer - ; Allocate 1 byte for number of tiles left - add sp, -3 - - ; left = number of tiles left to write - ld hl, sp + STACK_OFFSET_LEFT - ld [hl], SCRN_VX_B - - ; HL = CURRENT_MAP_PTR + Y * CURRENT_MAP_HEIGHT + ; HL = CURRENT_MAP_PTR ld a, [CURRENT_MAP_PTR] ld l, a ld a, [CURRENT_MAP_PTR + 1] ld h, a -.compute_map_row: + ; HL = CURRENT_MAP_PTR + Y * MAP_WIDTH + ld d, 0 + ld a, [CURRENT_MAP_WIDTH] + ld e, a ld a, c - or a - jr z, .compute_map_row_done - ld a, [CURRENT_MAP_HEIGHT] - ADD16 HL - dec c - jr .compute_map_row - - ; map = HL -.compute_map_row_done: - LD16 de, hl - ld hl, sp + STACK_OFFSET_MAP - ld a, e - ld [hl+], a - ld a, d - ld [hl], a +.get_map_row_ptr: + or a + jr z, .copy_map_row + add hl, de + dec a + jr .get_map_row_ptr - ld hl, sp + STACK_OFFSET_LEFT +.copy_map_row: + ; C = BYTES_LEFT + ld c, SCRN_VX_B + ld de, PENDING_ROW_DATA - ; While X < 0 and left > 0, write 0 to the row + ; Note: Can skip checking BYTES_LEFT > 0 in this loop. If there were + ; SCRN_VX_B zeros to write, then Y would be 1 greater and we would have + ; jumped into .zero_row before reaching this code .pad_left: + ; Check X < 0 bit 7, b - jr z, .copy_center - - ld a, [hl] - or a - jr z, .done - - ; *row = 0 - ZERO_ROW_TILE + jr z, .copy_middle - ; row++ - INC_STACK16 STACK_OFFSET_ROW + ; *ROW++ = 0 + ld [de], a + inc de - ; X++, left-- + ; X++, BYTES_LEFT-- inc b - ld hl, sp + STACK_OFFSET_LEFT - dec [hl] + dec c jr .pad_left - ; While X < MAP_WIDTH and [SP] > 0, copy from map -.copy_center: +.copy_middle: + ; Check X < MAP_WIDTH ld a, [CURRENT_MAP_WIDTH] dec a cp b - jr z, .pad_right + jr c, .pad_right - ld a, [hl] + ; Check BYTES_LEFT > 0 + ld a, c or a - jr z, .done + ret z - ; A = *map_ptr - ld hl, sp + STACK_OFFSET_MAP + ; *ROW++ = *MAP++ ld a, [hl+] - ld e, a - ld d, [hl] - ld a, [de] - - ; map_ptr++ - INC_STACK16 STACK_OFFSET_MAP - - ; *row = A - ld hl, sp + STACK_OFFSET_ROW - ld e, [hl] - inc hl - ld d, [hl] ld [de], a + inc de - ; row++ - INC_STACK16 3 - - ; X++, [SP]-- -.copy_center_inc: + ; X++, BYTES_LEFT-- inc b - ld hl, sp + STACK_OFFSET_LEFT - dec [hl] - jr .copy_center + dec c + jr .copy_middle - ; While [SP] > 0, write 0 to the row .pad_right: - ld a, [hl] + ; Check BYTES_LEFT > 0 + ld a, c or a - jr z, .done - - ; *row = 0 - ZERO_ROW_TILE + ret z - ; row++ - INC_STACK16 STACK_OFFSET_ROW - - ; [SP]-- -.pad_right_inc: - ld hl, sp + STACK_OFFSET_ROW - dec [hl] - jr .pad_right + ; *ROW++ = 0 + xor a + ld [de], a + inc de -.done: - add sp, 3 - pop hl - pop de - pop bc + ; X++, BYTES_LEFT-- + inc b + dec c + jr .pad_left +.zero_row: + ld hl, PENDING_ROW_DATA + xor a + ld c, SCRN_VX_B +.zero_row_loop: + ld a, [hl+] + dec c + jr nz, .zero_row_loop ret