@ vim: set ft=armv4 noet:

#define DISPCNT	0x04000000
#define VRAMBG0	0x06000000
#define LCD_W	240
#define LCD_H	160
#define RGB888(r,g,b) ( ((((r)>>3)&0x1f)<<0) | ((((g)>>3)&0x1f)<<5) | ((((b)>>3)&0x1f)<<10) )

.arch armv4
.cpu  arm7tdmi

.section .text.crt0,"x"

.arm
.globl _start
_start:
	b .Lrom_hdr_mid @ turns out this one *NEEDS* to be a branch >__>

.globl rom_hdr
rom_hdr:
	@!logo
	.fill 156,1,0

.globl logo_end
logo_end:

	@ title, game code
	@.fill 16,1,0
.Lrom_hdr_mid:
	@ load huge constants here -- avoids putting stuff in the constant pool
	mov r6, #DISPCNT
	mov r7, #VRAMBG0
	@ change to thumb, costs 8 bytes
	add r0, pc, #1
	bx r0
.thumb

	@ maker code
	@.byte 0,0
	b .Lrom_hdr_mid2

	@!magic @ can be used to store address for Channel 3 Wave Pattern RAM
.globl rom_magic
rom_magic:
	.byte 0x96
	@ main unit type
	.byte 0

	@.byte 0,0x04  @ aaaa

	@ device type
	@.byte 0
	@ reserved
	@.fill 7,1,0
.Lrom_hdr_mid2:
	ldr r1, videomode
	str r1, [r6]
	ldr r1, =( (RGB888(214,  2, 112) << 0) | (RGB888(  0, 56, 168) << 16) )
	b rom_hdr_end

	@ sw version, checksum
	.byte 0, 0x00

	@ reserved
	.byte 0,0

@.arm
@.globl mb$_start
@mb$_start:
@	b .Lrom_hdr_mid
@
@	@ multiboot stuff: boot method, slave number
@	.byte 0 @ set by bios, 0->cart 3->mb
@	.byte 0 @ set by bios, 0-3
@
@	.align 4

@.thumb
@ const stuff needs to be word-aligned, even though it's only a halfword...
videomode:
	.short 0x0403  @ mode 3: only bg2, 32k direct color

.globl rom_hdr_end
.thumb_func
rom_hdr_end:

	@ memset16 0x5a00 pixels at VRAMBG0 to RED
	mov r0, #0x5a
	lsl r0, #8
	mov r4, r1  @ copy for blue + xor
	lsr r4, #16
	mov r2, r0

.LloopR:
	strh r1, [r7]
	add  r7, #2
	sub  r0, #1
	bne .LloopR

	@ memset16 0x5a00 pixels at VRAMBG0+0x5a00 to BLUE
	mov r0, r2
.LloopB:
	strh r4, [r7]
	add r7, #2
	sub r0, #1
	bne .LloopB

	@ go back 0x7800 pixels
	mov r0, #0x78
	lsl r0, #(8+1)
	sub r7, r0

	mov r5, #1
	mov r6, r7
	mov r0, #32
.thumb_func
loopy:
	mov r2, #240

.thumb_func
	loopx:
		mov r3, r2
		eor r3, r0
		tst r3, r5
		beq nextx

		strh r4, [r7]
	nextx:
		add r7, #2
		sub r2, #1
		bne loopx

	sub r0, #1
	bne loopy

	eor r1, r4
.thumb_func
hang:
	mov r2, #15
	lsl r2, #9
	mov r7, r6

.thumb_func
	looper:
		ldrh r3, [r7]
		eor  r3, r1
		strh r3, [r7]

		add r7, #2
		sub r2, #1
		bne looper

	b hang

.pool

