/
mips_stub.S
3402 lines (2697 loc) · 110 KB
/
mips_stub.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# gameplaySP
#
# Copyright (C) 2006 Exophase <exophase@gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
.align 4
.global mips_update_gba
.global mips_indirect_branch_arm
.global mips_indirect_branch_thumb
.global mips_indirect_branch_dual
.global execute_load_u8
.global execute_load_u16
.global execute_load_u32
.global execute_load_s8
.global execute_load_s16
.global execute_store_u8
.global execute_store_u16
.global execute_store_u32
.global execute_aligned_load32
.global execute_aligned_store32
.global execute_read_cpsr
.global execute_read_spsr
.global execute_swi
.global execute_spsr_restore
.global execute_store_cpsr
.global execute_store_spsr
.global execute_lsl_flags_reg
.global execute_lsr_flags_reg
.global execute_asr_flags_reg
.global execute_ror_flags_reg
.global execute_arm_translate
.global invalidate_icache_region
.global step_debug_mips
.global reg_check
.global memory_map_read
.global memory_map_write
.global reg
.extern reg
.extern spsr
# MIPS register layout:
# $0 - constant zero
# $1 - temporary
# $2 - temporary / return value
# $3 - ARM r0 (not saved)
# $4 - temporary / function argument 0
# $5 - temporary / function argument 1
# $6 - temporary / function argument 2
# $7 - ARM r1 (not saved)
# $8 - ARM r2 (not saved)
# $9 - ARM r3 (not saved)
# $10 - ARM r4 (not saved)
# $11 - ARM r5 (not saved)
# $12 - ARM r6 (not saved)
# $13 - ARM r7 (not saved)
# $14 - ARM r8 (not saved)
# $15 - ARM r9 (not saved)
# $16 - ARM machine state pointer (saved)
# $17 - cycle counter (saved)
# $18 - ARM r10 (saved)
# $19 - block start address (roughly r15) (saved)
# $20 - ARM negative register (saved)
# $21 - ARM zero register (saved)
# $22 - ARM carry register (saved)
# $23 - ARM overflow register (saved)
# $24 - ARM r11 (not saved)
# $25 - ARM r12 (not saved)
# $26 - kernel temporary 0
# $27 - kernel temporary 1
# $28 - ARM r13 (saved)
# $29 - stack pointer
# $30 - ARM r14 (saved)
# $31 - return address
.equ REG_R0, (0 * 4)
.equ REG_R1, (1 * 4)
.equ REG_R2, (2 * 4)
.equ REG_R3, (3 * 4)
.equ REG_R4, (4 * 4)
.equ REG_R5, (5 * 4)
.equ REG_R6, (6 * 4)
.equ REG_R7, (7 * 4)
.equ REG_R8, (8 * 4)
.equ REG_R9, (9 * 4)
.equ REG_R10, (10 * 4)
.equ REG_R11, (11 * 4)
.equ REG_R12, (12 * 4)
.equ REG_R13, (13 * 4)
.equ REG_R14, (14 * 4)
.equ REG_PC, (15 * 4)
.equ REG_LR, (14 * 4)
.equ REG_N_FLAG, (16 * 4)
.equ REG_Z_FLAG, (17 * 4)
.equ REG_C_FLAG, (18 * 4)
.equ REG_V_FLAG, (19 * 4)
.equ REG_CPSR, (20 * 4)
.equ REG_SAVE, (21 * 4)
.equ REG_SAVE2, (22 * 4)
.equ REG_SAVE3, (23 * 4)
.equ CPU_MODE, (29 * 4)
.equ CPU_HALT_STATE, (30 * 4)
.equ CHANGED_PC_STATUS, (31 * 4)
.equ GP_SAVE, (32 * 4)
.equ SUPERVISOR_LR, (reg_mode + (3 * (7 * 4)) + (6 * 4))
.equ SUPERVISOR_SPSR, (spsr + (3 * 4))
.set noat
.set noreorder
# make sure $16 has the register base for these macros
.macro collapse_flag flag_reg, shift
ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR
.endm
.macro collapse_flags
lw $2, REG_CPSR($16) # load CPSR
andi $2, $2, 0xFF # isolate lower 8bits
collapse_flag 20, 31 # store flags
collapse_flag 21, 30
collapse_flag 22, 29
collapse_flag 23, 28
sw $2, REG_CPSR($16) # store CPSR
.endm
.macro extract_flag shift, flag_reg
ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR
.endm
.macro extract_flags_body # extract flags from $1
extract_flag 31, 20 # load flags
extract_flag 30, 21
extract_flag 29, 22
extract_flag 28, 23
.endm
.macro extract_flags
lw $1, REG_CPSR($16) # load CPSR
extract_flags_body
.endm
.macro save_registers
sw $3, REG_R0($16)
sw $7, REG_R1($16)
sw $8, REG_R2($16)
sw $9, REG_R3($16)
sw $10, REG_R4($16)
sw $11, REG_R5($16)
sw $12, REG_R6($16)
sw $13, REG_R7($16)
sw $14, REG_R8($16)
sw $15, REG_R9($16)
sw $24, REG_R11($16)
sw $25, REG_R12($16)
sw $18, REG_R10($16)
sw $28, REG_R13($16)
sw $30, REG_R14($16)
lw $28, GP_SAVE($16)
.endm
.macro restore_registers
lw $3, REG_R0($16)
lw $7, REG_R1($16)
lw $8, REG_R2($16)
lw $9, REG_R3($16)
lw $10, REG_R4($16)
lw $11, REG_R5($16)
lw $12, REG_R6($16)
lw $13, REG_R7($16)
lw $14, REG_R8($16)
lw $15, REG_R9($16)
lw $24, REG_R11($16)
lw $25, REG_R12($16)
lw $18, REG_R10($16)
lw $28, REG_R13($16)
lw $30, REG_R14($16)
.endm
# Process a hardware event. Since an interrupt might be
# raised we have to check if the PC has changed.
# $4: next address
# $16: register base
# $17: cycle counter
.balign 64
mips_update_gba:
sw $4, REG_PC($16) # current PC = $4
addiu $sp, $sp, -4 # make room on the stack
sw $ra,($sp) # save return address
collapse_flags # update cpsr
save_registers # save registers
jal update_gba # process the next event
sw $0, CHANGED_PC_STATUS($16)
lw $ra, ($sp) # restore return address
addiu $sp, $sp, 4 # fix stack
lw $1, CHANGED_PC_STATUS($16)
bne $1, $0, lookup_pc
addu $17, $2, $0 # $17 = new cycle count (delay slot)
restore_registers
jr $ra # if not, go back to caller
nop
# Perform an indirect branch.
# $4: GBA address to branch to
mips_indirect_branch_arm:
save_registers
jal block_lookup_address_arm # $2 = MIPS address to jump to
nop
restore_registers
jr $2 # jump to it
nop
mips_indirect_branch_thumb:
save_registers
jal block_lookup_address_thumb # $2 = MIPS address to jump to
nop
restore_registers
jr $2 # jump to it
nop
mips_indirect_branch_dual:
save_registers
jal block_lookup_address_dual # $2 = MIPS address to jump to
nop
restore_registers
jr $2 # jump to it
nop
# $4: address to write to
# $5: current PC
# Will patch the return address with a call to the correct handler as
# listed in the given table.
# Value will be set to force_open if it's open
.macro patch_handler ftable, force_open
srl $1, $4, 24 # $1 = address region
sltu $2, $1, 0x0F # check if the value is open
bne $2, $0, 1f
sll $1, $1, 2 # make address word indexed (delay)
addiu $1, $0, (\force_open * 4)
1:
lui $2, %hi(\ftable)
addu $2, $2, $1
lw $2, %lo(\ftable)($2) # new function handler is in $2
srl $2, $2, 2 # remove lower two bits
lui $1, %hi(3 << 26) # $1 = 3 (JAL opcode)
ins $1, $2, 0, 26 # insert offset into jal
addiu $ra, $ra, -8 # rewind return address to function call
sw $1, ($ra) # modify to call new handler
cache 0x1a, ($ra) # writeback dcache line
cache 0x8, ($ra) # invalidate icache line
cache 0x1a, ($ra) # do it again for good luck :P
cache 0x8, ($ra)
jr $ra # return
nop # wary of putting cache here
.endm
# Like the above, but will use the table of the proper alignment,
# The tables should be ordered by alignment
.macro patch_handler_align ftable, alignment
srl $1, $4, 24 # $1 = address region
sltu $2, $1, 0x0F # check if the value is open
bne $2, $0, 1f
sll $1, $1, 2 # make address word indexed (delay)
addiu $1, $0, 4 # force address to 0x1 (open)
1:
ins $1, $4, 6, \alignment # place alignment bits into offset
lui $2, %hi(\ftable)
addu $2, $2, $1
lw $2, %lo(\ftable)($2) # new function handler is in $2
srl $2, $2, 2 # remove lower two bits
lui $1, %hi(3 << 26) # $1 = 3 (JAL opcode)
ins $1, $2, 0, 26 # insert offset into jal
addiu $ra, $ra, -8 # rewind return address to function call
sw $1, ($ra) # modify to call new handler
cache 0x1a, ($ra) # writeback dcache line
cache 0x8, ($ra) # invalidate icache line
cache 0x1a, ($ra) # do it again for good luck :P
cache 0x8, ($ra)
jr $ra # return
nop # wary of putting cache here
.endm
.macro region_check region, patch_handler
srl $1, $4, 24 # check upper 8bits of address
xor $1, $1, \region # see if it is the given region
bne $1, $0, \patch_handler # if not repatch/try again
.endm
.macro region_check_open patch_handler
srl $1, $4, 24 # check upper 8bits of address
sltiu $2, $1, 0x0F # true if it is a low address
addiu $1, $1, -1 # non-zero if it is not a low open
sltu $1, $0, $1 # true if lower bits != 1
and $1, $1, $2 # true if low address and not open
bne $1, $0, \patch_handler # if above is true, patch
.endm
.macro region_check_align region, align_bits, alignment, patch_handler
srl $1, $4, 24 # check upper 8bits of address
ins $1, $4, 8, \align_bits # look at lower bits of address too
# See if it is the given region and alignment
xori $1, $1, (\region | (\alignment << 8))
bne $1, $0, \patch_handler # if not repatch/try again
.endm
.macro region_check_open_align align_bits, alignment, patch_handler
srl $1, $4, 24 # check upper 8bits of address
sltiu $2, $1, 0x0F # true if it is a low address
addiu $1, $1, -1 # non-zero if it is not a low open
sltu $1, $0, $1 # true if $1 != 0
and $1, $1, $2 # true if low address and not open
ext $2, $4, 0, \align_bits # $2 = low bits of 4
xori $2, $2, \alignment # true if alignment doesn't match
or $1, $1, $2 # align failure will trigger too
bne $1, $0, \patch_handler # if above is true, patch
.endm
.macro ignore_region region, patch_handler
region_check \region, \patch_handler
nop
jr $ra
nop
.endm
.macro ignore_high patch_handler
srl $1, $4, 24 # check upper 8bits of address
sltiu $1, $1, 0x0F # see if it is not high
bne $1, $0, \patch_handler # if not repatch/try again
nop
jr $ra
nop
.endm
.macro translate_region_core base, size
lui $2, %hi(\base) # generate upper address
andi $4, $4, \size # generate offset
addu $2, $2, $4 # add ptr upper and offset
.endm
.macro translate_region region, patch_handler, base, size
region_check \region, \patch_handler
translate_region_core \base, \size
.endm
# I refuse to have > 80 char lines, and GAS has a problem with the param
# list spilling over (grumble)
.macro translate_region_align region, a_b, alignment, p_h, base, size
region_check_align \region, \a_b, \alignment, \p_h
translate_region_core \base, \size
.endm
.macro translate_region_ewram_core mask
lui $2, %hi(ewram + 0x8000) # generate upper address (delay)
andi $1, $4, \mask # generate 15bit offset
ext $4, $4, 15, 3 # isolate top 3 bits of offset
ins $1, $4, 16, 3 # reinsert into top 4 bits
addu $2, $2, $1
.endm
.macro translate_region_ewram patch_handler
region_check 2, \patch_handler
translate_region_ewram_core 0x7FFF
.endm
.macro translate_region_ewram_load_align align_bits, alignment, patch_handler
region_check_align 2, \align_bits, \alignment, \patch_handler
translate_region_ewram_core 0x7FFF
.endm
.macro translate_region_ewram_load_align16 align_bits, alignment, patch_handler
region_check_align 2, \align_bits, \alignment, \patch_handler
translate_region_ewram_core 0x7FFE
.endm
.macro translate_region_ewram_load_align32 align_bits, alignment, patch_handler
region_check_align 2, \align_bits, \alignment, \patch_handler
translate_region_ewram_core 0x7FFC
.endm
.macro translate_region_ewram_store_align16 patch_handler
region_check 2, \patch_handler
translate_region_ewram_core 0x7FFE
.endm
.macro translate_region_ewram_store_align32 patch_handler
region_check 2, \patch_handler
translate_region_ewram_core 0x7FFC
.endm
.macro translate_region_vram_core
addiu $2, $2, -3 # see if it's 3
ext $4, $4, 0, 17 # generate 17bit offset
bne $2, $0, 1f
lui $1, %hi(vram) # start loading vram address (delay)
addiu $4, $4, -0x8000 # move address into VRAM region
1:
addu $2, $1, $4 # $2 = (hi)vram + address
.endm
.macro translate_region_vram patch_handler
region_check 6, \patch_handler
ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
translate_region_vram_core
.endm
.macro translate_region_vram_load_align align_bits, alignment, patch_handler
region_check_align 6, \align_bits, \alignment, \patch_handler
ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
translate_region_vram_core
.endm
.macro translate_region_vram_load_align16 align_bits, alignment, patch_handler
region_check_align 6, \align_bits, \alignment, \patch_handler
ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
ins $4, $0, 0, 1 # mask out lower bit of address
translate_region_vram_core
.endm
.macro translate_region_vram_load_align32 align_bits, alignment, patch_handler
region_check_align 6, \align_bits, \alignment, \patch_handler
ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
ins $4, $0, 0, 2 # mask out lower two bits of address
translate_region_vram_core
.endm
.macro translate_region_vram_store_align16 patch_handler
region_check 6, \patch_handler
ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
ins $4, $0, 0, 1 # mask out lower bit of address
translate_region_vram_core
.endm
.macro translate_region_vram_store_align32 patch_handler
region_check 6, \patch_handler
ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
ins $4, $0, 0, 2 # mask out lower two bits of address
translate_region_vram_core
.endm
.macro translate_region_gamepak_core mask
srl $2, $4, 15 # $2 = page number of address (delay)
sll $2, $2, 2 # adjust to word index
addu $2, $2, $16 # $2 = memory_map_read[address >> 15]
lw $2, -32768($2)
bne $2, $0, 1f # if it's non-NULL continue
andi $1, $4, \mask # $1 = low 15bits of address (delay slot)
sw $ra, REG_SAVE2($16) # save return address
save_registers # save the registers
ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF
jal load_gamepak_page # get page in $2
sw $1, REG_SAVE($16) # save offset (delay)
lw $1, REG_SAVE($16) # restore offset (delay)
restore_registers # restore the other registers
lw $ra, REG_SAVE2($16) # restore return address
1:
addu $2, $2, $1 # add the memory map offset
.endm
.macro translate_region_gamepak region, patch_handler
region_check \region, \patch_handler
translate_region_gamepak_core 0x7FFF
.endm
.macro translate_region_gamepak_align region, a_b, alignment, patch_handler
region_check_align \region, \a_b, \alignment, \patch_handler
translate_region_gamepak_core 0x7FFF
.endm
.macro translate_region_gamepak_align16 region, a_b, alignment, patch_handler
region_check_align \region, \a_b, \alignment, \patch_handler
translate_region_gamepak_core 0x7FFE
.endm
.macro translate_region_gamepak_align32 region, a_b, alignment, patch_handler
region_check_align \region, \a_b, \alignment, \patch_handler
translate_region_gamepak_core 0x7FFC
.endm
.macro translate_region_gamepak_a region, patch_handler
region_check \region, \patch_handler
srl $2, $4, 15 # $2 = page number of address (delay)
sll $2, $2, 2 # adjust to word index
addu $2, $2, $16 # $2 = memory_map_read[address >> 15]
lw $2, -32768($2)
bne $2, $0, 1f # if it's non-NULL continue
andi $1, $4, 0x7FFF # $1 = low 15bits of address (delay slot)
sw $ra, REG_SAVE2($16) # save return address
sw $6, REG_SAVE3($16) # save a2
save_registers # save the registers
ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF
jal load_gamepak_page # get page in $2
sw $1, REG_SAVE($16) # save offset (delay)
lw $1, REG_SAVE($16) # restore offset (delay)
restore_registers # restore the other registers
lw $ra, REG_SAVE2($16) # restore return address
lw $6, REG_SAVE3($16) # restore a2
1:
addu $2, $2, $1 # add the memory map offset
.endm
.macro eeprom_load_a patch_handler
region_check 0xD, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
sw $6, REG_SAVE2($16) # save a2
save_registers # save the registers
jal read_eeprom # get eeprom value in $2
nop
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
lw $6, REG_SAVE2($16) # restore a2
.endm
.macro eeprom_load_core
sw $ra, REG_SAVE($16) # save the return address (delay)
save_registers # save the registers
jal read_eeprom # get eeprom value in $2
nop
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
nop
.endm
.macro eeprom_load patch_handler
region_check 0xD, \patch_handler
eeprom_load_core
.endm
.macro eeprom_load_align align_bits, alignment, patch_handler
region_check_align 0xD, \align_bits, \alignment, \patch_handler
eeprom_load_core
.endm
.macro eeprom_load_align16 align_bits, alignment, patch_handler
eeprom_load_align \align_bits, \alignment, \patch_handler
.endm
.macro eeprom_load_align32 align_bits, alignment, patch_handler
eeprom_load_align \align_bits, \alignment, \patch_handler
.endm
.macro backup_load_core
save_registers # save the registers
jal read_backup # get backup value in $2
ext $4, $4, 0, 16 # address &= 0xFFFF
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
.endm
.macro backup_load_a patch_handler
region_check 0xE, \patch_handler
sw $ra, REG_SAVE($16) # save return address (delay)
sw $6, REG_SAVE2($16) # save a2
save_registers # save the registers
jal read_backup # get backup value in $2
ext $4, $4, 0, 16 # address &= 0xFFFF
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
lw $6, REG_SAVE2($16) # restore a2
.endm
.macro backup_load patch_handler
region_check 0xE, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
backup_load_core
.endm
.macro backup_load_align align_bits, alignment, patch_handler
region_check_align 0xE, \align_bits, \alignment, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
backup_load_core
.endm
.macro backup_load_align16 align_bits, alignment, patch_handler
region_check_align 0xE, \align_bits, \alignment, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
ins $4, $0, 0, 1 # mask out lower bit
backup_load_core
.endm
.macro backup_load_align32 align_bits, alignment, patch_handler
region_check_align 0xE, \align_bits, \alignment, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
ins $4, $0, 0, 2 # mask out lower two bits
backup_load_core
.endm
.macro open_load8_core
lw $2, REG_CPSR($16) # $2 = CPSR (delay)
andi $2, $2, 0x20 # test T bit
beq $2, $0, 1f # branch if ARM mode
andi $4, $4, 0x03 # isolate lower 3bits from address (delay)
andi $4, $4, 0x01 # in Thumb mode, isolate one more bit
1:
sw $ra, REG_SAVE($16) # save the return address (delay)
save_registers # save the registers
jal read_memory8 # get instruction at PC
addu $4, $5, $4 # a0 = PC + low bits of address
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
.endm
.macro open_load8 patch_handler
region_check_open \patch_handler
open_load8_core
.endm
.macro open_load16_core
lw $2, REG_CPSR($16) # $2 = CPSR (delay)
andi $2, $2, 0x20 # test T bit
beq $2, $0, 1f # branch if ARM mode
andi $4, $4, 0x02 # isolate bit 1 from address (delay)
addu $4, $0, $0 # zero out address bit
1:
sw $ra, REG_SAVE($16) # save the return address (delay)
save_registers # save the registers
jal read_memory16 # get instruction at PC
addu $4, $5, $4 # a0 = PC + low bits of address
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
.endm
.macro open_load16_align align_bits, alignment, patch_handler
region_check_open_align \align_bits, \alignment, \patch_handler
open_load16_core
.endm
.macro open_load16_align16 align_bits, alignment, patch_handler
open_load16_align \align_bits, \alignment, \patch_handler
.endm
.macro open_load32_core
lw $2, REG_CPSR($16) # $2 = CPSR (delay)
andi $2, $2, 0x20 # test T bit
save_registers # save the registers
beq $2, $0, 1f # branch if ARM mode
sw $ra, REG_SAVE($16) # save the return address (delay)
jal read_memory16 # get instruction at PC
addu $4, $5, $0 # a0 = PC
j 2f
ins $2, $2, 16, 16 # result = (result << 16) | result (delay)
1:
jal read_memory32 # get instruction at PC
addu $4, $5, $4 # a0 = PC
2: # join point
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
.endm
.macro open_load32_a patch_handler
region_check_open \patch_handler
lw $2, REG_CPSR($16) # $2 = CPSR (delay)
andi $2, $2, 0x20 # test T bit
save_registers # save the registers
sw $6, REG_SAVE2($16) # save a2
beq $2, $0, 1f # branch if ARM mode
sw $ra, REG_SAVE($16) # save the return address (delay)
jal read_memory16 # get instruction at PC
addu $4, $5, $0 # a0 = PC
j 2f
ins $2, $2, 16, 16 # result = (result << 16) | result (delay)
1:
jal read_memory32 # get instruction at PC
addu $4, $5, $4 # a0 = PC
2:
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
lw $6, REG_SAVE2($16) # restore a2 (delay)
.endm
.macro open_load32_align align_bits, alignment, patch_handler
region_check_open_align \align_bits, \alignment, \patch_handler
open_load32_core
.endm
.macro open_load32_align32 align_bits, alignment, patch_handler
open_load32_align \align_bits, \alignment, \patch_handler
.endm
.macro store_function function, region, patch_handler, mask
region_check \region, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
save_registers # save the registers
jal \function # store value out
andi $4, $4, \mask # mask address
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
nop
.endm
.macro store_function_a function, region, patch_handler, mask
region_check \region, \patch_handler
sw $ra, REG_SAVE($16) # save the return address (delay)
save_registers # save the registers
jal \function # store value out
andi $4, $4, \mask # mask address
restore_registers # restore the other registers
lw $ra, REG_SAVE($16) # restore return address
jr $ra # return
nop
.endm
.macro load_u8 base
jr $ra # return
lbu $2, %lo(\base)($2) # return base[offset]
.endm
.macro load_s8 base
jr $ra # return
lb $2, %lo(\base)($2) # return base[offset]
.endm
.macro load_u16 base
jr $ra # return
lhu $2, %lo(\base)($2) # return base[offset]
.endm
.macro load_s16 base
jr $ra # return
lh $2, %lo(\base)($2) # return base[offset]
.endm
.macro load_u32 base
jr $ra # return
lw $2, %lo(\base)($2) # return base[offset]
.endm
# 16bit unaligned load will always have a 1 in the LSB;
# should have already been taken care of in indexing.
.macro load_u16_unaligned base
lhu $2, %lo(\base)($2) # load base[offset]
jr $ra # return
ror $2, $2, 8 # rotate value by 8bits
.endm
# This is technically the same as load_s8, but kept to
# avoid confusion.
.macro load_s16_unaligned base
jr $ra # return
lb $2, %lo(\base)($2) # return base[offset]
.endm
# Unalignment must be known statically (use the tables to
# patch correctly)
.macro load_u32_unaligned base, alignment
lw $2, %lo(\base)($2) # load base[offset]
jr $ra # return
ror $2, $2, (\alignment * 8) # rotate value by 8bits
.endm
.macro store_u8 base
jr $ra # return
sb $5, %lo(\base)($2) # store value at base[offset]
.endm
.macro store_u16 base
jr $ra # return
sh $5, %lo(\base)($2) # store value at base[offset]
.endm
.macro store_u32 base
jr $ra # return
sw $5, %lo(\base)($2) # store value at base[offset]
.endm
# Store the value double mirrored (u16)
.macro store_u8_double base
ins $5, $5, 8, 8 # value = (value << 8) | value
jr $ra # return
sh $5, %lo(\base)($2) # store value at base[offset]
.endm
# Store the values and check if it overwrote code there
.macro store_u8_smc base
addiu $2, $2, %lo(\base) # offset the address
lb $1, -32768($2) # load the SMC status
bne $1, $0, smc_write # is there code there?
sb $5, ($2) # store value at base[offset] (delay)
jr $ra # return
nop
.endm
.macro store_u16_smc base
addiu $2, $2, %lo(\base) # offset the address
lh $1, -32768($2) # load the SMC status
bne $1, $0, smc_write # is there code there?
sh $5, ($2) # store value at base[offset] (delay)
jr $ra # return
nop
.endm
.macro store_u32_smc base
addiu $2, $2, %lo(\base) # offset the address
lw $1, -32768($2) # load the SMC status
bne $1, $0, smc_write # is there code there?
sw $5, ($2) # store value at base[offset] (delay)
jr $ra # return
nop
.endm
# Unsigned 8bit load handlers
execute_load_bios_u8:
region_check 0, patch_load_u8
srl $2, $4, 14 # check if address is in BIOS region
bne $2, $0, 2f # if not, perform open read
srl $1, $5, 14 # check if PC is in BIOS region
bne $1, $0, 1f # if not, perform BIOS protected read
lui $2, %hi(bios_rom) # generate upper address (delay)
andi $4, $4, 0x3FFF # generate offset
addu $2, $2, $4
load_u8 bios_rom
1:
lui $2, %hi(bios_read_protect) # generate upper address
ins $2, $4, 0, 2 # lower 2 bits address contributes
load_u8 bios_read_protect
2:
open_load8_core
nop
execute_load_ewram_u8:
translate_region_ewram patch_load_u8
load_u8 (ewram + 0x8000)
# Put the generic address over the handler you want to be default
# IWRAM is typically the most frequently read and written to.
execute_load_u8:
execute_load_iwram_u8:
translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF
load_u8 (iwram + 0x8000)
execute_load_io_u8:
translate_region 4, patch_load_u8, io_registers, 0x3FF
load_u8 io_registers
execute_load_palette_u8:
translate_region 5, patch_load_u8, palette_ram, 0x3FF
load_u8 palette_ram
execute_load_vram_u8: