/
vm.c
6166 lines (5810 loc) · 303 KB
/
vm.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright (c) 2021-2024 Gabriel Campbell
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// vm.c
// Gabriel Campbell (github.com/gabecampb)
// Created 2020-03-28
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <ctype.h>
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image.h"
#include "stb_image_write.h"
uint8_t* root_path = "/tmp"; // full path to some directory
// for file I/O functions
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <unistd.h>
#include <errno.h>
#define GL_GLEXT_PROTOTYPES
#include <GL/gl.h>
#include <GL/glext.h>
#include <GLFW/glfw3.h>
typedef struct dirent dirent;
uint8_t show_program_info, show_about, enable_vsync;
#define SHOW_FPS 1 /* show FPS counter in window title */
#define SHOW_INS_OUT_OF_RANGE 0 /* print when a thread is killed due to fetching instruction out of instruction range */
#define SHOW_NEW_THREAD 0 /* print init PC and thread ID for any newly created threads */
#define SHOW_SEGFAULT 0 /* print message at segfault */
#define SHOW_SHADERS 0 /* print GLSL shaders */
#define BUILD_VER 1 /* current build version */
#define SLEEP_AT_SWAP 0 /* force CPU sleep at buffer swap; only for testing */
#define SLEEP_SWAP_MS 16 /* how long to (force) sleep at buffer swap, in ms */
#define THR_0_RESTRICT_INS_RANGE 0 /* force thread 0 to have instruction memory range that spans only the boot-loaded program instead of entire main memory; only for testing */
#define STD_OUTPUT 1 /* whether or not to allow using the standard output register to print to console */
const char* WINDOW_TITLE = "Piculet VM";
uint32_t window_width = 500;
uint32_t window_height = 340;
int32_t cursor_x = 0;
int32_t cursor_y = 0;
uint8_t mouse_buttons = 0;
double scroll_x = 0, scroll_y = 0;
uint8_t kbd_states[9];
#define SIZE_MAIN_MEM (512*1000000) /* 512 MB */
#define SIZE_SYS_MEM (25*1000000) /* 25 MB */
#define HW_INFORMATION (SIZE_MAIN_MEM+18*1000) /* HARDWARE INFORMATION STARTS 18 MB INTO SYSTEM MEMORY */
#define HW_INFO_HIGH (SIZE_MAIN_MEM+20*1000) /* LAST ADDRESS OF HARDWARE INFORMATION */
uint32_t max_texture_size = 1024; // max texture dimensions
uint8_t gl_finish; // whether or not to call glFinish() after all threads have cycled; set back to 0 after all threads have finished a cycle
uint8_t gl_swap; // whether or not to swap the buffers after all threads have cycled; set back to 0 after all threads have finished a cycle
struct timespec start_tm;
#define NS_PER_SEC 1000000000
uint8_t memory[SIZE_MAIN_MEM+SIZE_SYS_MEM];
uint64_t mappings_low = HW_INFORMATION; // the lowest address for current buffer mappings; starts at beginning of HW information and is subtracted as buffers are mapped
typedef struct map_t { uint64_t address, size, privacy_key; } map_t;
map_t* mappings; // mapping regions
uint64_t n_mappings; // number of mapping regions
typedef struct thread_t thread_t;
thread_t* threads;
uint32_t n_threads;
typedef struct object_t object_t;
object_t* objects;
uint64_t n_objects = 0;
#define MAX_NUMBER_BOUND_SETS 4 /* maximum number of descriptor sets */
uint32_t max_number_ubos = 100; // maximum number of uniform buffers accessible by a pipeline
uint32_t max_number_sbos = 100; // maximum number of storage buffers accessible by a pipeline
uint32_t max_number_samplers = 8; // maximum number of samplers accessible by a pipeline
uint32_t max_number_images = 8; // maximum number of images accessible by a pipeline
uint32_t max_number_as = 0; // maximum number of acceleration structures accessible by a pipeline (0; RT is unsupported currently)
typedef struct segtable_t segtable_t;
typedef struct object_t object_t;
#define TYPE_CBO 0x00
#define TYPE_VAO 0x01
#define TYPE_VBO 0x02
#define TYPE_IBO 0x03
#define TYPE_TBO 0x04
#define TYPE_FBO 0x05
#define TYPE_UBO 0x06
#define TYPE_SBO 0x07
#define TYPE_TLAS 0x08
#define TYPE_BLAS 0x09
#define TYPE_DBO 0x0A
#define TYPE_SBT 0x0B
#define TYPE_SAMPLER_DESC 0x0C
#define TYPE_IMAGE_DESC 0x0D
#define TYPE_UNIFORM_DESC 0x0E
#define TYPE_STORAGE_DESC 0x0F
#define TYPE_AS_DESC 0x10
#define TYPE_DSET 0x11
#define TYPE_SET_LAYOUT 0x12
#define TYPE_VSH 0x13
#define TYPE_PSH 0x14
#define TYPE_RGENSH 0x15
#define TYPE_AHITSH 0x16
#define TYPE_CHITSH 0x17
#define TYPE_MISSSH 0x18
#define TYPE_CSH 0x19
#define TYPE_RASTER_PIPE 0x1A
#define TYPE_RT_PIPE 0x1B
#define TYPE_COMPUTE_PIPE 0x1C
#define TYPE_AUD_DATA 0x1D
#define TYPE_AUD_SRC 0x1E
#define TYPE_AUD_LIS 0x1F
#define TYPE_AUD_OCC 0x20
#define TYPE_VID_DATA 0x21
#define TYPE_SCKT 0x22
#define TYPE_SEGTABLE 0x23
#define UNIFORM_DESC_BINDING 0x00
#define STORAGE_DESC_BINDING 0x01
#define SAMPLER_DESC_BINDING 0x02
#define IMAGE_DESC_BINDING 0x03
#define AS_DESC_BINDING 0x04
#define DESC_SET_BINDING 0x05
#define SET_LAYOUT_BINDING 0x06
#define VAO_BINDING 0x07
#define VBO_BINDING 0x08
#define IBO_BINDING 0x09
#define TBO_BINDING 0x0A
#define CBO_BINDING 0x0B
#define UBO_BINDING 0x0C
#define SBO_BINDING 0x0D
#define TLAS_BINDING 0x0E
#define BLAS_BINDING 0x0F
#define DBO_BINDING 0x10
#define SBT_BINDING 0x11
#define SHADER_BINDING 0x12
#define PIPELINE_BINDING 0x13
#define FBO_BINDING 0x14
#define AUD_DATA_BINDING 0x15
#define AUD_SRC_BINDING 0x16
#define AUD_LIS_BINDING 0x17
#define AUD_OCC_BINDING 0x18
#define VID_DATA_BINDING 0x19
#define SEGTABLE_BINDING 0x20
#define N_BINDINGS 27
#define SR_BIT_N 0x400000000000
#define SR_BIT_Z 0x200000000000
#define SR_BIT_C 0x100000000000
#define SR_BIT_V 0x80000000000
#define SR_BIT_SEGFAULT 0x800000000000
typedef struct object_bindings_t {
uint64_t uniform_desc_binding;
uint64_t storage_desc_binding;
uint64_t sampler_desc_binding;
uint64_t image_desc_binding;
uint64_t as_desc_binding;
uint64_t desc_set_binding;
uint64_t set_layout_binding;
uint64_t vao_binding;
uint64_t vbo_binding;
uint64_t ibo_binding;
uint64_t tbo_binding;
uint64_t cbo_binding;
uint64_t ubo_binding;
uint64_t sbo_binding;
uint64_t tlas_binding;
uint64_t blas_binding;
uint64_t dbo_binding;
uint64_t sbt_binding;
uint64_t shader_binding;
uint64_t pipeline_binding;
uint64_t fbo_binding;
uint64_t aud_data_binding;
uint64_t aud_src_binding;
uint64_t aud_lis_binding;
uint64_t aud_occ_binding;
uint64_t vid_data_binding;
uint64_t segtable_binding;
} object_bindings_t;
typedef struct thread_t {
uint64_t id;
uint64_t* primary;
uint64_t* secondary;
uint64_t* output;
uint64_t* regs;
uint64_t instruction_max, instruction_min; // range for executable instructions in main memory
uint8_t end_cyc; // used in cycle execution
uint64_t parent, n_descendants; // used in determining where ...
uint64_t* descendants; // this thread sits in the hierarchy; lists only direct descendants
uint8_t killed; // whether or not this thread was killed
uint8_t detached; // whether or not this thread is detached
uint64_t joining; // what thread this thread is waiting for to be killed (0 if none)
uint8_t perm_screenshot, perm_camera, perm_microphones, perm_networking, perm_file_io, perm_thread_creation; // whether or not this thread has these permissions
uint8_t* highest_dir; // the highest accessible path for this thread
uint8_t highest_dir_length; // the length of this thread's highest accessible path string (in bytes, incl. null character)
object_bindings_t bindings; // this thread's object bindings
uint8_t object_privacy; // whether or not this thread has object privacy enabled
uint64_t privacy_key; // this thread's object privacy key
uint64_t* created_threads; // the IDs of threads created by this thread during a cycle (they will exist but be in killed state until this thread cycles again)
uint32_t n_created_threads; // the count of threads created by this thread during a cycle
uint64_t sleep_start_ns; // time that the thread was put to sleep
uint64_t sleep_duration_ns; // time that the thread was put to sleep for
uint64_t segtable_id;
FILE* file_streams[65534]; // open file streams (ID 1-65535)
} thread_t;
// create new mapping region in system memory with specified size and object privacy key, then return address
uint64_t new_mapping(uint64_t privacy_key, uint64_t size) {
mappings_low -= size;
mappings = realloc(mappings, sizeof(map_t)*(n_mappings+1));
mappings[n_mappings].address = mappings_low;
mappings[n_mappings].size = size;
mappings[n_mappings].privacy_key = privacy_key;
n_mappings++;
return mappings_low;
}
// delete a mapping region in system memory that starts at specified address
void delete_mapping(uint64_t address) {
for(uint32_t i = 0; i < n_mappings; i++)
if(address == mappings[i].address) {
if(address == mappings_low) mappings_low += mappings[i].size;
if(i!=n_mappings-1) mappings[i] = mappings[n_mappings-1];
mappings = realloc(mappings, sizeof(map_t)*(n_mappings-1));
n_mappings--;
if(!n_mappings) mappings_low = HW_INFORMATION;
return;
}
}
typedef struct segment_t {
uint64_t v_address;
uint64_t p_address;
uint64_t length;
uint8_t deleted;
} segment_t;
typedef struct segtable_t {
segment_t* segments;
uint32_t n_segments;
} segtable_t;
// add segment to a segment table
uint64_t add_segment(segtable_t* segtable, segment_t new_segment) {
for(uint32_t i = 0; i < segtable->n_segments; i++)
if(segtable->segments[i].deleted) {
segtable->segments[i] = new_segment;
return i;
}
segtable->segments = realloc(segtable->segments, sizeof(segment_t)*(segtable->n_segments+1));
memcpy(&segtable->segments[segtable->n_segments], &new_segment, sizeof(segment_t));
segtable->n_segments++;
return segtable->n_segments-1;
}
// reset segments
void reset_segtable(segtable_t* segtable) {
if(segtable->segments) free(segtable->segments);
segtable->segments = 0;
segtable->n_segments = 0;
}
uint8_t check_hwinfo(uint64_t address, uint64_t size) {
return address >= HW_INFORMATION && address + size - 1 <= HW_INFO_HIGH;
}
uint8_t check_mapped_region(uint64_t privacy_key, uint64_t address, uint64_t size) {
for(uint32_t i = 0; i < n_mappings; i++)
if(privacy_key == mappings[i].privacy_key && address >= mappings[i].address && address + size - 1 < mappings[i].address + mappings[i].size) return 1;
return 0; // not part of mapped region
}
void update_hwinfo() {
uint8_t* hwi = &memory[HW_INFORMATION];
*(uint32_t*)hwi = 0x180; // hw support info; texture filtering + hw accel gfx
hwi[4] = 0; // 1 display
*(uint64_t*)(hwi+5) = HW_INFORMATION+500; // address to dimensions of each display
*(uint64_t*)(hwi+13) = 0; // address to 16-bit touch count for each display
*(uint64_t*)(hwi+21) = 0; // address to 16-bit current touch count for each display
*(uint64_t*)(hwi+29) = 0; // address to touch coordinates
*(uint32_t*)(hwi+37) = 1; // current # of cursors
*(uint64_t*)(hwi+41) = HW_INFORMATION+600; // address to 32-bit cursor coordinates
*(uint64_t*)(hwi+49) = HW_INFORMATION+700; // address to 8-bit additional cursor inputs
hwi[57] = 1; // number of connected keyboards
*(uint64_t*)(hwi+58) = HW_INFORMATION+800; // address to keyboard info
hwi[66] = 0; // number of connected controllers
hwi[67] = 0; // number of controller buttons
hwi[68] = 0; // number of controller axes
hwi[69] = 0; // number of controller positions
hwi[70] = 0; // number of controller orientations
*(uint64_t*)(hwi+71) = 0; // address to controller info
hwi[79] = 0; // number of microphones
hwi[80] = 0; // number of available cameras
*(uint64_t*)(hwi+81) = 0; // address of camera image dimensions
*(uint32_t*)(hwi+89) = 0; // min cpu mhz
*(uint32_t*)(hwi+93) = 0; // max cpu mhz
*(uint64_t*)(hwi+97) = 0; // address to current clock speed for each core
*(uint16_t*)(hwi+105) = 0; // number of cpu cores
*(uint64_t*)(hwi+107) = SIZE_MAIN_MEM; // capacity of main memory
*(uint64_t*)(hwi+115) = 0; // gpu mem capacity
*(uint64_t*)(hwi+123) = 0; // gpu mem available
*(uint32_t*)(hwi+131) = 100.f; // battery percent
*(uint16_t*)(hwi+135) = 0; // num of graphics queues (0 corresponds to 1)
*(uint16_t*)(hwi+137) = 0; // num of compute queues (0 corresponds to 1)
*(uint32_t*)(hwi+139) = UINT32_MAX; // max desc binding points per descriptor set
*(uint32_t*)(hwi+143) = 16; // num of audio occlusion geometry binding points per audio listener
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
*(uint32_t*)(hwi+147) = max_texture_size; // max texture size
*(uint32_t*)(hwi+151) = 1024; // max local work-group size X
*(uint32_t*)(hwi+155) = 1024; // max local work-group size Y
*(uint32_t*)(hwi+159) = 64; // max local work-group size Z
*(uint32_t*)(hwi+163) = 1024; // max local work-group size
*(uint32_t*)(hwi+167) = 65535; // max global work-group size X
*(uint32_t*)(hwi+171) = 65535; // max global work-group size Y
*(uint32_t*)(hwi+175) = 65535; // max global work-group size Z
*(uint32_t*)(hwi+179) = 0; // max RT recursion
*(uint32_t*)(hwi+183) = 0; // max geometries in BLAS
*(uint32_t*)(hwi+187) = 0; // max count of geom instances in TLAS
*(uint32_t*)(hwi+191) = 0; // max total count of triangles in TLAS
hwi[195] = MAX_NUMBER_BOUND_SETS-1; // max number of accessible desc sets in a pipeline
*(uint64_t*)(hwi+196) = max_number_as; // max AS descriptors in a pipeline
*(uint64_t*)(hwi+204) = max_number_samplers;
*(uint64_t*)(hwi+212) = max_number_images;
*(uint64_t*)(hwi+220) = max_number_ubos;
*(uint64_t*)(hwi+228) = max_number_sbos;
*(uint64_t*)(hwi+236) = 0; // address to supported audio formats
*(uint64_t*)(hwi+244) = HW_INFORMATION+900; // address to supported video/image formats
*(uint16_t*)(hwi+252) = 0; // max num of audio channels
*(uint32_t*)(hwi+500) = window_width;
*(uint32_t*)(hwi+504) = window_height;
*(int32_t*)(hwi+600) = cursor_x;
*(int32_t*)(hwi+604) = cursor_y;
hwi[700] = mouse_buttons;
*(int32_t*)(hwi+701) = scroll_x;
*(int32_t*)(hwi+705) = scroll_y;
memmove(&hwi[800], &kbd_states, 9);
strcpy(&hwi[900], "png,jpg,jpeg");
}
uint8_t check_sys_region(uint64_t privacy_key, uint64_t address, uint64_t size) {
if(check_hwinfo(address,size)) {
glfwPollEvents();
update_hwinfo();
}
return check_hwinfo(address,size) || check_mapped_region(privacy_key,address,size);
}
void init_threads() { // creates thread 0
threads = calloc(1, sizeof(thread_t)); // initialize the thread hierarchy (calloc to init all bits to 0)
threads[0].regs = calloc(16, sizeof(uint64_t));
threads[0].instruction_max = SIZE_MAIN_MEM - 1;
threads[0].perm_screenshot = 1;
threads[0].perm_camera = 1;
threads[0].perm_microphones = 1;
threads[0].perm_networking = 1;
threads[0].perm_file_io = 1;
threads[0].perm_thread_creation = 1;
threads[0].highest_dir = malloc(1);
threads[0].highest_dir[0] = '/';
threads[0].highest_dir_length = 1;
memset(&threads[0].bindings, 0, sizeof(object_bindings_t));
threads[0].primary = &threads[0].regs[0];
threads[0].secondary = &threads[0].regs[0];
threads[0].output = &threads[0].regs[0];
n_threads = 1;
}
// create a new thread: does not set anything for new thread but its parent and adds the new thread to descendants array in parent
uint64_t new_thread(uint64_t parent_id) {
threads = realloc(threads, sizeof(thread_t)*(n_threads+1)); // add thread to the thread hierarchy
thread_t* parent = &threads[parent_id];
thread_t* thread = &threads[n_threads];
thread->regs = calloc(16, sizeof(uint64_t));
thread->parent = parent->id;
thread->id = n_threads;
memset(&thread->bindings, 0, sizeof(object_bindings_t));
thread->end_cyc = 0;
thread->n_descendants = 0;
thread->detached = 0;
thread->joining = 0;
thread->killed = 1; // this is set to 0 at the next cycle of parent (the thread is created as killed in order to treat the thread as non-existent until then)
thread->sleep_start_ns = 0;
thread->sleep_duration_ns = 0;
thread->primary = &thread->regs[0];
thread->secondary = &thread->regs[0];
thread->output = &thread->regs[0];
// push the created thread's ID to the back of the parent's created_threads array
parent->created_threads = realloc(parent->created_threads, sizeof(uint64_t)*(parent->n_created_threads+1));
parent->created_threads[parent->n_created_threads] = thread->id;
parent->n_created_threads++;
// push current n_threads to the back of the parent's descendants array
parent->descendants = realloc(parent->descendants, sizeof(uint64_t)*(parent->n_descendants+1));
parent->descendants[parent->n_descendants] = n_threads;
parent->n_descendants++;
n_threads++;
return n_threads-1;
}
void kill_thread(thread_t* thread) {
thread->killed = 1;
for(uint32_t i = 0; i < thread->n_descendants; i++)
threads[thread->descendants[i]].regs[13] |= 0x10000; // set the parent thread killed SR bit for this descendant
free(thread->highest_dir);
// to do: free the memory allocated for all threads whose IDs ares named in the thread->created_threads array
}
// returns 1 if child is a descendant of parent, 0 otherwise
uint8_t check_descendant(thread_t* parent, thread_t* child) {
if(parent->id == 0) {
if(child->id != 0) return 1; // all child are children of thread 0
else return 0; // thread 0 not a child of thread 0
}
while(1) {
child = &threads[child->parent];
if(child == parent) return 1;
if(child->parent == 0) return 0;
}
}
int64_t abs64(int64_t x) {
if(x < 0) return x + x*2;
return x;
}
// returns whether or not an addition of two 32-bit signed integers will result in overflow
uint8_t check_overflow32(int32_t a, int32_t b) {
int32_t res = a+b;
if(a > 0 && b > 0 && res < 0) return 1;
if(a < 0 && b < 0 && res > 0) return 1;
return 0;
}
// returns whether or not an addition of two 64-bit signed integers will result in overflow
uint8_t check_overflow64(int64_t a, int64_t b) {
int64_t res = a+b;
if(a > 0 && b > 0 && res < 0) return 1;
if(a < 0 && b < 0 && res > 0) return 1;
return 0;
}
typedef struct sbo_t {
void* data;
uint64_t size;
} sbo_t;
typedef struct desc_binding_t {
uint32_t binding_number;// the binding number of this descriptor binding
uint8_t binding_type; // the type of this descriptor binding (0=uniform, 1=storage, 2=sampler, 3=image, 4=AS)
uint32_t* object_ids; // IDs of objects referenced in this binding (only sampler bindings can have multiple)
uint8_t* min_filters; // one for each sampler descriptor
uint8_t* mag_filters; // one for each sampler descriptor
uint8_t* s_modes; // one for each sampler descriptor
uint8_t* t_modes; // one for each sampler descriptor
uint16_t n_descs; // number of descriptors at this binding
} desc_binding_t;
typedef struct set_layout_t {
uint32_t* binding_numbers; // contains the binding point number for each descriptor binding
uint8_t* binding_types; // contains binding types for each of the binding points named in 'binding_numbers' array (desc type; 0=uniform, 1=storage, 2=sampler, 3=image, 4=AS)
uint16_t* n_descs; // number of descriptors in each binding point
uint32_t n_binding_points; // number of descriptor binding points, 0 corresponds to 1
} set_layout_t;
typedef struct desc_set_t {
desc_binding_t* bindings; // bindings for this descriptor set
uint32_t n_bindings; // number of bindings in this descriptor set, 0 corresponds to 1
uint32_t layout_id;
} desc_set_t;
// structure for shader bytecode (in shader objects), or translated GLSL source code (in create_pipeline)
typedef struct shader_t {
char* src; // shader source bytecode, or GLSL source code
uint64_t size; // length in bytes (incl. null character)
uint8_t type; // 0 = vertex, 1 = pixel, 2 = compute
} shader_t;
typedef struct cbo_t { // command buffer structure
uint64_t bindings[4]; // the current bindings for the command buffer (arranged in order specified under Graphics States; these also affect recorded commands)
// bindings are bound object IDs for the command buffer: bindings[0] = pipeline object, bindings[1] = FBO, bindings[2] = VBO, bindings[3] = IBO
uint32_t dset_ids[MAX_NUMBER_BOUND_SETS]; // the descriptor sets bound to this command buffer
uint8_t pipeline_type; // set after initialization or after command buffer reset at first pipeline bound to CBO; the type of pipeline this CBO uses. initialized to 2 (none bound).
void* cmds; // the command opcodes, alongside the information affecting the commands execution as they were when the command was issued. see record_command() for more information
uint64_t size; // size of cmds
} cbo_t;
typedef struct definition_t definition_t;
typedef struct pipeline_t {
GLint gl_program; // this pipeline's GL program
uint64_t vao_id; // the ID of the VAO object for this pipeline (rasterization pipeline only)
uint32_t dset_layout_ids[MAX_NUMBER_BOUND_SETS]; // IDs of descriptor set layout objects referenced for each set binding
uint16_t n_desc_sets; // number of enabled descriptor sets (0-4 for rasterization and compute, 0-1 for RT pipelines)
uint8_t type; // 0 = rasterization, 1 = ray tracing, 2 = compute
definition_t* defs_1; // information about definitions present in the vertex/compute shader
definition_t* defs_2; // information about definitions present in the pixel shader
uint32_t n_defs_1;
uint32_t n_defs_2;
uint8_t* push_constant_data;
uint8_t n_push_constant_bytes;
/* RASTERIZATION PIPELINE STATES */
uint8_t culled_winding; // 0=no culling, 1=cw, 2=ccw, 3=cw+ccw
uint8_t primitive_type; // 0=triangles, 1=lines, 2=points
uint8_t depth_pass; // condition for depth test pass
uint8_t depth_enabled; // whether or not writing depth is enabled
uint8_t cw_stencil_ref; // stencil test func reference for cw faces
uint8_t cw_stencil_pass;// stencil test func pass condition for cw faces
uint8_t cw_stencil_op_sfail; // cw face stencil operation if stencil test fails
uint8_t cw_stencil_op_spass_dfail; // cw face stencil operation if stencil test passes but depth test fails
uint8_t cw_stencil_op_sfail_dfail; // cw face stencil operation if both stencil and depth test fail
uint8_t cw_stencil_func_mask; // stencil func mask for cw faces
uint8_t cw_stencil_write_mask; // stencil write mask for cw faces
uint8_t ccw_stencil_ref; // stencil test func reference for ccw faces
uint8_t ccw_stencil_pass;// stencil test func pass condition for ccw faces
uint8_t ccw_stencil_op_sfail; // ccw face stencil operation if stencil test fails
uint8_t ccw_stencil_op_spass_dfail; // ccw face stencil operation if stencil test passes but depth test fails
uint8_t ccw_stencil_op_sfail_dfail; // ccw face stencil operation if both stencil and depth test fail
uint8_t ccw_stencil_func_mask; // stencil func mask for ccw faces
uint8_t ccw_stencil_write_mask; // stencil write mask for ccw faces
uint8_t color_write_mask; // color write mask (which color components can be written to; RGBA bits w/ A at LSB)
uint8_t n_enabled_attachments; // number of enabled color attachments (0-7, 0 corresponds to 1)
uint8_t color_blend_op; // the RGB blending operation
uint8_t src_color_blend_fac; // the source RGB blending factor
uint8_t dst_color_blend_fac; // the destination RGB blending factor
uint8_t alpha_blend_op; // the alpha blending operation
uint8_t src_alpha_blend_fac; // the source alpha blending factor
uint8_t dst_alpha_blend_fac; // the destination alpha blending factor
} pipeline_t;
// add null-terminated string str2 to the end of null-terminated string str1
void str_add(char** str1, const char* str2) {
uint32_t len1 = strlen(*str1), len2 = strlen(str2);
*str1 = realloc(*str1, len1+len2+1);
memcpy(*str1 + len1, str2, len2);
(*str1)[len1+len2] = '\0';
}
// insert a null-terminated string str2 starting at index pos in null-terminated string str1
void str_insert(char** str1, const char* str2, uint32_t pos) {
uint32_t len1 = strlen(*str1), len2 = strlen(str2);
*str1 = realloc(*str1, len1+len2+1);
memcpy(*str1 + pos + len2, *str1 + pos, len1-pos);
memcpy(*str1 + pos, str2, len2);
(*str1)[len1+len2] = '\0';
}
// add shader data type name to the end of a null-terminated string given type's number
void str_add_type(char** str, uint8_t type) {
switch(type) {
case 0: str_add(str, "vec2"); break;
case 1: str_add(str, "vec3"); break;
case 2: str_add(str, "vec4"); break;
case 3: str_add(str, "ivec2"); break;
case 4: str_add(str, "ivec3"); break;
case 5: str_add(str, "ivec4"); break;
case 6: str_add(str, "uvec2"); break;
case 7: str_add(str, "uvec3"); break;
case 8: str_add(str, "uvec4"); break;
case 9: str_add(str, "mat2"); break;
case 10: str_add(str, "mat2x3"); break;
case 11: str_add(str, "mat2x4"); break;
case 12: str_add(str, "mat3x2"); break;
case 13: str_add(str, "mat3"); break;
case 14: str_add(str, "mat3x4"); break;
case 15: str_add(str, "mat4x2"); break;
case 16: str_add(str, "mat4x3"); break;
case 17: str_add(str, "mat4"); break;
case 18: str_add(str, "float"); break;
case 19: str_add(str, "int"); break;
case 20: str_add(str, "uint"); break;
case 21: str_add(str, "sampler2D"); break;
case 22: str_add(str, "isampler"); break;
case 23: str_add(str, "usampler"); break;
}
}
// add an unsigned integer to the end of a null-terminated string
void str_add_ui(char** str, uint32_t x) {
uint32_t str_size = 0;
uint32_t n_digits = (x/10)+1; // this is how much to expand the length of the string by
for(uint32_t i = 0; i < 1000; i++) { str_size++; if((*str)[i] == '\0') break; }
*str = realloc(*str, str_size+n_digits);
sprintf(*str+str_size-1, "%u", x);
}
void str_add_i(char** str, int32_t x) {
uint32_t str_size = 0;
uint32_t n_digits = (abs(x)/10)+1; // this is how much to expand the length of the string by
if(x<0) n_digits++;
for(uint32_t i = 0; i < 1000; i++) { str_size++; if((*str)[i] == '\0') break; }
*str = realloc(*str, str_size+n_digits);
sprintf(*str+str_size-1, "%i", x);
}
void str_add_f(char** str, float x) {
char fstring[32];
sprintf(fstring, "%f", x);
str_add(str, fstring);
}
// shorthand to add the .x, .y, .z, or .w for a vector to the end of a null-terminated string
void str_add_vec_idx(char** str, uint32_t idx) {
switch(idx) {
case 0: str_add(str, ".x"); break;
case 1: str_add(str, ".y"); break;
case 2: str_add(str, ".z"); break;
case 3: str_add(str, ".w"); break;
}
}
// shorthand to add the [col][row] for a matrix to the end of a null-terminated string
void str_add_mat_idx(char** str, uint8_t width, uint8_t height, uint32_t idx) {
str_add(str, "[");
str_add_ui(str, idx/height);
str_add(str, "][");
str_add_ui(str, idx%width);
str_add(str, "]");
}
// shorthand to add the loop counter that corresponds to the shader's scope_level to the end of a null-terminated string
void str_add_iterator(char** str, uint8_t scope_level) {
if(!scope_level || scope_level > 8) return;
char x[2]; x[1] = '\0';
x[0] = "ijklmnop"[scope_level-1];
str_add(str, (const char*)&x);
}
#define VAR_DEF_BIT 0x1
#define UNIF_DEF_BIT 0x2
#define IN_ATTR_DEF_BIT 0x4
#define OUT_ATTR_DEF_BIT 0x8
#define RAY_ATTR_DEF_BIT 0x10
#define INCOMING_RAY_ATTR_DEF_BIT 0x20
#define FUNC_DEF_BIT 0x40
#define ALL_DEF_BIT (VAR_DEF_BIT|UNIF_DEF_BIT|IN_ATTR_DEF_BIT|OUT_ATTR_DEF_BIT|RAY_ATTR_DEF_BIT|INCOMING_RAY_ATTR_DEF_BIT|FUNC_DEF_BIT)
typedef struct func_def_t func_def_t;
typedef struct shader_data_t {
// USED FOR CHECKING SET/BINDING AND LOCATION EXISTENCES:
uint8_t* sets; // the set number(s) for each data block within the shader
uint32_t* bindings; // parallel to sets array; the binding numbers for each data block within the shader
uint8_t* set_binding_types; // parallel to sets + bindings arrays; the types for each set/binding pair (0=uniform, 1=storage, 2=sampler, 3=image, 4=AS)
uint16_t* locations; // the location IDs occupied within the shader
uint32_t n_set_binding_pairs;
uint32_t n_locations;
uint32_t n_push_constant_bytes;
uint16_t* vertex_output_ids; // the identifier for each vertex shader attribute output
uint8_t* vertex_output_types; // the data for each vertex shader attribute output
uint8_t* vertex_output_modes; // the interpolation mode for each vertex shader attribute output
uint16_t* pixel_input_ids; // the identifier for each pixel shader attribute input
uint8_t* pixel_input_types; // the data type for each pixel shader attribute input
uint32_t n_vertex_outputs;
uint32_t n_pixel_inputs;
definition_t* defs;
uint32_t n_defs;
} shader_data_t;
// add a set+binding pair to shader data (type; 0=uniform, 1=storage, 2=sampler, 3=image, 4=AS)
void add_set_binding(shader_data_t* data, uint8_t set, uint32_t binding, uint8_t type) {
data->sets = realloc(data->sets, data->n_set_binding_pairs+1);
data->bindings = realloc(data->bindings, 4*(data->n_set_binding_pairs+1));
data->set_binding_types = realloc(data->set_binding_types, data->n_set_binding_pairs+1);
data->sets[data->n_set_binding_pairs] = set;
data->bindings[data->n_set_binding_pairs] = binding;
data->set_binding_types[data->n_set_binding_pairs] = type;
data->n_set_binding_pairs++;
}
// add a location to shader data
void add_location(shader_data_t* data, uint16_t location) {
data->locations = realloc(data->locations, 2*(data->n_locations+1));
data->locations[data->n_locations] = location;
data->n_locations++;
}
void add_vertex_output(shader_data_t* data, uint16_t id, uint8_t type, uint8_t mode) {
data->vertex_output_ids = realloc(data->vertex_output_ids, sizeof(uint16_t*)*(data->n_vertex_outputs+1));
data->vertex_output_types = realloc(data->vertex_output_types, sizeof(uint8_t*)*(data->n_vertex_outputs+1));
data->vertex_output_modes = realloc(data->vertex_output_modes, sizeof(uint8_t*)*(data->n_vertex_outputs+1));
data->vertex_output_ids[data->n_vertex_outputs] = id;
data->vertex_output_types[data->n_vertex_outputs] = type;
data->vertex_output_modes[data->n_vertex_outputs] = mode;
data->n_vertex_outputs++;
}
void add_pixel_input(shader_data_t* data, uint16_t id, uint8_t type) {
data->pixel_input_ids = realloc(data->pixel_input_ids, sizeof(uint16_t*)*(data->n_pixel_inputs+1));
data->pixel_input_types = realloc(data->pixel_input_types, sizeof(uint8_t*)*(data->n_pixel_inputs+1));
data->pixel_input_ids[data->n_pixel_inputs] = id;
data->pixel_input_types[data->n_pixel_inputs] = type;
data->n_pixel_inputs++;
}
uint8_t check_set_binding_existence(shader_data_t* data, uint8_t set, uint16_t binding) {
for(uint32_t i = 0; i < data->n_set_binding_pairs; i++) if(data->sets[i]==set&&data->bindings[i]==binding) return 1;
return 0;
}
uint8_t check_location_existence(shader_data_t* data, uint16_t location) {
for(uint32_t i = 0; i < data->n_locations; i++) if(data->locations[i]==location) return 1;
return 0;
}
// created for each defined identifer; function, variable, uniform, in/out attrib, ray attribute, and incoming ray attrib identifier
struct definition_t {
uint16_t id;
uint8_t def_type; // function, variable, uniform, in/out attrib, ray attrib; set to the *_DEF_BIT macro definitions
uint8_t data_type; // data type (0=vec2, 1=vec3, 2=vec4, 3=ivec2, 4=ivec3, 5=ivec4, 6=uvec2, 7=uvec3, 8=uvec4, 9=mat2x2, 10=mat2x3, 11=mat2x4,
// 12=mat3x2, 13=mat3x3, 14=mat3x4, 15=mat4x2, 16=mat4x3, 17=mat4x4, 18=float, 19=signed integer, 20=unsigned integer,
// 21=sampler, 22=isampler, 23=usampler, 24=image, 25=acceleration structure) - N/A if function definition
uint16_t elcount; // how many data elements defined at this identifier, 0 corresponds to 1 - N/A if function definition
uint16_t location_id; // for payload/incoming payload blocks, attribute definitions; 0 for uniforms, 1 for push constant uniforms
uint8_t within_block; // whether or not this was defined within a uniform/storage block
uint8_t set; // uniform block set number
uint32_t binding; // uniform block binding number
func_def_t* func_def; // pointer to function defined under this identifier
};
// created for each defined function; information about parameters
struct func_def_t {
uint32_t n_params; // how many parameters there are
uint16_t* param_ids; // identifiers for each parameter (defined locally as variables at beginning of the function body scope)
uint16_t* param_elcounts; // how many data elements defined for this parameter, 0 corresponds to 1
uint8_t* param_types; // data type of each parameter (0=vec2, 1=vec3, 2=vec4, 3=ivec2, 4=ivec3, 5=ivec4, 6=uvec2, 7=uvec3, 8=uvec4, 9=mat2x2, 10=mat2x3, 11=mat2x4,
// 12=mat3x2, 13=mat3x3, 14=mat3x4, 15=mat4x2, 16=mat4x3, 17=mat4x4, 18=float, 19=signed integer, 20=unsigned integer,
// 21=sampler, 22=isampler, 23=usampler, 24=image, 25=acceleration structure)
};
// check if a specified identifier exists
// filter is OR'd together bits; everthing to search for
// returns 0 if the identifier does not exist, address to the existing definition_t otherwise
definition_t* check_identifier_existence(uint16_t id, definition_t* defs, uint32_t n_defs, uint32_t filter) {
if(defs)
for(uint32_t i = 0; i < n_defs; i++) if(defs[i].def_type & filter && defs[i].id == id) return &defs[i];
return 0;
}
// shorthand for check_identifier_existence; check all definition types but allows to filter out (exclude) some
definition_t* check_identifier_existence_excl(uint16_t id, definition_t* defs, uint32_t n_defs, uint32_t excl_filter) {
return check_identifier_existence(id,defs,n_defs,ALL_DEF_BIT & (~excl_filter));
}
// defines a new identifier
// pushes its data structure to the back of defs array
void add_definition(definition_t** defs, uint32_t* n_defs, uint16_t id, uint8_t def_type, uint8_t data_type, uint16_t elcount,
uint16_t location_id, uint8_t within_block, uint8_t set, uint32_t binding, func_def_t* func_def) {
*defs = realloc(*defs, sizeof(definition_t)*((*n_defs)+1));
(*defs)[*n_defs].id = id;
(*defs)[*n_defs].def_type = def_type;
(*defs)[*n_defs].data_type = data_type;
(*defs)[*n_defs].elcount = elcount;
(*defs)[*n_defs].location_id = location_id;
(*defs)[*n_defs].within_block = within_block;
(*defs)[*n_defs].set = set;
(*defs)[*n_defs].binding = binding;
(*defs)[*n_defs].func_def = func_def;
(*n_defs)++;
}
// returns 1 if the definition type specified by def_type can be an array, and 0 otherwise
uint8_t check_def_type_array(uint32_t def_type) {
return (def_type&(VAR_DEF_BIT|UNIF_DEF_BIT|RAY_ATTR_DEF_BIT|INCOMING_RAY_ATTR_DEF_BIT)) > 0; // variables, uniforms, and ray attributes can be arrays
}
// given a type, add typecast to higher precision as required (uint/int/float required is stated in types)
// bitwise OR together the results from check_type for 'types'
void str_add_typecast(char** str, uint8_t full_vector, uint8_t n_vector_elements, uint8_t types) {
if(!full_vector) { // if not a full vector, then dealing with a scalar
if(types == 0) str_add(str, "uint(");
if(types == 1) str_add(str, "int(");
if(types > 1) str_add(str, "float(");
}
else {
switch(n_vector_elements) {
case 2:
if(types == 0) str_add(str, "uvec2(");
if(types == 1) str_add(str, "ivec2(");
if(types > 1) str_add(str, "vec2(");
break;
case 3:
if(types == 0) str_add(str, "uvec3(");
if(types == 1) str_add(str, "ivec3(");
if(types > 1) str_add(str, "vec3(");
break;
case 4:
if(types == 0) str_add(str, "uvec4(");
if(types == 1) str_add(str, "ivec4(");
if(types > 1) str_add(str, "vec4(");
break;
}
}
}
// returns 0 if a shader data type is unsigned, 1 if signed integer, and 2 if floating-point
uint8_t base_type(uint8_t type) {
if((type >= 6 && type <= 8) || type == 20 || type == 23) return 0;
else if((type >= 3 && type <= 5) || type == 19 || type == 22) return 1;
else return 2;
}
void str_add_constant(char** str, uint32_t constant, uint8_t type) {
if(type == 0) {
str_add(str, "uint(");
str_add_ui(str, constant);
str_add(str, ")");
}
if(type == 1) str_add_i(str, *(int32_t*)&constant);
if(type > 1) str_add_f(str, *(float*)&constant);
}
void str_add_operation(char** str, uint8_t operation) {
switch(operation) {
case 0x0: str_add(str, " + "); break;
case 0x1: str_add(str, " * "); break;
case 0x2: str_add(str, " / "); break;
case 0x3: str_add(str, " - "); break;
case 0x4: str_add(str, ", "); break;
}
}
#define IDX_TYPE_LOOP -1
#define IDX_TYPE_VAR -2
#define IDX_TYPE_UNIFORM -3
#define IDX_TYPE_INSTANCE -4
// will load an array index from a shader and return the index; adds to the pointer to the array index
// returns a value >= 0 if constant array index, -5 if end of shader is encountered (t), and an IDX_TYPE_* value otherwise
int32_t read_array_idx(uint16_t* array_index, uint16_t* identifier, uint16_t* multiplier, int32_t* offset, uint8_t* max_addr) {
if(*array_index == 65533) { // use instance ID, with multiplier, offset
if((uint8_t*)array_index + 7 > max_addr) return -5;
array_index++;
*multiplier = *array_index + 1;
array_index++;
*offset = *(int32_t*)array_index;
return IDX_TYPE_INSTANCE;
}
if(*array_index == 65534) { // use uint uniform, with multiplier, offset
if((uint8_t*)array_index + 9 > max_addr) return -5;
array_index++;
*identifier = *array_index;
array_index++;
*multiplier = *array_index + 1;
array_index++;
*offset = *(int32_t*)array_index;
return IDX_TYPE_UNIFORM;
}
if(*array_index == 65535) { // 65535 65535 - current loop iteration
if((uint8_t*)array_index + 3 > max_addr) return -5;
array_index++; // after the first; only if the index is 32-bit
if(*array_index == 65535) return IDX_TYPE_LOOP; // current loop iteration as index
*identifier = *array_index;
return IDX_TYPE_VAR; // uint variable as index
}
return *array_index;
}
// reads the common form of {identifier | index} without the 8 bits for vector/matrix element; puts its data into newly defined variables, returns if invalid
// opcode is assumed to be at the identifier, and will be shifted forward to the address after the index if fail
#define READ_ID(n,exclude_filter) definition_t *def_ptr##n; \
uint16_t id##n = READ(opcode,2); \
if(!(def_ptr##n=check_identifier_existence_excl(id##n,defs,n_defs,exclude_filter))) return 1; /* identifier does not exist */ \
uint8_t is_arr##n = 0, type##n = def_ptr##n->data_type, elcount##n = def_ptr##n->elcount; \
is_arr##n = check_def_type_array(def_ptr##n->def_type); \
uint16_t idx_id##n, multiplier##n; \
int32_t offset##n, arr_idx##n = is_arr##n ? read_array_idx((uint16_t*)(opcode+2), &idx_id##n, &multiplier##n, &offset##n, end) : 0; \
if(is_arr##n && arr_idx##n < IDX_TYPE_INSTANCE) return 1; \
if(arr_idx##n >= 0 && arr_idx##n > elcount##n) return 1; /* using constant as index and the index does not exist */ \
if(arr_idx##n == IDX_TYPE_LOOP && (level_status[scope_level-1]!=2||level_iterations[scope_level-1]-1>elcount##n)) return 1; /* using current loop iteration and this level is not in loop/has too many iterations*/ \
if(arr_idx##n == IDX_TYPE_VAR && (shader_type != 0 || def_ptr##n->def_type != UNIF_DEF_BIT)) return 1; /* using uint variable as index and not vertex shader with uniform arrays */ \
if(arr_idx##n == IDX_TYPE_VAR && !check_identifier_existence(idx_id##n,defs,n_defs,VAR_DEF_BIT)) return 1; \
if(arr_idx##n == IDX_TYPE_VAR && check_identifier_existence(idx_id##n,defs,n_defs,VAR_DEF_BIT)->elcount != 1) return 1; \
if(arr_idx##n == IDX_TYPE_UNIFORM && !check_identifier_existence(idx_id##n,defs,n_defs,UNIF_DEF_BIT)) return 1; \
if(arr_idx##n == IDX_TYPE_UNIFORM && check_identifier_existence(idx_id##n,defs,n_defs,UNIF_DEF_BIT)->elcount != 1) return 1; \
if(arr_idx##n == IDX_TYPE_INSTANCE && shader_type != 0) return 1; /* instance ID can only be used as index in vertex shaders */ \
opcode += 2; \
if(is_arr##n && arr_idx##n >= 0) opcode += 2; /* array index provided by constant index */ \
else if(is_arr##n && arr_idx##n == IDX_TYPE_LOOP) opcode += 4; /* array index provided by current loop iteration (index is 2 16-bit uints equal to 65535) */ \
else if(is_arr##n && arr_idx##n == IDX_TYPE_VAR) opcode += 4; /* array index provided by uint variable */ \
else if(is_arr##n && arr_idx##n == IDX_TYPE_UNIFORM) opcode += 10; /* array index provided by uint uniform */ \
else if(is_arr##n && arr_idx##n == IDX_TYPE_INSTANCE) opcode += 8; /* array index provided by instance ID */
// reads the common form of {identifier | index | 8 bits for vec/mat element}; puts its data into newly defined variables, returns if invalid
// opcode is assumed to be at the beginning of the id
#define READ_ID_WITH_MATVEC_ELEMENT(n,exclude_filter) READ_ID(n,exclude_filter);\
uint8_t matvec_idx##n = type##n<18 ? READ(opcode,1) : 0; if(type##n<18) opcode++; /* matrix/vector element will follow array index if type < 18 (is matrix or vector) */
// used to add index [] to shader; n should be the same as in READ_ID
#define ADD_IDX(n) { \
if(is_arr##n) add_idx(&glsl_shader->src, idx_id##n, arr_idx##n, multiplier##n, offset##n, scope_level); \
}
// implementation for ADD_IDX(n) macro
void add_idx(char** str, uint16_t idx_id, int32_t arr_idx, uint16_t multiplier, int32_t offset, uint8_t scope_level) {
str_add(str, "["); \
if(arr_idx >= 0) str_add_ui(str, arr_idx);
else if(arr_idx == IDX_TYPE_LOOP) str_add_iterator(str, scope_level-1);
else if(arr_idx == IDX_TYPE_VAR) {
str_add(str, "int(_");
str_add_ui(str, idx_id);
str_add(str, "[0])");
} else if(arr_idx == IDX_TYPE_UNIFORM) {
str_add_ui(str, multiplier);
str_add(str, "*int(_");
str_add_ui(str, idx_id);
str_add(str, "[0])");
if(offset >= 0) str_add(str, "+");
str_add_i(str, offset);
} else if(arr_idx == IDX_TYPE_INSTANCE) {
str_add_ui(str, multiplier);
str_add(str, "*gl_InstanceID");
if(offset >= 0) str_add(str, "+");
str_add_i(str, offset);
}
str_add(str, "]");
}
// returns 1 on fail, returns 0 and fills glsl_shader on success
uint8_t build_shader(uint8_t* src, uint32_t length, uint8_t shader_type, shader_t* glsl_shader, shader_data_t* shader_data) {
if(length == 0) return 1;
// shader_type: 0=vertex, 1=pixel, 2=compute, 3=other (RT shader; unsupported + bytecode exclusive to RT shaders will be treated as invalid)
uint64_t loadval(uint8_t* a, uint8_t n);
uint8_t err = 0; // checked at the beginning of instruction processing loop
#define READ(ptr,n_bytes) (ptr+n_bytes-1 > end ? (err=1) : loadval(ptr,n_bytes)) /* shorthand to read a value from the shader safely */
// check if identifier exists, return 1 if it does
#define CHECK_ID_DEFINED(id) if(check_identifier_existence(id,defs,n_defs,ALL_DEF_BIT)) return 1
#define MAT_WIDTH(type) (((type-9)/3)+2) /* gets the matrix width given a matrix type # */
#define MAT_HEIGHT(type) (((type-9)%3)+2) /* gets the matrix height given a matrix type # */
#define MAT_SIZE(type) (MAT_WIDTH(type)*MAT_HEIGHT(type)) /* gets the number of matrix elements given a matrix type */
#define VEC_SIZE(type) ((type%3)+2) /* gets the number of vector elements given a vector type */
// previously globally defined identifiers, their element counts, and their types
// there are functions, uniforms, variables, in/out attributes, ray attributes, and incoming ray attributes
definition_t* defs = 0; // array of all defined identifiers + their information
uint32_t n_defs = 0; // total number of identifier definitions
uint32_t n_local_defs = 0; // number of variables defined within a function; remove this number from back of definitions array when the function is exited
// scope information
uint8_t scope_type = 0; // current scope type (0=global, 1=main func, 2=func, 3=uniform block, 4=push constant block, 5=storage block, 6=ray payload block,
// 7=incoming ray payload block, 8=func definition)
uint8_t scope_level = 0; // 0=not in function, 1=level 1, 2=level 2, ..., 9=level 8