-
Notifications
You must be signed in to change notification settings - Fork 9
/
vec.h
126 lines (112 loc) · 3.64 KB
/
vec.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#ifndef VEC_H_
#define VEC_H_
struct Vec216 {
int16_t x, y;
Vec216() {}
Vec216(int16_t _x, int16_t _y):
x(_x), y(_y) {}
};
static Vec216 operator-(const Vec216 &a, const Vec216 &b) {
return Vec216(a.x - b.x, a.y - b.y);
}
// 8 bit fixed point 3-vector, all magnitude < 1
struct Vec38 {
int8_t x, y, z;
Vec38() {}
Vec38(int8_t _x, int8_t _y, int8_t _z):
x(_x), y(_y), z(_z) {}
int8_t dot(const Vec38 &b) {
#if 1
int16_t result16 = __builtin_avr_fmuls(x, b.x)
+ __builtin_avr_fmuls(y, b.y)
+ __builtin_avr_fmuls(z, b.z);
return result16 >> 8;
#else
/*
* optimized version (eliminating redundant loads/stores/clr r1s) of this:
* ...this seems to work in isolation, but not in the main loop. wtf.
*/
register uint8_t result;
asm("ld r18, Y\n\t"
"ld r19, Z\n\t"
"fmuls r18, r19\n\t"
"movw r24, r0\n\t"
"ldd r18, Y+1\n\t"
"ldd r19, Z+1\n\t"
"fmuls r18, r19\n\t"
"add r24, r0\n\t"
"adc r25, r1\n\t"
"ldd r18, Y+2\n\t"
"ldd r19, Z+2\n\t"
"fmuls r18, r19\n\t"
"add r24, r0\n\t"
"adc r25, r1\n\t"
"mov %0, r25\n\t"
"clr r1\n\t"
: "=r" (result) : "y" (&b), "z" (this) : "r0", "r1", "r18", "r19", "r25");
return result;
#endif
}
void project(int16_t scale, Vec216 *out) {
// "ooz" is one over z = 256 * scale / (dist - z)
// where dist is fixed at 1024 and z ranges from -128 to 127
//
// we approximate it here with a linear taylor expansion which is just
// ooz = 1024 + z!
//
// another term (z*z >> 10) adds a bit of accuracy but you can't see it
// anyway on this tiny screen
// since scale is O(1024), this is approx scale*1024/(1024-z)
int16_t ooz = scale + z;
out->x = 64*16 + ((int32_t) ooz * x >> 8);
out->y = 32*16 - ((int32_t) ooz * y >> 8);
}
};
struct Mat338 {
Vec38 x, y, z;
Mat338() {}
Mat338(Vec38 &&_x, Vec38 &&_y, Vec38 &&_z):
x(_x), y(_y), z(_z) {}
// optimized combined rotation and projection of n vertices from progmem to SRAM
void RotateAndProject(const int8_t* PROGMEM vertices, uint8_t n,
int16_t scale, Vec216 *out) {
do {
int8_t v;
v = pgm_read_byte_near(vertices++);
int16_t px = __builtin_avr_fmuls(v, x.x);
int16_t py = __builtin_avr_fmuls(v, y.x);
int16_t pz = __builtin_avr_fmuls(v, z.x);
v = pgm_read_byte_near(vertices++);
px += __builtin_avr_fmuls(v, x.y);
py += __builtin_avr_fmuls(v, y.y);
pz += __builtin_avr_fmuls(v, z.y);
v = pgm_read_byte_near(vertices++);
px += __builtin_avr_fmuls(v, x.z);
py += __builtin_avr_fmuls(v, y.z);
pz += __builtin_avr_fmuls(v, z.z);
pz = scale + (pz >> 8); // approximate 1/z
out->x = 64*16 + ((uint32_t) pz * (px>>8) >> 8);
out->y = 32*16 - ((uint32_t) pz * (py>>8) >> 8);
++out;
} while (--n);
}
// use fixed light source [1, 1, 4] for now... TODO: light source vector arg?
int8_t CalcIllumination(const int8_t* PROGMEM normal) {
// read and rotate normal vector
int16_t ill;
int8_t v = pgm_read_byte_near(normal++);
int16_t px = __builtin_avr_fmuls(v, x.x);
int16_t py = __builtin_avr_fmuls(v, y.x);
int16_t pz = __builtin_avr_fmuls(v, z.x);
v = pgm_read_byte_near(normal++);
px += __builtin_avr_fmuls(v, x.y);
py += __builtin_avr_fmuls(v, y.y);
pz += __builtin_avr_fmuls(v, z.y);
v = pgm_read_byte_near(normal);
px += __builtin_avr_fmuls(v, x.z);
py += __builtin_avr_fmuls(v, y.z);
pz += __builtin_avr_fmuls(v, z.z);
return (px >> (8+5)) + (py >> (8+5)) + (pz >> (8+3));
}
};
#endif // VEC_H_