/
fastfunctions.asm
98 lines (96 loc) · 2.76 KB
/
fastfunctions.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
.386
.XMM
.MODEL FLAT, C
.FARDATA
.CONST
dbl_val DQ 040dfe98000000000r ; 37268.0
; Fast swap algorithm for doubles
; Written in assembly to avoid frame pointer overhead
; Arguments:
; [ESP+4] = First element to swap
; [ESP+8] = Second element to swap
; Result:
; The values in [ESP+4] and [ESP+8] are exchanged
; Note:
; XCHG instruction does not work on non-integral values!
.CODE
swap PROC
; Load the function arguments into the register EAX and ECX
mov eax, dword ptr [esp+8]
mov ecx, dword ptr [esp+4]
; Use XMM registers as temporary place-holders (XMM0<-EAX; XMM1<-ECX)
movsd xmm0, qword ptr [eax]
movsd xmm1, qword ptr [ecx]
; And then put the values pointed to back into EAX and ECX (but in reverse - EAX<-XMM1; ECX<-XMM0)
movsd qword ptr [ecx], xmm0
movsd qword ptr [eax], xmm1
; Return (unsafe in CPL=0 but in user-mode we should be good)
ret
swap ENDP
; Max of 2 32-bit integers
; Written in assembly to avoid frame pointer overhead
; Arguments:
; [ESP+4] = First of two values to check
; [ESP+8] = Second of two values to check
; Result:
; The largest one of [ESP+4] and [ESP+8] in EAX
max2 PROC
; Load arg #1 into EAX
mov eax, [esp+4]
; Compare it to the other arg
cmp eax, [esp+8]
; If EAX is less than the arg compared to, replace EAX with the second argument
; Otherwise just return because EAX already has the right value
jl r_case2
ret
; Case #2; second arg bigger = Put arg #2 in EAX and return
r_case2:
mov eax, [esp+8]
ret
max2 ENDP
; Normalizes the doubles in the signal array
; Ported over to assembly to:
; #1 - Use additional register (EBX) instead of memory for increased speed
; #2 - Use the SIMD instructions for the double-divide operation
normalize PROC
; No frame pointer omission here - too unsafe to do that with variable length arrays
push ebp
mov ebp, esp
; Register EBX must be preserved when being used
; EBX will store our loop counter
push ebx
mov ebx, dword ptr [ebp-4]
; Which is initially 0
xor ebx, ebx
; Loop body
l_body:
; Check if our counting register is greater than the # of times we want to iterate
; If so, we're done
cmp ebx, dword ptr [ebp+16]
jge l_done
; Otherwise...
; Load the value from the short array [ebp+12] into eax
mov edx, ebx
mov eax, dword ptr [ebp+12]
; Load that value into ecx
movsx ecx, word ptr [eax+edx*2]
; Convert signed integer to signed double (store in xmm0)
cvtsi2sd xmm0, ecx
; Perform division by 37268.0
divsd xmm0, qword ptr dbl_val
; Now load up the array of doubles (output array)
mov edx, ebx
mov eax, dword ptr [ebp+8]
; And put our newly calculated result back in that array
movsd qword ptr [eax+edx*8], xmm0
; Increment our loop counter
add ebx, 1
; Back up the top
jmp short l_body
l_done:
; Restore stack & leave
mov esp, ebp
pop ebp
ret
normalize ENDP
END