562
|
1 /*
|
|
2 decode_MMX.s: MMX optimized synth
|
|
3
|
|
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
6 initially written by the mysterious higway (apparently)
|
|
7
|
|
8 Thomas' words about a note:
|
|
9 Initially, I found the note "this code comes under GPL" in this file.
|
|
10 After asking Michael about legal status of the MMX files, he said that he got them without any comment and thus I believe that the GPL comment was made by Michael, since he made mpg123 GPL at some time - and marked some files that way, but not all.
|
|
11 Based on that thought, I now consider this file along with the other parts of higway's MMX optimization to be licensed under LGPL 2.1 by Michael's decision.
|
|
12 */
|
|
13
|
|
14 #include "mangle.h"
|
|
15
|
|
16 .text
|
|
17
|
|
18 .globl ASM_NAME(synth_1to1_MMX)
|
|
19 /* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
|
|
20 ASM_NAME(synth_1to1_MMX):
|
|
21 pushl %ebp
|
|
22 pushl %edi
|
|
23 pushl %esi
|
|
24 pushl %ebx
|
|
25 /* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
|
|
26 movl 24(%esp),%ecx
|
|
27 movl 28(%esp),%edi
|
|
28 movl $15,%ebx
|
|
29 movl 36(%esp),%edx
|
|
30 leal (%edi,%ecx,2),%edi
|
|
31 decl %ecx
|
|
32 movl 32(%esp),%esi
|
|
33 movl (%edx),%eax
|
|
34 jecxz .L1
|
|
35 decl %eax
|
|
36 andl %ebx,%eax
|
|
37 leal 1088(%esi),%esi
|
|
38 movl %eax,(%edx)
|
|
39 .L1:
|
|
40 leal (%esi,%eax,2),%edx
|
|
41 movl %eax,%ebp
|
|
42 incl %eax
|
|
43 pushl 20(%esp)
|
|
44 andl %ebx,%eax
|
|
45 leal 544(%esi,%eax,2),%ecx
|
|
46 incl %ebx
|
|
47 testl $1, %eax
|
|
48 jnz .L2
|
|
49 xchgl %edx,%ecx
|
|
50 incl %ebp
|
|
51 leal 544(%esi),%esi
|
|
52 .L2:
|
|
53 pushl %edx
|
|
54 pushl %ecx
|
|
55 call ASM_NAME(dct64_MMX)
|
|
56 addl $12,%esp
|
|
57 /* stack like before, pushed 3, incremented again */
|
|
58 leal 1(%ebx), %ecx
|
|
59 subl %ebp,%ebx
|
|
60 pushl %eax
|
|
61 movl 44(%esp),%eax /* decwins */
|
|
62 leal (%eax,%ebx,2), %edx
|
|
63 popl %eax
|
|
64 .L3:
|
|
65 movq (%edx),%mm0
|
|
66 pmaddwd (%esi),%mm0
|
|
67 movq 8(%edx),%mm1
|
|
68 pmaddwd 8(%esi),%mm1
|
|
69 movq 16(%edx),%mm2
|
|
70 pmaddwd 16(%esi),%mm2
|
|
71 movq 24(%edx),%mm3
|
|
72 pmaddwd 24(%esi),%mm3
|
|
73 paddd %mm1,%mm0
|
|
74 paddd %mm2,%mm0
|
|
75 paddd %mm3,%mm0
|
|
76 movq %mm0,%mm1
|
|
77 psrlq $32,%mm1
|
|
78 paddd %mm1,%mm0
|
|
79 psrad $13,%mm0
|
|
80 packssdw %mm0,%mm0
|
|
81 movd %mm0,%eax
|
|
82 movw %ax, (%edi)
|
|
83
|
|
84 leal 32(%esi),%esi
|
|
85 leal 64(%edx),%edx
|
|
86 leal 4(%edi),%edi
|
|
87 loop .L3
|
|
88
|
|
89
|
|
90 subl $64,%esi
|
|
91 movl $15,%ecx
|
|
92 .L4:
|
|
93 movq (%edx),%mm0
|
|
94 pmaddwd (%esi),%mm0
|
|
95 movq 8(%edx),%mm1
|
|
96 pmaddwd 8(%esi),%mm1
|
|
97 movq 16(%edx),%mm2
|
|
98 pmaddwd 16(%esi),%mm2
|
|
99 movq 24(%edx),%mm3
|
|
100 pmaddwd 24(%esi),%mm3
|
|
101 paddd %mm1,%mm0
|
|
102 paddd %mm2,%mm0
|
|
103 paddd %mm3,%mm0
|
|
104 movq %mm0,%mm1
|
|
105 psrlq $32,%mm1
|
|
106 paddd %mm0,%mm1
|
|
107 psrad $13,%mm1
|
|
108 packssdw %mm1,%mm1
|
|
109 psubd %mm0,%mm0
|
|
110 psubsw %mm1,%mm0
|
|
111 movd %mm0,%eax
|
|
112 movw %ax,(%edi)
|
|
113
|
|
114 subl $32,%esi
|
|
115 addl $64,%edx
|
|
116 leal 4(%edi),%edi
|
|
117 loop .L4
|
|
118 emms
|
|
119 popl %ebx
|
|
120 popl %esi
|
|
121 popl %edi
|
|
122 popl %ebp
|
|
123 ret
|
|
124
|
|
125 /* Mark non-executable stack. */
|
|
126 #if defined(__linux__) && defined(__ELF__)
|
|
127 .section .note.GNU-stack,"",%progbits
|
|
128 #endif
|
|
129
|