Mercurial > sdl-ios-xcode
comparison src/hermes/x86p_16.asm @ 0:74212992fb08
Initial revision
author | Sam Lantinga <slouken@lokigames.com> |
---|---|
date | Thu, 26 Apr 2001 16:45:43 +0000 |
parents | |
children | da33b7e6d181 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:74212992fb08 |
---|---|
1 ; | |
2 ; x86 format converters for HERMES | |
3 ; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at) | |
10 ; Used with permission. | |
11 ; | |
12 | |
13 | |
14 BITS 32 | |
15 | |
16 GLOBAL _ConvertX86p16_32RGB888 | |
17 GLOBAL _ConvertX86p16_32BGR888 | |
18 GLOBAL _ConvertX86p16_32RGBA888 | |
19 GLOBAL _ConvertX86p16_32BGRA888 | |
20 GLOBAL _ConvertX86p16_24RGB888 | |
21 GLOBAL _ConvertX86p16_24BGR888 | |
22 GLOBAL _ConvertX86p16_16BGR565 | |
23 GLOBAL _ConvertX86p16_16RGB555 | |
24 GLOBAL _ConvertX86p16_16BGR555 | |
25 GLOBAL _ConvertX86p16_8RGB332 | |
26 | |
27 EXTERN _ConvertX86 | |
28 EXTERN _x86return | |
29 | |
30 | |
31 SECTION .text | |
32 | |
33 | |
34 | |
35 _ConvertX86p16_16BGR565: | |
36 | |
37 ; check short | |
38 cmp ecx,BYTE 16 | |
39 ja .L3 | |
40 | |
41 | |
42 .L1 ; short loop | |
43 mov al,[esi] | |
44 mov ah,[esi+1] | |
45 mov ebx,eax | |
46 mov edx,eax | |
47 shr eax,11 | |
48 and eax,BYTE 11111b | |
49 and ebx,11111100000b | |
50 shl edx,11 | |
51 add eax,ebx | |
52 add eax,edx | |
53 mov [edi],al | |
54 mov [edi+1],ah | |
55 add esi,BYTE 2 | |
56 add edi,BYTE 2 | |
57 dec ecx | |
58 jnz .L1 | |
59 .L2 | |
60 jmp _x86return | |
61 | |
62 .L3 ; head | |
63 mov eax,edi | |
64 and eax,BYTE 11b | |
65 jz .L4 | |
66 mov al,[esi] | |
67 mov ah,[esi+1] | |
68 mov ebx,eax | |
69 mov edx,eax | |
70 shr eax,11 | |
71 and eax,BYTE 11111b | |
72 and ebx,11111100000b | |
73 shl edx,11 | |
74 add eax,ebx | |
75 add eax,edx | |
76 mov [edi],al | |
77 mov [edi+1],ah | |
78 add esi,BYTE 2 | |
79 add edi,BYTE 2 | |
80 dec ecx | |
81 | |
82 .L4 ; save count | |
83 push ecx | |
84 | |
85 ; unroll twice | |
86 shr ecx,1 | |
87 | |
88 ; point arrays to end | |
89 lea esi,[esi+ecx*4] | |
90 lea edi,[edi+ecx*4] | |
91 | |
92 ; negative counter | |
93 neg ecx | |
94 jmp SHORT .L6 | |
95 | |
96 .L5 mov [edi+ecx*4-4],eax | |
97 .L6 mov eax,[esi+ecx*4] | |
98 | |
99 mov ebx,[esi+ecx*4] | |
100 and eax,07E007E0h | |
101 | |
102 mov edx,[esi+ecx*4] | |
103 and ebx,0F800F800h | |
104 | |
105 shr ebx,11 | |
106 and edx,001F001Fh | |
107 | |
108 shl edx,11 | |
109 add eax,ebx | |
110 | |
111 add eax,edx | |
112 inc ecx | |
113 | |
114 jnz .L5 | |
115 | |
116 mov [edi+ecx*4-4],eax | |
117 | |
118 ; tail | |
119 pop ecx | |
120 and ecx,BYTE 1 | |
121 jz .L7 | |
122 mov al,[esi] | |
123 mov ah,[esi+1] | |
124 mov ebx,eax | |
125 mov edx,eax | |
126 shr eax,11 | |
127 and eax,BYTE 11111b | |
128 and ebx,11111100000b | |
129 shl edx,11 | |
130 add eax,ebx | |
131 add eax,edx | |
132 mov [edi],al | |
133 mov [edi+1],ah | |
134 add esi,BYTE 2 | |
135 add edi,BYTE 2 | |
136 | |
137 .L7 | |
138 jmp _x86return | |
139 | |
140 | |
141 | |
142 | |
143 | |
144 | |
145 _ConvertX86p16_16RGB555: | |
146 | |
147 ; check short | |
148 cmp ecx,BYTE 32 | |
149 ja .L3 | |
150 | |
151 | |
152 .L1 ; short loop | |
153 mov al,[esi] | |
154 mov ah,[esi+1] | |
155 mov ebx,eax | |
156 shr ebx,1 | |
157 and ebx, 0111111111100000b | |
158 and eax,BYTE 0000000000011111b | |
159 add eax,ebx | |
160 mov [edi],al | |
161 mov [edi+1],ah | |
162 add esi,BYTE 2 | |
163 add edi,BYTE 2 | |
164 dec ecx | |
165 jnz .L1 | |
166 .L2 | |
167 jmp _x86return | |
168 | |
169 .L3 ; head | |
170 mov eax,edi | |
171 and eax,BYTE 11b | |
172 jz .L4 | |
173 mov al,[esi] | |
174 mov ah,[esi+1] | |
175 mov ebx,eax | |
176 shr ebx,1 | |
177 and ebx, 0111111111100000b | |
178 and eax,BYTE 0000000000011111b | |
179 add eax,ebx | |
180 mov [edi],al | |
181 mov [edi+1],ah | |
182 add esi,BYTE 2 | |
183 add edi,BYTE 2 | |
184 dec ecx | |
185 | |
186 .L4 ; save ebp | |
187 push ebp | |
188 | |
189 ; save count | |
190 push ecx | |
191 | |
192 ; unroll four times | |
193 shr ecx,2 | |
194 | |
195 ; point arrays to end | |
196 lea esi,[esi+ecx*8] | |
197 lea edi,[edi+ecx*8] | |
198 | |
199 ; negative counter | |
200 xor ebp,ebp | |
201 sub ebp,ecx | |
202 | |
203 .L5 mov eax,[esi+ebp*8] ; agi? | |
204 mov ecx,[esi+ebp*8+4] | |
205 | |
206 mov ebx,eax | |
207 mov edx,ecx | |
208 | |
209 and eax,0FFC0FFC0h | |
210 and ecx,0FFC0FFC0h | |
211 | |
212 shr eax,1 | |
213 and ebx,001F001Fh | |
214 | |
215 shr ecx,1 | |
216 and edx,001F001Fh | |
217 | |
218 add eax,ebx | |
219 add ecx,edx | |
220 | |
221 mov [edi+ebp*8],eax | |
222 mov [edi+ebp*8+4],ecx | |
223 | |
224 inc ebp | |
225 jnz .L5 | |
226 | |
227 ; tail | |
228 pop ecx | |
229 .L6 and ecx,BYTE 11b | |
230 jz .L7 | |
231 mov al,[esi] | |
232 mov ah,[esi+1] | |
233 mov ebx,eax | |
234 shr ebx,1 | |
235 and ebx, 0111111111100000b | |
236 and eax,BYTE 0000000000011111b | |
237 add eax,ebx | |
238 mov [edi],al | |
239 mov [edi+1],ah | |
240 add esi,BYTE 2 | |
241 add edi,BYTE 2 | |
242 dec ecx | |
243 jmp SHORT .L6 | |
244 | |
245 .L7 pop ebp | |
246 jmp _x86return | |
247 | |
248 | |
249 | |
250 | |
251 | |
252 | |
253 _ConvertX86p16_16BGR555: | |
254 | |
255 ; check short | |
256 cmp ecx,BYTE 16 | |
257 ja .L3 | |
258 | |
259 | |
260 .L1 ; short loop | |
261 mov al,[esi] | |
262 mov ah,[esi+1] | |
263 mov ebx,eax | |
264 mov edx,eax | |
265 shr eax,11 | |
266 and eax,BYTE 11111b | |
267 shr ebx,1 | |
268 and ebx,1111100000b | |
269 shl edx,10 | |
270 and edx,0111110000000000b | |
271 add eax,ebx | |
272 add eax,edx | |
273 mov [edi],al | |
274 mov [edi+1],ah | |
275 add esi,BYTE 2 | |
276 add edi,BYTE 2 | |
277 dec ecx | |
278 jnz .L1 | |
279 .L2 | |
280 jmp _x86return | |
281 | |
282 .L3 ; head | |
283 mov eax,edi | |
284 and eax,BYTE 11b | |
285 jz .L4 | |
286 mov al,[esi] | |
287 mov ah,[esi+1] | |
288 mov ebx,eax | |
289 mov edx,eax | |
290 shr eax,11 | |
291 and eax,BYTE 11111b | |
292 shr ebx,1 | |
293 and ebx,1111100000b | |
294 shl edx,10 | |
295 and edx,0111110000000000b | |
296 add eax,ebx | |
297 add eax,edx | |
298 mov [edi],al | |
299 mov [edi+1],ah | |
300 add esi,BYTE 2 | |
301 add edi,BYTE 2 | |
302 dec ecx | |
303 | |
304 .L4 ; save count | |
305 push ecx | |
306 | |
307 ; unroll twice | |
308 shr ecx,1 | |
309 | |
310 ; point arrays to end | |
311 lea esi,[esi+ecx*4] | |
312 lea edi,[edi+ecx*4] | |
313 | |
314 ; negative counter | |
315 neg ecx | |
316 jmp SHORT .L6 | |
317 | |
318 .L5 mov [edi+ecx*4-4],eax | |
319 .L6 mov eax,[esi+ecx*4] | |
320 | |
321 shr eax,1 | |
322 mov ebx,[esi+ecx*4] | |
323 | |
324 and eax,03E003E0h | |
325 mov edx,[esi+ecx*4] | |
326 | |
327 and ebx,0F800F800h | |
328 | |
329 shr ebx,11 | |
330 and edx,001F001Fh | |
331 | |
332 shl edx,10 | |
333 add eax,ebx | |
334 | |
335 add eax,edx | |
336 inc ecx | |
337 | |
338 jnz .L5 | |
339 | |
340 mov [edi+ecx*4-4],eax | |
341 | |
342 ; tail | |
343 pop ecx | |
344 and ecx,BYTE 1 | |
345 jz .L7 | |
346 mov al,[esi] | |
347 mov ah,[esi+1] | |
348 mov ebx,eax | |
349 mov edx,eax | |
350 shr eax,11 | |
351 and eax,BYTE 11111b | |
352 shr ebx,1 | |
353 and ebx,1111100000b | |
354 shl edx,10 | |
355 and edx,0111110000000000b | |
356 add eax,ebx | |
357 add eax,edx | |
358 mov [edi],al | |
359 mov [edi+1],ah | |
360 add esi,BYTE 2 | |
361 add edi,BYTE 2 | |
362 | |
363 .L7 | |
364 jmp _x86return | |
365 | |
366 | |
367 | |
368 | |
369 | |
370 | |
371 _ConvertX86p16_8RGB332: | |
372 | |
373 ; check short | |
374 cmp ecx,BYTE 16 | |
375 ja .L3 | |
376 | |
377 | |
378 .L1 ; short loop | |
379 mov al,[esi+0] | |
380 mov ah,[esi+1] | |
381 mov ebx,eax | |
382 mov edx,eax | |
383 and eax,BYTE 11000b ; blue | |
384 shr eax,3 | |
385 and ebx,11100000000b ; green | |
386 shr ebx,6 | |
387 and edx,1110000000000000b ; red | |
388 shr edx,8 | |
389 add eax,ebx | |
390 add eax,edx | |
391 mov [edi],al | |
392 add esi,BYTE 2 | |
393 inc edi | |
394 dec ecx | |
395 jnz .L1 | |
396 .L2 | |
397 jmp _x86return | |
398 | |
399 .L3 mov eax,edi | |
400 and eax,BYTE 11b | |
401 jz .L4 | |
402 mov al,[esi+0] | |
403 mov ah,[esi+1] | |
404 mov ebx,eax | |
405 mov edx,eax | |
406 and eax,BYTE 11000b ; blue | |
407 shr eax,3 | |
408 and ebx,11100000000b ; green | |
409 shr ebx,6 | |
410 and edx,1110000000000000b ; red | |
411 shr edx,8 | |
412 add eax,ebx | |
413 add eax,edx | |
414 mov [edi],al | |
415 add esi,BYTE 2 | |
416 inc edi | |
417 dec ecx | |
418 jmp SHORT .L3 | |
419 | |
420 .L4 ; save ebp | |
421 push ebp | |
422 | |
423 ; save count | |
424 push ecx | |
425 | |
426 ; unroll 4 times | |
427 shr ecx,2 | |
428 | |
429 ; prestep | |
430 mov dl,[esi+0] | |
431 mov bl,[esi+1] | |
432 mov dh,[esi+2] | |
433 | |
434 .L5 shl edx,16 | |
435 mov bh,[esi+3] | |
436 | |
437 shl ebx,16 | |
438 mov dl,[esi+4] | |
439 | |
440 mov dh,[esi+6] | |
441 mov bl,[esi+5] | |
442 | |
443 and edx,00011000000110000001100000011000b | |
444 mov bh,[esi+7] | |
445 | |
446 ror edx,16+3 | |
447 mov eax,ebx ; setup eax for reds | |
448 | |
449 and ebx,00000111000001110000011100000111b | |
450 and eax,11100000111000001110000011100000b ; reds | |
451 | |
452 ror ebx,16-2 | |
453 add esi,BYTE 8 | |
454 | |
455 ror eax,16 | |
456 add edi,BYTE 4 | |
457 | |
458 add eax,ebx | |
459 mov bl,[esi+1] ; greens | |
460 | |
461 add eax,edx | |
462 mov dl,[esi+0] ; blues | |
463 | |
464 mov [edi-4],eax | |
465 mov dh,[esi+2] | |
466 | |
467 dec ecx | |
468 jnz .L5 | |
469 | |
470 ; check tail | |
471 pop ecx | |
472 and ecx,BYTE 11b | |
473 jz .L7 | |
474 | |
475 .L6 ; tail | |
476 mov al,[esi+0] | |
477 mov ah,[esi+1] | |
478 mov ebx,eax | |
479 mov edx,eax | |
480 and eax,BYTE 11000b ; blue | |
481 shr eax,3 | |
482 and ebx,11100000000b ; green | |
483 shr ebx,6 | |
484 and edx,1110000000000000b ; red | |
485 shr edx,8 | |
486 add eax,ebx | |
487 add eax,edx | |
488 mov [edi],al | |
489 add esi,BYTE 2 | |
490 inc edi | |
491 dec ecx | |
492 jnz .L6 | |
493 | |
494 .L7 pop ebp | |
495 jmp _x86return | |
496 |