comparison src/hermes/x86p_16.asm @ 0:74212992fb08

Initial revision
author Sam Lantinga <slouken@lokigames.com>
date Thu, 26 Apr 2001 16:45:43 +0000
parents
children da33b7e6d181
comparison
equal deleted inserted replaced
-1:000000000000 0:74212992fb08
1 ;
2 ; x86 format converters for HERMES
3 ; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
4 ; This source code is licensed under the GNU LGPL
5 ;
6 ; Please refer to the file COPYING.LIB contained in the distribution for
7 ; licensing conditions
8 ;
9 ; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
10 ; Used with permission.
11 ;
12
13
14 BITS 32
15
16 GLOBAL _ConvertX86p16_32RGB888
17 GLOBAL _ConvertX86p16_32BGR888
18 GLOBAL _ConvertX86p16_32RGBA888
19 GLOBAL _ConvertX86p16_32BGRA888
20 GLOBAL _ConvertX86p16_24RGB888
21 GLOBAL _ConvertX86p16_24BGR888
22 GLOBAL _ConvertX86p16_16BGR565
23 GLOBAL _ConvertX86p16_16RGB555
24 GLOBAL _ConvertX86p16_16BGR555
25 GLOBAL _ConvertX86p16_8RGB332
26
27 EXTERN _ConvertX86
28 EXTERN _x86return
29
30
31 SECTION .text
32
33
34
35 _ConvertX86p16_16BGR565:
36
37 ; check short
38 cmp ecx,BYTE 16
39 ja .L3
40
41
42 .L1 ; short loop
43 mov al,[esi]
44 mov ah,[esi+1]
45 mov ebx,eax
46 mov edx,eax
47 shr eax,11
48 and eax,BYTE 11111b
49 and ebx,11111100000b
50 shl edx,11
51 add eax,ebx
52 add eax,edx
53 mov [edi],al
54 mov [edi+1],ah
55 add esi,BYTE 2
56 add edi,BYTE 2
57 dec ecx
58 jnz .L1
59 .L2
60 jmp _x86return
61
62 .L3 ; head
63 mov eax,edi
64 and eax,BYTE 11b
65 jz .L4
66 mov al,[esi]
67 mov ah,[esi+1]
68 mov ebx,eax
69 mov edx,eax
70 shr eax,11
71 and eax,BYTE 11111b
72 and ebx,11111100000b
73 shl edx,11
74 add eax,ebx
75 add eax,edx
76 mov [edi],al
77 mov [edi+1],ah
78 add esi,BYTE 2
79 add edi,BYTE 2
80 dec ecx
81
82 .L4 ; save count
83 push ecx
84
85 ; unroll twice
86 shr ecx,1
87
88 ; point arrays to end
89 lea esi,[esi+ecx*4]
90 lea edi,[edi+ecx*4]
91
92 ; negative counter
93 neg ecx
94 jmp SHORT .L6
95
96 .L5 mov [edi+ecx*4-4],eax
97 .L6 mov eax,[esi+ecx*4]
98
99 mov ebx,[esi+ecx*4]
100 and eax,07E007E0h
101
102 mov edx,[esi+ecx*4]
103 and ebx,0F800F800h
104
105 shr ebx,11
106 and edx,001F001Fh
107
108 shl edx,11
109 add eax,ebx
110
111 add eax,edx
112 inc ecx
113
114 jnz .L5
115
116 mov [edi+ecx*4-4],eax
117
118 ; tail
119 pop ecx
120 and ecx,BYTE 1
121 jz .L7
122 mov al,[esi]
123 mov ah,[esi+1]
124 mov ebx,eax
125 mov edx,eax
126 shr eax,11
127 and eax,BYTE 11111b
128 and ebx,11111100000b
129 shl edx,11
130 add eax,ebx
131 add eax,edx
132 mov [edi],al
133 mov [edi+1],ah
134 add esi,BYTE 2
135 add edi,BYTE 2
136
137 .L7
138 jmp _x86return
139
140
141
142
143
144
145 _ConvertX86p16_16RGB555:
146
147 ; check short
148 cmp ecx,BYTE 32
149 ja .L3
150
151
152 .L1 ; short loop
153 mov al,[esi]
154 mov ah,[esi+1]
155 mov ebx,eax
156 shr ebx,1
157 and ebx, 0111111111100000b
158 and eax,BYTE 0000000000011111b
159 add eax,ebx
160 mov [edi],al
161 mov [edi+1],ah
162 add esi,BYTE 2
163 add edi,BYTE 2
164 dec ecx
165 jnz .L1
166 .L2
167 jmp _x86return
168
169 .L3 ; head
170 mov eax,edi
171 and eax,BYTE 11b
172 jz .L4
173 mov al,[esi]
174 mov ah,[esi+1]
175 mov ebx,eax
176 shr ebx,1
177 and ebx, 0111111111100000b
178 and eax,BYTE 0000000000011111b
179 add eax,ebx
180 mov [edi],al
181 mov [edi+1],ah
182 add esi,BYTE 2
183 add edi,BYTE 2
184 dec ecx
185
186 .L4 ; save ebp
187 push ebp
188
189 ; save count
190 push ecx
191
192 ; unroll four times
193 shr ecx,2
194
195 ; point arrays to end
196 lea esi,[esi+ecx*8]
197 lea edi,[edi+ecx*8]
198
199 ; negative counter
200 xor ebp,ebp
201 sub ebp,ecx
202
203 .L5 mov eax,[esi+ebp*8] ; agi?
204 mov ecx,[esi+ebp*8+4]
205
206 mov ebx,eax
207 mov edx,ecx
208
209 and eax,0FFC0FFC0h
210 and ecx,0FFC0FFC0h
211
212 shr eax,1
213 and ebx,001F001Fh
214
215 shr ecx,1
216 and edx,001F001Fh
217
218 add eax,ebx
219 add ecx,edx
220
221 mov [edi+ebp*8],eax
222 mov [edi+ebp*8+4],ecx
223
224 inc ebp
225 jnz .L5
226
227 ; tail
228 pop ecx
229 .L6 and ecx,BYTE 11b
230 jz .L7
231 mov al,[esi]
232 mov ah,[esi+1]
233 mov ebx,eax
234 shr ebx,1
235 and ebx, 0111111111100000b
236 and eax,BYTE 0000000000011111b
237 add eax,ebx
238 mov [edi],al
239 mov [edi+1],ah
240 add esi,BYTE 2
241 add edi,BYTE 2
242 dec ecx
243 jmp SHORT .L6
244
245 .L7 pop ebp
246 jmp _x86return
247
248
249
250
251
252
253 _ConvertX86p16_16BGR555:
254
255 ; check short
256 cmp ecx,BYTE 16
257 ja .L3
258
259
260 .L1 ; short loop
261 mov al,[esi]
262 mov ah,[esi+1]
263 mov ebx,eax
264 mov edx,eax
265 shr eax,11
266 and eax,BYTE 11111b
267 shr ebx,1
268 and ebx,1111100000b
269 shl edx,10
270 and edx,0111110000000000b
271 add eax,ebx
272 add eax,edx
273 mov [edi],al
274 mov [edi+1],ah
275 add esi,BYTE 2
276 add edi,BYTE 2
277 dec ecx
278 jnz .L1
279 .L2
280 jmp _x86return
281
282 .L3 ; head
283 mov eax,edi
284 and eax,BYTE 11b
285 jz .L4
286 mov al,[esi]
287 mov ah,[esi+1]
288 mov ebx,eax
289 mov edx,eax
290 shr eax,11
291 and eax,BYTE 11111b
292 shr ebx,1
293 and ebx,1111100000b
294 shl edx,10
295 and edx,0111110000000000b
296 add eax,ebx
297 add eax,edx
298 mov [edi],al
299 mov [edi+1],ah
300 add esi,BYTE 2
301 add edi,BYTE 2
302 dec ecx
303
304 .L4 ; save count
305 push ecx
306
307 ; unroll twice
308 shr ecx,1
309
310 ; point arrays to end
311 lea esi,[esi+ecx*4]
312 lea edi,[edi+ecx*4]
313
314 ; negative counter
315 neg ecx
316 jmp SHORT .L6
317
318 .L5 mov [edi+ecx*4-4],eax
319 .L6 mov eax,[esi+ecx*4]
320
321 shr eax,1
322 mov ebx,[esi+ecx*4]
323
324 and eax,03E003E0h
325 mov edx,[esi+ecx*4]
326
327 and ebx,0F800F800h
328
329 shr ebx,11
330 and edx,001F001Fh
331
332 shl edx,10
333 add eax,ebx
334
335 add eax,edx
336 inc ecx
337
338 jnz .L5
339
340 mov [edi+ecx*4-4],eax
341
342 ; tail
343 pop ecx
344 and ecx,BYTE 1
345 jz .L7
346 mov al,[esi]
347 mov ah,[esi+1]
348 mov ebx,eax
349 mov edx,eax
350 shr eax,11
351 and eax,BYTE 11111b
352 shr ebx,1
353 and ebx,1111100000b
354 shl edx,10
355 and edx,0111110000000000b
356 add eax,ebx
357 add eax,edx
358 mov [edi],al
359 mov [edi+1],ah
360 add esi,BYTE 2
361 add edi,BYTE 2
362
363 .L7
364 jmp _x86return
365
366
367
368
369
370
371 _ConvertX86p16_8RGB332:
372
373 ; check short
374 cmp ecx,BYTE 16
375 ja .L3
376
377
378 .L1 ; short loop
379 mov al,[esi+0]
380 mov ah,[esi+1]
381 mov ebx,eax
382 mov edx,eax
383 and eax,BYTE 11000b ; blue
384 shr eax,3
385 and ebx,11100000000b ; green
386 shr ebx,6
387 and edx,1110000000000000b ; red
388 shr edx,8
389 add eax,ebx
390 add eax,edx
391 mov [edi],al
392 add esi,BYTE 2
393 inc edi
394 dec ecx
395 jnz .L1
396 .L2
397 jmp _x86return
398
399 .L3 mov eax,edi
400 and eax,BYTE 11b
401 jz .L4
402 mov al,[esi+0]
403 mov ah,[esi+1]
404 mov ebx,eax
405 mov edx,eax
406 and eax,BYTE 11000b ; blue
407 shr eax,3
408 and ebx,11100000000b ; green
409 shr ebx,6
410 and edx,1110000000000000b ; red
411 shr edx,8
412 add eax,ebx
413 add eax,edx
414 mov [edi],al
415 add esi,BYTE 2
416 inc edi
417 dec ecx
418 jmp SHORT .L3
419
420 .L4 ; save ebp
421 push ebp
422
423 ; save count
424 push ecx
425
426 ; unroll 4 times
427 shr ecx,2
428
429 ; prestep
430 mov dl,[esi+0]
431 mov bl,[esi+1]
432 mov dh,[esi+2]
433
434 .L5 shl edx,16
435 mov bh,[esi+3]
436
437 shl ebx,16
438 mov dl,[esi+4]
439
440 mov dh,[esi+6]
441 mov bl,[esi+5]
442
443 and edx,00011000000110000001100000011000b
444 mov bh,[esi+7]
445
446 ror edx,16+3
447 mov eax,ebx ; setup eax for reds
448
449 and ebx,00000111000001110000011100000111b
450 and eax,11100000111000001110000011100000b ; reds
451
452 ror ebx,16-2
453 add esi,BYTE 8
454
455 ror eax,16
456 add edi,BYTE 4
457
458 add eax,ebx
459 mov bl,[esi+1] ; greens
460
461 add eax,edx
462 mov dl,[esi+0] ; blues
463
464 mov [edi-4],eax
465 mov dh,[esi+2]
466
467 dec ecx
468 jnz .L5
469
470 ; check tail
471 pop ecx
472 and ecx,BYTE 11b
473 jz .L7
474
475 .L6 ; tail
476 mov al,[esi+0]
477 mov ah,[esi+1]
478 mov ebx,eax
479 mov edx,eax
480 and eax,BYTE 11000b ; blue
481 shr eax,3
482 and ebx,11100000000b ; green
483 shr ebx,6
484 and edx,1110000000000000b ; red
485 shr edx,8
486 add eax,ebx
487 add eax,edx
488 mov [edi],al
489 add esi,BYTE 2
490 inc edi
491 dec ecx
492 jnz .L6
493
494 .L7 pop ebp
495 jmp _x86return
496