Mercurial > sdl-ios-xcode
annotate src/video/SDL_yuv_sw.c @ 880:9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
From: Tyler Montbriand
Subject: [SDL] Opteron MMX patches for SDL_blit.c and SDL_blit_A.c
The inline MMX assembly in SDL_blit.c and SDL_blit_A.c compiles and runs fine
unmodified under AMD Opteron. The inline assembly in SDL_yuv_mmx.c and
SDL_blit_N.c unfortunately isn't directly compatible.
I've included diffs from SDL_blit.c and SDL_blit_A.c that allow the MMX
assembly to be compiled when USE_ASMBLIT, __x86_64__, and __GNUC__ are all
defined. All I had to modify was typedefs, the inline assembly itself wasn't
touched.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Sun, 11 Apr 2004 19:47:28 +0000 |
parents | b8d311d90021 |
children | c9b51268668f |
rev | line source |
---|---|
0 | 1 /* |
2 SDL - Simple DirectMedia Layer | |
769
b8d311d90021
Updated copyright information for 2004 (Happy New Year!)
Sam Lantinga <slouken@libsdl.org>
parents:
739
diff
changeset
|
3 Copyright (C) 1997-2004 Sam Lantinga |
0 | 4 |
5 This library is free software; you can redistribute it and/or | |
6 modify it under the terms of the GNU Library General Public | |
7 License as published by the Free Software Foundation; either | |
8 version 2 of the License, or (at your option) any later version. | |
9 | |
10 This library is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 Library General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU Library General Public | |
16 License along with this library; if not, write to the Free | |
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | |
19 Sam Lantinga | |
252
e8157fcb3114
Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents:
9
diff
changeset
|
20 slouken@libsdl.org |
0 | 21 */ |
22 | |
23 #ifdef SAVE_RCSID | |
24 static char rcsid = | |
25 "@(#) $Id$"; | |
26 #endif | |
27 | |
28 /* This is the software implementation of the YUV video overlay support */ | |
29 | |
30 /* This code was derived from code carrying the following copyright notices: | |
31 | |
32 * Copyright (c) 1995 The Regents of the University of California. | |
33 * All rights reserved. | |
34 * | |
35 * Permission to use, copy, modify, and distribute this software and its | |
36 * documentation for any purpose, without fee, and without written agreement is | |
37 * hereby granted, provided that the above copyright notice and the following | |
38 * two paragraphs appear in all copies of this software. | |
39 * | |
40 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR | |
41 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT | |
42 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF | |
43 * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
44 * | |
45 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, | |
46 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
47 * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
48 * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO | |
49 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
50 | |
51 * Copyright (c) 1995 Erik Corry | |
52 * All rights reserved. | |
53 * | |
54 * Permission to use, copy, modify, and distribute this software and its | |
55 * documentation for any purpose, without fee, and without written agreement is | |
56 * hereby granted, provided that the above copyright notice and the following | |
57 * two paragraphs appear in all copies of this software. | |
58 * | |
59 * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, | |
60 * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF | |
61 * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED | |
62 * OF THE POSSIBILITY OF SUCH DAMAGE. | |
63 * | |
64 * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT | |
65 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | |
66 * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" | |
67 * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, | |
68 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
69 | |
70 * Portions of this software Copyright (c) 1995 Brown University. | |
71 * All rights reserved. | |
72 * | |
73 * Permission to use, copy, modify, and distribute this software and its | |
74 * documentation for any purpose, without fee, and without written agreement | |
75 * is hereby granted, provided that the above copyright notice and the | |
76 * following two paragraphs appear in all copies of this software. | |
77 * | |
78 * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR | |
79 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT | |
80 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN | |
81 * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
82 * | |
83 * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT | |
84 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | |
85 * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" | |
86 * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, | |
87 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
88 */ | |
89 | |
90 #include <stdlib.h> | |
91 #include <string.h> | |
92 | |
93 #include "SDL_error.h" | |
94 #include "SDL_video.h" | |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
95 #include "SDL_cpuinfo.h" |
0 | 96 #include "SDL_stretch_c.h" |
97 #include "SDL_yuvfuncs.h" | |
98 #include "SDL_yuv_sw_c.h" | |
99 | |
100 /* The functions used to manipulate software video overlays */ | |
101 static struct private_yuvhwfuncs sw_yuvfuncs = { | |
102 SDL_LockYUV_SW, | |
103 SDL_UnlockYUV_SW, | |
104 SDL_DisplayYUV_SW, | |
105 SDL_FreeYUV_SW | |
106 }; | |
107 | |
108 /* RGB conversion lookup tables */ | |
109 struct private_yuvhwdata { | |
110 SDL_Surface *stretch; | |
111 SDL_Surface *display; | |
112 Uint8 *pixels; | |
113 int *colortab; | |
114 Uint32 *rgb_2_pix; | |
115 void (*Display1X)(int *colortab, Uint32 *rgb_2_pix, | |
116 unsigned char *lum, unsigned char *cr, | |
117 unsigned char *cb, unsigned char *out, | |
118 int rows, int cols, int mod ); | |
119 void (*Display2X)(int *colortab, Uint32 *rgb_2_pix, | |
120 unsigned char *lum, unsigned char *cr, | |
121 unsigned char *cb, unsigned char *out, | |
122 int rows, int cols, int mod ); | |
123 | |
124 /* These are just so we don't have to allocate them separately */ | |
125 Uint16 pitches[3]; | |
126 Uint8 *planes[3]; | |
127 }; | |
128 | |
129 | |
130 /* The colorspace conversion functions */ | |
131 | |
132 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, | |
133 unsigned char *lum, unsigned char *cr, | |
134 unsigned char *cb, unsigned char *out, | |
135 int rows, int cols, int mod ); | |
136 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, | |
137 unsigned char *lum, unsigned char *cr, | |
138 unsigned char *cb, unsigned char *out, | |
139 int rows, int cols, int mod ); | |
140 | |
141 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix, | |
142 unsigned char *lum, unsigned char *cr, | |
143 unsigned char *cb, unsigned char *out, | |
144 int rows, int cols, int mod ) | |
145 { | |
146 unsigned short* row1; | |
147 unsigned short* row2; | |
148 unsigned char* lum2; | |
149 int x, y; | |
150 int cr_r; | |
151 int crb_g; | |
152 int cb_b; | |
153 int cols_2 = cols / 2; | |
154 | |
155 row1 = (unsigned short*) out; | |
156 row2 = row1 + cols + mod; | |
157 lum2 = lum + cols; | |
158 | |
159 mod += cols + mod; | |
160 | |
161 y = rows / 2; | |
162 while( y-- ) | |
163 { | |
164 x = cols_2; | |
165 while( x-- ) | |
166 { | |
167 register int L; | |
168 | |
169 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
170 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
171 + colortab[ *cb + 2*256 ]; | |
172 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
173 ++cr; ++cb; | |
174 | |
175 L = *lum++; | |
176 *row1++ = (rgb_2_pix[ L + cr_r ] | | |
177 rgb_2_pix[ L + crb_g ] | | |
178 rgb_2_pix[ L + cb_b ]); | |
179 | |
180 L = *lum++; | |
181 *row1++ = (rgb_2_pix[ L + cr_r ] | | |
182 rgb_2_pix[ L + crb_g ] | | |
183 rgb_2_pix[ L + cb_b ]); | |
184 | |
185 | |
186 /* Now, do second row. */ | |
187 | |
188 L = *lum2++; | |
189 *row2++ = (rgb_2_pix[ L + cr_r ] | | |
190 rgb_2_pix[ L + crb_g ] | | |
191 rgb_2_pix[ L + cb_b ]); | |
192 | |
193 L = *lum2++; | |
194 *row2++ = (rgb_2_pix[ L + cr_r ] | | |
195 rgb_2_pix[ L + crb_g ] | | |
196 rgb_2_pix[ L + cb_b ]); | |
197 } | |
198 | |
199 /* | |
200 * These values are at the start of the next line, (due | |
201 * to the ++'s above),but they need to be at the start | |
202 * of the line after that. | |
203 */ | |
204 lum += cols; | |
205 lum2 += cols; | |
206 row1 += mod; | |
207 row2 += mod; | |
208 } | |
209 } | |
210 | |
211 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix, | |
212 unsigned char *lum, unsigned char *cr, | |
213 unsigned char *cb, unsigned char *out, | |
214 int rows, int cols, int mod ) | |
215 { | |
216 unsigned int value; | |
217 unsigned char* row1; | |
218 unsigned char* row2; | |
219 unsigned char* lum2; | |
220 int x, y; | |
221 int cr_r; | |
222 int crb_g; | |
223 int cb_b; | |
224 int cols_2 = cols / 2; | |
225 | |
226 row1 = out; | |
227 row2 = row1 + cols*3 + mod*3; | |
228 lum2 = lum + cols; | |
229 | |
230 mod += cols + mod; | |
231 mod *= 3; | |
232 | |
233 y = rows / 2; | |
234 while( y-- ) | |
235 { | |
236 x = cols_2; | |
237 while( x-- ) | |
238 { | |
239 register int L; | |
240 | |
241 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
242 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
243 + colortab[ *cb + 2*256 ]; | |
244 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
245 ++cr; ++cb; | |
246 | |
247 L = *lum++; | |
248 value = (rgb_2_pix[ L + cr_r ] | | |
249 rgb_2_pix[ L + crb_g ] | | |
250 rgb_2_pix[ L + cb_b ]); | |
251 *row1++ = (value ) & 0xFF; | |
252 *row1++ = (value >> 8) & 0xFF; | |
253 *row1++ = (value >> 16) & 0xFF; | |
254 | |
255 L = *lum++; | |
256 value = (rgb_2_pix[ L + cr_r ] | | |
257 rgb_2_pix[ L + crb_g ] | | |
258 rgb_2_pix[ L + cb_b ]); | |
259 *row1++ = (value ) & 0xFF; | |
260 *row1++ = (value >> 8) & 0xFF; | |
261 *row1++ = (value >> 16) & 0xFF; | |
262 | |
263 | |
264 /* Now, do second row. */ | |
265 | |
266 L = *lum2++; | |
267 value = (rgb_2_pix[ L + cr_r ] | | |
268 rgb_2_pix[ L + crb_g ] | | |
269 rgb_2_pix[ L + cb_b ]); | |
270 *row2++ = (value ) & 0xFF; | |
271 *row2++ = (value >> 8) & 0xFF; | |
272 *row2++ = (value >> 16) & 0xFF; | |
273 | |
274 L = *lum2++; | |
275 value = (rgb_2_pix[ L + cr_r ] | | |
276 rgb_2_pix[ L + crb_g ] | | |
277 rgb_2_pix[ L + cb_b ]); | |
278 *row2++ = (value ) & 0xFF; | |
279 *row2++ = (value >> 8) & 0xFF; | |
280 *row2++ = (value >> 16) & 0xFF; | |
281 } | |
282 | |
283 /* | |
284 * These values are at the start of the next line, (due | |
285 * to the ++'s above),but they need to be at the start | |
286 * of the line after that. | |
287 */ | |
288 lum += cols; | |
289 lum2 += cols; | |
290 row1 += mod; | |
291 row2 += mod; | |
292 } | |
293 } | |
294 | |
295 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix, | |
296 unsigned char *lum, unsigned char *cr, | |
297 unsigned char *cb, unsigned char *out, | |
298 int rows, int cols, int mod ) | |
299 { | |
300 unsigned int* row1; | |
301 unsigned int* row2; | |
302 unsigned char* lum2; | |
303 int x, y; | |
304 int cr_r; | |
305 int crb_g; | |
306 int cb_b; | |
307 int cols_2 = cols / 2; | |
308 | |
309 row1 = (unsigned int*) out; | |
310 row2 = row1 + cols + mod; | |
311 lum2 = lum + cols; | |
312 | |
313 mod += cols + mod; | |
314 | |
315 y = rows / 2; | |
316 while( y-- ) | |
317 { | |
318 x = cols_2; | |
319 while( x-- ) | |
320 { | |
321 register int L; | |
322 | |
323 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
324 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
325 + colortab[ *cb + 2*256 ]; | |
326 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
327 ++cr; ++cb; | |
328 | |
329 L = *lum++; | |
330 *row1++ = (rgb_2_pix[ L + cr_r ] | | |
331 rgb_2_pix[ L + crb_g ] | | |
332 rgb_2_pix[ L + cb_b ]); | |
333 | |
334 L = *lum++; | |
335 *row1++ = (rgb_2_pix[ L + cr_r ] | | |
336 rgb_2_pix[ L + crb_g ] | | |
337 rgb_2_pix[ L + cb_b ]); | |
338 | |
339 | |
340 /* Now, do second row. */ | |
341 | |
342 L = *lum2++; | |
343 *row2++ = (rgb_2_pix[ L + cr_r ] | | |
344 rgb_2_pix[ L + crb_g ] | | |
345 rgb_2_pix[ L + cb_b ]); | |
346 | |
347 L = *lum2++; | |
348 *row2++ = (rgb_2_pix[ L + cr_r ] | | |
349 rgb_2_pix[ L + crb_g ] | | |
350 rgb_2_pix[ L + cb_b ]); | |
351 } | |
352 | |
353 /* | |
354 * These values are at the start of the next line, (due | |
355 * to the ++'s above),but they need to be at the start | |
356 * of the line after that. | |
357 */ | |
358 lum += cols; | |
359 lum2 += cols; | |
360 row1 += mod; | |
361 row2 += mod; | |
362 } | |
363 } | |
364 | |
365 /* | |
366 * In this function I make use of a nasty trick. The tables have the lower | |
367 * 16 bits replicated in the upper 16. This means I can write ints and get | |
368 * the horisontal doubling for free (almost). | |
369 */ | |
370 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix, | |
371 unsigned char *lum, unsigned char *cr, | |
372 unsigned char *cb, unsigned char *out, | |
373 int rows, int cols, int mod ) | |
374 { | |
375 unsigned int* row1 = (unsigned int*) out; | |
376 const int next_row = cols+(mod/2); | |
377 unsigned int* row2 = row1 + 2*next_row; | |
378 unsigned char* lum2; | |
379 int x, y; | |
380 int cr_r; | |
381 int crb_g; | |
382 int cb_b; | |
383 int cols_2 = cols / 2; | |
384 | |
385 lum2 = lum + cols; | |
386 | |
387 mod = (next_row * 3) + (mod/2); | |
388 | |
389 y = rows / 2; | |
390 while( y-- ) | |
391 { | |
392 x = cols_2; | |
393 while( x-- ) | |
394 { | |
395 register int L; | |
396 | |
397 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
398 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
399 + colortab[ *cb + 2*256 ]; | |
400 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
401 ++cr; ++cb; | |
402 | |
403 L = *lum++; | |
404 row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] | | |
405 rgb_2_pix[ L + crb_g ] | | |
406 rgb_2_pix[ L + cb_b ]); | |
407 row1++; | |
408 | |
409 L = *lum++; | |
410 row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] | | |
411 rgb_2_pix[ L + crb_g ] | | |
412 rgb_2_pix[ L + cb_b ]); | |
413 row1++; | |
414 | |
415 | |
416 /* Now, do second row. */ | |
417 | |
418 L = *lum2++; | |
419 row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] | | |
420 rgb_2_pix[ L + crb_g ] | | |
421 rgb_2_pix[ L + cb_b ]); | |
422 row2++; | |
423 | |
424 L = *lum2++; | |
425 row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] | | |
426 rgb_2_pix[ L + crb_g ] | | |
427 rgb_2_pix[ L + cb_b ]); | |
428 row2++; | |
429 } | |
430 | |
431 /* | |
432 * These values are at the start of the next line, (due | |
433 * to the ++'s above),but they need to be at the start | |
434 * of the line after that. | |
435 */ | |
436 lum += cols; | |
437 lum2 += cols; | |
438 row1 += mod; | |
439 row2 += mod; | |
440 } | |
441 } | |
442 | |
443 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix, | |
444 unsigned char *lum, unsigned char *cr, | |
445 unsigned char *cb, unsigned char *out, | |
446 int rows, int cols, int mod ) | |
447 { | |
448 unsigned int value; | |
449 unsigned char* row1 = out; | |
450 const int next_row = (cols*2 + mod) * 3; | |
451 unsigned char* row2 = row1 + 2*next_row; | |
452 unsigned char* lum2; | |
453 int x, y; | |
454 int cr_r; | |
455 int crb_g; | |
456 int cb_b; | |
457 int cols_2 = cols / 2; | |
458 | |
459 lum2 = lum + cols; | |
460 | |
461 mod = next_row*3 + mod*3; | |
462 | |
463 y = rows / 2; | |
464 while( y-- ) | |
465 { | |
466 x = cols_2; | |
467 while( x-- ) | |
468 { | |
469 register int L; | |
470 | |
471 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
472 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
473 + colortab[ *cb + 2*256 ]; | |
474 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
475 ++cr; ++cb; | |
476 | |
477 L = *lum++; | |
478 value = (rgb_2_pix[ L + cr_r ] | | |
479 rgb_2_pix[ L + crb_g ] | | |
480 rgb_2_pix[ L + cb_b ]); | |
481 row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] = | |
482 (value ) & 0xFF; | |
483 row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] = | |
484 (value >> 8) & 0xFF; | |
485 row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] = | |
486 (value >> 16) & 0xFF; | |
487 row1 += 2*3; | |
488 | |
489 L = *lum++; | |
490 value = (rgb_2_pix[ L + cr_r ] | | |
491 rgb_2_pix[ L + crb_g ] | | |
492 rgb_2_pix[ L + cb_b ]); | |
493 row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] = | |
494 (value ) & 0xFF; | |
495 row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] = | |
496 (value >> 8) & 0xFF; | |
497 row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] = | |
498 (value >> 16) & 0xFF; | |
499 row1 += 2*3; | |
500 | |
501 | |
502 /* Now, do second row. */ | |
503 | |
504 L = *lum2++; | |
505 value = (rgb_2_pix[ L + cr_r ] | | |
506 rgb_2_pix[ L + crb_g ] | | |
507 rgb_2_pix[ L + cb_b ]); | |
508 row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] = | |
509 (value ) & 0xFF; | |
510 row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] = | |
511 (value >> 8) & 0xFF; | |
512 row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] = | |
513 (value >> 16) & 0xFF; | |
514 row2 += 2*3; | |
515 | |
516 L = *lum2++; | |
517 value = (rgb_2_pix[ L + cr_r ] | | |
518 rgb_2_pix[ L + crb_g ] | | |
519 rgb_2_pix[ L + cb_b ]); | |
520 row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] = | |
521 (value ) & 0xFF; | |
522 row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] = | |
523 (value >> 8) & 0xFF; | |
524 row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] = | |
525 (value >> 16) & 0xFF; | |
526 row2 += 2*3; | |
527 } | |
528 | |
529 /* | |
530 * These values are at the start of the next line, (due | |
531 * to the ++'s above),but they need to be at the start | |
532 * of the line after that. | |
533 */ | |
534 lum += cols; | |
535 lum2 += cols; | |
536 row1 += mod; | |
537 row2 += mod; | |
538 } | |
539 } | |
540 | |
541 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix, | |
542 unsigned char *lum, unsigned char *cr, | |
543 unsigned char *cb, unsigned char *out, | |
544 int rows, int cols, int mod ) | |
545 { | |
546 unsigned int* row1 = (unsigned int*) out; | |
547 const int next_row = cols*2+mod; | |
548 unsigned int* row2 = row1 + 2*next_row; | |
549 unsigned char* lum2; | |
550 int x, y; | |
551 int cr_r; | |
552 int crb_g; | |
553 int cb_b; | |
554 int cols_2 = cols / 2; | |
555 | |
556 lum2 = lum + cols; | |
557 | |
558 mod = (next_row * 3) + mod; | |
559 | |
560 y = rows / 2; | |
561 while( y-- ) | |
562 { | |
563 x = cols_2; | |
564 while( x-- ) | |
565 { | |
566 register int L; | |
567 | |
568 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
569 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
570 + colortab[ *cb + 2*256 ]; | |
571 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
572 ++cr; ++cb; | |
573 | |
574 L = *lum++; | |
575 row1[0] = row1[1] = row1[next_row] = row1[next_row+1] = | |
576 (rgb_2_pix[ L + cr_r ] | | |
577 rgb_2_pix[ L + crb_g ] | | |
578 rgb_2_pix[ L + cb_b ]); | |
579 row1 += 2; | |
580 | |
581 L = *lum++; | |
582 row1[0] = row1[1] = row1[next_row] = row1[next_row+1] = | |
583 (rgb_2_pix[ L + cr_r ] | | |
584 rgb_2_pix[ L + crb_g ] | | |
585 rgb_2_pix[ L + cb_b ]); | |
586 row1 += 2; | |
587 | |
588 | |
589 /* Now, do second row. */ | |
590 | |
591 L = *lum2++; | |
592 row2[0] = row2[1] = row2[next_row] = row2[next_row+1] = | |
593 (rgb_2_pix[ L + cr_r ] | | |
594 rgb_2_pix[ L + crb_g ] | | |
595 rgb_2_pix[ L + cb_b ]); | |
596 row2 += 2; | |
597 | |
598 L = *lum2++; | |
599 row2[0] = row2[1] = row2[next_row] = row2[next_row+1] = | |
600 (rgb_2_pix[ L + cr_r ] | | |
601 rgb_2_pix[ L + crb_g ] | | |
602 rgb_2_pix[ L + cb_b ]); | |
603 row2 += 2; | |
604 } | |
605 | |
606 /* | |
607 * These values are at the start of the next line, (due | |
608 * to the ++'s above),but they need to be at the start | |
609 * of the line after that. | |
610 */ | |
611 lum += cols; | |
612 lum2 += cols; | |
613 row1 += mod; | |
614 row2 += mod; | |
615 } | |
616 } | |
617 | |
618 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix, | |
619 unsigned char *lum, unsigned char *cr, | |
620 unsigned char *cb, unsigned char *out, | |
621 int rows, int cols, int mod ) | |
622 { | |
623 unsigned short* row; | |
624 int x, y; | |
625 int cr_r; | |
626 int crb_g; | |
627 int cb_b; | |
628 int cols_2 = cols / 2; | |
629 | |
630 row = (unsigned short*) out; | |
631 | |
632 y = rows; | |
633 while( y-- ) | |
634 { | |
635 x = cols_2; | |
636 while( x-- ) | |
637 { | |
638 register int L; | |
639 | |
640 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
641 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
642 + colortab[ *cb + 2*256 ]; | |
643 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
644 cr += 4; cb += 4; | |
645 | |
646 L = *lum; lum += 2; | |
647 *row++ = (rgb_2_pix[ L + cr_r ] | | |
648 rgb_2_pix[ L + crb_g ] | | |
649 rgb_2_pix[ L + cb_b ]); | |
650 | |
651 L = *lum; lum += 2; | |
652 *row++ = (rgb_2_pix[ L + cr_r ] | | |
653 rgb_2_pix[ L + crb_g ] | | |
654 rgb_2_pix[ L + cb_b ]); | |
655 | |
656 } | |
657 | |
658 row += mod; | |
659 } | |
660 } | |
661 | |
662 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix, | |
663 unsigned char *lum, unsigned char *cr, | |
664 unsigned char *cb, unsigned char *out, | |
665 int rows, int cols, int mod ) | |
666 { | |
667 unsigned int value; | |
668 unsigned char* row; | |
669 int x, y; | |
670 int cr_r; | |
671 int crb_g; | |
672 int cb_b; | |
673 int cols_2 = cols / 2; | |
674 | |
675 row = (unsigned char*) out; | |
676 mod *= 3; | |
677 y = rows; | |
678 while( y-- ) | |
679 { | |
680 x = cols_2; | |
681 while( x-- ) | |
682 { | |
683 register int L; | |
684 | |
685 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
686 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
687 + colortab[ *cb + 2*256 ]; | |
688 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
689 cr += 4; cb += 4; | |
690 | |
691 L = *lum; lum += 2; | |
692 value = (rgb_2_pix[ L + cr_r ] | | |
693 rgb_2_pix[ L + crb_g ] | | |
694 rgb_2_pix[ L + cb_b ]); | |
695 *row++ = (value ) & 0xFF; | |
696 *row++ = (value >> 8) & 0xFF; | |
697 *row++ = (value >> 16) & 0xFF; | |
698 | |
699 L = *lum; lum += 2; | |
700 value = (rgb_2_pix[ L + cr_r ] | | |
701 rgb_2_pix[ L + crb_g ] | | |
702 rgb_2_pix[ L + cb_b ]); | |
703 *row++ = (value ) & 0xFF; | |
704 *row++ = (value >> 8) & 0xFF; | |
705 *row++ = (value >> 16) & 0xFF; | |
706 | |
707 } | |
708 row += mod; | |
709 } | |
710 } | |
711 | |
712 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix, | |
713 unsigned char *lum, unsigned char *cr, | |
714 unsigned char *cb, unsigned char *out, | |
715 int rows, int cols, int mod ) | |
716 { | |
717 unsigned int* row; | |
718 int x, y; | |
719 int cr_r; | |
720 int crb_g; | |
721 int cb_b; | |
722 int cols_2 = cols / 2; | |
723 | |
724 row = (unsigned int*) out; | |
725 y = rows; | |
726 while( y-- ) | |
727 { | |
728 x = cols_2; | |
729 while( x-- ) | |
730 { | |
731 register int L; | |
732 | |
733 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
734 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
735 + colortab[ *cb + 2*256 ]; | |
736 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
737 cr += 4; cb += 4; | |
738 | |
739 L = *lum; lum += 2; | |
740 *row++ = (rgb_2_pix[ L + cr_r ] | | |
741 rgb_2_pix[ L + crb_g ] | | |
742 rgb_2_pix[ L + cb_b ]); | |
743 | |
744 L = *lum; lum += 2; | |
745 *row++ = (rgb_2_pix[ L + cr_r ] | | |
746 rgb_2_pix[ L + crb_g ] | | |
747 rgb_2_pix[ L + cb_b ]); | |
748 | |
749 | |
750 } | |
751 row += mod; | |
752 } | |
753 } | |
754 | |
755 /* | |
756 * In this function I make use of a nasty trick. The tables have the lower | |
757 * 16 bits replicated in the upper 16. This means I can write ints and get | |
758 * the horisontal doubling for free (almost). | |
759 */ | |
760 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix, | |
761 unsigned char *lum, unsigned char *cr, | |
762 unsigned char *cb, unsigned char *out, | |
763 int rows, int cols, int mod ) | |
764 { | |
765 unsigned int* row = (unsigned int*) out; | |
766 const int next_row = cols+(mod/2); | |
767 int x, y; | |
768 int cr_r; | |
769 int crb_g; | |
770 int cb_b; | |
771 int cols_2 = cols / 2; | |
772 | |
773 y = rows; | |
774 while( y-- ) | |
775 { | |
776 x = cols_2; | |
777 while( x-- ) | |
778 { | |
779 register int L; | |
780 | |
781 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
782 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
783 + colortab[ *cb + 2*256 ]; | |
784 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
785 cr += 4; cb += 4; | |
786 | |
787 L = *lum; lum += 2; | |
788 row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] | | |
789 rgb_2_pix[ L + crb_g ] | | |
790 rgb_2_pix[ L + cb_b ]); | |
791 row++; | |
792 | |
793 L = *lum; lum += 2; | |
794 row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] | | |
795 rgb_2_pix[ L + crb_g ] | | |
796 rgb_2_pix[ L + cb_b ]); | |
797 row++; | |
798 | |
799 } | |
800 row += next_row; | |
801 } | |
802 } | |
803 | |
804 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix, | |
805 unsigned char *lum, unsigned char *cr, | |
806 unsigned char *cb, unsigned char *out, | |
807 int rows, int cols, int mod ) | |
808 { | |
809 unsigned int value; | |
810 unsigned char* row = out; | |
811 const int next_row = (cols*2 + mod) * 3; | |
812 int x, y; | |
813 int cr_r; | |
814 int crb_g; | |
815 int cb_b; | |
816 int cols_2 = cols / 2; | |
817 y = rows; | |
818 while( y-- ) | |
819 { | |
820 x = cols_2; | |
821 while( x-- ) | |
822 { | |
823 register int L; | |
824 | |
825 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
826 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
827 + colortab[ *cb + 2*256 ]; | |
828 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
829 cr += 4; cb += 4; | |
830 | |
831 L = *lum; lum += 2; | |
832 value = (rgb_2_pix[ L + cr_r ] | | |
833 rgb_2_pix[ L + crb_g ] | | |
834 rgb_2_pix[ L + cb_b ]); | |
835 row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] = | |
836 (value ) & 0xFF; | |
837 row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] = | |
838 (value >> 8) & 0xFF; | |
839 row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] = | |
840 (value >> 16) & 0xFF; | |
841 row += 2*3; | |
842 | |
843 L = *lum; lum += 2; | |
844 value = (rgb_2_pix[ L + cr_r ] | | |
845 rgb_2_pix[ L + crb_g ] | | |
846 rgb_2_pix[ L + cb_b ]); | |
847 row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] = | |
848 (value ) & 0xFF; | |
849 row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] = | |
850 (value >> 8) & 0xFF; | |
851 row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] = | |
852 (value >> 16) & 0xFF; | |
853 row += 2*3; | |
854 | |
855 } | |
856 row += next_row; | |
857 } | |
858 } | |
859 | |
860 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix, | |
861 unsigned char *lum, unsigned char *cr, | |
862 unsigned char *cb, unsigned char *out, | |
863 int rows, int cols, int mod ) | |
864 { | |
865 unsigned int* row = (unsigned int*) out; | |
866 const int next_row = cols*2+mod; | |
867 int x, y; | |
868 int cr_r; | |
869 int crb_g; | |
870 int cb_b; | |
871 int cols_2 = cols / 2; | |
872 mod+=mod; | |
873 y = rows; | |
874 while( y-- ) | |
875 { | |
876 x = cols_2; | |
877 while( x-- ) | |
878 { | |
879 register int L; | |
880 | |
881 cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; | |
882 crb_g = 1*768+256 + colortab[ *cr + 1*256 ] | |
883 + colortab[ *cb + 2*256 ]; | |
884 cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; | |
885 cr += 4; cb += 4; | |
886 | |
887 L = *lum; lum += 2; | |
888 row[0] = row[1] = row[next_row] = row[next_row+1] = | |
889 (rgb_2_pix[ L + cr_r ] | | |
890 rgb_2_pix[ L + crb_g ] | | |
891 rgb_2_pix[ L + cb_b ]); | |
892 row += 2; | |
893 | |
894 L = *lum; lum += 2; | |
895 row[0] = row[1] = row[next_row] = row[next_row+1] = | |
896 (rgb_2_pix[ L + cr_r ] | | |
897 rgb_2_pix[ L + crb_g ] | | |
898 rgb_2_pix[ L + cb_b ]); | |
899 row += 2; | |
900 | |
901 | |
902 } | |
903 | |
904 row += next_row; | |
905 } | |
906 } | |
907 | |
908 /* | |
909 * How many 1 bits are there in the Uint32. | |
910 * Low performance, do not call often. | |
911 */ | |
912 static int number_of_bits_set( Uint32 a ) | |
913 { | |
914 if(!a) return 0; | |
915 if(a & 1) return 1 + number_of_bits_set(a >> 1); | |
916 return(number_of_bits_set(a >> 1)); | |
917 } | |
918 | |
919 /* | |
920 * How many 0 bits are there at least significant end of Uint32. | |
921 * Low performance, do not call often. | |
922 */ | |
923 static int free_bits_at_bottom( Uint32 a ) | |
924 { | |
925 /* assume char is 8 bits */ | |
926 if(!a) return sizeof(Uint32) * 8; | |
927 if(((Sint32)a) & 1l) return 0; | |
928 return 1 + free_bits_at_bottom ( a >> 1); | |
929 } | |
930 | |
931 | |
932 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display) | |
933 { | |
934 SDL_Overlay *overlay; | |
935 struct private_yuvhwdata *swdata; | |
936 int *Cr_r_tab; | |
937 int *Cr_g_tab; | |
938 int *Cb_g_tab; | |
939 int *Cb_b_tab; | |
940 Uint32 *r_2_pix_alloc; | |
941 Uint32 *g_2_pix_alloc; | |
942 Uint32 *b_2_pix_alloc; | |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
943 int i; |
0 | 944 int CR, CB; |
945 Uint32 Rmask, Gmask, Bmask; | |
946 | |
947 /* Only RGB packed pixel conversion supported */ | |
948 if ( (display->format->BytesPerPixel != 2) && | |
949 (display->format->BytesPerPixel != 3) && | |
950 (display->format->BytesPerPixel != 4) ) { | |
951 SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces"); | |
952 return(NULL); | |
953 } | |
954 | |
955 /* Verify that we support the format */ | |
956 switch (format) { | |
957 case SDL_YV12_OVERLAY: | |
958 case SDL_IYUV_OVERLAY: | |
959 case SDL_YUY2_OVERLAY: | |
960 case SDL_UYVY_OVERLAY: | |
961 case SDL_YVYU_OVERLAY: | |
962 break; | |
963 default: | |
964 SDL_SetError("Unsupported YUV format"); | |
965 return(NULL); | |
966 } | |
967 | |
968 /* Create the overlay structure */ | |
969 overlay = (SDL_Overlay *)malloc(sizeof *overlay); | |
970 if ( overlay == NULL ) { | |
971 SDL_OutOfMemory(); | |
972 return(NULL); | |
973 } | |
974 memset(overlay, 0, (sizeof *overlay)); | |
975 | |
976 /* Fill in the basic members */ | |
977 overlay->format = format; | |
978 overlay->w = width; | |
979 overlay->h = height; | |
980 | |
981 /* Set up the YUV surface function structure */ | |
982 overlay->hwfuncs = &sw_yuvfuncs; | |
983 | |
984 /* Create the pixel data and lookup tables */ | |
985 swdata = (struct private_yuvhwdata *)malloc(sizeof *swdata); | |
986 overlay->hwdata = swdata; | |
987 if ( swdata == NULL ) { | |
988 SDL_OutOfMemory(); | |
989 SDL_FreeYUVOverlay(overlay); | |
990 return(NULL); | |
991 } | |
992 swdata->stretch = NULL; | |
993 swdata->display = display; | |
994 swdata->pixels = (Uint8 *) malloc(width*height*2); | |
995 swdata->colortab = (int *)malloc(4*256*sizeof(int)); | |
996 Cr_r_tab = &swdata->colortab[0*256]; | |
997 Cr_g_tab = &swdata->colortab[1*256]; | |
998 Cb_g_tab = &swdata->colortab[2*256]; | |
999 Cb_b_tab = &swdata->colortab[3*256]; | |
1000 swdata->rgb_2_pix = (Uint32 *)malloc(3*768*sizeof(Uint32)); | |
1001 r_2_pix_alloc = &swdata->rgb_2_pix[0*768]; | |
1002 g_2_pix_alloc = &swdata->rgb_2_pix[1*768]; | |
1003 b_2_pix_alloc = &swdata->rgb_2_pix[2*768]; | |
1004 if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) { | |
1005 SDL_OutOfMemory(); | |
1006 SDL_FreeYUVOverlay(overlay); | |
1007 return(NULL); | |
1008 } | |
1009 | |
1010 /* Generate the tables for the display surface */ | |
1011 for (i=0; i<256; i++) { | |
1012 /* Gamma correction (luminescence table) and chroma correction | |
1013 would be done here. See the Berkeley mpeg_play sources. | |
1014 */ | |
1015 CB = CR = (i-128); | |
1016 Cr_r_tab[i] = (int) ( (0.419/0.299) * CR); | |
1017 Cr_g_tab[i] = (int) (-(0.299/0.419) * CR); | |
1018 Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); | |
1019 Cb_b_tab[i] = (int) ( (0.587/0.331) * CB); | |
1020 } | |
1021 | |
1022 /* | |
1023 * Set up entries 0-255 in rgb-to-pixel value tables. | |
1024 */ | |
1025 Rmask = display->format->Rmask; | |
1026 Gmask = display->format->Gmask; | |
1027 Bmask = display->format->Bmask; | |
1028 for ( i=0; i<256; ++i ) { | |
1029 r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask)); | |
1030 r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask); | |
1031 g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask)); | |
1032 g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask); | |
1033 b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask)); | |
1034 b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask); | |
1035 } | |
1036 | |
1037 /* | |
1038 * If we have 16-bit output depth, then we double the value | |
1039 * in the top word. This means that we can write out both | |
1040 * pixels in the pixel doubling mode with one op. It is | |
1041 * harmless in the normal case as storing a 32-bit value | |
1042 * through a short pointer will lose the top bits anyway. | |
1043 */ | |
1044 if( display->format->BytesPerPixel == 2 ) { | |
1045 for ( i=0; i<256; ++i ) { | |
1046 r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16; | |
1047 g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16; | |
1048 b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16; | |
1049 } | |
1050 } | |
1051 | |
1052 /* | |
1053 * Spread out the values we have to the rest of the array so that | |
1054 * we do not need to check for overflow. | |
1055 */ | |
1056 for ( i=0; i<256; ++i ) { | |
1057 r_2_pix_alloc[i] = r_2_pix_alloc[256]; | |
1058 r_2_pix_alloc[i+512] = r_2_pix_alloc[511]; | |
1059 g_2_pix_alloc[i] = g_2_pix_alloc[256]; | |
1060 g_2_pix_alloc[i+512] = g_2_pix_alloc[511]; | |
1061 b_2_pix_alloc[i] = b_2_pix_alloc[256]; | |
1062 b_2_pix_alloc[i+512] = b_2_pix_alloc[511]; | |
1063 } | |
1064 | |
1065 /* You have chosen wisely... */ | |
1066 switch (format) { | |
1067 case SDL_YV12_OVERLAY: | |
1068 case SDL_IYUV_OVERLAY: | |
1069 if ( display->format->BytesPerPixel == 2 ) { | |
1070 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) | |
1071 /* inline assembly functions */ | |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1072 if ( SDL_HasMMX() && (Rmask == 0xF800) && |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1073 (Gmask == 0x07E0) && |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1074 (Bmask == 0x001F) && |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1075 (width & 15) == 0) { |
0 | 1076 /*printf("Using MMX 16-bit 565 dither\n");*/ |
1077 swdata->Display1X = Color565DitherYV12MMX1X; | |
1078 } else { | |
1079 /*printf("Using C 16-bit dither\n");*/ | |
1080 swdata->Display1X = Color16DitherYV12Mod1X; | |
1081 } | |
1082 #else | |
1083 swdata->Display1X = Color16DitherYV12Mod1X; | |
1084 #endif | |
1085 swdata->Display2X = Color16DitherYV12Mod2X; | |
1086 } | |
1087 if ( display->format->BytesPerPixel == 3 ) { | |
1088 swdata->Display1X = Color24DitherYV12Mod1X; | |
1089 swdata->Display2X = Color24DitherYV12Mod2X; | |
1090 } | |
1091 if ( display->format->BytesPerPixel == 4 ) { | |
1092 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) | |
1093 /* inline assembly functions */ | |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1094 if ( SDL_HasMMX() && (Rmask == 0x00FF0000) && |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1095 (Gmask == 0x0000FF00) && |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1096 (Bmask == 0x000000FF) && |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
366
diff
changeset
|
1097 (width & 15) == 0) { |
0 | 1098 /*printf("Using MMX 32-bit dither\n");*/ |
1099 swdata->Display1X = ColorRGBDitherYV12MMX1X; | |
1100 } else { | |
1101 /*printf("Using C 32-bit dither\n");*/ | |
1102 swdata->Display1X = Color32DitherYV12Mod1X; | |
1103 } | |
1104 #else | |
1105 swdata->Display1X = Color32DitherYV12Mod1X; | |
1106 #endif | |
1107 swdata->Display2X = Color32DitherYV12Mod2X; | |
1108 } | |
1109 break; | |
1110 case SDL_YUY2_OVERLAY: | |
1111 case SDL_UYVY_OVERLAY: | |
1112 case SDL_YVYU_OVERLAY: | |
1113 if ( display->format->BytesPerPixel == 2 ) { | |
1114 swdata->Display1X = Color16DitherYUY2Mod1X; | |
1115 swdata->Display2X = Color16DitherYUY2Mod2X; | |
1116 } | |
1117 if ( display->format->BytesPerPixel == 3 ) { | |
1118 swdata->Display1X = Color24DitherYUY2Mod1X; | |
1119 swdata->Display2X = Color24DitherYUY2Mod2X; | |
1120 } | |
1121 if ( display->format->BytesPerPixel == 4 ) { | |
1122 swdata->Display1X = Color32DitherYUY2Mod1X; | |
1123 swdata->Display2X = Color32DitherYUY2Mod2X; | |
1124 } | |
1125 break; | |
1126 default: | |
1127 /* We should never get here (caught above) */ | |
1128 break; | |
1129 } | |
1130 | |
1131 /* Find the pitch and offset values for the overlay */ | |
1132 overlay->pitches = swdata->pitches; | |
1133 overlay->pixels = swdata->planes; | |
1134 switch (format) { | |
1135 case SDL_YV12_OVERLAY: | |
1136 case SDL_IYUV_OVERLAY: | |
1137 overlay->pitches[0] = overlay->w; | |
1138 overlay->pitches[1] = overlay->pitches[0] / 2; | |
1139 overlay->pitches[2] = overlay->pitches[0] / 2; | |
1140 overlay->pixels[0] = swdata->pixels; | |
1141 overlay->pixels[1] = overlay->pixels[0] + | |
1142 overlay->pitches[0] * overlay->h; | |
1143 overlay->pixels[2] = overlay->pixels[1] + | |
1144 overlay->pitches[1] * overlay->h / 2; | |
1145 overlay->planes = 3; | |
1146 break; | |
1147 case SDL_YUY2_OVERLAY: | |
1148 case SDL_UYVY_OVERLAY: | |
1149 case SDL_YVYU_OVERLAY: | |
1150 overlay->pitches[0] = overlay->w*2; | |
1151 overlay->pixels[0] = swdata->pixels; | |
1152 overlay->planes = 1; | |
1153 break; | |
1154 default: | |
1155 /* We should never get here (caught above) */ | |
1156 break; | |
1157 } | |
1158 | |
1159 /* We're all done.. */ | |
1160 return(overlay); | |
1161 } | |
1162 | |
1163 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay) | |
1164 { | |
1165 return(0); | |
1166 } | |
1167 | |
1168 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay) | |
1169 { | |
1170 return; | |
1171 } | |
1172 | |
1173 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect) | |
1174 { | |
1175 struct private_yuvhwdata *swdata; | |
1176 SDL_Surface *stretch; | |
1177 SDL_Surface *display; | |
1178 int scale_2x; | |
1179 Uint8 *lum, *Cr, *Cb; | |
1180 Uint8 *dst; | |
1181 int mod; | |
1182 | |
1183 swdata = overlay->hwdata; | |
1184 scale_2x = 0; | |
1185 stretch = 0; | |
1186 if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) { | |
1187 if ( (dstrect->w == 2*overlay->w) && | |
1188 (dstrect->h == 2*overlay->h) ) { | |
1189 scale_2x = 1; | |
1190 } else { | |
1191 if ( ! swdata->stretch ) { | |
1192 display = swdata->display; | |
1193 swdata->stretch = SDL_CreateRGBSurface( | |
1194 SDL_SWSURFACE, | |
1195 overlay->w, overlay->h, | |
1196 display->format->BitsPerPixel, | |
1197 display->format->Rmask, | |
1198 display->format->Gmask, | |
1199 display->format->Bmask, 0); | |
1200 if ( ! swdata->stretch ) { | |
1201 return(-1); | |
1202 } | |
1203 } | |
1204 stretch = swdata->stretch; | |
1205 } | |
1206 } | |
1207 | |
1208 if ( stretch ) { | |
1209 display = stretch; | |
1210 } else { | |
1211 display = swdata->display; | |
1212 } | |
1213 switch (overlay->format) { | |
1214 case SDL_YV12_OVERLAY: | |
1215 lum = overlay->pixels[0]; | |
1216 Cr = overlay->pixels[1]; | |
1217 Cb = overlay->pixels[2]; | |
1218 break; | |
1219 case SDL_IYUV_OVERLAY: | |
1220 lum = overlay->pixels[0]; | |
1221 Cr = overlay->pixels[2]; | |
1222 Cb = overlay->pixels[1]; | |
1223 break; | |
1224 case SDL_YUY2_OVERLAY: | |
1225 lum = overlay->pixels[0]; | |
1226 Cr = lum + 3; | |
1227 Cb = lum + 1; | |
1228 break; | |
1229 case SDL_UYVY_OVERLAY: | |
1230 lum = overlay->pixels[0]+1; | |
1231 Cr = lum + 1; | |
1232 Cb = lum - 1; | |
1233 break; | |
1234 case SDL_YVYU_OVERLAY: | |
1235 lum = overlay->pixels[0]; | |
1236 Cr = lum + 1; | |
1237 Cb = lum + 3; | |
1238 break; | |
1239 default: | |
292
eadc0746dfaf
Added SDL_LockRect() and SDL_UnlockRect()
Sam Lantinga <slouken@libsdl.org>
parents:
252
diff
changeset
|
1240 SDL_SetError("Unsupported YUV format in blit"); |
0 | 1241 return(-1); |
1242 } | |
1243 if ( SDL_MUSTLOCK(display) ) { | |
1244 if ( SDL_LockSurface(display) < 0 ) { | |
1245 return(-1); | |
1246 } | |
1247 } | |
1248 if ( stretch ) { | |
1249 dst = (Uint8 *)stretch->pixels; | |
1250 } else { | |
1251 dst = (Uint8 *)display->pixels | |
1252 + dstrect->x * display->format->BytesPerPixel | |
1253 + dstrect->y * display->pitch; | |
1254 } | |
1255 mod = (display->pitch / display->format->BytesPerPixel); | |
1256 | |
1257 if ( scale_2x ) { | |
1258 mod -= (overlay->w * 2); | |
1259 swdata->Display2X(swdata->colortab, swdata->rgb_2_pix, | |
1260 lum, Cr, Cb, dst, overlay->h, overlay->w,mod); | |
1261 } else { | |
1262 mod -= overlay->w; | |
1263 swdata->Display1X(swdata->colortab, swdata->rgb_2_pix, | |
1264 lum, Cr, Cb, dst, overlay->h, overlay->w,mod); | |
1265 } | |
1266 if ( SDL_MUSTLOCK(display) ) { | |
1267 SDL_UnlockSurface(display); | |
1268 } | |
1269 if ( stretch ) { | |
1270 display = swdata->display; | |
1271 SDL_SoftStretch(stretch, NULL, display, dstrect); | |
1272 } | |
1273 SDL_UpdateRects(display, 1, dstrect); | |
1274 | |
1275 return(0); | |
1276 } | |
1277 | |
1278 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay) | |
1279 { | |
1280 struct private_yuvhwdata *swdata; | |
1281 | |
1282 swdata = overlay->hwdata; | |
1283 if ( swdata ) { | |
9
a1c15fa4abb9
Fixed memory leak in software YUV stretch code
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1284 if ( swdata->stretch ) { |
a1c15fa4abb9
Fixed memory leak in software YUV stretch code
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1285 SDL_FreeSurface(swdata->stretch); |
a1c15fa4abb9
Fixed memory leak in software YUV stretch code
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1286 } |
0 | 1287 if ( swdata->pixels ) { |
1288 free(swdata->pixels); | |
1289 } | |
1290 if ( swdata->colortab ) { | |
1291 free(swdata->colortab); | |
1292 } | |
1293 if ( swdata->rgb_2_pix ) { | |
1294 free(swdata->rgb_2_pix); | |
1295 } | |
1296 free(swdata); | |
1297 } | |
1298 } |