Mercurial > sdl-ios-xcode
diff src/render/SDL_yuv_sw.c @ 5159:307ccc9c135e
Made it possible to create a texture of any format, even if not supported by the renderer.
This allows me to reduce the set of formats supported by the renderers to the most optimal set, for a nice speed boost.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Thu, 03 Feb 2011 00:19:40 -0800 |
parents | src/video/SDL_yuv_sw.c@f7b03b6838cb |
children | b530ef003506 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/render/SDL_yuv_sw.c Thu Feb 03 00:19:40 2011 -0800 @@ -0,0 +1,1322 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2010 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +/* This is the software implementation of the YUV texture support */ + +/* This code was derived from code carrying the following copyright notices: + + * Copyright (c) 1995 The Regents of the University of California. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF + * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + + * Copyright (c) 1995 Erik Corry + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, + * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" + * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, + * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + + * Portions of this software Copyright (c) 1995 Brown University. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement + * is hereby granted, provided that the above copyright notice and the + * following two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN + * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" + * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "SDL_video.h" +#include "SDL_cpuinfo.h" +#include "SDL_yuv_sw_c.h" + + +/* The colorspace conversion functions */ + +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES +extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); +extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); +#endif + +static void +Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned short *row1; + unsigned short *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = (unsigned short *) out; + row2 = row1 + cols + mod; + lum2 = lum + cols; + + mod += cols + mod; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + L = *lum++; + *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + + /* Now, do second row. */ + + L = *lum2++; + *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + L = *lum2++; + *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row1; + unsigned char *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = out; + row2 = row1 + cols * 3 + mod * 3; + lum2 = lum + cols; + + mod += cols + mod; + mod *= 3; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row1++ = (value) & 0xFF; + *row1++ = (value >> 8) & 0xFF; + *row1++ = (value >> 16) & 0xFF; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row1++ = (value) & 0xFF; + *row1++ = (value >> 8) & 0xFF; + *row1++ = (value >> 16) & 0xFF; + + + /* Now, do second row. */ + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row2++ = (value) & 0xFF; + *row2++ = (value >> 8) & 0xFF; + *row2++ = (value >> 16) & 0xFF; + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row2++ = (value) & 0xFF; + *row2++ = (value >> 8) & 0xFF; + *row2++ = (value >> 16) & 0xFF; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row1; + unsigned int *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = (unsigned int *) out; + row2 = row1 + cols + mod; + lum2 = lum + cols; + + mod += cols + mod; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + *row1++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + L = *lum++; + *row1++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + + /* Now, do second row. */ + + L = *lum2++; + *row2++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + L = *lum2++; + *row2++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ +static void +Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row1 = (unsigned int *) out; + const int next_row = cols + (mod / 2); + unsigned int *row2 = row1 + 2 * next_row; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = (next_row * 3) + (mod / 2); + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row1++; + + L = *lum++; + row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row1++; + + + /* Now, do second row. */ + + L = *lum2++; + row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row2++; + + L = *lum2++; + row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row2++; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row1 = out; + const int next_row = (cols * 2 + mod) * 3; + unsigned char *row2 = row1 + 2 * next_row; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = next_row * 3 + mod * 3; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] = + row1[next_row + 3 + 0] = (value) & 0xFF; + row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] = + row1[next_row + 3 + 1] = (value >> 8) & 0xFF; + row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] = + row1[next_row + 3 + 2] = (value >> 16) & 0xFF; + row1 += 2 * 3; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] = + row1[next_row + 3 + 0] = (value) & 0xFF; + row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] = + row1[next_row + 3 + 1] = (value >> 8) & 0xFF; + row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] = + row1[next_row + 3 + 2] = (value >> 16) & 0xFF; + row1 += 2 * 3; + + + /* Now, do second row. */ + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] = + row2[next_row + 3 + 0] = (value) & 0xFF; + row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] = + row2[next_row + 3 + 1] = (value >> 8) & 0xFF; + row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] = + row2[next_row + 3 + 2] = (value >> 16) & 0xFF; + row2 += 2 * 3; + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] = + row2[next_row + 3 + 0] = (value) & 0xFF; + row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] = + row2[next_row + 3 + 1] = (value >> 8) & 0xFF; + row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] = + row2[next_row + 3 + 2] = (value >> 16) & 0xFF; + row2 += 2 * 3; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row1 = (unsigned int *) out; + const int next_row = cols * 2 + mod; + unsigned int *row2 = row1 + 2 * next_row; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = (next_row * 3) + mod; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1 += 2; + + L = *lum++; + row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1 += 2; + + + /* Now, do second row. */ + + L = *lum2++; + row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2 += 2; + + L = *lum2++; + row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2 += 2; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned short *row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned short *) out; + + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + *row++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + L = *lum; + lum += 2; + *row++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + } + + row += mod; + } +} + +static void +Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned char *) out; + mod *= 3; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row++ = (value) & 0xFF; + *row++ = (value >> 8) & 0xFF; + *row++ = (value >> 16) & 0xFF; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row++ = (value) & 0xFF; + *row++ = (value >> 8) & 0xFF; + *row++ = (value >> 16) & 0xFF; + + } + row += mod; + } +} + +static void +Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned int *) out; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + *row++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + L = *lum; + lum += 2; + *row++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + + } + row += mod; + } +} + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ +static void +Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row = (unsigned int *) out; + const int next_row = cols + (mod / 2); + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + row[0] = row[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row++; + + L = *lum; + lum += 2; + row[0] = row[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row++; + + } + row += next_row; + } +} + +static void +Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row = out; + const int next_row = (cols * 2 + mod) * 3; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row[0 + 0] = row[3 + 0] = row[next_row + 0] = + row[next_row + 3 + 0] = (value) & 0xFF; + row[0 + 1] = row[3 + 1] = row[next_row + 1] = + row[next_row + 3 + 1] = (value >> 8) & 0xFF; + row[0 + 2] = row[3 + 2] = row[next_row + 2] = + row[next_row + 3 + 2] = (value >> 16) & 0xFF; + row += 2 * 3; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row[0 + 0] = row[3 + 0] = row[next_row + 0] = + row[next_row + 3 + 0] = (value) & 0xFF; + row[0 + 1] = row[3 + 1] = row[next_row + 1] = + row[next_row + 3 + 1] = (value >> 8) & 0xFF; + row[0 + 2] = row[3 + 2] = row[next_row + 2] = + row[next_row + 3 + 2] = (value >> 16) & 0xFF; + row += 2 * 3; + + } + row += next_row; + } +} + +static void +Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row = (unsigned int *) out; + const int next_row = cols * 2 + mod; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + mod += mod; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + row[0] = row[1] = row[next_row] = row[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row += 2; + + L = *lum; + lum += 2; + row[0] = row[1] = row[next_row] = row[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row += 2; + + + } + + row += next_row; + } +} + +/* + * How many 1 bits are there in the Uint32. + * Low performance, do not call often. + */ +static int +number_of_bits_set(Uint32 a) +{ + if (!a) + return 0; + if (a & 1) + return 1 + number_of_bits_set(a >> 1); + return (number_of_bits_set(a >> 1)); +} + +/* + * How many 0 bits are there at least significant end of Uint32. + * Low performance, do not call often. + */ +static int +free_bits_at_bottom(Uint32 a) +{ + /* assume char is 8 bits */ + if (!a) + return sizeof(Uint32) * 8; + if (((Sint32) a) & 1l) + return 0; + return 1 + free_bits_at_bottom(a >> 1); +} + +static int +SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format) +{ + Uint32 *r_2_pix_alloc; + Uint32 *g_2_pix_alloc; + Uint32 *b_2_pix_alloc; + int i; + int bpp; + Uint32 Rmask, Gmask, Bmask, Amask; + + if (!SDL_PixelFormatEnumToMasks + (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) { + SDL_SetError("Unsupported YUV destination format"); + return -1; + } + + swdata->target_format = target_format; + r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768]; + g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768]; + b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768]; + + /* + * Set up entries 0-255 in rgb-to-pixel value tables. + */ + for (i = 0; i < 256; ++i) { + r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask)); + r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask); + r_2_pix_alloc[i + 256] |= Amask; + g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask)); + g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask); + g_2_pix_alloc[i + 256] |= Amask; + b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask)); + b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask); + b_2_pix_alloc[i + 256] |= Amask; + } + + /* + * If we have 16-bit output depth, then we double the value + * in the top word. This means that we can write out both + * pixels in the pixel doubling mode with one op. It is + * harmless in the normal case as storing a 32-bit value + * through a short pointer will lose the top bits anyway. + */ + if (SDL_BYTESPERPIXEL(target_format) == 2) { + for (i = 0; i < 256; ++i) { + r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16; + g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16; + b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16; + } + } + + /* + * Spread out the values we have to the rest of the array so that + * we do not need to check for overflow. + */ + for (i = 0; i < 256; ++i) { + r_2_pix_alloc[i] = r_2_pix_alloc[256]; + r_2_pix_alloc[i + 512] = r_2_pix_alloc[511]; + g_2_pix_alloc[i] = g_2_pix_alloc[256]; + g_2_pix_alloc[i + 512] = g_2_pix_alloc[511]; + b_2_pix_alloc[i] = b_2_pix_alloc[256]; + b_2_pix_alloc[i + 512] = b_2_pix_alloc[511]; + } + + /* You have chosen wisely... */ + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + if (SDL_BYTESPERPIXEL(target_format) == 2) { +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + /* inline assembly functions */ + if (SDL_HasMMX() && (Rmask == 0xF800) && + (Gmask == 0x07E0) && (Bmask == 0x001F) + && (swdata->w & 15) == 0) { +/*printf("Using MMX 16-bit 565 dither\n");*/ + swdata->Display1X = Color565DitherYV12MMX1X; + } else { +/*printf("Using C 16-bit dither\n");*/ + swdata->Display1X = Color16DitherYV12Mod1X; + } +#else + swdata->Display1X = Color16DitherYV12Mod1X; +#endif + swdata->Display2X = Color16DitherYV12Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 3) { + swdata->Display1X = Color24DitherYV12Mod1X; + swdata->Display2X = Color24DitherYV12Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 4) { +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + /* inline assembly functions */ + if (SDL_HasMMX() && (Rmask == 0x00FF0000) && + (Gmask == 0x0000FF00) && + (Bmask == 0x000000FF) && (swdata->w & 15) == 0) { +/*printf("Using MMX 32-bit dither\n");*/ + swdata->Display1X = ColorRGBDitherYV12MMX1X; + } else { +/*printf("Using C 32-bit dither\n");*/ + swdata->Display1X = Color32DitherYV12Mod1X; + } +#else + swdata->Display1X = Color32DitherYV12Mod1X; +#endif + swdata->Display2X = Color32DitherYV12Mod2X; + } + break; + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + if (SDL_BYTESPERPIXEL(target_format) == 2) { + swdata->Display1X = Color16DitherYUY2Mod1X; + swdata->Display2X = Color16DitherYUY2Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 3) { + swdata->Display1X = Color24DitherYUY2Mod1X; + swdata->Display2X = Color24DitherYUY2Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 4) { + swdata->Display1X = Color32DitherYUY2Mod1X; + swdata->Display2X = Color32DitherYUY2Mod2X; + } + break; + default: + /* We should never get here (caught above) */ + break; + } + + if (swdata->display) { + SDL_FreeSurface(swdata->display); + swdata->display = NULL; + } + return 0; +} + +SDL_SW_YUVTexture * +SDL_SW_CreateYUVTexture(Uint32 format, int w, int h) +{ + SDL_SW_YUVTexture *swdata; + int *Cr_r_tab; + int *Cr_g_tab; + int *Cb_g_tab; + int *Cb_b_tab; + int i; + int CR, CB; + + swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata)); + if (!swdata) { + SDL_OutOfMemory(); + return NULL; + } + + switch (format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + break; + default: + SDL_SetError("Unsupported YUV format"); + return NULL; + } + + swdata->format = format; + swdata->target_format = SDL_PIXELFORMAT_UNKNOWN; + swdata->w = w; + swdata->h = h; + swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2); + swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int)); + swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32)); + if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) { + SDL_OutOfMemory(); + SDL_SW_DestroyYUVTexture(swdata); + return NULL; + } + + /* Generate the tables for the display surface */ + Cr_r_tab = &swdata->colortab[0 * 256]; + Cr_g_tab = &swdata->colortab[1 * 256]; + Cb_g_tab = &swdata->colortab[2 * 256]; + Cb_b_tab = &swdata->colortab[3 * 256]; + for (i = 0; i < 256; i++) { + /* Gamma correction (luminescence table) and chroma correction + would be done here. See the Berkeley mpeg_play sources. + */ + CB = CR = (i - 128); + Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR); + Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR); + Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB); + Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB); + } + + /* Find the pitch and offset values for the overlay */ + switch (format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + swdata->pitches[0] = w; + swdata->pitches[1] = swdata->pitches[0] / 2; + swdata->pitches[2] = swdata->pitches[0] / 2; + swdata->planes[0] = swdata->pixels; + swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h; + swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2; + break; + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + swdata->pitches[0] = w * 2; + swdata->planes[0] = swdata->pixels; + break; + default: + /* We should never get here (caught above) */ + break; + } + + /* We're all done.. */ + return (swdata); +} + +int +SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels, + int *pitch) +{ + *pixels = swdata->planes[0]; + *pitch = swdata->pitches[0]; + return 0; +} + +int +SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, + const void *pixels, int pitch) +{ + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + if (rect + && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w + || rect->h != swdata->h)) { + SDL_SetError + ("YV12 and IYUV textures only support full surface updates"); + return -1; + } + SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2); + break; + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + { + Uint8 *src, *dst; + int row; + size_t length; + + src = (Uint8 *) pixels; + dst = + swdata->planes[0] + rect->y * swdata->pitches[0] + + rect->x * 2; + length = rect->w * 2; + for (row = 0; row < rect->h; ++row) { + SDL_memcpy(dst, src, length); + src += pitch; + dst += swdata->pitches[0]; + } + } + break; + } + return 0; +} + +int +SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, + void **pixels, int *pitch) +{ + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + if (rect + && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w + || rect->h != swdata->h)) { + SDL_SetError + ("YV12 and IYUV textures only support full surface locks"); + return -1; + } + break; + } + + *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2; + *pitch = swdata->pitches[0]; + return 0; +} + +void +SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata) +{ +} + +int +SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect, + Uint32 target_format, int w, int h, void *pixels, + int pitch) +{ + int stretch; + int scale_2x; + Uint8 *lum, *Cr, *Cb; + int mod; + + /* Make sure we're set up to display in the desired format */ + if (target_format != swdata->target_format) { + if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) { + return -1; + } + } + + stretch = 0; + scale_2x = 0; + if (srcrect->x || srcrect->y || srcrect->w < swdata->w + || srcrect->h < swdata->h) { + /* The source rectangle has been clipped. + Using a scratch surface is easier than adding clipped + source support to all the blitters, plus that would + slow them down in the general unclipped case. + */ + stretch = 1; + } else if ((srcrect->w != w) || (srcrect->h != h)) { + if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) { + scale_2x = 1; + } else { + stretch = 1; + } + } + if (stretch) { + int bpp; + Uint32 Rmask, Gmask, Bmask, Amask; + + if (swdata->display) { + swdata->display->w = w; + swdata->display->h = h; + swdata->display->pixels = pixels; + swdata->display->pitch = pitch; + } else { + /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */ + SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask, + &Bmask, &Amask); + swdata->display = + SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, + Gmask, Bmask, Amask); + if (!swdata->display) { + return (-1); + } + } + if (!swdata->stretch) { + /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */ + SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask, + &Bmask, &Amask); + swdata->stretch = + SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask, + Gmask, Bmask, Amask); + if (!swdata->stretch) { + return (-1); + } + } + pixels = swdata->stretch->pixels; + pitch = swdata->stretch->pitch; + } + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + lum = swdata->planes[0]; + Cr = swdata->planes[1]; + Cb = swdata->planes[2]; + break; + case SDL_PIXELFORMAT_IYUV: + lum = swdata->planes[0]; + Cr = swdata->planes[2]; + Cb = swdata->planes[1]; + break; + case SDL_PIXELFORMAT_YUY2: + lum = swdata->planes[0]; + Cr = lum + 3; + Cb = lum + 1; + break; + case SDL_PIXELFORMAT_UYVY: + lum = swdata->planes[0] + 1; + Cr = lum + 1; + Cb = lum - 1; + break; + case SDL_PIXELFORMAT_YVYU: + lum = swdata->planes[0]; + Cr = lum + 1; + Cb = lum + 3; + break; + default: + SDL_SetError("Unsupported YUV format in copy"); + return (-1); + } + mod = (pitch / SDL_BYTESPERPIXEL(target_format)); + + if (scale_2x) { + mod -= (swdata->w * 2); + swdata->Display2X(swdata->colortab, swdata->rgb_2_pix, + lum, Cr, Cb, pixels, swdata->h, swdata->w, mod); + } else { + mod -= swdata->w; + swdata->Display1X(swdata->colortab, swdata->rgb_2_pix, + lum, Cr, Cb, pixels, swdata->h, swdata->w, mod); + } + if (stretch) { + SDL_Rect rect = *srcrect; + SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL); + } + return 0; +} + +void +SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata) +{ + if (swdata) { + if (swdata->pixels) { + SDL_free(swdata->pixels); + } + if (swdata->colortab) { + SDL_free(swdata->colortab); + } + if (swdata->rgb_2_pix) { + SDL_free(swdata->rgb_2_pix); + } + if (swdata->stretch) { + SDL_FreeSurface(swdata->stretch); + } + if (swdata->display) { + SDL_FreeSurface(swdata->display); + } + SDL_free(swdata); + } +} + +/* vi: set ts=4 sw=4 expandtab: */