Mercurial > sdl-ios-xcode
changeset 3144:0d8d1f870964 gsoc2009_ps3
Moved SPE-functions to SDL_ps3spe.c.
Added ActivateRenderer() and PS3_QueryTexturePixels().
Added yuv2rgb_spu but not yet in use.
author | Martin Lowinski <martin@goldtopf.org> |
---|---|
date | Wed, 10 Jun 2009 09:15:33 +0000 |
parents | 8fdabaa064c3 |
children | 7828eed2f31a |
files | configure.in src/video/SDL_yuv_sw.c src/video/SDL_yuv_sw_c.h src/video/ps3/SDL_ps3render.c src/video/ps3/SDL_ps3spe.c src/video/ps3/SDL_ps3spe_c.h src/video/ps3/SDL_ps3video.c src/video/ps3/SDL_ps3video.h src/video/ps3/spulibs/Makefile src/video/ps3/spulibs/yuv2rgb_converter.c |
diffstat | 10 files changed, 1017 insertions(+), 220 deletions(-) [+] |
line wrap: on
line diff
--- a/configure.in Sat Jun 06 06:40:23 2009 +0000 +++ b/configure.in Wed Jun 10 09:15:33 2009 +0000 @@ -1522,7 +1522,7 @@ AC_DEFINE(SDL_VIDEO_DRIVER_PS3) SOURCES="$SOURCES $srcdir/src/video/ps3/*.c" EXTRA_CFLAGS="$EXTRA_CFLAGS -I/opt/cell/sdk/usr/include" - EXTRA_LDFLAGS="$EXTRA_LDFLAGS -L/opt/cell/sdk/usr/lib -lspe2 -lfb_writer_spu" + EXTRA_LDFLAGS="$EXTRA_LDFLAGS -L/opt/cell/sdk/usr/lib -lspe2 -lfb_writer_spu -lyuv2rgb_spu" have_video=yes fi fi
--- a/src/video/SDL_yuv_sw.c Sat Jun 06 06:40:23 2009 +0000 +++ b/src/video/SDL_yuv_sw.c Wed Jun 10 09:15:33 2009 +0000 @@ -88,32 +88,6 @@ #include "SDL_yuv_sw_c.h" -struct SDL_SW_YUVTexture -{ - Uint32 format; - Uint32 target_format; - int w, h; - Uint8 *pixels; - int *colortab; - Uint32 *rgb_2_pix; - void (*Display1X) (int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod); - void (*Display2X) (int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod); - - /* These are just so we don't have to allocate them separately */ - Uint16 pitches[3]; - Uint8 *planes[3]; - - /* This is a temporary surface in case we have to stretch copy */ - SDL_Surface *stretch; - SDL_Surface *display; -}; - /* The colorspace conversion functions */ #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
--- a/src/video/SDL_yuv_sw_c.h Sat Jun 06 06:40:23 2009 +0000 +++ b/src/video/SDL_yuv_sw_c.h Wed Jun 10 09:15:33 2009 +0000 @@ -26,6 +26,32 @@ /* This is the software implementation of the YUV texture support */ +struct SDL_SW_YUVTexture +{ + Uint32 format; + Uint32 target_format; + int w, h; + Uint8 *pixels; + int *colortab; + Uint32 *rgb_2_pix; + void (*Display1X) (int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); + void (*Display2X) (int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); + + /* These are just so we don't have to allocate them separately */ + Uint16 pitches[3]; + Uint8 *planes[3]; + + /* This is a temporary surface in case we have to stretch copy */ + SDL_Surface *stretch; + SDL_Surface *display; +}; + typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture; SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
--- a/src/video/ps3/SDL_ps3render.c Sat Jun 06 06:40:23 2009 +0000 +++ b/src/video/ps3/SDL_ps3render.c Wed Jun 10 09:15:33 2009 +0000 @@ -27,7 +27,7 @@ #include "../SDL_renderer_sw.h" #include "SDL_ps3video.h" -#include "spulibs/spu_common.h" +#include "SDL_ps3spe_c.h" #include <fcntl.h> #include <stdlib.h> @@ -35,13 +35,17 @@ #include <linux/kd.h> #include <linux/fb.h> #include <sys/mman.h> +#include <asm/ps3fb.h> -#include <asm/ps3fb.h> + +/* Stores the executable name */ +extern spe_program_handle_t yuv2rgb_spu; /* SDL surface based renderer implementation */ static SDL_Renderer *SDL_PS3_CreateRenderer(SDL_Window * window, Uint32 flags); +static int SDL_PS3_ActivateRenderer(SDL_Renderer * renderer); static int SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y); static int SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1, int x2, int y2); @@ -56,6 +60,7 @@ /* Texture */ static int PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture); +static int PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, void **pixels, int *pitch); static void PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture); @@ -95,13 +100,18 @@ /* Use two buffers in fb? res < 720p */ unsigned int double_buffering; + + /* SPE threading stuff */ + spu_data_t * converter_thread_data; + /* YUV converting transfer data */ + volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128))); } SDL_PS3_RenderData; typedef struct { - void *pixels; int pitch; int bpp; + volatile void *pixels __attribute__((aligned(128))); } PS3_TextureData; SDL_Renderer * @@ -138,6 +148,8 @@ //renderer->CreateTexture = PS3_CreateTexture; //renderer->DestroyTexture = PS3_DestroyTexture; + //renderer->QueryTexturePixels = PS3_QueryTexturePixels; + renderer->ActivateRenderer = SDL_PS3_ActivateRenderer; renderer->RenderPoint = SDL_PS3_RenderPoint; renderer->RenderLine = SDL_PS3_RenderLine; renderer->RenderFill = SDL_PS3_RenderFill; @@ -184,10 +196,42 @@ } data->current_screen = 0; + /* Create SPU parms structure */ + data->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t)); + if (data->converter_parms == NULL) { + SDL_PS3_DestroyRenderer(renderer); + SDL_OutOfMemory(); + return NULL; + } + + /* Set up the SPEs */ + data->converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); + if (data->converter_thread_data == NULL) { + SDL_PS3_DestroyRenderer(renderer); + SDL_OutOfMemory(); + return NULL; + } + + data->converter_thread_data->program = yuv2rgb_spu; + data->converter_thread_data->program_name = "yuv2rgb_spu"; + data->converter_thread_data->keepalive = 1; + data->converter_thread_data->booted = 0; + + SPE_Start(data->converter_thread_data); + return renderer; } static int +SDL_PS3_ActivateRenderer(SDL_Renderer * renderer) +{ + deprintf(1, "PS3_ActivateRenderer()\n"); + SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata; + + return 0; +} + +static int PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) { deprintf(1, "PS3_CreateTexture()\n"); PS3_TextureData *data; @@ -197,6 +241,7 @@ return -1; } + data->bpp = SDL_BYTESPERPIXEL(texture->format); data->pitch = (texture->w * SDL_BYTESPERPIXEL(texture->format)); data->pixels = NULL; @@ -211,16 +256,29 @@ return 0; } +static int +PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, + void **pixels, int *pitch) +{ + PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; + + *pixels = (void *)data->pixels; + *pitch = data->pitch; + + return 0; +} + static void PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture) { + deprintf(1, "PS3_DestroyTexture()\n"); PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; if (!data) { return; } - free(data->pixels); + free((void *)data->pixels); } static int @@ -302,7 +360,8 @@ (SDL_PS3_RenderData *) renderer->driverdata; SDL_Window *window = SDL_GetWindowFromID(renderer->window); SDL_VideoDisplay *display = SDL_GetDisplayFromWindow(window); - PS3_TextureData *txdata = (PS3_TextureData *) texture->driverdata; + //PS3_TextureData *txdata = (PS3_TextureData *) texture->driverdata; + SDL_SW_YUVTexture *txdata = (SDL_SW_YUVTexture *) texture->driverdata; SDL_VideoData *devdata = display->device->driverdata; if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { @@ -311,10 +370,41 @@ void *pixels = (Uint8 *) target->pixels + dstrect->y * target->pitch + dstrect->x * target->format->BytesPerPixel; +#if 0 + /* Not yet tested */ + Uint8 *lum, *Cr, *Cb; + SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) texture->driverdata; + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + lum = swdata->planes[0]; + Cr = swdata->planes[1]; + Cb = swdata->planes[2]; + break; + case SDL_PIXELFORMAT_IYUV: + lum = swdata->planes[0]; + Cr = swdata->planes[2]; + Cb = swdata->planes[1]; + break; + default: + return -1; + } + + data->converter_parms->src_pixel_width = dstrect->w; + data->converter_parms->src_pixel_height = dstrect->h; + data->converter_parms->dstBuffer = (Uint8 *)pixels; + data->converter_thread_data->argp = (void *)data->converter_parms; + + /* Convert YUV overlay to RGB */ + SPE_SendMsg(data->converter_thread_data, SPU_START); + SPE_SendMsg(data->converter_thread_data, (unsigned int)data->converter_thread_data->argp); + + return 0; +#else return SDL_SW_CopyYUVToRGB((SDL_SW_YUVTexture *) texture->driverdata, srcrect, display->current_mode.format, dstrect->w, dstrect->h, pixels, target->pitch); +#endif } else { deprintf(1, "SDL_ISPIXELFORMAT_FOURCC = false\n"); SDL_Surface *surface = (SDL_Surface *) texture->driverdata; @@ -330,12 +420,12 @@ deprintf(1, "dstrect->w = %u\n", dstrect->w); deprintf(1, "dstrect->h = %u\n", dstrect->h); - deprintf(1, "txdata->bpp = %u\n", txdata->bpp); + //deprintf(1, "txdata->bpp = %u\n", txdata->bpp); deprintf(1, "texture->format (bpp) = %u\n", SDL_BYTESPERPIXEL(texture->format)); /* For testing, align pixels */ - void *pixels = (void *)memalign(16, dstrect->h * data->screens[0]->pitch); - SDL_memcpy(pixels, surface->pixels, dstrect->h * data->screens[0]->pitch); + void *pixels = (void *)memalign(16, window->h * window->w * 4); + SDL_memcpy(pixels, surface->pixels, window->h * window->w * 4); /* Get screeninfo */ struct fb_fix_screeninfo fb_finfo; @@ -349,9 +439,9 @@ return -1; } /* 16 and 15 bpp is reported as 16 bpp */ - txdata->bpp = fb_vinfo.bits_per_pixel; - if (txdata->bpp == 16) - txdata->bpp = fb_vinfo.red.length + fb_vinfo.green.length + fb_vinfo.blue.length; + //txdata->bpp = fb_vinfo.bits_per_pixel; + //if (txdata->bpp == 16) + // txdata->bpp = fb_vinfo.red.length + fb_vinfo.green.length + fb_vinfo.blue.length; /* Adjust centering */ data->bounded_width = window->w < fb_vinfo.xres ? window->w : fb_vinfo.xres; @@ -372,7 +462,8 @@ devdata->fb_parms->in_line_stride = dstrect->w * /*txdata->bpp / 8*/4; devdata->fb_parms->bounded_input_height = data->bounded_height; devdata->fb_parms->bounded_input_width = data->bounded_width; - devdata->fb_parms->fb_pixel_size = txdata->bpp / 8; + //devdata->fb_parms->fb_pixel_size = txdata->bpp / 8; + devdata->fb_parms->fb_pixel_size = SDL_BYTESPERPIXEL(texture->format); deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", devdata->fb_thread_data->argp); @@ -438,6 +529,16 @@ SDL_FreeSurface(data->screens[i]); } } + + /* Shutdown SPE and related resources */ + if (data->converter_parms) { + free((void *)data->converter_parms); + } + if (data->converter_thread_data) { + SPE_Shutdown(data->converter_thread_data); + free((void *)data->converter_thread_data); + } + SDL_free(data); } SDL_free(renderer);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/ps3/SDL_ps3spe.c Wed Jun 10 09:15:33 2009 +0000 @@ -0,0 +1,167 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2009 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "SDL_video.h" +#include "SDL_ps3spe_c.h" + +#include "SDL_ps3video.h" +#include "SDL_ps3render_c.h" + + +/* Start the SPE thread */ +int SPE_Start(spu_data_t * spe_data) +{ + deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name); + if (!(spe_data->booted)) + SPE_Boot(spe_data); + + /* To allow re-running of context, spe_ctx_entry has to be set before each call */ + spe_data->entry = SPE_DEFAULT_ENTRY; + spe_data->error_code = 0; + + /* Create SPE thread and run */ + deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name); + if (pthread_create + (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) { + deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name); + SDL_SetError("[PS3->SPU] Could not create pthread for spe"); + return -1; + } + + if (spe_data->keepalive) + SPE_WaitForMsg(spe_data, SPU_READY); +} + +/* Stop the SPE thread */ +int SPE_Stop(spu_data_t * spe_data) +{ + deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name); + /* Wait for SPE thread to complete */ + deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name); + if (pthread_join(spe_data->thread, NULL)) { + deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name); + SDL_SetError("[PS3->SPU] Failed joining the thread"); + return -1; + } + + return 0; +} + +/* Create SPE context and load program */ +int SPE_Boot(spu_data_t * spe_data) +{ + /* Create SPE context */ + deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name); + spe_data->ctx = spe_context_create(0, NULL); + if (spe_data->ctx == NULL) { + deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name); + SDL_SetError("[PS3->SPU] Failed creating SPE context"); + return -1; + } + + /* Load SPE object into SPE local store */ + deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name); + if (spe_program_load(spe_data->ctx, &spe_data->program)) { + deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name); + SDL_SetError + ("[PS3->SPU] Failed loading program into SPE context"); + return -1; + } + spe_data->booted = 1; + deprintf(2, "[PS3->SPU] SPE boot successful\n"); + + return 0; +} + +/* (Stop and) shutdown the SPE */ +int SPE_Shutdown(spu_data_t * spe_data) +{ + if (spe_data->keepalive && spe_data->booted) { + SPE_SendMsg(spe_data, SPU_EXIT); + SPE_Stop(spe_data); + } + + /* Destroy SPE context */ + deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name); + if (spe_context_destroy(spe_data->ctx)) { + deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name); + SDL_SetError("[PS3->SPU] Failed destroying context"); + return -1; + } + deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name); + return 0; +} + +/* Send message to the SPE via mailboxe */ +int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg) +{ + deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name); + /* Send one message, block until message was sent */ + unsigned int spe_in_mbox_msgs[1]; + spe_in_mbox_msgs[0] = msg; + int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING); + + if (1 > in_mbox_write) { + deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name); + SDL_SetError("[PS3->SPU] No message could be written"); + return -1; + } + return 0; +} + + +/* Read 1 message from SPE, block until at least 1 message was received */ +int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg) +{ + deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name); + unsigned int out_messages[1]; + while (!spe_out_mbox_status(spe_data->ctx)); + int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1); + deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]); + if (out_messages[0] == msg) + return 0; + else + return -1; +} + +/* Re-runnable invocation of the spe_context_run call */ +void SPE_RunContext(void *thread_argp) +{ + /* argp is the pointer to argument to be passed to the SPE program */ + spu_data_t *args = (spu_data_t *) thread_argp; + deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp); + + /* Run it.. */ + deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name); + if (spe_context_run + (args->ctx, &args->entry, 0, (void *)args->argp, NULL, + NULL) < 0) { + deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name); + SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name); + exit(1); + } + + pthread_exit(NULL); +} + +/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/ps3/SDL_ps3spe_c.h Wed Jun 10 09:15:33 2009 +0000 @@ -0,0 +1,55 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2009 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "spulibs/spu_common.h" + +#include <libspe2.h> + +#ifndef _SDL_ps3spe_h +#define _SDL_ps3spe_h + +/* SPU thread data */ +typedef struct spu_data { + spe_context_ptr_t ctx; + spe_program_handle_t program; + pthread_t thread; + char * program_name; + unsigned int booted; + unsigned int keepalive; + unsigned int entry; + int error_code; + void * argp; +} spu_data_t; + +/* SPU specific functions */ +int SPE_Start(spu_data_t * spe_data); +int SPE_Stop(spu_data_t * spe_data); +int SPE_Boot(spu_data_t * spe_data); +int SPE_Shutdown(spu_data_t * spe_data); +int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg); +int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg); +void SPE_RunContext(void *thread_argp); + +#endif /* _SDL_ps3spe_h */ + +/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3video.c Sat Jun 06 06:40:23 2009 +0000 +++ b/src/video/ps3/SDL_ps3video.c Wed Jun 10 09:15:33 2009 +0000 @@ -33,16 +33,15 @@ #include "../SDL_sysvideo.h" #include "../SDL_pixels_c.h" #include "../../events/SDL_events_c.h" -#include "spulibs/spu_common.h" #include "SDL_ps3video.h" +#include "SDL_ps3spe_c.h" #include "SDL_ps3events_c.h" #include "SDL_ps3render_c.h" #include <fcntl.h> #include <linux/fb.h> #include <asm/ps3fb.h> -#include <libspe2.h> #include <sys/mman.h> #define PS3VID_DRIVER_NAME "ps3" @@ -52,15 +51,6 @@ static int PS3_SetDisplayMode(_THIS, SDL_DisplayMode * mode); static void PS3_VideoQuit(_THIS); -/* SPU specific functions */ -int SPE_Start(_THIS, spu_data_t * spe_data); -int SPE_Stop(_THIS, spu_data_t * spe_data); -int SPE_Boot(_THIS, spu_data_t * spe_data); -int SPE_Shutdown(_THIS, spu_data_t * spe_data); -int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg); -int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg); -void SPE_RunContext(void *thread_argp); - /* Stores the SPE executable name of fb_writer_spu */ extern spe_program_handle_t fb_writer_spu; @@ -167,7 +157,7 @@ data->fb_thread_data->keepalive = 1; data->fb_thread_data->booted = 0; - SPE_Start(_this, data->fb_thread_data); + SPE_Start(data->fb_thread_data); /* Open the device */ data->fbdev = open(PS3DEV, O_RDWR); @@ -198,7 +188,7 @@ SDL_SetError("[PS3] Can't mmap for %s", PS3DEV); return (0); } else { - //current->flags |= SDL_DOUBLEBUF; + /* Enable double buffering */ } /* Blank screen */ @@ -220,6 +210,8 @@ { deprintf(1, "PS3_VideoQuit()\n"); SDL_VideoData *data = (SDL_VideoData *) _this->driverdata; + + /* Unmap framebuffer */ if (data->frame_buffer) { struct fb_fix_screeninfo fb_finfo; if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo) != -1) { @@ -228,154 +220,21 @@ } } + /* Shutdown SPE and related resources */ if (data->fb_parms) free((void *)data->fb_parms); if (data->fb_thread_data) { - SPE_Shutdown(_this, data->fb_thread_data); + SPE_Shutdown(data->fb_thread_data); free((void *)data->fb_thread_data); } + + /* Close device */ + if (data->fbdev > 0) { + /* Give control of frame buffer back to kernel */ + ioctl(data->fbdev, PS3FB_IOCTL_OFF, 0); + close(data->fbdev); + data->fbdev = -1; + } } - -/* - * SPE handling - */ - -/* Start the SPE thread */ -int SPE_Start(_THIS, spu_data_t * spe_data) -{ - deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name); - if (!(spe_data->booted)) - SPE_Boot(_this, spe_data); - - /* To allow re-running of context, spe_ctx_entry has to be set before each call */ - spe_data->entry = SPE_DEFAULT_ENTRY; - spe_data->error_code = 0; - - /* Create SPE thread and run */ - deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name); - if (pthread_create - (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) { - deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Could not create pthread for spe"); - return -1; - } - - if (spe_data->keepalive) - SPE_WaitForMsg(spe_data, SPU_READY); -} - - -/* Stop the SPE thread */ -int SPE_Stop(_THIS, spu_data_t * spe_data) -{ - deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name); - /* Wait for SPE thread to complete */ - deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name); - if (pthread_join(spe_data->thread, NULL)) { - deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Failed joining the thread"); - return -1; - } - - return 0; -} - -/* Create SPE context and load program */ -int SPE_Boot(_THIS, spu_data_t * spe_data) -{ - /* Create SPE context */ - deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name); - spe_data->ctx = spe_context_create(0, NULL); - if (spe_data->ctx == NULL) { - deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Failed creating SPE context"); - return -1; - } - - /* Load SPE object into SPE local store */ - deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name); - if (spe_program_load(spe_data->ctx, &spe_data->program)) { - deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name); - SDL_SetError - ("[PS3->SPU] Failed loading program into SPE context"); - return -1; - } - spe_data->booted = 1; - deprintf(2, "[PS3->SPU] SPE boot successful\n"); - - return 0; -} - -/* (Stop and) shutdown the SPE */ -int SPE_Shutdown(_THIS, spu_data_t * spe_data) -{ - if (spe_data->keepalive && spe_data->booted) { - SPE_SendMsg(spe_data, SPU_EXIT); - SPE_Stop(_this, spe_data); - } - - /* Destroy SPE context */ - deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name); - if (spe_context_destroy(spe_data->ctx)) { - deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Failed destroying context"); - return -1; - } - deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name); - return 0; -} - -/* Send message to the SPE via mailboxe */ -int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg) -{ - deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name); - /* Send one message, block until message was sent */ - unsigned int spe_in_mbox_msgs[1]; - spe_in_mbox_msgs[0] = msg; - int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING); - - if (1 > in_mbox_write) { - deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] No message could be written"); - return -1; - } - return 0; -} - - -/* Read 1 message from SPE, block until at least 1 message was received */ -int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg) -{ - deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name); - unsigned int out_messages[1]; - while (!spe_out_mbox_status(spe_data->ctx)); - int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1); - deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]); - if (out_messages[0] == msg) - return 0; - else - return -1; -} - -/* Re-runnable invocation of the spe_context_run call */ -void SPE_RunContext(void *thread_argp) -{ - /* argp is the pointer to argument to be passed to the SPE program */ - spu_data_t *args = (spu_data_t *) thread_argp; - deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp); - - /* Run it.. */ - deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name); - if (spe_context_run - (args->ctx, &args->entry, 0, (void *)args->argp, NULL, - NULL) < 0) { - deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name); - SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name); - exit(1); - } - - pthread_exit(NULL); -} - /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3video.h Sat Jun 06 06:40:23 2009 +0000 +++ b/src/video/ps3/SDL_ps3video.h Wed Jun 10 09:15:33 2009 +0000 @@ -21,12 +21,11 @@ */ #include "SDL_config.h" -#include <libspe2.h> - #ifndef _SDL_ps3video_h #define _SDL_ps3video_h #include "../SDL_sysvideo.h" +#include "SDL_ps3spe_c.h" /* Debugging * 0: No debug messages @@ -53,19 +52,6 @@ /* Default framebuffer device on PS3 */ #define PS3DEV "/dev/fb0" -/* SPU thread data */ -typedef struct spu_data { - spe_context_ptr_t ctx; - spe_program_handle_t program; - pthread_t thread; - char * program_name; - unsigned int booted; - unsigned int keepalive; - unsigned int entry; - int error_code; - void * argp; -} spu_data_t; - /* Private display data */ typedef struct SDL_VideoData {
--- a/src/video/ps3/spulibs/Makefile Sat Jun 06 06:40:23 2009 +0000 +++ b/src/video/ps3/spulibs/Makefile Wed Jun 10 09:15:33 2009 +0000 @@ -15,8 +15,8 @@ PREFIX=/usr/lib -all: libfb_writer_spu.a libfb_writer_spu.so -# libyuv2rgb_spu.so libyuv2rgb_spu.a \ +all: libfb_writer_spu.a libfb_writer_spu.so \ + libyuv2rgb_spu.so libyuv2rgb_spu.a # libbilin_scaler_spu.so libbilin_scaler_spu.a @@ -55,24 +55,24 @@ libbilin_scaler_spu.so: bilin_scaler_spu-embed.o $(PPU_LD) -o libbilin_scaler_spu.so -shared -soname=libbilin_scaler_spu.so bilin_scaler_spu-embed.o -install: libfb_writer_spu.a libfb_writer_spu.so -# libyuv2rgb_spu.so libyuv2rgb_spu.a \ +install: libfb_writer_spu.a libfb_writer_spu.so \ + libyuv2rgb_spu.so libyuv2rgb_spu.a # libbilin_scaler_spu.so libbilin_scaler_spu.a $(INSTALL) -c -m 0755 libfb_writer_spu.so $(PREFIX)/. $(INSTALL) -c -m 0655 libfb_writer_spu.a $(PREFIX)/. -# $(INSTALL) -c -m 0755 libyuv2rgb_spu.so $(PREFIX)/. -# $(INSTALL) -c -m 0655 libyuv2rgb_spu.a $(PREFIX)/. + $(INSTALL) -c -m 0755 libyuv2rgb_spu.so $(PREFIX)/. + $(INSTALL) -c -m 0655 libyuv2rgb_spu.a $(PREFIX)/. # $(INSTALL) -c -m 0755 libbilin_scaler_spu.so $(PREFIX)/. # $(INSTALL) -c -m 0655 libbilin_scaler_spu.a $(PREFIX)/. -uninstall: $(PREFIX)/libfb_writer_spu.so $(PREFIX)/libfb_writer_spu.a -# $(PREFIX)/libyuv2rgb_spu.so $(PREFIX)/libyuv2rgb_spu.a \ +uninstall: $(PREFIX)/libfb_writer_spu.so $(PREFIX)/libfb_writer_spu.a \ + $(PREFIX)/libyuv2rgb_spu.so $(PREFIX)/libyuv2rgb_spu.a # $(PREFIX)/libbilin_scaler_spu.so $(PREFIX)/libbilin_scaler_spu.a rm -f $(PREFIX)/libfb_writer_spu.a rm -f $(PREFIX)/libfb_writer_spu.so -# rm -f $(PREFIX)/libyuv2rgb_spu.so -# rm -f $(PREFIX)/libyuv2rgb_spu.a + rm -f $(PREFIX)/libyuv2rgb_spu.so + rm -f $(PREFIX)/libyuv2rgb_spu.a # rm -f $(PREFIX)/libbilin_scaler_spu.so # rm -f $(PREFIX)/libbilin_scaler_spu.a
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/ps3/spulibs/yuv2rgb_converter.c Wed Jun 10 09:15:33 2009 +0000 @@ -0,0 +1,629 @@ +/* + * SDL - Simple DirectMedia Layer + * CELL BE Support for PS3 Framebuffer + * Copyright (C) 2008, 2009 International Business Machines Corporation + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + * + * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> + * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> + * SPE code based on research by: + * Rene Becker + * Thimo Emmerich + */ + +#include "spu_common.h" + +#include <spu_intrinsics.h> +#include <spu_mfcio.h> + +// Debugging +//#define DEBUG + +#ifdef DEBUG +#define deprintf(fmt, args... ) \ + fprintf( stdout, fmt, ##args ); \ + fflush( stdout ); +#else +#define deprintf( fmt, args... ) +#endif + +struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128))); + +/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored + * there might be the need to retrieve misaligned data, adjust + * incoming v and u plane to be able to handle this (add 128) + */ +unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128))); +unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); +unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); + +/* A maximum of 4 lines BGRA are stored, 4 byte per pixel */ +unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128))); + +/* some vectors needed by the float to int conversion */ +static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; +static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; + +void yuv_to_rgb_w16(); +void yuv_to_rgb_w32(); + +void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width); +void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width); + + +int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused))) +{ + deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id); + uint32_t ea_mfc, mbox; + // send ready message + spu_write_out_mbox(SPU_READY); + + while (1) { + /* Check mailbox */ + mbox = spu_read_in_mbox(); + deprintf("[SPU] Message is %u\n", mbox); + switch (mbox) { + case SPU_EXIT: + deprintf("[SPU] fb_writer goes down...\n"); + return 0; + case SPU_START: + break; + default: + deprintf("[SPU] Cannot handle message\n"); + continue; + } + + /* Tag Manager setup */ + unsigned int tag_id; + tag_id = mfc_multi_tag_reserve(1); + if (tag_id == MFC_TAG_INVALID) { + deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n"); + return 0; + } + + /* DMA transfer for the input parameters */ + ea_mfc = spu_read_in_mbox(); + deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc); + spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD); + DMA_WAIT_TAG(tag_id); + + /* There are alignment issues that involve handling of special cases + * a width of 32 results in a width of 16 in the chrominance + * --> choose the proper handling to optimize the performance + */ + deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height); + if (parms_converter.src_pixel_width & 0x1f) { + deprintf("[SPU] Using yuv_to_rgb_w16\n"); + yuv_to_rgb_w16(); + } else { + deprintf("[SPU] Using yuv_to_rgb_w32\n"); + yuv_to_rgb_w32(); + } + + mfc_multi_tag_release(tag_id, 1); + deprintf("[SPU] yuv2rgb_spu... done!\n"); + /* Send FIN message */ + spu_write_out_mbox(SPU_FIN); + } + + return 0; +} + + +/* + * float_to_char() + * + * converts a float to a character using saturated + * arithmetic + * + * @param s float for conversion + * @returns converted character + */ +inline static unsigned char float_to_char(float s) { + vector float vec_s = spu_splats(s); + vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); + vec_s = spu_sel(vec_s, vec_0_1, select_1); + + vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); + vec_s = spu_sel(vec_s, vec_255, select_2); + return (unsigned char) spu_extract(vec_s,0); +} + + +/* + * vfloat_to_vuint() + * + * converts a float vector to an unsinged int vector using saturated + * arithmetic + * + * @param vec_s float vector for conversion + * @returns converted unsigned int vector + */ +inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { + vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); + vec_s = spu_sel(vec_s, vec_0_1, select_1); + + vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); + vec_s = spu_sel(vec_s, vec_255, select_2); + return spu_convtu(vec_s,0); +} + + +void yuv_to_rgb_w16() { + // Pixel dimensions of the picture + uint32_t width, height; + + // Extract parameters + width = parms_converter.src_pixel_width; + height = parms_converter.src_pixel_height; + + // Plane data management + // Y + unsigned char* ram_addr_y = parms_converter.y_plane; + // V + unsigned char* ram_addr_v = parms_converter.v_plane; + // U + unsigned char* ram_addr_u = parms_converter.u_plane; + + // BGRA + unsigned char* ram_addr_bgra = parms_converter.dstBuffer; + + // Strides + unsigned int stride_y = width; + unsigned int stride_vu = width>>1; + + // Buffer management + unsigned int buf_idx = 0; + unsigned int size_4lines_y = stride_y<<2; + unsigned int size_2lines_y = stride_y<<1; + unsigned int size_2lines_vu = stride_vu<<1; + + // 2*width*4byte_per_pixel + unsigned int size_2lines_bgra = width<<3; + + + // start double-buffered processing + // 4 lines y + spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); + + // 2 lines v + spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); + + // 2 lines u + spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); + + // Wait for these transfers to be completed + DMA_WAIT_TAG((RETR_BUF + buf_idx)); + + unsigned int i; + for(i=0; i<(height>>2)-1; i++) { + + buf_idx^=1; + + // 4 lines y + spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); + + // 2 lines v + spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); + + // 2 lines u + spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); + + DMA_WAIT_TAG((RETR_BUF + buf_idx)); + + buf_idx^=1; + + + // Convert YUV to BGRA, store it back (first two lines) + yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); + + // Next two lines + yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, + v_plane[buf_idx] + stride_vu, + u_plane[buf_idx] + stride_vu, + bgra + size_2lines_bgra, + width); + + // Wait for previous storing transfer to be completed + DMA_WAIT_TAG(STR_BUF); + + // Store converted lines in two steps->max transfer size 16384 + spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + ram_addr_bgra += size_2lines_bgra; + spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + ram_addr_bgra += size_2lines_bgra; + + // Move 4 lines + ram_addr_y += size_4lines_y; + ram_addr_v += size_2lines_vu; + ram_addr_u += size_2lines_vu; + + buf_idx^=1; + } + + // Convert YUV to BGRA, store it back (first two lines) + yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); + + // Next two lines + yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, + v_plane[buf_idx] + stride_vu, + u_plane[buf_idx] + stride_vu, + bgra + size_2lines_bgra, + width); + + // Wait for previous storing transfer to be completed + DMA_WAIT_TAG(STR_BUF); + spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + ram_addr_bgra += size_2lines_bgra; + spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + + // wait for previous storing transfer to be completed + DMA_WAIT_TAG(STR_BUF); + +} + + +void yuv_to_rgb_w32() { + // Pixel dimensions of the picture + uint32_t width, height; + + // Extract parameters + width = parms_converter.src_pixel_width; + height = parms_converter.src_pixel_height; + + // Plane data management + // Y + unsigned char* ram_addr_y = parms_converter.y_plane; + // V + unsigned char* ram_addr_v = parms_converter.v_plane; + // U + unsigned char* ram_addr_u = parms_converter.u_plane; + + // BGRA + unsigned char* ram_addr_bgra = parms_converter.dstBuffer; + + // Strides + unsigned int stride_y = width; + unsigned int stride_vu = width>>1; + + // Buffer management + unsigned int buf_idx = 0; + unsigned int size_4lines_y = stride_y<<2; + unsigned int size_2lines_y = stride_y<<1; + unsigned int size_2lines_vu = stride_vu<<1; + + // 2*width*4byte_per_pixel + unsigned int size_2lines_bgra = width<<3; + + // start double-buffered processing + // 4 lines y + spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); + // 2 lines v + spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); + // 2 lines u + spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); + + // Wait for these transfers to be completed + DMA_WAIT_TAG((RETR_BUF + buf_idx)); + + unsigned int i; + for(i=0; i < (height>>2)-1; i++) { + buf_idx^=1; + // 4 lines y + spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); + deprintf("4lines = %d\n", size_4lines_y); + // 2 lines v + spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); + deprintf("2lines = %d\n", size_2lines_vu); + // 2 lines u + spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); + deprintf("2lines = %d\n", size_2lines_vu); + + DMA_WAIT_TAG((RETR_BUF + buf_idx)); + + buf_idx^=1; + + // Convert YUV to BGRA, store it back (first two lines) + yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); + + // Next two lines + yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, + v_plane[buf_idx] + stride_vu, + u_plane[buf_idx] + stride_vu, + bgra + size_2lines_bgra, + width); + + // Wait for previous storing transfer to be completed + DMA_WAIT_TAG(STR_BUF); + + // Store converted lines in two steps->max transfer size 16384 + spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + ram_addr_bgra += size_2lines_bgra; + spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + ram_addr_bgra += size_2lines_bgra; + + // Move 4 lines + ram_addr_y += size_4lines_y; + ram_addr_v += size_2lines_vu; + ram_addr_u += size_2lines_vu; + + buf_idx^=1; + } + + // Convert YUV to BGRA, store it back (first two lines) + yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); + + // Next two lines + yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, + v_plane[buf_idx] + stride_vu, + u_plane[buf_idx] + stride_vu, + bgra + size_2lines_bgra, + width); + + // Wait for previous storing transfer to be completed + DMA_WAIT_TAG(STR_BUF); + spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + ram_addr_bgra += size_2lines_bgra; + spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); + + // Wait for previous storing transfer to be completed + DMA_WAIT_TAG(STR_BUF); +} + + +/* Some vectors needed by the yuv 2 rgb conversion algorithm */ +const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f }; +const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 }; +const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 }; +const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B }; +const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F }; + +const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f}; +const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f}; +const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f}; +const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f}; + +const vector unsigned int vec_alpha = { 255 << 24, 255 << 24, 255 << 24, 255 << 24 }; + +const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 }; +const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F }; + + +/* + * yuv_to_rgb_w16() + * + * processes to line of yuv-input, width has to be a multiple of 16 + * two lines of yuv are taken as input + * + * @param y_addr address of the y plane in local store + * @param v_addr address of the v plane in local store + * @param u_addr address of the u plane in local store + * @param bgra_addr_ address of the bgra output buffer + * @param width the width in pixel + */ +void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { + // each pixel is stored as an integer + unsigned int* bgra_addr = (unsigned int*) bgra_addr_; + + unsigned int x; + for(x = 0; x < width; x+=2) { + // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt + const unsigned char Y_1 = *(y_addr + x); + const unsigned char Y_2 = *(y_addr + x + 1); + const unsigned char Y_3 = *(y_addr + x + width); + const unsigned char Y_4 = *(y_addr + x + width + 1); + const unsigned char U = *(u_addr + (x >> 1)); + const unsigned char V = *(v_addr + (x >> 1)); + + float V_minus_128 = (float)((float)V - 128.0f); + float U_minus_128 = (float)((float)U - 128.0f); + + float R_precalculate = 1.403f * V_minus_128; + float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128); + float B_precalculate = 1.773f * U_minus_128; + + const unsigned char R_1 = float_to_char((Y_1 + R_precalculate)); + const unsigned char R_2 = float_to_char((Y_2 + R_precalculate)); + const unsigned char R_3 = float_to_char((Y_3 + R_precalculate)); + const unsigned char R_4 = float_to_char((Y_4 + R_precalculate)); + const unsigned char G_1 = float_to_char((Y_1 + G_precalculate)); + const unsigned char G_2 = float_to_char((Y_2 + G_precalculate)); + const unsigned char G_3 = float_to_char((Y_3 + G_precalculate)); + const unsigned char G_4 = float_to_char((Y_4 + G_precalculate)); + const unsigned char B_1 = float_to_char((Y_1 + B_precalculate)); + const unsigned char B_2 = float_to_char((Y_2 + B_precalculate)); + const unsigned char B_3 = float_to_char((Y_3 + B_precalculate)); + const unsigned char B_4 = float_to_char((Y_4 + B_precalculate)); + + *(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24); + *(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24); + *(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24); + *(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24); + } +} + + +/* + * yuv_to_rgb_w32() + * + * processes to line of yuv-input, width has to be a multiple of 32 + * two lines of yuv are taken as input + * + * @param y_addr address of the y plane in local store + * @param v_addr address of the v plane in local store + * @param u_addr address of the u plane in local store + * @param bgra_addr_ address of the bgra output buffer + * @param width the width in pixel + */ +void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { + // each pixel is stored as an integer + unsigned int* bgra_addr = (unsigned int*) bgra_addr_; + + unsigned int x; + for(x = 0; x < width; x+=32) { + // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt + + const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x)); + const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16)); + const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width)); + const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16)); + const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1))); + const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1))); + + const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128); + const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128); + const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128); + const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128); + + const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128); + const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128); + const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128); + const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128); + + vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0); + vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0); + vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0); + vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0); + vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0); + vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0); + vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0); + vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0); + vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0); + vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0); + vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0); + vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0); + vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0); + vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0); + vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0); + vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0); + + const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1); + const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2); + const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3); + const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4); + + const vector float R1_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_upper); + const vector float R2_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_lower); + const vector float R3_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_upper); + const vector float R4_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_lower); + const vector float R5_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_upper); + const vector float R6_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_lower); + const vector float R7_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_upper); + const vector float R8_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_lower); + + + const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff)); + const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff)); + const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff)); + const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff)); + + const vector float G1_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_upper); + const vector float G2_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_lower); + const vector float G3_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_upper); + const vector float G4_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_lower); + const vector float G5_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_upper); + const vector float G6_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_lower); + const vector float G7_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_upper); + const vector float G8_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_lower); + + + const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1); + const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2); + const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3); + const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4); + + const vector float B1_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_upper); + const vector float B2_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_lower); + const vector float B3_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_upper); + const vector float B4_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_lower); + const vector float B5_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_upper); + const vector float B6_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_lower); + const vector float B7_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_upper); + const vector float B8_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_lower); + + + const vector unsigned int R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate)); + const vector unsigned int R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate)); + const vector unsigned int R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate)); + const vector unsigned int R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate)); + const vector unsigned int R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate)); + const vector unsigned int R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate)); + const vector unsigned int R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate)); + const vector unsigned int R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate)); + const vector unsigned int R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate)); + const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate)); + const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate)); + const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate)); + const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate)); + const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate)); + const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate)); + const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate)); + + const vector unsigned int G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate)); + const vector unsigned int G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate)); + const vector unsigned int G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate)); + const vector unsigned int G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate)); + const vector unsigned int G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate)); + const vector unsigned int G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate)); + const vector unsigned int G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate)); + const vector unsigned int G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate)); + const vector unsigned int G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate)); + const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate)); + const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate)); + const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate)); + const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate)); + const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate)); + const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate)); + const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate)); + + const vector unsigned int B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate)); + const vector unsigned int B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate)); + const vector unsigned int B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate)); + const vector unsigned int B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate)); + const vector unsigned int B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate)); + const vector unsigned int B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate)); + const vector unsigned int B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate)); + const vector unsigned int B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate)); + const vector unsigned int B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate)); + const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate)); + const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate)); + const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate)); + const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate)); + const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate)); + const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate)); + const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate)); + + *((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha, B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1))); + *((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha, B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1))); + *((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha, B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1))); + *((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha, B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1))); + *((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha, B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1))); + *((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha, B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1))); + *((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha, B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1))); + *((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha, B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1))); + *((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha, B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1))); + *((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1))); + } +} +