# HG changeset patch # User Sam Lantinga # Date 1295504740 28800 # Node ID 187d7d4463067bdcb8f01e4bd275c0449d91593e # Parent edaf3e364a0593e2bb6288eef58115d21b2d50cb PS3 Linux is no more... diff -r edaf3e364a05 -r 187d7d446306 Makefile.in --- a/Makefile.in Wed Jan 19 22:21:31 2011 -0800 +++ b/Makefile.in Wed Jan 19 22:25:40 2011 -0800 @@ -37,11 +37,6 @@ SDLMAIN_TARGET = libSDLmain.a SDLMAIN_OBJECTS = @SDLMAIN_OBJECTS@ -# PS3 SPU programs -SPU_GCC = @SPU_GCC@ -EMBEDSPU = @EMBEDSPU@ -#include $(srcdir)/src/video/ps3/spulibs/Makefile - DIST = acinclude Android.mk autogen.sh Borland.html Borland.zip BUGS build-scripts configure configure.in COPYING CREDITS include INSTALL Makefile.minimal Makefile.in README* sdl-config.in sdl.m4 sdl.pc.in SDL.spec SDL.spec.in src test TODO VisualC.html VisualC VisualCE Watcom-Win32.zip WhatsNew Xcode Xcode-iPhoneOS HDRS = \ diff -r edaf3e364a05 -r 187d7d446306 README.PS3 --- a/README.PS3 Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ - -SDL on Sony Playstation3 ------------------------- - -Installation: - First, you have to install the Cell SDK - - Download the Cell SDK installer RPM and ISO images to - a temporary directory such as /tmp/cellsdk. - - Mount the image: mount -o loop CellSDK-Devel-Fedora_3.1.0.0.0.iso /tmp/cellsdk - - Install the SDK installer: rpm -ivh cell-install-3.1.0-0.0.noarch.rpm - - Install the SDK: cd /opt/cell && ./cellsdk --iso /tmp/cellsdkiso install - - You'll than need to install the SPU-libs - - Run make ps3-libs && make ps3libs-install - - Finally, install SDL - - Go to SDL-1.2/ and build SDL like any other GNU style package. - e.g. - - Build the configure-script with ./autogen.sh - - Configure SDL for your needs: ./configure --enable-video-ps3 ... - - Build and install it: make && make install - - -Todo: - - Mouse & Keyboard support - - On SPU-side the current scaler and converter restrictions are: - - resolution has to be a multiple of 8 (will work on that) - - scaler/converter only supports the YV12 and IYUV format - - the scaler works only bilinear (lanzos would be nice) - - Optimize the SPU-program handling on the PPE side - - Integrate spumedia in SDL - -Have fun! - Dirk Herrendoerfer - diff -r edaf3e364a05 -r 187d7d446306 configure.in --- a/configure.in Wed Jan 19 22:21:31 2011 -0800 +++ b/configure.in Wed Jan 19 22:25:40 2011 -0800 @@ -1546,46 +1546,6 @@ fi } -dnl See if we're running on PlayStation 3 Cell hardware -CheckPS3() -{ - AC_ARG_ENABLE(video-ps3, - AC_HELP_STRING([--enable-video-ps3], [use PlayStation 3 Cell driver [[default=yes]]]), - , enable_video_ps3=yes) - if test x$enable_video = xyes -a x$enable_video_ps3 = xyes; then - video_ps3=no - AC_CHECK_HEADER([linux/fb.h]) - AC_CHECK_HEADER([asm/ps3fb.h], [have_ps3fb_hdr=yes], [], - [#ifndef _LINUX_TYPES_H - #include - #endif]) - AC_CHECK_HEADER([libspe2.h], have_libspe2_hdr=yes) - AC_CHECK_LIB([spe2], spe_context_create, have_spe2_lib=yes) - - AC_CHECK_PROGS(SPU_GCC, [spu-gcc]) - AC_CHECK_PROGS(EMBEDSPU, [embedspu]) - - have_spu_libs=yes - AC_CHECK_LIB([fb_writer_spu], [main], [], [have_spu_libs=no]) - AC_CHECK_LIB([yuv2rgb_spu], [main], [], [have_spu_libs=no]) - AC_CHECK_LIB([bilin_scaler_spu], [main], [], [have_spu_libs=no]) - if test x$have_ps3fb_hdr = xyes -a x$have_libspe2_hdr = xyes -a x$have_spe2_lib = xyes -a "$SPU_GCC" -a "$EMBEDSPU"; then - AC_DEFINE(SDL_VIDEO_DRIVER_PS3) - video_ps3=yes - have_video=yes - SOURCES="$SOURCES $srcdir/src/video/ps3/*.c" - EXTRA_CFLAGS="$EXTRA_CFLAGS -I/opt/cell/sdk/usr/include" - EXTRA_LDFLAGS="$EXTRA_LDFLAGS -L/opt/cell/sdk/usr/lib -lspe2 -lfb_writer_spu -lyuv2rgb_spu -lbilin_scaler_spu" - - if test x$have_spu_libs = xno; then - AC_MSG_WARN([ps3libs missing, please run make ps3libs]) - fi - fi - AC_MSG_CHECKING([for PlayStation 3 Cell support]) - AC_MSG_RESULT([$video_ps3]) - fi -} - dnl rcg04172001 Set up the Null video driver. CheckDummyVideo() { @@ -2245,7 +2205,6 @@ CheckX11 CheckDirectFB CheckFusionSound - CheckPS3 CheckOpenGLX11 CheckInputEvents CheckTslib diff -r edaf3e364a05 -r 187d7d446306 include/SDL_config.h.in --- a/include/SDL_config.h.in Wed Jan 19 22:21:31 2011 -0800 +++ b/include/SDL_config.h.in Wed Jan 19 22:25:40 2011 -0800 @@ -264,7 +264,6 @@ #undef SDL_VIDEO_DRIVER_NDS #undef SDL_VIDEO_DRIVER_PHOTON #undef SDL_VIDEO_DRIVER_QNXGF -#undef SDL_VIDEO_DRIVER_PS3 #undef SDL_VIDEO_DRIVER_RISCOS #undef SDL_VIDEO_DRIVER_WIN32 #undef SDL_VIDEO_DRIVER_X11 diff -r edaf3e364a05 -r 187d7d446306 src/video/SDL_sysvideo.h --- a/src/video/SDL_sysvideo.h Wed Jan 19 22:21:31 2011 -0800 +++ b/src/video/SDL_sysvideo.h Wed Jan 19 22:25:40 2011 -0800 @@ -411,9 +411,6 @@ #if SDL_VIDEO_DRIVER_DIRECTFB extern VideoBootStrap DirectFB_bootstrap; #endif -#if SDL_VIDEO_DRIVER_PS3 -extern VideoBootStrap PS3_bootstrap; -#endif #if SDL_VIDEO_DRIVER_WIN32 extern VideoBootStrap WIN32_bootstrap; #endif diff -r edaf3e364a05 -r 187d7d446306 src/video/SDL_video.c --- a/src/video/SDL_video.c Wed Jan 19 22:21:31 2011 -0800 +++ b/src/video/SDL_video.c Wed Jan 19 22:25:40 2011 -0800 @@ -65,9 +65,6 @@ #if SDL_VIDEO_DRIVER_DIRECTFB &DirectFB_bootstrap, #endif -#if SDL_VIDEO_DRIVER_PS3 - &PS3_bootstrap, -#endif #if SDL_VIDEO_DRIVER_WIN32 &WIN32_bootstrap, #endif diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3events.c --- a/src/video/ps3/SDL_ps3events.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "../../events/SDL_sysevents.h" -#include "../../events/SDL_events_c.h" - -#include "SDL_ps3video.h" -#include "SDL_ps3events_c.h" - -void -PS3_PumpEvents(_THIS) -{ - /* do nothing. */ -} - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3events_c.h --- a/src/video/ps3/SDL_ps3events_c.h Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "SDL_ps3video.h" - -extern void PS3_PumpEvents(_THIS); - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3modes.c --- a/src/video/ps3/SDL_ps3modes.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "SDL_ps3video.h" - -void -PS3_InitModes(_THIS) -{ - deprintf(1, "+PS3_InitModes()\n"); - SDL_VideoDisplay display; - SDL_VideoData *data = (SDL_VideoData *) _this->driverdata; - SDL_DisplayMode mode; - PS3_DisplayModeData *modedata; - unsigned long vid = 0; - - modedata = (PS3_DisplayModeData *) SDL_malloc(sizeof(*modedata)); - if (!modedata) { - return; - } - - /* Setting up the DisplayMode based on current settings */ - struct ps3fb_ioctl_res res; - if (ioctl(data->fbdev, PS3FB_IOCTL_SCREENINFO, &res)) { - SDL_SetError("Can't get PS3FB_IOCTL_SCREENINFO"); - } - mode.format = SDL_PIXELFORMAT_RGB888; - mode.refresh_rate = 0; - mode.w = res.xres; - mode.h = res.yres; - - /* Setting up driver specific mode data, - * Get the current ps3 specific videmode number */ - if (ioctl(data->fbdev, PS3FB_IOCTL_GETMODE, (unsigned long)&vid)) { - SDL_SetError("Can't get PS3FB_IOCTL_GETMODE"); - } - deprintf(2, "PS3FB_IOCTL_GETMODE = %u\n", vid); - modedata->mode = vid; - mode.driverdata = modedata; - - /* Set display's videomode and add it */ - SDL_zero(display); - display.desktop_mode = mode; - display.current_mode = mode; - - SDL_AddVideoDisplay(&display); - deprintf(1, "-PS3_InitModes()\n"); -} - -/* DisplayModes available on the PS3 */ -static SDL_DisplayMode ps3fb_modedb[] = { - /* VESA */ - {SDL_PIXELFORMAT_RGB888, 1280, 768, 0, NULL}, // WXGA - {SDL_PIXELFORMAT_RGB888, 1280, 1024, 0, NULL}, // SXGA - {SDL_PIXELFORMAT_RGB888, 1920, 1200, 0, NULL}, // WUXGA - /* Native resolutions (progressive, "fullscreen") */ - {SDL_PIXELFORMAT_RGB888, 720, 480, 0, NULL}, // 480p - {SDL_PIXELFORMAT_RGB888, 1280, 720, 0, NULL}, // 720p - {SDL_PIXELFORMAT_RGB888, 1920, 1080, 0, NULL} // 1080p -}; - -/* PS3 videomode number according to ps3fb_modedb */ -static PS3_DisplayModeData ps3fb_data[] = { - {11}, {12}, {13}, {130}, {131}, {133}, -}; - -void -PS3_GetDisplayModes(_THIS, SDL_VideoDisplay * display) -{ - deprintf(1, "+PS3_GetDisplayModes()\n"); - SDL_DisplayMode mode; - unsigned int nummodes; - - nummodes = sizeof(ps3fb_modedb) / sizeof(SDL_DisplayMode); - - int n; - for (n=0; ndriverdata; - PS3_DisplayModeData *dispdata = (PS3_DisplayModeData *) mode->driverdata; - - /* Set the new DisplayMode */ - deprintf(2, "Setting PS3FB_MODE to %u\n", dispdata->mode); - if (ioctl(data->fbdev, PS3FB_IOCTL_SETMODE, (unsigned long)&dispdata->mode)) { - deprintf(2, "Could not set PS3FB_MODE\n"); - SDL_SetError("Could not set PS3FB_MODE\n"); - return -1; - } - - deprintf(1, "-PS3_SetDisplayMode()\n"); - return 0; -} - -void -PS3_QuitModes(_THIS) -{ - deprintf(1, "+PS3_QuitModes()\n"); - - /* There was no mem allocated for driverdata */ - int i, j; - for (i = 0; i < SDL_GetNumVideoDisplays(); ++i) { - SDL_VideoDisplay *display = SDL_GetVideoDisplay(i); - for (j = display->num_display_modes; j--;) { - display->display_modes[j].driverdata = NULL; - } - } - - deprintf(1, "-PS3_QuitModes()\n"); -} - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3modes_c.h --- a/src/video/ps3/SDL_ps3modes_c.h Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#ifndef _SDL_ps3modes_h -#define _SDL_ps3modes_h - -extern void PS3_InitModes(_THIS); -extern void PS3_GetDisplayModes(_THIS, SDL_VideoDisplay * display); -extern int PS3_SetDisplayMode(_THIS, SDL_VideoDisplay * display, SDL_DisplayMode * mode); -extern void PS3_QuitModes(_THIS); - -#endif /* SDL_ps3modes_h */ - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3render.c --- a/src/video/ps3/SDL_ps3render.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,747 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "SDL_video.h" -#include "../SDL_sysvideo.h" -#include "../SDL_yuv_sw_c.h" -#include "../SDL_renderer_sw.h" - -#include "SDL_ps3video.h" -#include "SDL_ps3spe_c.h" - -#include -#include -#include -#include -#include -#include -#include - - -/* Stores the executable name */ -extern spe_program_handle_t yuv2rgb_spu; -extern spe_program_handle_t bilin_scaler_spu; - -/* SDL surface based renderer implementation */ -static SDL_Renderer *SDL_PS3_CreateRenderer(SDL_Window * window, - Uint32 flags); -static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer); -static int SDL_PS3_ActivateRenderer(SDL_Renderer * renderer); -static int SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y); -static int SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1, - int x2, int y2); -static int SDL_PS3_RenderFill(SDL_Renderer * renderer, - const SDL_Rect * rect); -static int SDL_PS3_RenderCopy(SDL_Renderer * renderer, - SDL_Texture * texture, - const SDL_Rect * srcrect, - const SDL_Rect * dstrect); -static void SDL_PS3_RenderPresent(SDL_Renderer * renderer); -static void SDL_PS3_DestroyRenderer(SDL_Renderer * renderer); - -/* Texture */ -static int PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static int PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, void **pixels, int *pitch); -static int PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch); -static int PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, int markDirty, void **pixels, int *pitch); -static void PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static void PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture); - - -SDL_RenderDriver SDL_PS3_RenderDriver = { - SDL_PS3_CreateRenderer, - { - "ps3", - (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTVSYNC | - SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTDISCARD | - SDL_RENDERER_ACCELERATED), - (SDL_TEXTUREMODULATE_NONE), - (SDL_BLENDMODE_NONE), - /* We use bilinear scaling on the SPE for YV12 & IYUV - * (width and height % 8 = 0) */ - (SDL_SCALEMODE_SLOW) - } -}; - -typedef struct -{ - int current_screen; - SDL_Surface *screen; - SDL_VideoDisplay *display; - /* adress of the centered image in the framebuffer (double buffered) */ - uint8_t *center[2]; - - /* width of input (bounded by writeable width) */ - unsigned int bounded_width; - /* height of input (bounded by writeable height) */ - unsigned int bounded_height; - /* offset from the left side (used for centering) */ - unsigned int offset_left; - /* offset from the upper side (used for centering) */ - unsigned int offset_top; - /* width of screen which is writeable */ - unsigned int wr_width; - /* width of screen which is writeable */ - unsigned int wr_height; - /* size of a screen line: width * bpp/8 */ - unsigned int line_length; - - /* Is the kernels fb size bigger than ~12MB - * double buffering will work for 1080p */ - unsigned int double_buffering; - - /* SPE threading stuff */ - spu_data_t *converter_thread_data; - spu_data_t *scaler_thread_data; - - /* YUV converting transfer data */ - volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128))); - /* Scaler transfer data */ - volatile struct scale_parms_t * scaler_parms __attribute__((aligned(128))); -} SDL_PS3_RenderData; - -typedef struct -{ - int pitch; - /* Image data */ - volatile void *pixels; - /* Use software renderer for not supported formats */ - SDL_SW_YUVTexture *yuv; -} PS3_TextureData; - -SDL_Renderer * -SDL_PS3_CreateRenderer(SDL_Window * window, Uint32 flags) -{ - deprintf(1, "+SDL_PS3_CreateRenderer()\n"); - SDL_VideoDisplay *display = window->display; - SDL_DisplayMode *displayMode = &display->current_mode; - SDL_VideoData *devdata = display->device->driverdata; - SDL_Renderer *renderer; - SDL_PS3_RenderData *data; - struct ps3fb_ioctl_res res; - int i, n; - int bpp; - Uint32 Rmask, Gmask, Bmask, Amask; - - if (!SDL_PixelFormatEnumToMasks - (displayMode->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) { - SDL_SetError("Unknown display format"); - return NULL; - } - - renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer)); - if (!renderer) { - SDL_OutOfMemory(); - return NULL; - } - - data = (SDL_PS3_RenderData *) SDL_malloc(sizeof(*data)); - if (!data) { - SDL_PS3_DestroyRenderer(renderer); - SDL_OutOfMemory(); - return NULL; - } - SDL_zerop(data); - - renderer->CreateTexture = PS3_CreateTexture; - renderer->DestroyTexture = PS3_DestroyTexture; - renderer->QueryTexturePixels = PS3_QueryTexturePixels; - renderer->UpdateTexture = PS3_UpdateTexture; - renderer->LockTexture = PS3_LockTexture; - renderer->UnlockTexture = PS3_UnlockTexture; - renderer->ActivateRenderer = SDL_PS3_ActivateRenderer; - renderer->DisplayModeChanged = SDL_PS3_DisplayModeChanged; - renderer->RenderPoint = SDL_PS3_RenderPoint; - renderer->RenderLine = SDL_PS3_RenderLine; - renderer->RenderFill = SDL_PS3_RenderFill; - renderer->RenderCopy = SDL_PS3_RenderCopy; - renderer->RenderPresent = SDL_PS3_RenderPresent; - renderer->DestroyRenderer = SDL_PS3_DestroyRenderer; - renderer->info.name = SDL_PS3_RenderDriver.info.name; - renderer->info.flags = 0; - renderer->window = window; - renderer->driverdata = data; - - deprintf(1, "window->w = %u\n", window->w); - deprintf(1, "window->h = %u\n", window->h); - - data->double_buffering = 0; - - /* Get ps3 screeninfo */ - if (ioctl(devdata->fbdev, PS3FB_IOCTL_SCREENINFO, (unsigned long)&res) < 0) { - SDL_SetError("[PS3] PS3FB_IOCTL_SCREENINFO failed"); - } - deprintf(2, "res.num_frames = %d\n", res.num_frames); - - /* Only use double buffering if enough fb memory is available */ - if (res.num_frames > 1) { - renderer->info.flags |= SDL_RENDERER_PRESENTFLIP2; - n = 2; - data->double_buffering = 1; - } else { - renderer->info.flags |= SDL_RENDERER_PRESENTCOPY; - n = 1; - } - - data->screen = - SDL_CreateRGBSurface(0, window->w, window->h, bpp, Rmask, Gmask, - Bmask, Amask); - if (!data->screen) { - SDL_PS3_DestroyRenderer(renderer); - return NULL; - } - /* Allocate aligned memory for pixels */ - SDL_free(data->screen->pixels); - data->screen->pixels = (void *)memalign(16, data->screen->h * data->screen->pitch); - if (!data->screen->pixels) { - SDL_FreeSurface(data->screen); - SDL_OutOfMemory(); - return NULL; - } - SDL_memset(data->screen->pixels, 0, data->screen->h * data->screen->pitch); - SDL_SetSurfacePalette(data->screen, display->palette); - - data->current_screen = 0; - - /* Create SPU parms structure */ - data->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t)); - data->scaler_parms = (struct scale_parms_t *) memalign(16, sizeof(struct scale_parms_t)); - if (data->converter_parms == NULL || data->scaler_parms == NULL) { - SDL_PS3_DestroyRenderer(renderer); - SDL_OutOfMemory(); - return NULL; - } - - /* Set up the SPE threading data */ - data->converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); - data->scaler_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); - if (data->converter_thread_data == NULL || data->scaler_thread_data == NULL) { - SDL_PS3_DestroyRenderer(renderer); - SDL_OutOfMemory(); - return NULL; - } - - /* Set up the SPE scaler (booted) */ - data->scaler_thread_data->program = bilin_scaler_spu; - data->scaler_thread_data->program_name = "bilin_scaler_spu"; - data->scaler_thread_data->keepalive = 0; - data->scaler_thread_data->booted = 0; - - /* Set up the SPE converter (always running) */ - data->converter_thread_data->program = yuv2rgb_spu; - data->converter_thread_data->program_name = "yuv2rgb_spu"; - data->converter_thread_data->keepalive = 1; - data->converter_thread_data->booted = 0; - - SPE_Start(data->converter_thread_data); - - deprintf(1, "-SDL_PS3_CreateRenderer()\n"); - return renderer; -} - -static int -SDL_PS3_ActivateRenderer(SDL_Renderer * renderer) -{ - deprintf(1, "+PS3_ActivateRenderer()\n"); - SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata; - - deprintf(1, "-PS3_ActivateRenderer()\n"); - return 0; -} - -static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer) { - deprintf(1, "+PS3_DisplayModeChanged()\n"); - SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata; - - deprintf(1, "-PS3_DisplayModeChanged()\n"); - return 0; -} - -static int -PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) { - deprintf(1, "+PS3_CreateTexture()\n"); - PS3_TextureData *data; - data = (PS3_TextureData *) SDL_calloc(1, sizeof(*data)); - if (!data) { - SDL_OutOfMemory(); - return -1; - } - data->pitch = (texture->w * SDL_BYTESPERPIXEL(texture->format)); - - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - /* Use SDLs SW_YUVTexture */ - data->yuv = - SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h); - if (!data->yuv) { - SDL_OutOfMemory(); - return -1; - } - /* but align pixels */ - SDL_free(data->yuv->pixels); - data->yuv->pixels = (Uint8 *)memalign(16, texture->w * texture->h * 2); - if (!data->yuv->pixels) { - SDL_OutOfMemory(); - return -1; - } - - /* Redo: Find the pitch and offset values for the overlay */ - SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) data->yuv; - switch (texture->format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - swdata->pitches[0] = texture->w; - swdata->pitches[1] = swdata->pitches[0] / 2; - swdata->pitches[2] = swdata->pitches[0] / 2; - swdata->planes[0] = swdata->pixels; - swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * texture->h; - swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * texture->h / 2; - break; - case SDL_PIXELFORMAT_YUY2: - case SDL_PIXELFORMAT_UYVY: - case SDL_PIXELFORMAT_YVYU: - swdata->pitches[0] = texture->w * 2; - swdata->planes[0] = swdata->pixels; - break; - default: - /* We should never get here (caught above) */ - break; - } - } else { - data->pixels = NULL; - data->pixels = SDL_malloc(texture->h * data->pitch); - if (!data->pixels) { - PS3_DestroyTexture(renderer, texture); - SDL_OutOfMemory(); - return -1; - } - } - texture->driverdata = data; - deprintf(1, "-PS3_CreateTexture()\n"); - return 0; -} - -static int -PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, - void **pixels, int *pitch) -{ - deprintf(1, "+PS3_QueryTexturePixels()\n"); - PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; - - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch); - } else { - *pixels = (void *)data->pixels; - *pitch = data->pitch; - } - - deprintf(1, "-PS3_QueryTexturePixels()\n"); - return 0; -} - -static int -PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, const void *pixels, int pitch) -{ - deprintf(1, "+PS3_UpdateTexture()\n"); - PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; - - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - return SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch); - } else { - Uint8 *src, *dst; - int row; - size_t length; - Uint8 *dstpixels; - - src = (Uint8 *) pixels; - dst = (Uint8 *) dstpixels + rect->y * data->pitch + rect->x - * SDL_BYTESPERPIXEL(texture->format); - length = rect->w * SDL_BYTESPERPIXEL(texture->format); - /* Update the texture */ - for (row = 0; row < rect->h; ++row) { - SDL_memcpy(dst, src, length); - src += pitch; - dst += data->pitch; - } - } - deprintf(1, "-PS3_UpdateTexture()\n"); - return 0; -} - -static int -PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch) -{ - deprintf(1, "+PS3_LockTexture()\n"); - PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; - - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - deprintf(1, "-PS3_LockTexture()\n"); - return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels, pitch); - } else { - *pixels = - (void *) ((Uint8 *) data->pixels + rect->y * data->pitch + - rect->x * SDL_BYTESPERPIXEL(texture->format)); - *pitch = data->pitch; - deprintf(1, "-PS3_LockTexture()\n"); - return 0; - } -} - -static void -PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture) -{ - deprintf(1, "+PS3_UnlockTexture()\n"); - PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; - - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - SDL_SW_UnlockYUVTexture(data->yuv); - } - deprintf(1, "-PS3_UnlockTexture()\n"); -} - -static void -PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture) -{ - deprintf(1, "+PS3_DestroyTexture()\n"); - PS3_TextureData *data = (PS3_TextureData *) texture->driverdata; - - if (!data) { - return; - } - if (data->yuv) { - SDL_SW_DestroyYUVTexture(data->yuv); - } - if (data->pixels) { - SDL_free((void *)data->pixels); - } - deprintf(1, "-PS3_DestroyTexture()\n"); -} - -static int -SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y) -{ - SDL_PS3_RenderData *data = - (SDL_PS3_RenderData *) renderer->driverdata; - SDL_Surface *target = data->screen; - int status; - - if (renderer->blendMode == SDL_BLENDMODE_NONE || - renderer->blendMode == SDL_BLENDMODE_MASK) { - Uint32 color = - SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b, - renderer->a); - - status = SDL_DrawPoint(target, x, y, color); - } else { - status = - SDL_BlendPoint(target, x, y, renderer->blendMode, renderer->r, - renderer->g, renderer->b, renderer->a); - } - return status; -} - -static int -SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1, int x2, int y2) -{ - SDL_PS3_RenderData *data = - (SDL_PS3_RenderData *) renderer->driverdata; - SDL_Surface *target = data->screen; - int status; - - if (renderer->blendMode == SDL_BLENDMODE_NONE || - renderer->blendMode == SDL_BLENDMODE_MASK) { - Uint32 color = - SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b, - renderer->a); - - status = SDL_DrawLine(target, x1, y1, x2, y2, color); - } else { - status = - SDL_BlendLine(target, x1, y1, x2, y2, renderer->blendMode, - renderer->r, renderer->g, renderer->b, renderer->a); - } - return status; -} - -static int -SDL_PS3_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect) -{ - deprintf(1, "SDL_PS3_RenderFill()\n"); - SDL_PS3_RenderData *data = - (SDL_PS3_RenderData *) renderer->driverdata; - SDL_Surface *target = data->screen; - SDL_Rect real_rect = *rect; - int status; - - if (renderer->blendMode == SDL_BLENDMODE_NONE) { - Uint32 color = - SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b, - renderer->a); - - status = SDL_FillRect(target, &real_rect, color); - } else { - status = - SDL_BlendFillRect(target, &real_rect, renderer->blendMode, - renderer->r, renderer->g, renderer->b, - renderer->a); - } - return status; -} - -static int -SDL_PS3_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * srcrect, const SDL_Rect * dstrect) -{ - deprintf(1, "+SDL_PS3_RenderCopy()\n"); - SDL_PS3_RenderData *data = - (SDL_PS3_RenderData *) renderer->driverdata; - SDL_Window *window = SDL_GetWindowFromID(renderer->window); - SDL_VideoDisplay *display = window->display; - PS3_TextureData *txdata = (PS3_TextureData *) texture->driverdata; - SDL_VideoData *devdata = display->device->driverdata; - - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - deprintf(1, "Texture is in a FOURCC format\n"); - if ((texture->format == SDL_PIXELFORMAT_YV12 || texture->format == SDL_PIXELFORMAT_IYUV) - && texture->w % 8 == 0 && texture->h % 8 == 0 - && dstrect->w % 8 == 0 && dstrect->h % 8 == 0) { - deprintf(1, "Use SPE for scaling/converting\n"); - - SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) txdata->yuv; - Uint8 *lum, *Cr, *Cb; - Uint8 *scaler_out = NULL; - Uint8 *dstpixels; - switch (texture->format) { - case SDL_PIXELFORMAT_YV12: - lum = swdata->planes[0]; - Cr = swdata->planes[1]; - Cb = swdata->planes[2]; - break; - case SDL_PIXELFORMAT_IYUV: - lum = swdata->planes[0]; - Cr = swdata->planes[2]; - Cb = swdata->planes[1]; - break; - default: - /* We should never get here (caught above) */ - return -1; - } - - if (srcrect->w != dstrect->w || srcrect->h != dstrect->h) { - deprintf(1, "We need to scale the texture from %u x %u to %u x %u\n", - srcrect->w, srcrect->h, dstrect->w, dstrect->h); - /* Alloc mem for scaled YUV picture */ - scaler_out = (Uint8 *) memalign(16, dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 1)); - if (scaler_out == NULL) { - SDL_OutOfMemory(); - return -1; - } - - /* Set parms for scaling */ - data->scaler_parms->src_pixel_width = srcrect->w; - data->scaler_parms->src_pixel_height = srcrect->h; - data->scaler_parms->dst_pixel_width = dstrect->w; - data->scaler_parms->dst_pixel_height = dstrect->h; - data->scaler_parms->y_plane = lum; - data->scaler_parms->v_plane = Cr; - data->scaler_parms->u_plane = Cb; - data->scaler_parms->dstBuffer = scaler_out; - data->scaler_thread_data->argp = (void *)data->scaler_parms; - - /* Scale the YUV overlay to given size */ - SPE_Start(data->scaler_thread_data); - SPE_Stop(data->scaler_thread_data); - - /* Set parms for converting after scaling */ - data->converter_parms->y_plane = scaler_out; - data->converter_parms->v_plane = scaler_out + dstrect->w * dstrect->h; - data->converter_parms->u_plane = scaler_out + dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 2); - } else { - data->converter_parms->y_plane = lum; - data->converter_parms->v_plane = Cr; - data->converter_parms->u_plane = Cb; - } - - dstpixels = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x - * SDL_BYTESPERPIXEL(texture->format); - data->converter_parms->src_pixel_width = dstrect->w; - data->converter_parms->src_pixel_height = dstrect->h; - data->converter_parms->dstBuffer = dstpixels/*(Uint8 *)data->screen->pixels*/; - data->converter_thread_data->argp = (void *)data->converter_parms; - - /* Convert YUV texture to RGB */ - SPE_SendMsg(data->converter_thread_data, SPU_START); - SPE_SendMsg(data->converter_thread_data, (unsigned int)data->converter_thread_data->argp); - - /* We can probably move that to RenderPresent() */ - SPE_WaitForMsg(data->converter_thread_data, SPU_FIN); - if (scaler_out) { - free(scaler_out); - } - } else { - deprintf(1, "Use software for scaling/converting\n"); - Uint8 *dst; - /* FIXME: Not good */ - dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x - * SDL_BYTESPERPIXEL(texture->format); - return SDL_SW_CopyYUVToRGB(txdata->yuv, srcrect, display->current_mode.format, - dstrect->w, dstrect->h, dst/*data->screen->pixels*/, - data->screen->pitch); - } - } else { - deprintf(1, "SDL_ISPIXELFORMAT_FOURCC = false\n"); - - Uint8 *src, *dst; - int row; - size_t length; - Uint8 *dstpixels; - - src = (Uint8 *) txdata->pixels; - dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x - * SDL_BYTESPERPIXEL(texture->format); - length = dstrect->w * SDL_BYTESPERPIXEL(texture->format); - for (row = 0; row < dstrect->h; ++row) { - SDL_memcpy(dst, src, length); - src += txdata->pitch; - dst += data->screen->pitch; - } - } - - deprintf(1, "-SDL_PS3_RenderCopy()\n"); - return 0; -} - -static void -SDL_PS3_RenderPresent(SDL_Renderer * renderer) -{ - deprintf(1, "+SDL_PS3_RenderPresent()\n"); - SDL_PS3_RenderData *data = - (SDL_PS3_RenderData *) renderer->driverdata; - SDL_Window *window = SDL_GetWindowFromID(renderer->window); - SDL_VideoDisplay *display = window->display; - SDL_VideoData *devdata = display->device->driverdata; - - /* Send the data to the screen */ - /* Get screeninfo */ - struct fb_fix_screeninfo fb_finfo; - if (ioctl(devdata->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) { - SDL_SetError("[PS3] Can't get fixed screeninfo"); - } - struct fb_var_screeninfo fb_vinfo; - if (ioctl(devdata->fbdev, FBIOGET_VSCREENINFO, &fb_vinfo)) { - SDL_SetError("[PS3] Can't get VSCREENINFO"); - } - - /* 16 and 15 bpp is reported as 16 bpp */ - //txdata->bpp = fb_vinfo.bits_per_pixel; - //if (txdata->bpp == 16) - // txdata->bpp = fb_vinfo.red.length + fb_vinfo.green.length + fb_vinfo.blue.length; - - /* Adjust centering */ - data->bounded_width = window->w < fb_vinfo.xres ? window->w : fb_vinfo.xres; - data->bounded_height = window->h < fb_vinfo.yres ? window->h : fb_vinfo.yres; - /* We could use SDL's CENTERED flag for centering */ - data->offset_left = (fb_vinfo.xres - data->bounded_width) >> 1; - data->offset_top = (fb_vinfo.yres - data->bounded_height) >> 1; - data->center[0] = devdata->frame_buffer + data->offset_left * /*txdata->bpp/8*/ 4 + - data->offset_top * fb_finfo.line_length; - data->center[1] = data->center[0] + fb_vinfo.yres * fb_finfo.line_length; - - deprintf(1, "offset_left = %u\n", data->offset_left); - deprintf(1, "offset_top = %u\n", data->offset_top); - - /* Set SPU parms for copying the surface to framebuffer */ - devdata->fb_parms->data = (unsigned char *)data->screen->pixels; - devdata->fb_parms->center = data->center[data->current_screen]; - devdata->fb_parms->out_line_stride = fb_finfo.line_length; - devdata->fb_parms->in_line_stride = window->w * /*txdata->bpp / 8*/4; - devdata->fb_parms->bounded_input_height = data->bounded_height; - devdata->fb_parms->bounded_input_width = data->bounded_width; - //devdata->fb_parms->fb_pixel_size = txdata->bpp / 8; - devdata->fb_parms->fb_pixel_size = 4;//SDL_BYTESPERPIXEL(window->format); - - deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", devdata->fb_thread_data->argp); - - /* Copying.. */ - SPE_SendMsg(devdata->fb_thread_data, SPU_START); - SPE_SendMsg(devdata->fb_thread_data, (unsigned int)devdata->fb_thread_data->argp); - - SPE_WaitForMsg(devdata->fb_thread_data, SPU_FIN); - - /* Wait for vsync */ - if (renderer->info.flags & SDL_RENDERER_PRESENTVSYNC) { - unsigned long crt = 0; - deprintf(1, "[PS3] Wait for vsync\n"); - ioctl(devdata->fbdev, FBIO_WAITFORVSYNC, &crt); - } - - /* Page flip */ - deprintf(1, "[PS3] Page flip to buffer #%u 0x%x\n", data->current_screen, data->center[data->current_screen]); - ioctl(devdata->fbdev, PS3FB_IOCTL_FSEL, (unsigned long)&data->current_screen); - - /* Update the flipping chain, if any */ - if (data->double_buffering) { - data->current_screen = (data->current_screen + 1) % 2; - } - deprintf(1, "-SDL_PS3_RenderPresent()\n"); -} - -static void -SDL_PS3_DestroyRenderer(SDL_Renderer * renderer) -{ - deprintf(1, "+SDL_PS3_DestroyRenderer()\n"); - SDL_PS3_RenderData *data = - (SDL_PS3_RenderData *) renderer->driverdata; - int i; - - if (data) { - for (i = 0; i < SDL_arraysize(data->screen); ++i) { - if (data->screen) { - SDL_FreeSurface(data->screen); - } - } - - /* Shutdown SPE and release related resources */ - if (data->scaler_thread_data) { - free((void *)data->scaler_thread_data); - } - if (data->scaler_parms) { - free((void *)data->scaler_parms); - } - if (data->converter_thread_data) { - SPE_Shutdown(data->converter_thread_data); - free((void *)data->converter_thread_data); - } - if (data->converter_parms) { - free((void *)data->converter_parms); - } - - SDL_free(data); - } - SDL_free(renderer); - deprintf(1, "-SDL_PS3_DestroyRenderer()\n"); -} - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3render_c.h --- a/src/video/ps3/SDL_ps3render_c.h Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -/* Default framebuffer device on PS3 */ -/* SDL surface based renderer implementation */ - -extern SDL_RenderDriver SDL_PS3_RenderDriver; - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3spe.c --- a/src/video/ps3/SDL_ps3spe.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,166 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "SDL_video.h" -#include "SDL_ps3spe_c.h" - -#include "SDL_ps3video.h" -#include "SDL_ps3render_c.h" - -/* Start the SPE thread */ -int SPE_Start(spu_data_t * spe_data) -{ - deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name); - if (!(spe_data->booted)) - SPE_Boot(spe_data); - - /* To allow re-running of context, spe_ctx_entry has to be set before each call */ - spe_data->entry = SPE_DEFAULT_ENTRY; - spe_data->error_code = 0; - - /* Create SPE thread and run */ - deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name); - if (pthread_create - (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) { - deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Could not create pthread for spe"); - return -1; - } - - if (spe_data->keepalive) - SPE_WaitForMsg(spe_data, SPU_READY); -} - -/* Stop the SPE thread */ -int SPE_Stop(spu_data_t * spe_data) -{ - deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name); - /* Wait for SPE thread to complete */ - deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name); - if (pthread_join(spe_data->thread, NULL)) { - deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Failed joining the thread"); - return -1; - } - - return 0; -} - -/* Create SPE context and load program */ -int SPE_Boot(spu_data_t * spe_data) -{ - /* Create SPE context */ - deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name); - spe_data->ctx = spe_context_create(0, NULL); - if (spe_data->ctx == NULL) { - deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Failed creating SPE context"); - return -1; - } - - /* Load SPE object into SPE local store */ - deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name); - if (spe_program_load(spe_data->ctx, &spe_data->program)) { - deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name); - SDL_SetError - ("[PS3->SPU] Failed loading program into SPE context"); - return -1; - } - spe_data->booted = 1; - deprintf(2, "[PS3->SPU] SPE boot successful\n"); - - return 0; -} - -/* (Stop and) shutdown the SPE */ -int SPE_Shutdown(spu_data_t * spe_data) -{ - if (spe_data->keepalive && spe_data->booted) { - SPE_SendMsg(spe_data, SPU_EXIT); - SPE_Stop(spe_data); - } - - /* Destroy SPE context */ - deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name); - if (spe_context_destroy(spe_data->ctx)) { - deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] Failed destroying context"); - return -1; - } - deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name); - return 0; -} - -/* Send message to the SPE via mailboxe */ -int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg) -{ - deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name); - /* Send one message, block until message was sent */ - unsigned int spe_in_mbox_msgs[1]; - spe_in_mbox_msgs[0] = msg; - int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING); - - if (1 > in_mbox_write) { - deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name); - SDL_SetError("[PS3->SPU] No message could be written"); - return -1; - } - return 0; -} - - -/* Read 1 message from SPE, block until at least 1 message was received */ -int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg) -{ - deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name); - unsigned int out_messages[1]; - while (!spe_out_mbox_status(spe_data->ctx)); - int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1); - deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]); - if (out_messages[0] == msg) - return 0; - else - return -1; -} - -/* Re-runnable invocation of the spe_context_run call */ -void SPE_RunContext(void *thread_argp) -{ - /* argp is the pointer to argument to be passed to the SPE program */ - spu_data_t *args = (spu_data_t *) thread_argp; - deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp); - - /* Run it.. */ - deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name); - if (spe_context_run - (args->ctx, &args->entry, 0, (void *)args->argp, NULL, - NULL) < 0) { - deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name); - SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name); - exit(1); - } - - pthread_exit(NULL); -} - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3spe_c.h --- a/src/video/ps3/SDL_ps3spe_c.h Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ - -/* This SPE API basically provides 3 ways to run and control a program - * on the SPE: - * - Start and stop the program (keepalive=0). - * SPE_Start() will implicitly boot up the program, create a thread and run - * the context. - * SPE_Stop() will join the (terminated) thread (may block) and return. - * - Boot the program and run it (keepalive=0). - * SPE_Boot() will create a context and load the program and finally start - * the context with SPE_Start(). - * SPE_Stop() will savely end the program. - * - Boot, Run and send messages to the program (keepalive=1). - * Start the program by using one of the methods described above. When - * received the READY-message the program is in its infinite loop waiting - * for new messages. - * Every time you run the program, send SPU_START and the address of the - * according struct using SPE_SendMsg(). - * SPE_WaitForMsg() will than wait for SPU_FIN and is blocking. - * SPE_Shutdown() sends SPU_EXIT and finally stops the program. - * - * Therefor the SPE program - * - either runs once and returns - * - or runs in an infinite loop and is controlled by messages. - */ - -#include "SDL_config.h" - -#include "spulibs/spu_common.h" - -#include - -#ifndef _SDL_ps3spe_h -#define _SDL_ps3spe_h - -/* SPU handling data */ -typedef struct spu_data { - /* Context to be executed */ - spe_context_ptr_t ctx; - spe_program_handle_t program; - /* Thread running the context */ - pthread_t thread; - /* For debugging */ - char * program_name; - /* SPE_Start() or SPE_Boot() called */ - unsigned int booted; - /* Runs the program in an infinite loop? */ - unsigned int keepalive; - unsigned int entry; - /* Exit code of the program */ - int error_code; - /* Arguments passed to the program */ - void * argp; -} spu_data_t; - -/* SPU specific API functions */ -int SPE_Start(spu_data_t * spe_data); -int SPE_Stop(spu_data_t * spe_data); -int SPE_Boot(spu_data_t * spe_data); -int SPE_Shutdown(spu_data_t * spe_data); -int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg); -int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg); -void SPE_RunContext(void *thread_argp); - -#endif /* _SDL_ps3spe_h */ - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3video.c --- a/src/video/ps3/SDL_ps3video.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,228 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -/* SDL PS3 video driver implementation based on dummy video driver - * - * Initial work by Ryan C. Gordon (icculus@icculus.org). A good portion - * of this was cut-and-pasted from Stephane Peter's work in the AAlib - * SDL video driver. Renamed to "DUMMY" by Sam Lantinga. - */ - -#include "SDL_video.h" -#include "SDL_mouse.h" -#include "../SDL_sysvideo.h" -#include "../SDL_pixels_c.h" -#include "../../events/SDL_events_c.h" - -#include "SDL_ps3video.h" -#include "SDL_ps3spe_c.h" -#include "SDL_ps3events_c.h" -#include "SDL_ps3render_c.h" -#include "SDL_ps3modes_c.h" - -#include -#include -#include -#include - -#define PS3VID_DRIVER_NAME "ps3" - -/* Initialization/Query functions */ -static int PS3_VideoInit(_THIS); -static void PS3_VideoQuit(_THIS); - -/* Stores the SPE executable name of fb_writer_spu */ -extern spe_program_handle_t fb_writer_spu; - -/* PS3 driver bootstrap functions */ - -static int -PS3_Available(void) -{ - deprintf(1, "+PS3_Available()\n"); - const char *envr = SDL_getenv("SDL_VIDEODRIVER"); - if ((envr) && (SDL_strcmp(envr, PS3VID_DRIVER_NAME) == 0)) { - return (1); - } - - deprintf(1, "-PS3_Available()\n"); - return (0); -} - -static void -PS3_DeleteDevice(SDL_VideoDevice * device) -{ - deprintf(1, "+PS3_DeleteDevice()\n"); - SDL_free(device->driverdata); - SDL_free(device); - deprintf(1, "-PS3_DeleteDevice()\n"); -} - -static SDL_VideoDevice * -PS3_CreateDevice(int devindex) -{ - deprintf(1, "+PS3_CreateDevice()\n"); - SDL_VideoDevice *device; - SDL_VideoData *data; - - /* Initialize all variables that we clean on shutdown */ - device = (SDL_VideoDevice *) SDL_calloc(1, sizeof(SDL_VideoDevice)); - if (!device) { - SDL_OutOfMemory(); - if (device) { - SDL_free(device); - } - return (0); - } - data = (struct SDL_VideoData *) SDL_calloc(1, sizeof(SDL_VideoData)); - if (!data) { - SDL_OutOfMemory(); - SDL_free(device); - return (0); - } - device->driverdata = data; - - /* Set the function pointers */ - device->VideoInit = PS3_VideoInit; - device->VideoQuit = PS3_VideoQuit; - device->SetDisplayMode = PS3_SetDisplayMode; - device->GetDisplayModes = PS3_GetDisplayModes; - device->PumpEvents = PS3_PumpEvents; - - device->free = PS3_DeleteDevice; - - deprintf(1, "-PS3_CreateDevice()\n"); - return device; -} - -VideoBootStrap PS3_bootstrap = { - PS3VID_DRIVER_NAME, "SDL PS3 Cell video driver", - PS3_Available, PS3_CreateDevice -}; - - -int -PS3_VideoInit(_THIS) -{ - int i; - - deprintf(1, "PS3_VideoInit()\n"); - - SDL_VideoData *data = (SDL_VideoData *) _this->driverdata; - SDL_DisplayMode mode; - - /* Create SPU fb_parms and thread structure */ - data->fb_parms = (struct fb_writer_parms_t *) - memalign(16, sizeof(struct fb_writer_parms_t)); - data->fb_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); - if (data->fb_parms == NULL || data->fb_thread_data == NULL) { - SDL_OutOfMemory(); - return -1; - } - data->fb_thread_data->program = fb_writer_spu; - data->fb_thread_data->program_name = "fb_writer_spu"; - data->fb_thread_data->argp = (void *)data->fb_parms; - data->fb_thread_data->keepalive = 1; - data->fb_thread_data->booted = 0; - - SPE_Start(data->fb_thread_data); - - /* Open the device */ - data->fbdev = open(PS3DEV, O_RDWR); - if (data->fbdev < 0) { - SDL_SetError("[PS3] Unable to open device %s", PS3DEV); - return -1; - } - - /* Take control of frame buffer from kernel, for details see - * http://felter.org/wesley/files/ps3/linux-20061110-docs/ApplicationProgrammingEnvironment.html - * kernel will no longer flip the screen itself - */ - ioctl(data->fbdev, PS3FB_IOCTL_ON, 0); - - /* Unblank screen */ - ioctl(data->fbdev, FBIOBLANK, 0); - - struct fb_fix_screeninfo fb_finfo; - if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) { - SDL_SetError("[PS3] Can't get fixed screeninfo"); - return (0); - } - - /* Note: on PS3, fb_finfo.smem_len is enough for double buffering */ - if ((data->frame_buffer = (uint8_t *)mmap(0, fb_finfo.smem_len, - PROT_READ | PROT_WRITE, MAP_SHARED, - data->fbdev, 0)) == (uint8_t *) - 1) { - SDL_SetError("[PS3] Can't mmap for %s", PS3DEV); - return (0); - } else { - /* Enable double buffering */ - } - - /* Blank screen */ - memset(data->frame_buffer, 0x00, fb_finfo.smem_len); - - PS3_InitModes(_this); - for (i = 0; i < _this->num_displays; ++i) { - SDL_AddRenderDriver(&_this->displays[i], &SDL_PS3_RenderDriver); - } - - /* We're done! */ - return 0; -} - -void -PS3_VideoQuit(_THIS) -{ - deprintf(1, "PS3_VideoQuit()\n"); - SDL_VideoData *data = (SDL_VideoData *) _this->driverdata; - - PS3_QuitModes(_this); - - /* Unmap framebuffer */ - if (data->frame_buffer) { - struct fb_fix_screeninfo fb_finfo; - if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo) != -1) { - munmap(data->frame_buffer, fb_finfo.smem_len); - data->frame_buffer = 0; - } - } - - /* Shutdown SPE and related resources */ - if (data->fb_parms) - free((void *)data->fb_parms); - if (data->fb_thread_data) { - SPE_Shutdown(data->fb_thread_data); - free((void *)data->fb_thread_data); - } - - /* Close device */ - if (data->fbdev) { - /* Give control of frame buffer back to kernel */ - ioctl(data->fbdev, PS3FB_IOCTL_OFF, 0); - close(data->fbdev); - data->fbdev = -1; - } -} - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/SDL_ps3video.h --- a/src/video/ps3/SDL_ps3video.h Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#ifndef _SDL_ps3video_h -#define _SDL_ps3video_h - -#include "../SDL_sysvideo.h" -#include "SDL_ps3spe_c.h" - -#include -#include - -/* Debugging - * 0: No debug messages - * 1: Video debug messages - * 2: SPE debug messages - * 3: Memory adresses - */ -#define DEBUG_LEVEL 0 - -#ifdef DEBUG_LEVEL -#define deprintf( level, fmt, args... ) \ - do \ -{ \ - if ( (unsigned)(level) <= DEBUG_LEVEL ) \ - { \ - fprintf( stdout, fmt, ##args ); \ - fflush( stdout ); \ - } \ -} while ( 0 ) -#else -#define deprintf( level, fmt, args... ) -#endif - -/* Default framebuffer device on PS3 */ -#define PS3DEV "/dev/fb0" - -/* Private display data */ -typedef struct SDL_VideoData -{ - /* Framebuffer device descriptor */ - int fbdev; - /* mmap'd access to fbdev */ - uint8_t * frame_buffer; - /* SPE threading stuff of the framebuffer */ - spu_data_t * fb_thread_data; - /* Framebuffer transfer data */ - volatile struct fb_writer_parms_t * fb_parms __attribute__((aligned(128))); -} SDL_VideoData; - -typedef struct SDL_DisplayModeData -{ - unsigned long mode; - //struct ps3fb_ioctl_res res; -} PS3_DisplayModeData; - -#endif /* _SDL_ps3video_h */ - -/* vi: set ts=4 sw=4 expandtab: */ diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/spulibs/Makefile --- a/src/video/ps3/spulibs/Makefile Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -# This Makefile is for building the CELL BE SPU libs -# libfb_writer_spu.so, libyuv2rgb_spu.so, libbilin_scaler_spu.so - -# Toolchain -PPU_LD=/usr/bin/ld -SPU_SRCDIR=$(srcdir)/src/video/ps3/spulibs -SPU_LIBDIR=$(srcdir)/src/video/ps3/spulibs/libs -SPU_CFLAGS=-g -W -Wall -Winline -Wno-main -I. -I /usr/spu/include -I /opt/cell/sdk/usr/spu/include -finline-limit=10000 -Winline -ftree-vectorize -funroll-loops -fmodulo-sched -ffast-math -fPIC -O2 - -DEPS = $(SPU_SRCDIR)/spu_common.h -LIBS= fb_writer yuv2rgb bilin_scaler - -OBJLIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.a) -SHALIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.so) - - -ps3libs: $(foreach lib,$(OBJLIBS),$(SPU_LIBDIR)/$(lib)) $(foreach lib,$(SHALIBS),$(SPU_LIBDIR)/$(lib)) - - -$(SPU_LIBDIR)/lib%_spu.a: $(SPU_LIBDIR)/%-embed.o - $(AR) -qcs $@ $< - -$(SPU_LIBDIR)/lib%_spu.so: $(SPU_LIBDIR)/%-embed.o - $(PPU_LD) -o $@ -shared -soname=$(notdir $@) $< - -$(SPU_LIBDIR)/%-embed.o: $(SPU_LIBDIR)/%.o - $(EMBEDSPU) -m32 $(subst -embed.o,,$(notdir $@))_spu $< $@ - -$(SPU_LIBDIR)/%.o: $(SPU_SRCDIR)/%.c $(DEPS) - $(SPU_GCC) $(SPU_CFLAGS) -o $@ $< -lm - - -ps3libs-install: $(foreach obj,$(OBJLIBS),$(SPU_LIBDIR)/$(obj)) $(foreach obj,$(SHALIBS),$(SPU_LIBDIR)/$(obj)) - for file in $(OBJLIBS); do \ - $(INSTALL) -c -m 0655 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \ - done - for file in $(SHALIBS); do \ - $(INSTALL) -c -m 0755 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \ - done - -ps3libs-uninstall: - for file in $(OBJLIBS) $(SHALIBS); do \ - rm -f $(DESTDIR)$(libdir)/$$file; \ - done - -ps3libs-clean: - rm -f $(SPU_LIBDIR)/* diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/spulibs/bilin_scaler.c --- a/src/video/ps3/spulibs/bilin_scaler.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2050 +0,0 @@ -/* - * SDL - Simple DirectMedia Layer - * CELL BE Support for PS3 Framebuffer - * Copyright (C) 2008, 2009 International Business Machines Corporation - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - * USA - * - * Martin Lowinski - * Dirk Herrendoerfer - * SPE code based on research by: - * Rene Becker - * Thimo Emmerich - */ - -#include "spu_common.h" - -#include -#include - -// Debugging -//#define DEBUG - -#ifdef DEBUG -#define deprintf(fmt, args... ) \ - fprintf( stdout, fmt, ##args ); \ - fflush( stdout ); -#else -#define deprintf( fmt, args... ) -#endif - -struct scale_parms_t parms __attribute__((aligned(128))); - -/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored - * there might be the need to retrieve misaligned data, adjust - * incoming v and u plane to be able to handle this (add 128) - */ -unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128))); -unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); -unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); - -/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */ -unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128))); -unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); -unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); - -/* some vectors needed by the float to int conversion */ -static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; -static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; - -void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); -void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); - -void scale_srcw16_dstw16(); -void scale_srcw16_dstw32(); -void scale_srcw32_dstw16(); -void scale_srcw32_dstw32(); - -int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp ) -{ - deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id); - /* DMA transfer for the input parameters */ - spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD); - DMA_WAIT_TAG(TAG_INIT); - - deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height, - parms.dst_pixel_width, parms.dst_pixel_height); - - if(parms.src_pixel_width & 0x1f) { - if(parms.dst_pixel_width & 0x1F) { - deprintf("[SPU] Using scale_srcw16_dstw16\n"); - scale_srcw16_dstw16(); - } else { - deprintf("[SPU] Using scale_srcw16_dstw32\n"); - scale_srcw16_dstw32(); - } - } else { - if(parms.dst_pixel_width & 0x1F) { - deprintf("[SPU] Using scale_srcw32_dstw16\n"); - scale_srcw32_dstw16(); - } else { - deprintf("[SPU] Using scale_srcw32_dstw32\n"); - scale_srcw32_dstw32(); - } - } - deprintf("[SPU] bilin_scaler_spu... done!\n"); - - return 0; -} - - -/* - * vfloat_to_vuint() - * - * converts a float vector to an unsinged int vector using saturated - * arithmetic - * - * @param vec_s float vector for conversion - * @returns converted unsigned int vector - */ -inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { - vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); - vec_s = spu_sel(vec_s, vec_0_1, select_1); - - vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); - vec_s = spu_sel(vec_s, vec_255, select_2); - return spu_convtu(vec_s,0); -} - - -/* - * scale_srcw16_dstw16() - * - * processes an input image of width 16 - * scaling is done to a width 16 - * result stored in RAM - */ -void scale_srcw16_dstw16() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - // for handling misalignment, addresses are precalculated - unsigned char* precalc_src_addr_v = src_addr_v; - unsigned char* precalc_src_addr_u = src_addr_u; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // offset for the v and u plane to handle misalignement - unsigned int curr_lsoff_v = 0, next_lsoff_v; - unsigned int curr_lsoff_u = 0, next_lsoff_u; - - // calculate lower line indices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - /* iteration loop - * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - * the scaled output is 2 lines y, 1 line v, 1 line u - * the yuv2rgb-converted output is stored to RAM - */ - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); - next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; - mfc_get( v_plane[next_src_idx], - ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); - next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; - mfc_get( u_plane[next_src_idx], - ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - - curr_lsoff_v = next_lsoff_v; - curr_lsoff_u = next_lsoff_u; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/* - * scale_srcw16_dstw32() - * - * processes an input image of width 16 - * scaling is done to a width 32 - * yuv2rgb conversion on a width of 32 - * result stored in RAM - */ -void scale_srcw16_dstw32() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - - // for handling misalignment, addresses are precalculated - unsigned char* precalc_src_addr_v = src_addr_v; - unsigned char* precalc_src_addr_u = src_addr_u; - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // offset for the v and u plane to handle misalignement - unsigned int curr_lsoff_v = 0, next_lsoff_v; - unsigned int curr_lsoff_u = 0, next_lsoff_u; - - // calculate lower line idices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - // iteration loop - // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - // the scaled output is 2 lines y, 1 line v, 1 line u - // the yuv2rgb-converted output is stored to RAM - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); - next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; - mfc_get( v_plane[next_src_idx], - ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); - next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; - mfc_get( u_plane[next_src_idx], - ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - - curr_lsoff_v = next_lsoff_v; - curr_lsoff_u = next_lsoff_u; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/* - * scale_srcw32_dstw16() - * - * processes an input image of width 32 - * scaling is done to a width 16 - * yuv2rgb conversion on a width of 16 - * result stored in RAM - */ -void scale_srcw32_dstw16() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // calculate lower line idices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - // iteration loop - // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - // the scaled output is 2 lines y, 1 line v, 1 line u - // the yuv2rgb-converted output is stored to RAM - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - mfc_get( v_plane[next_src_idx], - (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - mfc_get( u_plane[next_src_idx], - (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/** - * scale_srcw32_dstw32() - * - * processes an input image of width 32 - * scaling is done to a width 32 - * yuv2rgb conversion on a width of 32 - * result stored in RAM - */ -void scale_srcw32_dstw32() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // calculate lower line idices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - // iteration loop - // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - // the scaled output is 2 lines y, 1 line v, 1 line u - // the yuv2rgb-converted output is stored to RAM - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - mfc_get( v_plane[next_src_idx], - (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - mfc_get( u_plane[next_src_idx], - (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/* - * bilinear_scale_line_w8() - * - * processes a line of yuv-input, width has to be a multiple of 8 - * scaled yuv-output is written to local store buffer - * - * @param src buffer for 2 lines input - * @param dst_ buffer for 1 line output - * @param dst_width the width of the destination line - * @param vf_x_scale a float vector, at each entry is the x_scale-factor - * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line - * @param src_linestride the stride of the srcline - */ -void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { - - unsigned char* dst = dst_; - - unsigned int dst_x; - for( dst_x=0; dst_xfirst 4 pixel - // upper range->next 4 pixel - vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 }; - vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 }; - vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range ); - vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range ); - - // calculate weight EAST-WEST - vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 ); - vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 ); - vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale ); - vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale ); - vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 ); - vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 ); - vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 ); - vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 ); - vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range ); - vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range ); - - // calculate address offset - // - // pixel NORTH WEST - vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range; - vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range; - - // pixel NORTH EAST-->(offpixelNW+1) - vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; - vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 ); - - // SOUTH-WEST-->(offpixelNW+src_linestride) - vector unsigned int vui_srclinestride = spu_splats( src_linestride ); - vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range ); - vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range ); - - // SOUTH-EAST-->(offpixelNW+src_linestride+1) - vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range ); - vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range ); - - // calculate each address - vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); - vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range ); - vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range ); - vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range ); - vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range ); - - vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range ); - vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range ); - vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range ); - vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range ); - - // get each pixel - // - // scalar load, afterwards insertion into the right position - // NORTH WEST - vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - vector unsigned char vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )), - vuc_pixel_NW_lower_range, 7 ); - vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )), - vuc_pixel_NW_lower_range, 11 ); - vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )), - vuc_pixel_NW_lower_range, 15 ); - - vector unsigned char vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )), - vuc_pixel_NW_upper_range, 7 ); - vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )), - vuc_pixel_NW_upper_range, 11 ); - vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )), - vuc_pixel_NW_upper_range, 15 ); - - // NORTH EAST - vector unsigned char vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )), - vuc_pixel_NE_lower_range, 7 ); - vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )), - vuc_pixel_NE_lower_range, 11 ); - vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )), - vuc_pixel_NE_lower_range, 15 ); - - vector unsigned char vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )), - vuc_pixel_NE_upper_range, 7 ); - vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )), - vuc_pixel_NE_upper_range, 11 ); - vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )), - vuc_pixel_NE_upper_range, 15 ); - - - // SOUTH WEST - vector unsigned char vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )), - vuc_pixel_SW_lower_range, 7 ); - vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )), - vuc_pixel_SW_lower_range, 11 ); - vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )), - vuc_pixel_SW_lower_range, 15 ); - - vector unsigned char vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )), - vuc_pixel_SW_upper_range, 7 ); - vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )), - vuc_pixel_SW_upper_range, 11 ); - vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )), - vuc_pixel_SW_upper_range, 15 ); - - // SOUTH EAST - vector unsigned char vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )), - vuc_pixel_SE_lower_range, 7 ); - vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )), - vuc_pixel_SE_lower_range, 11 ); - vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )), - vuc_pixel_SE_lower_range, 15 ); - - vector unsigned char vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )), - vuc_pixel_SE_upper_range, 7 ); - vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )), - vuc_pixel_SE_upper_range, 11 ); - vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )), - vuc_pixel_SE_upper_range, 15 ); - - - // convert to float - vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 ); - vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 ); - - vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 ); - vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 ); - - vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 ); - vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 ); - - vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 ); - vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 ); - - - - // first linear interpolation: EWtop - // EWtop = NW + EWweight*(NE-NW) - // - // lower range - vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range ); - vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range, - vf_EWtop_lower_range_tmp, - vf_pixel_NW_lower_range ); - - // upper range - vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range ); - vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range, - vf_EWtop_upper_range_tmp, - vf_pixel_NW_upper_range ); - - - - // second linear interpolation: EWbottom - // EWbottom = SW + EWweight*(SE-SW) - // - // lower range - vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range ); - vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range, - vf_EWbottom_lower_range_tmp, - vf_pixel_SW_lower_range ); - - // upper range - vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range ); - vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range, - vf_EWbottom_upper_range_tmp, - vf_pixel_SW_upper_range ); - - - - // third linear interpolation: the bilinear interpolated value - // result = EWtop + NSweight*(EWbottom-EWtop); - // - // lower range - vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range ); - vector float vf_result_lower_range = spu_madd( vf_NSweight, - vf_result_lower_range_tmp, - vf_EWtop_lower_range ); - - // upper range - vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range ); - vector float vf_result_upper_range = spu_madd( vf_NSweight, - vf_result_upper_range_tmp, - vf_EWtop_upper_range ); - - - // convert back: using saturated arithmetic - vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range ); - vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range ); - - // merge results->lower,upper - vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F, - 0x13, 0x17, 0x1B, 0x1F, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 }; - - vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range, - (vector unsigned char) vui_result_upper_range, - vuc_mask_merge_result ); - - // partial storing - vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF }; - - - // get currently stored data - vector unsigned char vuc_orig = *((vector unsigned char*)dst); - - // clear currently stored data - vuc_orig = spu_and( vuc_orig, - spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) ); - - // rotate result according to storing address - vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F ); - - // store result - *((vector unsigned char*)dst) = spu_or( vuc_result, - vuc_orig ); - dst += 8; - } -} - - -/* - * bilinear_scale_line_w16() - * - * processes a line of yuv-input, width has to be a multiple of 16 - * scaled yuv-output is written to local store buffer - * - * @param src buffer for 2 lines input - * @param dst_ buffer for 1 line output - * @param dst_width the width of the destination line - * @param vf_x_scale a float vector, at each entry is the x_scale-factor - * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line - * @param src_linestride the stride of the srcline - */ -void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { - - unsigned char* dst = dst_; - - unsigned int dst_x; - for( dst_x=0; dst_xpixel 1 2 3 4 - // second range->pixel 5 6 7 8 - // third range->pixel 9 10 11 12 - // fourth range->pixel 13 14 15 16 - vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 }; - vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 }; - vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 }; - vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 }; - vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range ); - vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range ); - vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range ); - vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range ); - - // calculate weight EAST-WEST - vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 ); - vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 ); - vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 ); - vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 ); - vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale ); - vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale ); - vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale ); - vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale ); - vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 ); - vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 ); - vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 ); - vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 ); - vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 ); - vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 ); - vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 ); - vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 ); - vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range ); - vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range ); - vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range ); - vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range ); - - // calculate address offset - // - // pixel NORTH WEST - vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range; - vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range; - vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range; - vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range; - - // pixel NORTH EAST-->(offpixelNW+1) - vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; - vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 ); - - // SOUTH-WEST-->(offpixelNW+src_linestride) - vector unsigned int vui_srclinestride = spu_splats( src_linestride ); - vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range ); - vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range ); - vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range ); - vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range ); - - // SOUTH-EAST-->(offpixelNW+src_linestride+1) - vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range ); - vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range ); - vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range ); - vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range ); - - // calculate each address - vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); - vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range ); - vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range ); - vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range ); - vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range ); - - vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range ); - vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range ); - vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range ); - vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range ); - - vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range ); - vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range ); - vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range ); - vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range ); - - vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range ); - vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range ); - vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range ); - vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range ); - - - // get each pixel - // - // scalar load, afterwards insertion into the right position - // NORTH WEST - // first range - vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - vector unsigned char vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )), - vuc_pixel_NW_first_range, 7 ); - vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )), - vuc_pixel_NW_first_range, 11 ); - vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )), - vuc_pixel_NW_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )), - vuc_pixel_NW_second_range, 7 ); - vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )), - vuc_pixel_NW_second_range, 11 ); - vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )), - vuc_pixel_NW_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )), - vuc_pixel_NW_third_range, 7 ); - vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )), - vuc_pixel_NW_third_range, 11 ); - vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )), - vuc_pixel_NW_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )), - vuc_pixel_NW_fourth_range, 7 ); - vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )), - vuc_pixel_NW_fourth_range, 11 ); - vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )), - vuc_pixel_NW_fourth_range, 15 ); - - // NORTH EAST - // first range - vector unsigned char vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )), - vuc_pixel_NE_first_range, 7 ); - vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )), - vuc_pixel_NE_first_range, 11 ); - vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )), - vuc_pixel_NE_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )), - vuc_pixel_NE_second_range, 7 ); - vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )), - vuc_pixel_NE_second_range, 11 ); - vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )), - vuc_pixel_NE_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )), - vuc_pixel_NE_third_range, 7 ); - vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )), - vuc_pixel_NE_third_range, 11 ); - vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )), - vuc_pixel_NE_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )), - vuc_pixel_NE_fourth_range, 7 ); - vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )), - vuc_pixel_NE_fourth_range, 11 ); - vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )), - vuc_pixel_NE_fourth_range, 15 ); - - // SOUTH WEST - // first range - vector unsigned char vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )), - vuc_pixel_SW_first_range, 7 ); - vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )), - vuc_pixel_SW_first_range, 11 ); - vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )), - vuc_pixel_SW_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )), - vuc_pixel_SW_second_range, 7 ); - vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )), - vuc_pixel_SW_second_range, 11 ); - vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )), - vuc_pixel_SW_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )), - vuc_pixel_SW_third_range, 7 ); - vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )), - vuc_pixel_SW_third_range, 11 ); - vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )), - vuc_pixel_SW_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )), - vuc_pixel_SW_fourth_range, 7 ); - vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )), - vuc_pixel_SW_fourth_range, 11 ); - vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )), - vuc_pixel_SW_fourth_range, 15 ); - - // NORTH EAST - // first range - vector unsigned char vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )), - vuc_pixel_SE_first_range, 7 ); - vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )), - vuc_pixel_SE_first_range, 11 ); - vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )), - vuc_pixel_SE_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )), - vuc_pixel_SE_second_range, 7 ); - vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )), - vuc_pixel_SE_second_range, 11 ); - vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )), - vuc_pixel_SE_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )), - vuc_pixel_SE_third_range, 7 ); - vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )), - vuc_pixel_SE_third_range, 11 ); - vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )), - vuc_pixel_SE_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )), - vuc_pixel_SE_fourth_range, 7 ); - vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )), - vuc_pixel_SE_fourth_range, 11 ); - vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )), - vuc_pixel_SE_fourth_range, 15 ); - - - - // convert to float - vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 ); - vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 ); - vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 ); - vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 ); - - vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 ); - vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 ); - vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 ); - vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 ); - - vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 ); - vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 ); - vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 ); - vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 ); - - vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 ); - vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 ); - vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 ); - vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 ); - - // first linear interpolation: EWtop - // EWtop = NW + EWweight*(NE-NW) - // - // first range - vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range ); - vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range, - vf_EWtop_first_range_tmp, - vf_pixel_NW_first_range ); - - // second range - vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range ); - vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range, - vf_EWtop_second_range_tmp, - vf_pixel_NW_second_range ); - - // third range - vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range ); - vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range, - vf_EWtop_third_range_tmp, - vf_pixel_NW_third_range ); - - // fourth range - vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range ); - vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range, - vf_EWtop_fourth_range_tmp, - vf_pixel_NW_fourth_range ); - - - - // second linear interpolation: EWbottom - // EWbottom = SW + EWweight*(SE-SW) - // - // first range - vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range ); - vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range, - vf_EWbottom_first_range_tmp, - vf_pixel_SW_first_range ); - - // second range - vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range ); - vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range, - vf_EWbottom_second_range_tmp, - vf_pixel_SW_second_range ); - // first range - vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range ); - vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range, - vf_EWbottom_third_range_tmp, - vf_pixel_SW_third_range ); - - // first range - vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range ); - vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range, - vf_EWbottom_fourth_range_tmp, - vf_pixel_SW_fourth_range ); - - - - // third linear interpolation: the bilinear interpolated value - // result = EWtop + NSweight*(EWbottom-EWtop); - // - // first range - vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range ); - vector float vf_result_first_range = spu_madd( vf_NSweight, - vf_result_first_range_tmp, - vf_EWtop_first_range ); - - // second range - vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range ); - vector float vf_result_second_range = spu_madd( vf_NSweight, - vf_result_second_range_tmp, - vf_EWtop_second_range ); - - // third range - vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range ); - vector float vf_result_third_range = spu_madd( vf_NSweight, - vf_result_third_range_tmp, - vf_EWtop_third_range ); - - // fourth range - vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range ); - vector float vf_result_fourth_range = spu_madd( vf_NSweight, - vf_result_fourth_range_tmp, - vf_EWtop_fourth_range ); - - - - // convert back: using saturated arithmetic - vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range ); - vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range ); - vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range ); - vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range ); - - // merge results->lower,upper - vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F, - 0x13, 0x17, 0x1B, 0x1F, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 }; - - vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x03, 0x07, 0x0B, 0x0F, - 0x13, 0x17, 0x1B, 0x1F }; - - vector unsigned char vuc_result_first_second = - spu_shuffle( (vector unsigned char) vui_result_first_range, - (vector unsigned char) vui_result_second_range, - vuc_mask_merge_result_first_second ); - - vector unsigned char vuc_result_third_fourth = - spu_shuffle( (vector unsigned char) vui_result_third_range, - (vector unsigned char) vui_result_fourth_range, - vuc_mask_merge_result_third_fourth ); - - // store result - *((vector unsigned char*)dst) = spu_or( vuc_result_first_second, - vuc_result_third_fourth ); - dst += 16; - } -} - diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/spulibs/fb_writer.c --- a/src/video/ps3/spulibs/fb_writer.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,193 +0,0 @@ -/* - * SDL - Simple DirectMedia Layer - * CELL BE Support for PS3 Framebuffer - * Copyright (C) 2008, 2009 International Business Machines Corporation - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - * USA - * - * Martin Lowinski - * Dirk Herrendoerfer - * SPE code based on research by: - * Rene Becker - * Thimo Emmerich - */ - -#include "spu_common.h" - -#include -#include -#include -#include - -// Debugging -//#define DEBUG - -#ifdef DEBUG -#define deprintf(fmt, args... ) \ - fprintf( stdout, fmt, ##args ); \ - fflush( stdout ); -#else -#define deprintf( fmt, args... ) -#endif - -void cpy_to_fb(unsigned int); - -/* fb_writer_spu parms */ -static volatile struct fb_writer_parms_t parms __attribute__ ((aligned(128))); - -/* Code running on SPU */ -int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused))) -{ - deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id); - uint32_t ea_mfc, mbox; - // send ready message - spu_write_out_mbox(SPU_READY); - - while (1) { - /* Check mailbox */ - mbox = spu_read_in_mbox(); - deprintf("[SPU] Message is %u\n", mbox); - switch (mbox) { - case SPU_EXIT: - deprintf("[SPU] fb_writer goes down...\n"); - return 0; - case SPU_START: - break; - default: - deprintf("[SPU] Cannot handle message\n"); - continue; - } - - /* Tag Manager setup */ - unsigned int tags; - tags = mfc_multi_tag_reserve(5); - if (tags == MFC_TAG_INVALID) { - deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n"); - return 0; - } - - /* Framebuffer parms */ - ea_mfc = spu_read_in_mbox(); - deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc); - spu_mfcdma32(&parms, (unsigned int)ea_mfc, - sizeof(struct fb_writer_parms_t), tags, - MFC_GET_CMD); - deprintf("[SPU] argp = %u\n", (unsigned int)argp); - DMA_WAIT_TAG(tags); - - /* Copy parms->data to framebuffer */ - deprintf("[SPU] Copying to framebuffer started\n"); - cpy_to_fb(tags); - deprintf("[SPU] Copying to framebuffer done!\n"); - - mfc_multi_tag_release(tags, 5); - deprintf("[SPU] fb_writer_spu... done!\n"); - /* Send FIN msg */ - spu_write_out_mbox(SPU_FIN); - } - - return 0; -} - -void cpy_to_fb(unsigned int tag_id_base) -{ - unsigned int i; - unsigned char current_buf; - uint8_t *in = parms.data; - - /* Align fb pointer which was centered before */ - uint8_t *fb = - (unsigned char *)((unsigned int)parms.center & 0xFFFFFFF0); - - uint32_t bounded_input_height = parms.bounded_input_height; - uint32_t bounded_input_width = parms.bounded_input_width; - uint32_t fb_pixel_size = parms.fb_pixel_size; - - uint32_t out_line_stride = parms.out_line_stride; - uint32_t in_line_stride = parms.in_line_stride; - uint32_t in_line_size = bounded_input_width * fb_pixel_size; - - current_buf = 0; - - /* Local store buffer */ - static volatile uint8_t buf[4][BUFFER_SIZE] - __attribute__ ((aligned(128))); - /* do 4-times multibuffering using DMA list, process in two steps */ - for (i = 0; i < bounded_input_height >> 2; i++) { - /* first buffer */ - DMA_WAIT_TAG(tag_id_base + 1); - // retrieve buffer - spu_mfcdma32(buf[0], (unsigned int)in, in_line_size, - tag_id_base + 1, MFC_GETB_CMD); - DMA_WAIT_TAG(tag_id_base + 1); - // store buffer - spu_mfcdma32(buf[0], (unsigned int)fb, in_line_size, - tag_id_base + 1, MFC_PUTB_CMD); - in += in_line_stride; - fb += out_line_stride; - deprintf("[SPU] 1st buffer copied in=0x%x, fb=0x%x\n", in, - fb); - - /* second buffer */ - DMA_WAIT_TAG(tag_id_base + 2); - // retrieve buffer - spu_mfcdma32(buf[1], (unsigned int)in, in_line_size, - tag_id_base + 2, MFC_GETB_CMD); - DMA_WAIT_TAG(tag_id_base + 2); - // store buffer - spu_mfcdma32(buf[1], (unsigned int)fb, in_line_size, - tag_id_base + 2, MFC_PUTB_CMD); - in += in_line_stride; - fb += out_line_stride; - deprintf("[SPU] 2nd buffer copied in=0x%x, fb=0x%x\n", in, - fb); - - /* third buffer */ - DMA_WAIT_TAG(tag_id_base + 3); - // retrieve buffer - spu_mfcdma32(buf[2], (unsigned int)in, in_line_size, - tag_id_base + 3, MFC_GETB_CMD); - DMA_WAIT_TAG(tag_id_base + 3); - // store buffer - spu_mfcdma32(buf[2], (unsigned int)fb, in_line_size, - tag_id_base + 3, MFC_PUTB_CMD); - in += in_line_stride; - fb += out_line_stride; - deprintf("[SPU] 3rd buffer copied in=0x%x, fb=0x%x\n", in, - fb); - - /* fourth buffer */ - DMA_WAIT_TAG(tag_id_base + 4); - // retrieve buffer - spu_mfcdma32(buf[3], (unsigned int)in, in_line_size, - tag_id_base + 4, MFC_GETB_CMD); - DMA_WAIT_TAG(tag_id_base + 4); - // store buffer - spu_mfcdma32(buf[3], (unsigned int)fb, in_line_size, - tag_id_base + 4, MFC_PUTB_CMD); - in += in_line_stride; - fb += out_line_stride; - deprintf("[SPU] 4th buffer copied in=0x%x, fb=0x%x\n", in, - fb); - deprintf("[SPU] Loop #%i, bounded_input_height=%i\n", i, - bounded_input_height >> 2); - } - DMA_WAIT_TAG(tag_id_base + 2); - DMA_WAIT_TAG(tag_id_base + 3); - DMA_WAIT_TAG(tag_id_base + 4); -} - - diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/spulibs/spu_common.h --- a/src/video/ps3/spulibs/spu_common.h Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -/* - * SDL - Simple DirectMedia Layer - * CELL BE Support for PS3 Framebuffer - * Copyright (C) 2008, 2009 International Business Machines Corporation - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - * USA - * - * Martin Lowinski - * Dirk Herrendoerfer - * SPE code based on research by: - * Rene Becker - * Thimo Emmerich - */ - -/* Common definitions/makros for SPUs */ - -#ifndef _SPU_COMMON_H -#define _SPU_COMMON_H - -#include -#include -#include - -/* Tag management */ -#define DMA_WAIT_TAG(_tag) \ - mfc_write_tag_mask(1<<(_tag)); \ - mfc_read_tag_status_all(); - -/* SPU mailbox messages */ -#define SPU_READY 0 -#define SPU_START 1 -#define SPU_FIN 2 -#define SPU_EXIT 3 - -/* Tags */ -#define RETR_BUF 0 -#define STR_BUF 1 -#define TAG_INIT 2 - -/* Buffersizes */ -#define MAX_HDTV_WIDTH 1920 -#define MAX_HDTV_HEIGHT 1080 -/* One stride of HDTV */ -#define BUFFER_SIZE 7680 - -/* fb_writer ppu/spu exchange parms */ -struct fb_writer_parms_t { - uint8_t *data; - uint8_t *center; - uint32_t out_line_stride; - uint32_t in_line_stride; - uint32_t bounded_input_height; - uint32_t bounded_input_width; - uint32_t fb_pixel_size; - - /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */ - char padding[4]; -} __attribute__((aligned(128))); - -/* yuv2rgb ppu/spu exchange parms */ -struct yuv2rgb_parms_t { - uint8_t* y_plane; - uint8_t* v_plane; - uint8_t* u_plane; - - uint8_t* dstBuffer; - - unsigned int src_pixel_width; - unsigned int src_pixel_height; - - /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */ - char padding[128 - ((4 * sizeof(uint8_t *) + 2 * sizeof(unsigned int)) & 0x7F)]; -} __attribute__((aligned(128))); - -/* bilin_scaler ppu/spu exchange parms */ -struct scale_parms_t { - uint8_t* y_plane; - uint8_t* v_plane; - uint8_t* u_plane; - - uint8_t* dstBuffer; - - unsigned int src_pixel_width; - unsigned int src_pixel_height; - - unsigned int dst_pixel_width; - unsigned int dst_pixel_height; - - /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */ - char padding[128 - ((4 * sizeof(uint8_t *) + 4 * sizeof(unsigned int)) & 0x7F)]; -} __attribute__((aligned(128))); - -#endif /* _SPU_COMMON_H */ - - diff -r edaf3e364a05 -r 187d7d446306 src/video/ps3/spulibs/yuv2rgb.c --- a/src/video/ps3/spulibs/yuv2rgb.c Wed Jan 19 22:21:31 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,662 +0,0 @@ -/* - * SDL - Simple DirectMedia Layer - * CELL BE Support for PS3 Framebuffer - * Copyright (C) 2008, 2009 International Business Machines Corporation - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - * USA - * - * Martin Lowinski - * Dirk Herrendoerfer - * SPE code based on research by: - * Rene Becker - * Thimo Emmerich - */ - -#include "spu_common.h" - -#include -#include - -// Debugging -//#define DEBUG - -// Test environment for /2 resolutions -//#define TESTING - -#ifdef DEBUG -#define deprintf(fmt, args... ) \ - fprintf( stdout, fmt, ##args ); \ - fflush( stdout ); -#else -#define deprintf( fmt, args... ) -#endif - -struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128))); - -/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored - * there might be the need to retrieve misaligned data, adjust - * incoming v and u plane to be able to handle this (add 128) - */ -unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128))); -unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); -unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); - -/* A maximum of 4 lines BGRA are stored, 4 byte per pixel */ -unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128))); - -/* some vectors needed by the float to int conversion */ -static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; -static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; - -void yuv_to_rgb_w16(); -void yuv_to_rgb_w32(); - -void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width); -void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width); - - -int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused))) -{ - deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id); - uint32_t ea_mfc, mbox; - // send ready message - spu_write_out_mbox(SPU_READY); - - while (1) { - /* Check mailbox */ - mbox = spu_read_in_mbox(); - deprintf("[SPU] Message is %u\n", mbox); - switch (mbox) { - case SPU_EXIT: - deprintf("[SPU] yuv2rgb_converter goes down...\n"); - return 0; - case SPU_START: - break; - default: - deprintf("[SPU] Cannot handle message\n"); - continue; - } - - /* Tag Manager setup */ - unsigned int tag_id; - tag_id = mfc_multi_tag_reserve(1); - if (tag_id == MFC_TAG_INVALID) { - deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n"); - return 0; - } - - /* DMA transfer for the input parameters */ - ea_mfc = spu_read_in_mbox(); - deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc); - spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD); - DMA_WAIT_TAG(tag_id); - - /* There are alignment issues that involve handling of special cases - * a width of 32 results in a width of 16 in the chrominance - * --> choose the proper handling to optimize the performance - */ - deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height); - if (!(parms_converter.src_pixel_width & 0x1f)) { - deprintf("[SPU] Using yuv_to_rgb_w16\n"); - yuv_to_rgb_w16(); - } else { - deprintf("[SPU] Using yuv_to_rgb_w32\n"); - yuv_to_rgb_w32(); - } - - mfc_multi_tag_release(tag_id, 1); - deprintf("[SPU] yuv2rgb_spu... done!\n"); - /* Send FIN message */ - spu_write_out_mbox(SPU_FIN); - } - - return 0; -} - - -/* - * float_to_char() - * - * converts a float to a character using saturated - * arithmetic - * - * @param s float for conversion - * @returns converted character - */ -inline static unsigned char float_to_char(float s) { - vector float vec_s = spu_splats(s); - vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); - vec_s = spu_sel(vec_s, vec_0_1, select_1); - - vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); - vec_s = spu_sel(vec_s, vec_255, select_2); - return (unsigned char) spu_extract(vec_s,0); -} - - -/* - * vfloat_to_vuint() - * - * converts a float vector to an unsinged int vector using saturated - * arithmetic - * - * @param vec_s float vector for conversion - * @returns converted unsigned int vector - */ -inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { - vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); - vec_s = spu_sel(vec_s, vec_0_1, select_1); - - vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); - vec_s = spu_sel(vec_s, vec_255, select_2); - return spu_convtu(vec_s,0); -} - - -void yuv_to_rgb_w16() { - // Pixel dimensions of the picture - uint32_t width, height; - - // Extract parameters - width = parms_converter.src_pixel_width; - height = parms_converter.src_pixel_height; - - // Plane data management - // Y - unsigned char* ram_addr_y = parms_converter.y_plane; - // V - unsigned char* ram_addr_v = parms_converter.v_plane; - // U - unsigned char* ram_addr_u = parms_converter.u_plane; - - // BGRA - unsigned char* ram_addr_bgra = parms_converter.dstBuffer; - - // Strides - unsigned int stride_y = width; - unsigned int stride_vu = width>>1; - - // Buffer management - unsigned int buf_idx = 0; - unsigned int size_4lines_y = stride_y<<2; - unsigned int size_2lines_y = stride_y<<1; - unsigned int size_2lines_vu = stride_vu<<1; - - // 2*width*4byte_per_pixel - unsigned int size_2lines_bgra = width<<3; - - - // start double-buffered processing - // 4 lines y - spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); - - // 2 lines v - spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); - - // 2 lines u - spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); - - // Wait for these transfers to be completed - DMA_WAIT_TAG((RETR_BUF + buf_idx)); - - unsigned int i; - for(i=0; i<(height>>2)-1; i++) { - - buf_idx^=1; - - // 4 lines y - spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); - - // 2 lines v - spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); - - // 2 lines u - spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); - - DMA_WAIT_TAG((RETR_BUF + buf_idx)); - - buf_idx^=1; - - - // Convert YUV to BGRA, store it back (first two lines) -#ifndef TESTING - yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); - - // Next two lines - yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, - v_plane[buf_idx] + stride_vu, - u_plane[buf_idx] + stride_vu, - bgra + size_2lines_bgra, - width); -#else - yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); - - // Next two lines - yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y, - v_plane[buf_idx] + stride_vu, - u_plane[buf_idx] + stride_vu, - bgra + size_2lines_bgra, - width); -#endif - - // Wait for previous storing transfer to be completed - DMA_WAIT_TAG(STR_BUF); - - // Store converted lines in two steps->max transfer size 16384 - spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - ram_addr_bgra += size_2lines_bgra; - spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - ram_addr_bgra += size_2lines_bgra; - - // Move 4 lines - ram_addr_y += size_4lines_y; - ram_addr_v += size_2lines_vu; - ram_addr_u += size_2lines_vu; - - buf_idx^=1; - } - -#ifndef TESTING - // Convert YUV to BGRA, store it back (first two lines) - yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); - - // Next two lines - yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, - v_plane[buf_idx] + stride_vu, - u_plane[buf_idx] + stride_vu, - bgra + size_2lines_bgra, - width); -#else - // Convert YUV to BGRA, store it back (first two lines) - yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); - - // Next two lines - yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y, - v_plane[buf_idx] + stride_vu, - u_plane[buf_idx] + stride_vu, - bgra + size_2lines_bgra, - width); -#endif - - // Wait for previous storing transfer to be completed - DMA_WAIT_TAG(STR_BUF); - spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - ram_addr_bgra += size_2lines_bgra; - spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - - // wait for previous storing transfer to be completed - DMA_WAIT_TAG(STR_BUF); - -} - - -void yuv_to_rgb_w32() { - // Pixel dimensions of the picture - uint32_t width, height; - - // Extract parameters - width = parms_converter.src_pixel_width; - height = parms_converter.src_pixel_height; - - // Plane data management - // Y - unsigned char* ram_addr_y = parms_converter.y_plane; - // V - unsigned char* ram_addr_v = parms_converter.v_plane; - // U - unsigned char* ram_addr_u = parms_converter.u_plane; - - // BGRA - unsigned char* ram_addr_bgra = parms_converter.dstBuffer; - - // Strides - unsigned int stride_y = width; - unsigned int stride_vu = width>>1; - - // Buffer management - unsigned int buf_idx = 0; - unsigned int size_4lines_y = stride_y<<2; - unsigned int size_2lines_y = stride_y<<1; - unsigned int size_2lines_vu = stride_vu<<1; - - // 2*width*4byte_per_pixel - unsigned int size_2lines_bgra = width<<3; - - // start double-buffered processing - // 4 lines y - spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); - // 2 lines v - spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); - // 2 lines u - spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); - - // Wait for these transfers to be completed - DMA_WAIT_TAG((RETR_BUF + buf_idx)); - - unsigned int i; - for(i=0; i < (height>>2)-1; i++) { - buf_idx^=1; - // 4 lines y - spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); - deprintf("4lines = %d\n", size_4lines_y); - // 2 lines v - spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); - deprintf("2lines = %d\n", size_2lines_vu); - // 2 lines u - spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); - deprintf("2lines = %d\n", size_2lines_vu); - - DMA_WAIT_TAG((RETR_BUF + buf_idx)); - - buf_idx^=1; - - // Convert YUV to BGRA, store it back (first two lines) - yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); - - // Next two lines - yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, - v_plane[buf_idx] + stride_vu, - u_plane[buf_idx] + stride_vu, - bgra + size_2lines_bgra, - width); - - // Wait for previous storing transfer to be completed - DMA_WAIT_TAG(STR_BUF); - - // Store converted lines in two steps->max transfer size 16384 - spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - ram_addr_bgra += size_2lines_bgra; - spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - ram_addr_bgra += size_2lines_bgra; - - // Move 4 lines - ram_addr_y += size_4lines_y; - ram_addr_v += size_2lines_vu; - ram_addr_u += size_2lines_vu; - - buf_idx^=1; - } - - // Convert YUV to BGRA, store it back (first two lines) - yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); - - // Next two lines - yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, - v_plane[buf_idx] + stride_vu, - u_plane[buf_idx] + stride_vu, - bgra + size_2lines_bgra, - width); - - // Wait for previous storing transfer to be completed - DMA_WAIT_TAG(STR_BUF); - spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - ram_addr_bgra += size_2lines_bgra; - spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); - - // Wait for previous storing transfer to be completed - DMA_WAIT_TAG(STR_BUF); -} - - -/* Some vectors needed by the yuv 2 rgb conversion algorithm */ -const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f }; -const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 }; -const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 }; -const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B }; -const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F }; - -const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f}; -const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f}; -const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f}; -const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f}; - -const vector unsigned int vec_alpha = { 255 << 24, 255 << 24, 255 << 24, 255 << 24 }; - -const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 }; -const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F }; - - -#ifdef TESTING -/* - * yuv_to_rgb_w2() - * - * - converts x * 4 pixels from YUV to RGB - * - two lines of YUV are taken as input. - * - width has to be a multiple of 2 (= 4 pixel) - * - * @param y_addr address of the y plane (local store) - * @param v_addr address of the v plane (local store) - * @param u_addr address of the u plane (local store) - * @param bgra_addr_char address of the bgra output buffer (local store) - * @param width the width of a line in pixel - */ -void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_char, unsigned int width) { - // each pixel is stored as an integer - unsigned int* bgra_addr = (unsigned int*) bgra_addr_char; - - unsigned int x; - // Go through each line in steps of 2, because every U and V value is connected to 4 pixels Y (YUV 4:2:0) - for(x = 0; x < width; x+=2) { - // Get the 4 Y, 1 U and 1 V values - const unsigned char Y_1 = *(y_addr + x); - const unsigned char Y_2 = *(y_addr + x + 1); - const unsigned char Y_3 = *(y_addr + x + width); - const unsigned char Y_4 = *(y_addr + x + width + 1); - const unsigned char U = *(u_addr + (x >> 1)); - const unsigned char V = *(v_addr + (x >> 1)); - - // Start converting - float V_minus_128 = (float)((float)V - 128.0f); - float U_minus_128 = (float)((float)U - 128.0f); - - float R_precalculate = 1.403f * V_minus_128; - float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128); - float B_precalculate = 1.773f * U_minus_128; - - // Cast the results - const unsigned char R_1 = float_to_char((Y_1 + R_precalculate)); - const unsigned char R_2 = float_to_char((Y_2 + R_precalculate)); - const unsigned char R_3 = float_to_char((Y_3 + R_precalculate)); - const unsigned char R_4 = float_to_char((Y_4 + R_precalculate)); - const unsigned char G_1 = float_to_char((Y_1 + G_precalculate)); - const unsigned char G_2 = float_to_char((Y_2 + G_precalculate)); - const unsigned char G_3 = float_to_char((Y_3 + G_precalculate)); - const unsigned char G_4 = float_to_char((Y_4 + G_precalculate)); - const unsigned char B_1 = float_to_char((Y_1 + B_precalculate)); - const unsigned char B_2 = float_to_char((Y_2 + B_precalculate)); - const unsigned char B_3 = float_to_char((Y_3 + B_precalculate)); - const unsigned char B_4 = float_to_char((Y_4 + B_precalculate)); - - // Write back - *(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24); - *(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24); - *(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24); - *(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24); - } -} -#endif - - -/* - * yuv_to_rgb_w32() - * - * processes to line of yuv-input, width has to be a multiple of 32 - * two lines of yuv are taken as input - * - * @param y_addr address of the y plane in local store - * @param v_addr address of the v plane in local store - * @param u_addr address of the u plane in local store - * @param bgra_addr_ address of the bgra output buffer - * @param width the width in pixel - */ -void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { - // each pixel is stored as an integer - unsigned int* bgra_addr = (unsigned int*) bgra_addr_; - - unsigned int x; - for(x = 0; x < width; x+=32) { - // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt - - const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x)); - const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16)); - const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width)); - const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16)); - const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1))); - const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1))); - - const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128); - const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128); - const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128); - const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128); - - const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128); - const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128); - const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128); - const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128); - - vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0); - vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0); - vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0); - vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0); - vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0); - vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0); - vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0); - vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0); - vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0); - vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0); - vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0); - vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0); - vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0); - vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0); - vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0); - vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0); - - const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1); - const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2); - const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3); - const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4); - - const vector float R1_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_upper); - const vector float R2_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_lower); - const vector float R3_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_upper); - const vector float R4_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_lower); - const vector float R5_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_upper); - const vector float R6_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_lower); - const vector float R7_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_upper); - const vector float R8_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_lower); - - - const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff)); - const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff)); - const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff)); - const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff)); - - const vector float G1_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_upper); - const vector float G2_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_lower); - const vector float G3_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_upper); - const vector float G4_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_lower); - const vector float G5_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_upper); - const vector float G6_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_lower); - const vector float G7_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_upper); - const vector float G8_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_lower); - - - const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1); - const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2); - const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3); - const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4); - - const vector float B1_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_upper); - const vector float B2_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_lower); - const vector float B3_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_upper); - const vector float B4_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_lower); - const vector float B5_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_upper); - const vector float B6_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_lower); - const vector float B7_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_upper); - const vector float B8_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_lower); - - - const vector unsigned int R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate)); - const vector unsigned int R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate)); - const vector unsigned int R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate)); - const vector unsigned int R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate)); - const vector unsigned int R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate)); - const vector unsigned int R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate)); - const vector unsigned int R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate)); - const vector unsigned int R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate)); - const vector unsigned int R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate)); - const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate)); - const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate)); - const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate)); - const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate)); - const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate)); - const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate)); - const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate)); - - const vector unsigned int G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate)); - const vector unsigned int G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate)); - const vector unsigned int G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate)); - const vector unsigned int G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate)); - const vector unsigned int G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate)); - const vector unsigned int G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate)); - const vector unsigned int G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate)); - const vector unsigned int G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate)); - const vector unsigned int G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate)); - const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate)); - const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate)); - const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate)); - const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate)); - const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate)); - const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate)); - const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate)); - - const vector unsigned int B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate)); - const vector unsigned int B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate)); - const vector unsigned int B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate)); - const vector unsigned int B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate)); - const vector unsigned int B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate)); - const vector unsigned int B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate)); - const vector unsigned int B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate)); - const vector unsigned int B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate)); - const vector unsigned int B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate)); - const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate)); - const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate)); - const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate)); - const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate)); - const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate)); - const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate)); - const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate)); - - *((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha, B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1))); - *((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha, B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1))); - *((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha, B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1))); - *((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha, B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1))); - *((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha, B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1))); - *((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha, B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1))); - *((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha, B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1))); - *((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha, B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1))); - *((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha, B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1))); - *((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1))); - } -} -