Mercurial > sdl-ios-xcode
comparison src/video/ps3/spulibs/bilin_scaler.c @ 3148:104786a909a2 gsoc2009_ps3
Scaling (bilinear) of YUV-Textures working.
author | Martin Lowinski <martin@goldtopf.org> |
---|---|
date | Fri, 19 Jun 2009 05:22:00 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3147:a80760096937 | 3148:104786a909a2 |
---|---|
1 /* | |
2 * SDL - Simple DirectMedia Layer | |
3 * CELL BE Support for PS3 Framebuffer | |
4 * Copyright (C) 2008, 2009 International Business Machines Corporation | |
5 * | |
6 * This library is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU Lesser General Public License as published | |
8 * by the Free Software Foundation; either version 2.1 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, but | |
12 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | |
19 * USA | |
20 * | |
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | |
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | |
23 * SPE code based on research by: | |
24 * Rene Becker | |
25 * Thimo Emmerich | |
26 */ | |
27 | |
28 #include "spu_common.h" | |
29 | |
30 #include <spu_intrinsics.h> | |
31 #include <spu_mfcio.h> | |
32 | |
33 // Debugging | |
34 //#define DEBUG | |
35 | |
36 #ifdef DEBUG | |
37 #define deprintf(fmt, args... ) \ | |
38 fprintf( stdout, fmt, ##args ); \ | |
39 fflush( stdout ); | |
40 #else | |
41 #define deprintf( fmt, args... ) | |
42 #endif | |
43 | |
44 struct scale_parms_t parms __attribute__((aligned(128))); | |
45 | |
46 /* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored | |
47 * there might be the need to retrieve misaligned data, adjust | |
48 * incoming v and u plane to be able to handle this (add 128) | |
49 */ | |
50 unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128))); | |
51 unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); | |
52 unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); | |
53 | |
54 /* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */ | |
55 unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128))); | |
56 unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); | |
57 unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); | |
58 | |
59 /* some vectors needed by the float to int conversion */ | |
60 static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; | |
61 static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; | |
62 | |
63 void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); | |
64 void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); | |
65 | |
66 void scale_srcw16_dstw16(); | |
67 void scale_srcw16_dstw32(); | |
68 void scale_srcw32_dstw16(); | |
69 void scale_srcw32_dstw32(); | |
70 | |
71 int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp ) | |
72 { | |
73 deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id); | |
74 /* DMA transfer for the input parameters */ | |
75 spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD); | |
76 DMA_WAIT_TAG(TAG_INIT); | |
77 | |
78 deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height, | |
79 parms.dst_pixel_width, parms.dst_pixel_height); | |
80 | |
81 if(parms.src_pixel_width & 0x1f) { | |
82 if(parms.dst_pixel_width & 0x1F) { | |
83 deprintf("[SPU] Using scale_srcw16_dstw16\n"); | |
84 scale_srcw16_dstw16(); | |
85 } else { | |
86 deprintf("[SPU] Using scale_srcw16_dstw32\n"); | |
87 scale_srcw16_dstw32(); | |
88 } | |
89 } else { | |
90 if(parms.dst_pixel_width & 0x1F) { | |
91 deprintf("[SPU] Using scale_srcw32_dstw16\n"); | |
92 scale_srcw32_dstw16(); | |
93 } else { | |
94 deprintf("[SPU] Using scale_srcw32_dstw32\n"); | |
95 scale_srcw32_dstw32(); | |
96 } | |
97 } | |
98 deprintf("[SPU] bilin_scaler_spu... done!\n"); | |
99 | |
100 return 0; | |
101 } | |
102 | |
103 | |
104 /* | |
105 * vfloat_to_vuint() | |
106 * | |
107 * converts a float vector to an unsinged int vector using saturated | |
108 * arithmetic | |
109 * | |
110 * @param vec_s float vector for conversion | |
111 * @returns converted unsigned int vector | |
112 */ | |
113 inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { | |
114 vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); | |
115 vec_s = spu_sel(vec_s, vec_0_1, select_1); | |
116 | |
117 vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); | |
118 vec_s = spu_sel(vec_s, vec_255, select_2); | |
119 return spu_convtu(vec_s,0); | |
120 } | |
121 | |
122 | |
123 /* | |
124 * scale_srcw16_dstw16() | |
125 * | |
126 * processes an input image of width 16 | |
127 * scaling is done to a width 16 | |
128 * result stored in RAM | |
129 */ | |
130 void scale_srcw16_dstw16() { | |
131 // extract parameters | |
132 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | |
133 | |
134 unsigned int src_width = parms.src_pixel_width; | |
135 unsigned int src_height = parms.src_pixel_height; | |
136 unsigned int dst_width = parms.dst_pixel_width; | |
137 unsigned int dst_height = parms.dst_pixel_height; | |
138 | |
139 // YVU | |
140 unsigned int src_linestride_y = src_width; | |
141 unsigned int src_dbl_linestride_y = src_width<<1; | |
142 unsigned int src_linestride_vu = src_width>>1; | |
143 unsigned int src_dbl_linestride_vu = src_width; | |
144 | |
145 // scaled YVU | |
146 unsigned int scaled_src_linestride_y = dst_width; | |
147 | |
148 // ram addresses | |
149 unsigned char* src_addr_y = parms.y_plane; | |
150 unsigned char* src_addr_v = parms.v_plane; | |
151 unsigned char* src_addr_u = parms.u_plane; | |
152 | |
153 // for handling misalignment, addresses are precalculated | |
154 unsigned char* precalc_src_addr_v = src_addr_v; | |
155 unsigned char* precalc_src_addr_u = src_addr_u; | |
156 | |
157 unsigned int dst_picture_size = dst_width*dst_height; | |
158 | |
159 // Sizes for destination | |
160 unsigned int dst_dbl_linestride_y = dst_width<<1; | |
161 unsigned int dst_dbl_linestride_vu = dst_width>>1; | |
162 | |
163 // Perform address calculation for Y, V and U in main memory with dst_addr as base | |
164 unsigned char* dst_addr_main_memory_y = dst_addr; | |
165 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | |
166 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | |
167 | |
168 // calculate scale factors | |
169 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | |
170 float y_scale = (float)src_height/(float)dst_height; | |
171 | |
172 // double buffered processing | |
173 // buffer switching | |
174 unsigned int curr_src_idx = 0; | |
175 unsigned int curr_dst_idx = 0; | |
176 unsigned int next_src_idx, next_dst_idx; | |
177 | |
178 // 2 lines y as output, upper and lowerline | |
179 unsigned int curr_interpl_y_upper = 0; | |
180 unsigned int next_interpl_y_upper; | |
181 unsigned int curr_interpl_y_lower, next_interpl_y_lower; | |
182 // only 1 line v/u output, both planes have the same dimension | |
183 unsigned int curr_interpl_vu = 0; | |
184 unsigned int next_interpl_vu; | |
185 | |
186 // weights, calculated in every loop iteration | |
187 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
188 vector float vf_next_NSweight_y_upper; | |
189 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | |
190 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
191 vector float vf_next_NSweight_vu; | |
192 | |
193 // line indices for the src picture | |
194 float curr_src_y_upper = 0.0f, next_src_y_upper; | |
195 float curr_src_y_lower, next_src_y_lower; | |
196 float curr_src_vu = 0.0f, next_src_vu; | |
197 | |
198 // line indices for the dst picture | |
199 unsigned int dst_y=0, dst_vu=0; | |
200 | |
201 // offset for the v and u plane to handle misalignement | |
202 unsigned int curr_lsoff_v = 0, next_lsoff_v; | |
203 unsigned int curr_lsoff_u = 0, next_lsoff_u; | |
204 | |
205 // calculate lower line indices | |
206 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | |
207 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | |
208 // lower line weight | |
209 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | |
210 | |
211 | |
212 // start partially double buffered processing | |
213 // get initial data, 2 sets of y, 1 set v, 1 set u | |
214 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | |
215 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
216 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | |
217 src_dbl_linestride_y, | |
218 RETR_BUF, | |
219 0, 0 ); | |
220 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
221 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
222 | |
223 /* iteration loop | |
224 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | |
225 * the scaled output is 2 lines y, 1 line v, 1 line u | |
226 * the yuv2rgb-converted output is stored to RAM | |
227 */ | |
228 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | |
229 dst_y = dst_vu<<1; | |
230 | |
231 // calculate next indices | |
232 next_src_vu = ((float)dst_vu+1)*y_scale; | |
233 next_src_y_upper = ((float)dst_y+2)*y_scale; | |
234 next_src_y_lower = ((float)dst_y+3)*y_scale; | |
235 | |
236 next_interpl_vu = (unsigned int) next_src_vu; | |
237 next_interpl_y_upper = (unsigned int) next_src_y_upper; | |
238 next_interpl_y_lower = (unsigned int) next_src_y_lower; | |
239 | |
240 // calculate weight NORTH-SOUTH | |
241 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | |
242 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | |
243 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | |
244 | |
245 // get next lines | |
246 next_src_idx = curr_src_idx^1; | |
247 next_dst_idx = curr_dst_idx^1; | |
248 | |
249 // 4 lines y | |
250 mfc_get( y_plane[next_src_idx], | |
251 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | |
252 src_dbl_linestride_y, | |
253 RETR_BUF+next_src_idx, | |
254 0, 0 ); | |
255 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | |
256 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | |
257 src_dbl_linestride_y, | |
258 RETR_BUF+next_src_idx, | |
259 0, 0 ); | |
260 | |
261 // 2 lines v | |
262 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); | |
263 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; | |
264 mfc_get( v_plane[next_src_idx], | |
265 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, | |
266 src_dbl_linestride_vu+(next_lsoff_v<<1), | |
267 RETR_BUF+next_src_idx, | |
268 0, 0 ); | |
269 // 2 lines u | |
270 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); | |
271 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; | |
272 mfc_get( u_plane[next_src_idx], | |
273 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, | |
274 src_dbl_linestride_vu+(next_lsoff_v<<1), | |
275 RETR_BUF+next_src_idx, | |
276 0, 0 ); | |
277 | |
278 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
279 | |
280 // scaling | |
281 // work line y_upper | |
282 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
283 scaled_y_plane[curr_src_idx], | |
284 dst_width, | |
285 vf_x_scale, | |
286 vf_curr_NSweight_y_upper, | |
287 src_linestride_y ); | |
288 // work line y_lower | |
289 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
290 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
291 dst_width, | |
292 vf_x_scale, | |
293 vf_curr_NSweight_y_lower, | |
294 src_linestride_y ); | |
295 // work line v | |
296 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | |
297 scaled_v_plane[curr_src_idx], | |
298 dst_width>>1, | |
299 vf_x_scale, | |
300 vf_curr_NSweight_vu, | |
301 src_linestride_vu ); | |
302 // work line u | |
303 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | |
304 scaled_u_plane[curr_src_idx], | |
305 dst_width>>1, | |
306 vf_x_scale, | |
307 vf_curr_NSweight_vu, | |
308 src_linestride_vu ); | |
309 | |
310 | |
311 // Store the result back to main memory into a destination buffer in YUV format | |
312 //--------------------------------------------------------------------------------------------- | |
313 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
314 | |
315 // Perform three DMA transfers to 3 different locations in the main memory! | |
316 // dst_width: Pixel width of destination image | |
317 // dst_addr: Destination address in main memory | |
318 // dst_vu: Counter which is incremented one by one | |
319 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
320 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
321 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
322 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
323 STR_BUF+curr_dst_idx, // Tag | |
324 0, 0 ); | |
325 | |
326 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
327 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
328 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
329 STR_BUF+curr_dst_idx, // Tag | |
330 0, 0 ); | |
331 | |
332 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
333 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
334 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
335 STR_BUF+curr_dst_idx, // Tag | |
336 0, 0 ); | |
337 //--------------------------------------------------------------------------------------------- | |
338 | |
339 | |
340 // update for next cycle | |
341 curr_src_idx = next_src_idx; | |
342 curr_dst_idx = next_dst_idx; | |
343 | |
344 curr_interpl_y_upper = next_interpl_y_upper; | |
345 curr_interpl_y_lower = next_interpl_y_lower; | |
346 curr_interpl_vu = next_interpl_vu; | |
347 | |
348 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | |
349 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | |
350 vf_curr_NSweight_vu = vf_next_NSweight_vu; | |
351 | |
352 curr_src_y_upper = next_src_y_upper; | |
353 curr_src_y_lower = next_src_y_lower; | |
354 curr_src_vu = next_src_vu; | |
355 | |
356 curr_lsoff_v = next_lsoff_v; | |
357 curr_lsoff_u = next_lsoff_u; | |
358 } | |
359 | |
360 | |
361 | |
362 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
363 | |
364 // scaling | |
365 // work line y_upper | |
366 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
367 scaled_y_plane[curr_src_idx], | |
368 dst_width, | |
369 vf_x_scale, | |
370 vf_curr_NSweight_y_upper, | |
371 src_linestride_y ); | |
372 // work line y_lower | |
373 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
374 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
375 dst_width, | |
376 vf_x_scale, | |
377 vf_curr_NSweight_y_lower, | |
378 src_linestride_y ); | |
379 // work line v | |
380 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | |
381 scaled_v_plane[curr_src_idx], | |
382 dst_width>>1, | |
383 vf_x_scale, | |
384 vf_curr_NSweight_vu, | |
385 src_linestride_vu ); | |
386 // work line u | |
387 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | |
388 scaled_u_plane[curr_src_idx], | |
389 dst_width>>1, | |
390 vf_x_scale, | |
391 vf_curr_NSweight_vu, | |
392 src_linestride_vu ); | |
393 | |
394 | |
395 // Store the result back to main memory into a destination buffer in YUV format | |
396 //--------------------------------------------------------------------------------------------- | |
397 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
398 | |
399 // Perform three DMA transfers to 3 different locations in the main memory! | |
400 // dst_width: Pixel width of destination image | |
401 // dst_addr: Destination address in main memory | |
402 // dst_vu: Counter which is incremented one by one | |
403 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
404 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
405 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
406 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
407 STR_BUF+curr_dst_idx, // Tag | |
408 0, 0 ); | |
409 | |
410 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
411 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
412 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
413 STR_BUF+curr_dst_idx, // Tag | |
414 0, 0 ); | |
415 | |
416 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
417 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
418 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
419 STR_BUF+curr_dst_idx, // Tag | |
420 0, 0 ); | |
421 | |
422 // wait for completion | |
423 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
424 //--------------------------------------------------------------------------------------------- | |
425 } | |
426 | |
427 | |
428 /* | |
429 * scale_srcw16_dstw32() | |
430 * | |
431 * processes an input image of width 16 | |
432 * scaling is done to a width 32 | |
433 * yuv2rgb conversion on a width of 32 | |
434 * result stored in RAM | |
435 */ | |
436 void scale_srcw16_dstw32() { | |
437 // extract parameters | |
438 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | |
439 | |
440 unsigned int src_width = parms.src_pixel_width; | |
441 unsigned int src_height = parms.src_pixel_height; | |
442 unsigned int dst_width = parms.dst_pixel_width; | |
443 unsigned int dst_height = parms.dst_pixel_height; | |
444 | |
445 // YVU | |
446 unsigned int src_linestride_y = src_width; | |
447 unsigned int src_dbl_linestride_y = src_width<<1; | |
448 unsigned int src_linestride_vu = src_width>>1; | |
449 unsigned int src_dbl_linestride_vu = src_width; | |
450 // scaled YVU | |
451 unsigned int scaled_src_linestride_y = dst_width; | |
452 | |
453 // ram addresses | |
454 unsigned char* src_addr_y = parms.y_plane; | |
455 unsigned char* src_addr_v = parms.v_plane; | |
456 unsigned char* src_addr_u = parms.u_plane; | |
457 | |
458 unsigned int dst_picture_size = dst_width*dst_height; | |
459 | |
460 // Sizes for destination | |
461 unsigned int dst_dbl_linestride_y = dst_width<<1; | |
462 unsigned int dst_dbl_linestride_vu = dst_width>>1; | |
463 | |
464 // Perform address calculation for Y, V and U in main memory with dst_addr as base | |
465 unsigned char* dst_addr_main_memory_y = dst_addr; | |
466 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | |
467 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | |
468 | |
469 | |
470 // for handling misalignment, addresses are precalculated | |
471 unsigned char* precalc_src_addr_v = src_addr_v; | |
472 unsigned char* precalc_src_addr_u = src_addr_u; | |
473 | |
474 // calculate scale factors | |
475 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | |
476 float y_scale = (float)src_height/(float)dst_height; | |
477 | |
478 // double buffered processing | |
479 // buffer switching | |
480 unsigned int curr_src_idx = 0; | |
481 unsigned int curr_dst_idx = 0; | |
482 unsigned int next_src_idx, next_dst_idx; | |
483 | |
484 // 2 lines y as output, upper and lowerline | |
485 unsigned int curr_interpl_y_upper = 0; | |
486 unsigned int next_interpl_y_upper; | |
487 unsigned int curr_interpl_y_lower, next_interpl_y_lower; | |
488 // only 1 line v/u output, both planes have the same dimension | |
489 unsigned int curr_interpl_vu = 0; | |
490 unsigned int next_interpl_vu; | |
491 | |
492 // weights, calculated in every loop iteration | |
493 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
494 vector float vf_next_NSweight_y_upper; | |
495 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | |
496 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
497 vector float vf_next_NSweight_vu; | |
498 | |
499 // line indices for the src picture | |
500 float curr_src_y_upper = 0.0f, next_src_y_upper; | |
501 float curr_src_y_lower, next_src_y_lower; | |
502 float curr_src_vu = 0.0f, next_src_vu; | |
503 | |
504 // line indices for the dst picture | |
505 unsigned int dst_y=0, dst_vu=0; | |
506 | |
507 // offset for the v and u plane to handle misalignement | |
508 unsigned int curr_lsoff_v = 0, next_lsoff_v; | |
509 unsigned int curr_lsoff_u = 0, next_lsoff_u; | |
510 | |
511 // calculate lower line idices | |
512 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | |
513 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | |
514 // lower line weight | |
515 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | |
516 | |
517 | |
518 // start partially double buffered processing | |
519 // get initial data, 2 sets of y, 1 set v, 1 set u | |
520 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | |
521 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
522 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | |
523 src_dbl_linestride_y, | |
524 RETR_BUF, | |
525 0, 0 ); | |
526 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
527 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
528 | |
529 // iteration loop | |
530 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | |
531 // the scaled output is 2 lines y, 1 line v, 1 line u | |
532 // the yuv2rgb-converted output is stored to RAM | |
533 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | |
534 dst_y = dst_vu<<1; | |
535 | |
536 // calculate next indices | |
537 next_src_vu = ((float)dst_vu+1)*y_scale; | |
538 next_src_y_upper = ((float)dst_y+2)*y_scale; | |
539 next_src_y_lower = ((float)dst_y+3)*y_scale; | |
540 | |
541 next_interpl_vu = (unsigned int) next_src_vu; | |
542 next_interpl_y_upper = (unsigned int) next_src_y_upper; | |
543 next_interpl_y_lower = (unsigned int) next_src_y_lower; | |
544 | |
545 // calculate weight NORTH-SOUTH | |
546 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | |
547 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | |
548 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | |
549 | |
550 // get next lines | |
551 next_src_idx = curr_src_idx^1; | |
552 next_dst_idx = curr_dst_idx^1; | |
553 | |
554 // 4 lines y | |
555 mfc_get( y_plane[next_src_idx], | |
556 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | |
557 src_dbl_linestride_y, | |
558 RETR_BUF+next_src_idx, | |
559 0, 0 ); | |
560 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | |
561 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | |
562 src_dbl_linestride_y, | |
563 RETR_BUF+next_src_idx, | |
564 0, 0 ); | |
565 | |
566 // 2 lines v | |
567 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); | |
568 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; | |
569 mfc_get( v_plane[next_src_idx], | |
570 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, | |
571 src_dbl_linestride_vu+(next_lsoff_v<<1), | |
572 RETR_BUF+next_src_idx, | |
573 0, 0 ); | |
574 // 2 lines u | |
575 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); | |
576 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; | |
577 mfc_get( u_plane[next_src_idx], | |
578 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, | |
579 src_dbl_linestride_vu+(next_lsoff_v<<1), | |
580 RETR_BUF+next_src_idx, | |
581 0, 0 ); | |
582 | |
583 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
584 | |
585 // scaling | |
586 // work line y_upper | |
587 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
588 scaled_y_plane[curr_src_idx], | |
589 dst_width, | |
590 vf_x_scale, | |
591 vf_curr_NSweight_y_upper, | |
592 src_linestride_y ); | |
593 // work line y_lower | |
594 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
595 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
596 dst_width, | |
597 vf_x_scale, | |
598 vf_curr_NSweight_y_lower, | |
599 src_linestride_y ); | |
600 // work line v | |
601 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | |
602 scaled_v_plane[curr_src_idx], | |
603 dst_width>>1, | |
604 vf_x_scale, | |
605 vf_curr_NSweight_vu, | |
606 src_linestride_vu ); | |
607 // work line u | |
608 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | |
609 scaled_u_plane[curr_src_idx], | |
610 dst_width>>1, | |
611 vf_x_scale, | |
612 vf_curr_NSweight_vu, | |
613 src_linestride_vu ); | |
614 | |
615 //--------------------------------------------------------------------------------------------- | |
616 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
617 | |
618 // Perform three DMA transfers to 3 different locations in the main memory! | |
619 // dst_width: Pixel width of destination image | |
620 // dst_addr: Destination address in main memory | |
621 // dst_vu: Counter which is incremented one by one | |
622 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
623 | |
624 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
625 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
626 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
627 STR_BUF+curr_dst_idx, // Tag | |
628 0, 0 ); | |
629 | |
630 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
631 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
632 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
633 STR_BUF+curr_dst_idx, // Tag | |
634 0, 0 ); | |
635 | |
636 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
637 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
638 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
639 STR_BUF+curr_dst_idx, // Tag | |
640 0, 0 ); | |
641 //--------------------------------------------------------------------------------------------- | |
642 | |
643 | |
644 // update for next cycle | |
645 curr_src_idx = next_src_idx; | |
646 curr_dst_idx = next_dst_idx; | |
647 | |
648 curr_interpl_y_upper = next_interpl_y_upper; | |
649 curr_interpl_y_lower = next_interpl_y_lower; | |
650 curr_interpl_vu = next_interpl_vu; | |
651 | |
652 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | |
653 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | |
654 vf_curr_NSweight_vu = vf_next_NSweight_vu; | |
655 | |
656 curr_src_y_upper = next_src_y_upper; | |
657 curr_src_y_lower = next_src_y_lower; | |
658 curr_src_vu = next_src_vu; | |
659 | |
660 curr_lsoff_v = next_lsoff_v; | |
661 curr_lsoff_u = next_lsoff_u; | |
662 } | |
663 | |
664 | |
665 | |
666 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
667 | |
668 // scaling | |
669 // work line y_upper | |
670 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
671 scaled_y_plane[curr_src_idx], | |
672 dst_width, | |
673 vf_x_scale, | |
674 vf_curr_NSweight_y_upper, | |
675 src_linestride_y ); | |
676 // work line y_lower | |
677 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
678 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
679 dst_width, | |
680 vf_x_scale, | |
681 vf_curr_NSweight_y_lower, | |
682 src_linestride_y ); | |
683 // work line v | |
684 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | |
685 scaled_v_plane[curr_src_idx], | |
686 dst_width>>1, | |
687 vf_x_scale, | |
688 vf_curr_NSweight_vu, | |
689 src_linestride_vu ); | |
690 // work line u | |
691 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | |
692 scaled_u_plane[curr_src_idx], | |
693 dst_width>>1, | |
694 vf_x_scale, | |
695 vf_curr_NSweight_vu, | |
696 src_linestride_vu ); | |
697 | |
698 //--------------------------------------------------------------------------------------------- | |
699 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
700 | |
701 // Perform three DMA transfers to 3 different locations in the main memory! | |
702 // dst_width: Pixel width of destination image | |
703 // dst_addr: Destination address in main memory | |
704 // dst_vu: Counter which is incremented one by one | |
705 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
706 | |
707 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
708 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
709 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
710 STR_BUF+curr_dst_idx, // Tag | |
711 0, 0 ); | |
712 | |
713 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
714 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
715 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
716 STR_BUF+curr_dst_idx, // Tag | |
717 0, 0 ); | |
718 | |
719 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
720 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
721 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
722 STR_BUF+curr_dst_idx, // Tag | |
723 0, 0 ); | |
724 | |
725 // wait for completion | |
726 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
727 //--------------------------------------------------------------------------------------------- | |
728 } | |
729 | |
730 | |
731 /* | |
732 * scale_srcw32_dstw16() | |
733 * | |
734 * processes an input image of width 32 | |
735 * scaling is done to a width 16 | |
736 * yuv2rgb conversion on a width of 16 | |
737 * result stored in RAM | |
738 */ | |
739 void scale_srcw32_dstw16() { | |
740 // extract parameters | |
741 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | |
742 | |
743 unsigned int src_width = parms.src_pixel_width; | |
744 unsigned int src_height = parms.src_pixel_height; | |
745 unsigned int dst_width = parms.dst_pixel_width; | |
746 unsigned int dst_height = parms.dst_pixel_height; | |
747 | |
748 // YVU | |
749 unsigned int src_linestride_y = src_width; | |
750 unsigned int src_dbl_linestride_y = src_width<<1; | |
751 unsigned int src_linestride_vu = src_width>>1; | |
752 unsigned int src_dbl_linestride_vu = src_width; | |
753 // scaled YVU | |
754 unsigned int scaled_src_linestride_y = dst_width; | |
755 | |
756 // ram addresses | |
757 unsigned char* src_addr_y = parms.y_plane; | |
758 unsigned char* src_addr_v = parms.v_plane; | |
759 unsigned char* src_addr_u = parms.u_plane; | |
760 | |
761 unsigned int dst_picture_size = dst_width*dst_height; | |
762 | |
763 // Sizes for destination | |
764 unsigned int dst_dbl_linestride_y = dst_width<<1; | |
765 unsigned int dst_dbl_linestride_vu = dst_width>>1; | |
766 | |
767 // Perform address calculation for Y, V and U in main memory with dst_addr as base | |
768 unsigned char* dst_addr_main_memory_y = dst_addr; | |
769 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | |
770 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | |
771 | |
772 // calculate scale factors | |
773 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | |
774 float y_scale = (float)src_height/(float)dst_height; | |
775 | |
776 // double buffered processing | |
777 // buffer switching | |
778 unsigned int curr_src_idx = 0; | |
779 unsigned int curr_dst_idx = 0; | |
780 unsigned int next_src_idx, next_dst_idx; | |
781 | |
782 // 2 lines y as output, upper and lowerline | |
783 unsigned int curr_interpl_y_upper = 0; | |
784 unsigned int next_interpl_y_upper; | |
785 unsigned int curr_interpl_y_lower, next_interpl_y_lower; | |
786 // only 1 line v/u output, both planes have the same dimension | |
787 unsigned int curr_interpl_vu = 0; | |
788 unsigned int next_interpl_vu; | |
789 | |
790 // weights, calculated in every loop iteration | |
791 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
792 vector float vf_next_NSweight_y_upper; | |
793 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | |
794 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
795 vector float vf_next_NSweight_vu; | |
796 | |
797 // line indices for the src picture | |
798 float curr_src_y_upper = 0.0f, next_src_y_upper; | |
799 float curr_src_y_lower, next_src_y_lower; | |
800 float curr_src_vu = 0.0f, next_src_vu; | |
801 | |
802 // line indices for the dst picture | |
803 unsigned int dst_y=0, dst_vu=0; | |
804 | |
805 // calculate lower line idices | |
806 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | |
807 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | |
808 // lower line weight | |
809 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | |
810 | |
811 | |
812 // start partially double buffered processing | |
813 // get initial data, 2 sets of y, 1 set v, 1 set u | |
814 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | |
815 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
816 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | |
817 src_dbl_linestride_y, | |
818 RETR_BUF, | |
819 0, 0 ); | |
820 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
821 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
822 | |
823 // iteration loop | |
824 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | |
825 // the scaled output is 2 lines y, 1 line v, 1 line u | |
826 // the yuv2rgb-converted output is stored to RAM | |
827 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | |
828 dst_y = dst_vu<<1; | |
829 | |
830 // calculate next indices | |
831 next_src_vu = ((float)dst_vu+1)*y_scale; | |
832 next_src_y_upper = ((float)dst_y+2)*y_scale; | |
833 next_src_y_lower = ((float)dst_y+3)*y_scale; | |
834 | |
835 next_interpl_vu = (unsigned int) next_src_vu; | |
836 next_interpl_y_upper = (unsigned int) next_src_y_upper; | |
837 next_interpl_y_lower = (unsigned int) next_src_y_lower; | |
838 | |
839 // calculate weight NORTH-SOUTH | |
840 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | |
841 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | |
842 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | |
843 | |
844 // get next lines | |
845 next_src_idx = curr_src_idx^1; | |
846 next_dst_idx = curr_dst_idx^1; | |
847 | |
848 // 4 lines y | |
849 mfc_get( y_plane[next_src_idx], | |
850 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | |
851 src_dbl_linestride_y, | |
852 RETR_BUF+next_src_idx, | |
853 0, 0 ); | |
854 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | |
855 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | |
856 src_dbl_linestride_y, | |
857 RETR_BUF+next_src_idx, | |
858 0, 0 ); | |
859 | |
860 // 2 lines v | |
861 mfc_get( v_plane[next_src_idx], | |
862 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), | |
863 src_dbl_linestride_vu, | |
864 RETR_BUF+next_src_idx, | |
865 0, 0 ); | |
866 // 2 lines u | |
867 mfc_get( u_plane[next_src_idx], | |
868 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), | |
869 src_dbl_linestride_vu, | |
870 RETR_BUF+next_src_idx, | |
871 0, 0 ); | |
872 | |
873 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
874 | |
875 // scaling | |
876 // work line y_upper | |
877 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
878 scaled_y_plane[curr_src_idx], | |
879 dst_width, | |
880 vf_x_scale, | |
881 vf_curr_NSweight_y_upper, | |
882 src_linestride_y ); | |
883 // work line y_lower | |
884 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
885 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
886 dst_width, | |
887 vf_x_scale, | |
888 vf_curr_NSweight_y_lower, | |
889 src_linestride_y ); | |
890 // work line v | |
891 bilinear_scale_line_w16( v_plane[curr_src_idx], | |
892 scaled_v_plane[curr_src_idx], | |
893 dst_width>>1, | |
894 vf_x_scale, | |
895 vf_curr_NSweight_vu, | |
896 src_linestride_vu ); | |
897 // work line u | |
898 bilinear_scale_line_w16( u_plane[curr_src_idx], | |
899 scaled_u_plane[curr_src_idx], | |
900 dst_width>>1, | |
901 vf_x_scale, | |
902 vf_curr_NSweight_vu, | |
903 src_linestride_vu ); | |
904 | |
905 //--------------------------------------------------------------------------------------------- | |
906 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
907 | |
908 // Perform three DMA transfers to 3 different locations in the main memory! | |
909 // dst_width: Pixel width of destination image | |
910 // dst_addr: Destination address in main memory | |
911 // dst_vu: Counter which is incremented one by one | |
912 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
913 | |
914 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
915 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
916 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
917 STR_BUF+curr_dst_idx, // Tag | |
918 0, 0 ); | |
919 | |
920 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
921 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
922 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
923 STR_BUF+curr_dst_idx, // Tag | |
924 0, 0 ); | |
925 | |
926 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
927 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
928 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
929 STR_BUF+curr_dst_idx, // Tag | |
930 0, 0 ); | |
931 //--------------------------------------------------------------------------------------------- | |
932 | |
933 | |
934 // update for next cycle | |
935 curr_src_idx = next_src_idx; | |
936 curr_dst_idx = next_dst_idx; | |
937 | |
938 curr_interpl_y_upper = next_interpl_y_upper; | |
939 curr_interpl_y_lower = next_interpl_y_lower; | |
940 curr_interpl_vu = next_interpl_vu; | |
941 | |
942 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | |
943 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | |
944 vf_curr_NSweight_vu = vf_next_NSweight_vu; | |
945 | |
946 curr_src_y_upper = next_src_y_upper; | |
947 curr_src_y_lower = next_src_y_lower; | |
948 curr_src_vu = next_src_vu; | |
949 } | |
950 | |
951 | |
952 | |
953 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
954 | |
955 // scaling | |
956 // work line y_upper | |
957 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
958 scaled_y_plane[curr_src_idx], | |
959 dst_width, | |
960 vf_x_scale, | |
961 vf_curr_NSweight_y_upper, | |
962 src_linestride_y ); | |
963 // work line y_lower | |
964 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
965 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
966 dst_width, | |
967 vf_x_scale, | |
968 vf_curr_NSweight_y_lower, | |
969 src_linestride_y ); | |
970 // work line v | |
971 bilinear_scale_line_w16( v_plane[curr_src_idx], | |
972 scaled_v_plane[curr_src_idx], | |
973 dst_width>>1, | |
974 vf_x_scale, | |
975 vf_curr_NSweight_vu, | |
976 src_linestride_vu ); | |
977 // work line u | |
978 bilinear_scale_line_w16( u_plane[curr_src_idx], | |
979 scaled_u_plane[curr_src_idx], | |
980 dst_width>>1, | |
981 vf_x_scale, | |
982 vf_curr_NSweight_vu, | |
983 src_linestride_vu ); | |
984 | |
985 | |
986 //--------------------------------------------------------------------------------------------- | |
987 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
988 | |
989 // Perform three DMA transfers to 3 different locations in the main memory! | |
990 // dst_width: Pixel width of destination image | |
991 // dst_addr: Destination address in main memory | |
992 // dst_vu: Counter which is incremented one by one | |
993 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
994 | |
995 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
996 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
997 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
998 STR_BUF+curr_dst_idx, // Tag | |
999 0, 0 ); | |
1000 | |
1001 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
1002 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
1003 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
1004 STR_BUF+curr_dst_idx, // Tag | |
1005 0, 0 ); | |
1006 | |
1007 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
1008 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
1009 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
1010 STR_BUF+curr_dst_idx, // Tag | |
1011 0, 0 ); | |
1012 | |
1013 // wait for completion | |
1014 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
1015 //--------------------------------------------------------------------------------------------- | |
1016 } | |
1017 | |
1018 | |
1019 /** | |
1020 * scale_srcw32_dstw32() | |
1021 * | |
1022 * processes an input image of width 32 | |
1023 * scaling is done to a width 32 | |
1024 * yuv2rgb conversion on a width of 32 | |
1025 * result stored in RAM | |
1026 */ | |
1027 void scale_srcw32_dstw32() { | |
1028 // extract parameters | |
1029 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | |
1030 | |
1031 unsigned int src_width = parms.src_pixel_width; | |
1032 unsigned int src_height = parms.src_pixel_height; | |
1033 unsigned int dst_width = parms.dst_pixel_width; | |
1034 unsigned int dst_height = parms.dst_pixel_height; | |
1035 | |
1036 // YVU | |
1037 unsigned int src_linestride_y = src_width; | |
1038 unsigned int src_dbl_linestride_y = src_width<<1; | |
1039 unsigned int src_linestride_vu = src_width>>1; | |
1040 unsigned int src_dbl_linestride_vu = src_width; | |
1041 | |
1042 // scaled YVU | |
1043 unsigned int scaled_src_linestride_y = dst_width; | |
1044 | |
1045 // ram addresses | |
1046 unsigned char* src_addr_y = parms.y_plane; | |
1047 unsigned char* src_addr_v = parms.v_plane; | |
1048 unsigned char* src_addr_u = parms.u_plane; | |
1049 | |
1050 unsigned int dst_picture_size = dst_width*dst_height; | |
1051 | |
1052 // Sizes for destination | |
1053 unsigned int dst_dbl_linestride_y = dst_width<<1; | |
1054 unsigned int dst_dbl_linestride_vu = dst_width>>1; | |
1055 | |
1056 // Perform address calculation for Y, V and U in main memory with dst_addr as base | |
1057 unsigned char* dst_addr_main_memory_y = dst_addr; | |
1058 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | |
1059 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | |
1060 | |
1061 // calculate scale factors | |
1062 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | |
1063 float y_scale = (float)src_height/(float)dst_height; | |
1064 | |
1065 // double buffered processing | |
1066 // buffer switching | |
1067 unsigned int curr_src_idx = 0; | |
1068 unsigned int curr_dst_idx = 0; | |
1069 unsigned int next_src_idx, next_dst_idx; | |
1070 | |
1071 // 2 lines y as output, upper and lowerline | |
1072 unsigned int curr_interpl_y_upper = 0; | |
1073 unsigned int next_interpl_y_upper; | |
1074 unsigned int curr_interpl_y_lower, next_interpl_y_lower; | |
1075 // only 1 line v/u output, both planes have the same dimension | |
1076 unsigned int curr_interpl_vu = 0; | |
1077 unsigned int next_interpl_vu; | |
1078 | |
1079 // weights, calculated in every loop iteration | |
1080 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
1081 vector float vf_next_NSweight_y_upper; | |
1082 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | |
1083 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | |
1084 vector float vf_next_NSweight_vu; | |
1085 | |
1086 // line indices for the src picture | |
1087 float curr_src_y_upper = 0.0f, next_src_y_upper; | |
1088 float curr_src_y_lower, next_src_y_lower; | |
1089 float curr_src_vu = 0.0f, next_src_vu; | |
1090 | |
1091 // line indices for the dst picture | |
1092 unsigned int dst_y=0, dst_vu=0; | |
1093 | |
1094 // calculate lower line idices | |
1095 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | |
1096 curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | |
1097 // lower line weight | |
1098 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | |
1099 | |
1100 | |
1101 // start partially double buffered processing | |
1102 // get initial data, 2 sets of y, 1 set v, 1 set u | |
1103 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | |
1104 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
1105 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | |
1106 src_dbl_linestride_y, | |
1107 RETR_BUF, | |
1108 0, 0 ); | |
1109 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
1110 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | |
1111 | |
1112 // iteration loop | |
1113 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | |
1114 // the scaled output is 2 lines y, 1 line v, 1 line u | |
1115 // the yuv2rgb-converted output is stored to RAM | |
1116 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | |
1117 dst_y = dst_vu<<1; | |
1118 | |
1119 // calculate next indices | |
1120 next_src_vu = ((float)dst_vu+1)*y_scale; | |
1121 next_src_y_upper = ((float)dst_y+2)*y_scale; | |
1122 next_src_y_lower = ((float)dst_y+3)*y_scale; | |
1123 | |
1124 next_interpl_vu = (unsigned int) next_src_vu; | |
1125 next_interpl_y_upper = (unsigned int) next_src_y_upper; | |
1126 next_interpl_y_lower = (unsigned int) next_src_y_lower; | |
1127 | |
1128 // calculate weight NORTH-SOUTH | |
1129 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | |
1130 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | |
1131 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | |
1132 | |
1133 // get next lines | |
1134 next_src_idx = curr_src_idx^1; | |
1135 next_dst_idx = curr_dst_idx^1; | |
1136 | |
1137 // 4 lines y | |
1138 mfc_get( y_plane[next_src_idx], | |
1139 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | |
1140 src_dbl_linestride_y, | |
1141 RETR_BUF+next_src_idx, | |
1142 0, 0 ); | |
1143 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | |
1144 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | |
1145 src_dbl_linestride_y, | |
1146 RETR_BUF+next_src_idx, | |
1147 0, 0 ); | |
1148 | |
1149 // 2 lines v | |
1150 mfc_get( v_plane[next_src_idx], | |
1151 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), | |
1152 src_dbl_linestride_vu, | |
1153 RETR_BUF+next_src_idx, | |
1154 0, 0 ); | |
1155 // 2 lines u | |
1156 mfc_get( u_plane[next_src_idx], | |
1157 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), | |
1158 src_dbl_linestride_vu, | |
1159 RETR_BUF+next_src_idx, | |
1160 0, 0 ); | |
1161 | |
1162 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
1163 | |
1164 // scaling | |
1165 // work line y_upper | |
1166 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
1167 scaled_y_plane[curr_src_idx], | |
1168 dst_width, | |
1169 vf_x_scale, | |
1170 vf_curr_NSweight_y_upper, | |
1171 src_linestride_y ); | |
1172 // work line y_lower | |
1173 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
1174 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
1175 dst_width, | |
1176 vf_x_scale, | |
1177 vf_curr_NSweight_y_lower, | |
1178 src_linestride_y ); | |
1179 // work line v | |
1180 bilinear_scale_line_w16( v_plane[curr_src_idx], | |
1181 scaled_v_plane[curr_src_idx], | |
1182 dst_width>>1, | |
1183 vf_x_scale, | |
1184 vf_curr_NSweight_vu, | |
1185 src_linestride_vu ); | |
1186 // work line u | |
1187 bilinear_scale_line_w16( u_plane[curr_src_idx], | |
1188 scaled_u_plane[curr_src_idx], | |
1189 dst_width>>1, | |
1190 vf_x_scale, | |
1191 vf_curr_NSweight_vu, | |
1192 src_linestride_vu ); | |
1193 | |
1194 | |
1195 | |
1196 // Store the result back to main memory into a destination buffer in YUV format | |
1197 //--------------------------------------------------------------------------------------------- | |
1198 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
1199 | |
1200 // Perform three DMA transfers to 3 different locations in the main memory! | |
1201 // dst_width: Pixel width of destination image | |
1202 // dst_addr: Destination address in main memory | |
1203 // dst_vu: Counter which is incremented one by one | |
1204 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
1205 | |
1206 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
1207 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
1208 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
1209 STR_BUF+curr_dst_idx, // Tag | |
1210 0, 0 ); | |
1211 | |
1212 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
1213 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
1214 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
1215 STR_BUF+curr_dst_idx, // Tag | |
1216 0, 0 ); | |
1217 | |
1218 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
1219 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
1220 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
1221 STR_BUF+curr_dst_idx, // Tag | |
1222 0, 0 ); | |
1223 //--------------------------------------------------------------------------------------------- | |
1224 | |
1225 | |
1226 // update for next cycle | |
1227 curr_src_idx = next_src_idx; | |
1228 curr_dst_idx = next_dst_idx; | |
1229 | |
1230 curr_interpl_y_upper = next_interpl_y_upper; | |
1231 curr_interpl_y_lower = next_interpl_y_lower; | |
1232 curr_interpl_vu = next_interpl_vu; | |
1233 | |
1234 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | |
1235 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | |
1236 vf_curr_NSweight_vu = vf_next_NSweight_vu; | |
1237 | |
1238 curr_src_y_upper = next_src_y_upper; | |
1239 curr_src_y_lower = next_src_y_lower; | |
1240 curr_src_vu = next_src_vu; | |
1241 } | |
1242 | |
1243 | |
1244 | |
1245 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | |
1246 | |
1247 // scaling | |
1248 // work line y_upper | |
1249 bilinear_scale_line_w16( y_plane[curr_src_idx], | |
1250 scaled_y_plane[curr_src_idx], | |
1251 dst_width, | |
1252 vf_x_scale, | |
1253 vf_curr_NSweight_y_upper, | |
1254 src_linestride_y ); | |
1255 // work line y_lower | |
1256 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | |
1257 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | |
1258 dst_width, | |
1259 vf_x_scale, | |
1260 vf_curr_NSweight_y_lower, | |
1261 src_linestride_y ); | |
1262 // work line v | |
1263 bilinear_scale_line_w16( v_plane[curr_src_idx], | |
1264 scaled_v_plane[curr_src_idx], | |
1265 dst_width>>1, | |
1266 vf_x_scale, | |
1267 vf_curr_NSweight_vu, | |
1268 src_linestride_vu ); | |
1269 // work line u | |
1270 bilinear_scale_line_w16( u_plane[curr_src_idx], | |
1271 scaled_u_plane[curr_src_idx], | |
1272 dst_width>>1, | |
1273 vf_x_scale, | |
1274 vf_curr_NSweight_vu, | |
1275 src_linestride_vu ); | |
1276 | |
1277 | |
1278 // Store the result back to main memory into a destination buffer in YUV format | |
1279 //--------------------------------------------------------------------------------------------- | |
1280 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
1281 | |
1282 // Perform three DMA transfers to 3 different locations in the main memory! | |
1283 // dst_width: Pixel width of destination image | |
1284 // dst_addr: Destination address in main memory | |
1285 // dst_vu: Counter which is incremented one by one | |
1286 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | |
1287 | |
1288 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | |
1289 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | |
1290 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | |
1291 STR_BUF+curr_dst_idx, // Tag | |
1292 0, 0 ); | |
1293 | |
1294 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | |
1295 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
1296 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | |
1297 STR_BUF+curr_dst_idx, // Tag | |
1298 0, 0 ); | |
1299 | |
1300 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | |
1301 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | |
1302 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | |
1303 STR_BUF+curr_dst_idx, // Tag | |
1304 0, 0 ); | |
1305 | |
1306 // wait for completion | |
1307 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | |
1308 //--------------------------------------------------------------------------------------------- | |
1309 } | |
1310 | |
1311 | |
1312 /* | |
1313 * bilinear_scale_line_w8() | |
1314 * | |
1315 * processes a line of yuv-input, width has to be a multiple of 8 | |
1316 * scaled yuv-output is written to local store buffer | |
1317 * | |
1318 * @param src buffer for 2 lines input | |
1319 * @param dst_ buffer for 1 line output | |
1320 * @param dst_width the width of the destination line | |
1321 * @param vf_x_scale a float vector, at each entry is the x_scale-factor | |
1322 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line | |
1323 * @param src_linestride the stride of the srcline | |
1324 */ | |
1325 void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { | |
1326 | |
1327 unsigned char* dst = dst_; | |
1328 | |
1329 unsigned int dst_x; | |
1330 for( dst_x=0; dst_x<dst_width; dst_x+=8) { | |
1331 // address calculation for loading the 4 surrounding pixel of each calculated | |
1332 // destination pixel | |
1333 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x ); | |
1334 // lower range->first 4 pixel | |
1335 // upper range->next 4 pixel | |
1336 vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 }; | |
1337 vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 }; | |
1338 vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range ); | |
1339 vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range ); | |
1340 | |
1341 // calculate weight EAST-WEST | |
1342 vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 ); | |
1343 vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 ); | |
1344 vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale ); | |
1345 vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale ); | |
1346 vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 ); | |
1347 vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 ); | |
1348 vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 ); | |
1349 vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 ); | |
1350 vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range ); | |
1351 vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range ); | |
1352 | |
1353 // calculate address offset | |
1354 // | |
1355 // pixel NORTH WEST | |
1356 vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range; | |
1357 vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range; | |
1358 | |
1359 // pixel NORTH EAST-->(offpixelNW+1) | |
1360 vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; | |
1361 vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 ); | |
1362 vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 ); | |
1363 | |
1364 // SOUTH-WEST-->(offpixelNW+src_linestride) | |
1365 vector unsigned int vui_srclinestride = spu_splats( src_linestride ); | |
1366 vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range ); | |
1367 vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range ); | |
1368 | |
1369 // SOUTH-EAST-->(offpixelNW+src_linestride+1) | |
1370 vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range ); | |
1371 vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range ); | |
1372 | |
1373 // calculate each address | |
1374 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); | |
1375 vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range ); | |
1376 vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range ); | |
1377 vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range ); | |
1378 vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range ); | |
1379 | |
1380 vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range ); | |
1381 vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range ); | |
1382 vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range ); | |
1383 vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range ); | |
1384 | |
1385 // get each pixel | |
1386 // | |
1387 // scalar load, afterwards insertion into the right position | |
1388 // NORTH WEST | |
1389 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
1390 vector unsigned char vuc_pixel_NW_lower_range = spu_insert( | |
1391 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 ); | |
1392 vuc_pixel_NW_lower_range = spu_insert( | |
1393 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )), | |
1394 vuc_pixel_NW_lower_range, 7 ); | |
1395 vuc_pixel_NW_lower_range = spu_insert( | |
1396 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )), | |
1397 vuc_pixel_NW_lower_range, 11 ); | |
1398 vuc_pixel_NW_lower_range = spu_insert( | |
1399 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )), | |
1400 vuc_pixel_NW_lower_range, 15 ); | |
1401 | |
1402 vector unsigned char vuc_pixel_NW_upper_range = spu_insert( | |
1403 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 ); | |
1404 vuc_pixel_NW_upper_range = spu_insert( | |
1405 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )), | |
1406 vuc_pixel_NW_upper_range, 7 ); | |
1407 vuc_pixel_NW_upper_range = spu_insert( | |
1408 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )), | |
1409 vuc_pixel_NW_upper_range, 11 ); | |
1410 vuc_pixel_NW_upper_range = spu_insert( | |
1411 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )), | |
1412 vuc_pixel_NW_upper_range, 15 ); | |
1413 | |
1414 // NORTH EAST | |
1415 vector unsigned char vuc_pixel_NE_lower_range = spu_insert( | |
1416 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 ); | |
1417 vuc_pixel_NE_lower_range = spu_insert( | |
1418 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )), | |
1419 vuc_pixel_NE_lower_range, 7 ); | |
1420 vuc_pixel_NE_lower_range = spu_insert( | |
1421 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )), | |
1422 vuc_pixel_NE_lower_range, 11 ); | |
1423 vuc_pixel_NE_lower_range = spu_insert( | |
1424 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )), | |
1425 vuc_pixel_NE_lower_range, 15 ); | |
1426 | |
1427 vector unsigned char vuc_pixel_NE_upper_range = spu_insert( | |
1428 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 ); | |
1429 vuc_pixel_NE_upper_range = spu_insert( | |
1430 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )), | |
1431 vuc_pixel_NE_upper_range, 7 ); | |
1432 vuc_pixel_NE_upper_range = spu_insert( | |
1433 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )), | |
1434 vuc_pixel_NE_upper_range, 11 ); | |
1435 vuc_pixel_NE_upper_range = spu_insert( | |
1436 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )), | |
1437 vuc_pixel_NE_upper_range, 15 ); | |
1438 | |
1439 | |
1440 // SOUTH WEST | |
1441 vector unsigned char vuc_pixel_SW_lower_range = spu_insert( | |
1442 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 ); | |
1443 vuc_pixel_SW_lower_range = spu_insert( | |
1444 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )), | |
1445 vuc_pixel_SW_lower_range, 7 ); | |
1446 vuc_pixel_SW_lower_range = spu_insert( | |
1447 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )), | |
1448 vuc_pixel_SW_lower_range, 11 ); | |
1449 vuc_pixel_SW_lower_range = spu_insert( | |
1450 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )), | |
1451 vuc_pixel_SW_lower_range, 15 ); | |
1452 | |
1453 vector unsigned char vuc_pixel_SW_upper_range = spu_insert( | |
1454 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 ); | |
1455 vuc_pixel_SW_upper_range = spu_insert( | |
1456 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )), | |
1457 vuc_pixel_SW_upper_range, 7 ); | |
1458 vuc_pixel_SW_upper_range = spu_insert( | |
1459 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )), | |
1460 vuc_pixel_SW_upper_range, 11 ); | |
1461 vuc_pixel_SW_upper_range = spu_insert( | |
1462 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )), | |
1463 vuc_pixel_SW_upper_range, 15 ); | |
1464 | |
1465 // SOUTH EAST | |
1466 vector unsigned char vuc_pixel_SE_lower_range = spu_insert( | |
1467 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 ); | |
1468 vuc_pixel_SE_lower_range = spu_insert( | |
1469 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )), | |
1470 vuc_pixel_SE_lower_range, 7 ); | |
1471 vuc_pixel_SE_lower_range = spu_insert( | |
1472 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )), | |
1473 vuc_pixel_SE_lower_range, 11 ); | |
1474 vuc_pixel_SE_lower_range = spu_insert( | |
1475 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )), | |
1476 vuc_pixel_SE_lower_range, 15 ); | |
1477 | |
1478 vector unsigned char vuc_pixel_SE_upper_range = spu_insert( | |
1479 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 ); | |
1480 vuc_pixel_SE_upper_range = spu_insert( | |
1481 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )), | |
1482 vuc_pixel_SE_upper_range, 7 ); | |
1483 vuc_pixel_SE_upper_range = spu_insert( | |
1484 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )), | |
1485 vuc_pixel_SE_upper_range, 11 ); | |
1486 vuc_pixel_SE_upper_range = spu_insert( | |
1487 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )), | |
1488 vuc_pixel_SE_upper_range, 15 ); | |
1489 | |
1490 | |
1491 // convert to float | |
1492 vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 ); | |
1493 vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 ); | |
1494 | |
1495 vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 ); | |
1496 vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 ); | |
1497 | |
1498 vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 ); | |
1499 vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 ); | |
1500 | |
1501 vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 ); | |
1502 vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 ); | |
1503 | |
1504 | |
1505 | |
1506 // first linear interpolation: EWtop | |
1507 // EWtop = NW + EWweight*(NE-NW) | |
1508 // | |
1509 // lower range | |
1510 vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range ); | |
1511 vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range, | |
1512 vf_EWtop_lower_range_tmp, | |
1513 vf_pixel_NW_lower_range ); | |
1514 | |
1515 // upper range | |
1516 vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range ); | |
1517 vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range, | |
1518 vf_EWtop_upper_range_tmp, | |
1519 vf_pixel_NW_upper_range ); | |
1520 | |
1521 | |
1522 | |
1523 // second linear interpolation: EWbottom | |
1524 // EWbottom = SW + EWweight*(SE-SW) | |
1525 // | |
1526 // lower range | |
1527 vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range ); | |
1528 vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range, | |
1529 vf_EWbottom_lower_range_tmp, | |
1530 vf_pixel_SW_lower_range ); | |
1531 | |
1532 // upper range | |
1533 vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range ); | |
1534 vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range, | |
1535 vf_EWbottom_upper_range_tmp, | |
1536 vf_pixel_SW_upper_range ); | |
1537 | |
1538 | |
1539 | |
1540 // third linear interpolation: the bilinear interpolated value | |
1541 // result = EWtop + NSweight*(EWbottom-EWtop); | |
1542 // | |
1543 // lower range | |
1544 vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range ); | |
1545 vector float vf_result_lower_range = spu_madd( vf_NSweight, | |
1546 vf_result_lower_range_tmp, | |
1547 vf_EWtop_lower_range ); | |
1548 | |
1549 // upper range | |
1550 vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range ); | |
1551 vector float vf_result_upper_range = spu_madd( vf_NSweight, | |
1552 vf_result_upper_range_tmp, | |
1553 vf_EWtop_upper_range ); | |
1554 | |
1555 | |
1556 // convert back: using saturated arithmetic | |
1557 vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range ); | |
1558 vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range ); | |
1559 | |
1560 // merge results->lower,upper | |
1561 vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F, | |
1562 0x13, 0x17, 0x1B, 0x1F, | |
1563 0x00, 0x00, 0x00, 0x00, | |
1564 0x00, 0x00, 0x00, 0x00 }; | |
1565 | |
1566 vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range, | |
1567 (vector unsigned char) vui_result_upper_range, | |
1568 vuc_mask_merge_result ); | |
1569 | |
1570 // partial storing | |
1571 vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00, | |
1572 0x00, 0x00, 0x00, 0x00, | |
1573 0xFF, 0xFF, 0xFF, 0xFF, | |
1574 0xFF, 0xFF, 0xFF, 0xFF }; | |
1575 | |
1576 | |
1577 // get currently stored data | |
1578 vector unsigned char vuc_orig = *((vector unsigned char*)dst); | |
1579 | |
1580 // clear currently stored data | |
1581 vuc_orig = spu_and( vuc_orig, | |
1582 spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) ); | |
1583 | |
1584 // rotate result according to storing address | |
1585 vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F ); | |
1586 | |
1587 // store result | |
1588 *((vector unsigned char*)dst) = spu_or( vuc_result, | |
1589 vuc_orig ); | |
1590 dst += 8; | |
1591 } | |
1592 } | |
1593 | |
1594 | |
1595 /* | |
1596 * bilinear_scale_line_w16() | |
1597 * | |
1598 * processes a line of yuv-input, width has to be a multiple of 16 | |
1599 * scaled yuv-output is written to local store buffer | |
1600 * | |
1601 * @param src buffer for 2 lines input | |
1602 * @param dst_ buffer for 1 line output | |
1603 * @param dst_width the width of the destination line | |
1604 * @param vf_x_scale a float vector, at each entry is the x_scale-factor | |
1605 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line | |
1606 * @param src_linestride the stride of the srcline | |
1607 */ | |
1608 void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { | |
1609 | |
1610 unsigned char* dst = dst_; | |
1611 | |
1612 unsigned int dst_x; | |
1613 for( dst_x=0; dst_x<dst_width; dst_x+=16) { | |
1614 // address calculation for loading the 4 surrounding pixel of each calculated | |
1615 // destination pixel | |
1616 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x ); | |
1617 // parallelised processing | |
1618 // first range->pixel 1 2 3 4 | |
1619 // second range->pixel 5 6 7 8 | |
1620 // third range->pixel 9 10 11 12 | |
1621 // fourth range->pixel 13 14 15 16 | |
1622 vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 }; | |
1623 vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 }; | |
1624 vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 }; | |
1625 vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 }; | |
1626 vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range ); | |
1627 vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range ); | |
1628 vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range ); | |
1629 vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range ); | |
1630 | |
1631 // calculate weight EAST-WEST | |
1632 vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 ); | |
1633 vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 ); | |
1634 vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 ); | |
1635 vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 ); | |
1636 vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale ); | |
1637 vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale ); | |
1638 vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale ); | |
1639 vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale ); | |
1640 vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 ); | |
1641 vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 ); | |
1642 vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 ); | |
1643 vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 ); | |
1644 vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 ); | |
1645 vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 ); | |
1646 vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 ); | |
1647 vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 ); | |
1648 vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range ); | |
1649 vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range ); | |
1650 vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range ); | |
1651 vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range ); | |
1652 | |
1653 // calculate address offset | |
1654 // | |
1655 // pixel NORTH WEST | |
1656 vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range; | |
1657 vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range; | |
1658 vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range; | |
1659 vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range; | |
1660 | |
1661 // pixel NORTH EAST-->(offpixelNW+1) | |
1662 vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; | |
1663 vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 ); | |
1664 vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 ); | |
1665 vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 ); | |
1666 vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 ); | |
1667 | |
1668 // SOUTH-WEST-->(offpixelNW+src_linestride) | |
1669 vector unsigned int vui_srclinestride = spu_splats( src_linestride ); | |
1670 vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range ); | |
1671 vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range ); | |
1672 vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range ); | |
1673 vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range ); | |
1674 | |
1675 // SOUTH-EAST-->(offpixelNW+src_linestride+1) | |
1676 vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range ); | |
1677 vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range ); | |
1678 vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range ); | |
1679 vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range ); | |
1680 | |
1681 // calculate each address | |
1682 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); | |
1683 vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range ); | |
1684 vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range ); | |
1685 vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range ); | |
1686 vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range ); | |
1687 | |
1688 vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range ); | |
1689 vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range ); | |
1690 vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range ); | |
1691 vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range ); | |
1692 | |
1693 vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range ); | |
1694 vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range ); | |
1695 vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range ); | |
1696 vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range ); | |
1697 | |
1698 vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range ); | |
1699 vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range ); | |
1700 vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range ); | |
1701 vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range ); | |
1702 | |
1703 | |
1704 // get each pixel | |
1705 // | |
1706 // scalar load, afterwards insertion into the right position | |
1707 // NORTH WEST | |
1708 // first range | |
1709 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
1710 vector unsigned char vuc_pixel_NW_first_range = spu_insert( | |
1711 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 ); | |
1712 vuc_pixel_NW_first_range = spu_insert( | |
1713 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )), | |
1714 vuc_pixel_NW_first_range, 7 ); | |
1715 vuc_pixel_NW_first_range = spu_insert( | |
1716 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )), | |
1717 vuc_pixel_NW_first_range, 11 ); | |
1718 vuc_pixel_NW_first_range = spu_insert( | |
1719 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )), | |
1720 vuc_pixel_NW_first_range, 15 ); | |
1721 // second range | |
1722 vector unsigned char vuc_pixel_NW_second_range = spu_insert( | |
1723 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 ); | |
1724 vuc_pixel_NW_second_range = spu_insert( | |
1725 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )), | |
1726 vuc_pixel_NW_second_range, 7 ); | |
1727 vuc_pixel_NW_second_range = spu_insert( | |
1728 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )), | |
1729 vuc_pixel_NW_second_range, 11 ); | |
1730 vuc_pixel_NW_second_range = spu_insert( | |
1731 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )), | |
1732 vuc_pixel_NW_second_range, 15 ); | |
1733 // third range | |
1734 vector unsigned char vuc_pixel_NW_third_range = spu_insert( | |
1735 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 ); | |
1736 vuc_pixel_NW_third_range = spu_insert( | |
1737 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )), | |
1738 vuc_pixel_NW_third_range, 7 ); | |
1739 vuc_pixel_NW_third_range = spu_insert( | |
1740 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )), | |
1741 vuc_pixel_NW_third_range, 11 ); | |
1742 vuc_pixel_NW_third_range = spu_insert( | |
1743 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )), | |
1744 vuc_pixel_NW_third_range, 15 ); | |
1745 // fourth range | |
1746 vector unsigned char vuc_pixel_NW_fourth_range = spu_insert( | |
1747 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 ); | |
1748 vuc_pixel_NW_fourth_range = spu_insert( | |
1749 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )), | |
1750 vuc_pixel_NW_fourth_range, 7 ); | |
1751 vuc_pixel_NW_fourth_range = spu_insert( | |
1752 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )), | |
1753 vuc_pixel_NW_fourth_range, 11 ); | |
1754 vuc_pixel_NW_fourth_range = spu_insert( | |
1755 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )), | |
1756 vuc_pixel_NW_fourth_range, 15 ); | |
1757 | |
1758 // NORTH EAST | |
1759 // first range | |
1760 vector unsigned char vuc_pixel_NE_first_range = spu_insert( | |
1761 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 ); | |
1762 vuc_pixel_NE_first_range = spu_insert( | |
1763 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )), | |
1764 vuc_pixel_NE_first_range, 7 ); | |
1765 vuc_pixel_NE_first_range = spu_insert( | |
1766 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )), | |
1767 vuc_pixel_NE_first_range, 11 ); | |
1768 vuc_pixel_NE_first_range = spu_insert( | |
1769 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )), | |
1770 vuc_pixel_NE_first_range, 15 ); | |
1771 // second range | |
1772 vector unsigned char vuc_pixel_NE_second_range = spu_insert( | |
1773 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 ); | |
1774 vuc_pixel_NE_second_range = spu_insert( | |
1775 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )), | |
1776 vuc_pixel_NE_second_range, 7 ); | |
1777 vuc_pixel_NE_second_range = spu_insert( | |
1778 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )), | |
1779 vuc_pixel_NE_second_range, 11 ); | |
1780 vuc_pixel_NE_second_range = spu_insert( | |
1781 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )), | |
1782 vuc_pixel_NE_second_range, 15 ); | |
1783 // third range | |
1784 vector unsigned char vuc_pixel_NE_third_range = spu_insert( | |
1785 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 ); | |
1786 vuc_pixel_NE_third_range = spu_insert( | |
1787 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )), | |
1788 vuc_pixel_NE_third_range, 7 ); | |
1789 vuc_pixel_NE_third_range = spu_insert( | |
1790 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )), | |
1791 vuc_pixel_NE_third_range, 11 ); | |
1792 vuc_pixel_NE_third_range = spu_insert( | |
1793 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )), | |
1794 vuc_pixel_NE_third_range, 15 ); | |
1795 // fourth range | |
1796 vector unsigned char vuc_pixel_NE_fourth_range = spu_insert( | |
1797 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 ); | |
1798 vuc_pixel_NE_fourth_range = spu_insert( | |
1799 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )), | |
1800 vuc_pixel_NE_fourth_range, 7 ); | |
1801 vuc_pixel_NE_fourth_range = spu_insert( | |
1802 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )), | |
1803 vuc_pixel_NE_fourth_range, 11 ); | |
1804 vuc_pixel_NE_fourth_range = spu_insert( | |
1805 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )), | |
1806 vuc_pixel_NE_fourth_range, 15 ); | |
1807 | |
1808 // SOUTH WEST | |
1809 // first range | |
1810 vector unsigned char vuc_pixel_SW_first_range = spu_insert( | |
1811 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 ); | |
1812 vuc_pixel_SW_first_range = spu_insert( | |
1813 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )), | |
1814 vuc_pixel_SW_first_range, 7 ); | |
1815 vuc_pixel_SW_first_range = spu_insert( | |
1816 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )), | |
1817 vuc_pixel_SW_first_range, 11 ); | |
1818 vuc_pixel_SW_first_range = spu_insert( | |
1819 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )), | |
1820 vuc_pixel_SW_first_range, 15 ); | |
1821 // second range | |
1822 vector unsigned char vuc_pixel_SW_second_range = spu_insert( | |
1823 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 ); | |
1824 vuc_pixel_SW_second_range = spu_insert( | |
1825 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )), | |
1826 vuc_pixel_SW_second_range, 7 ); | |
1827 vuc_pixel_SW_second_range = spu_insert( | |
1828 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )), | |
1829 vuc_pixel_SW_second_range, 11 ); | |
1830 vuc_pixel_SW_second_range = spu_insert( | |
1831 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )), | |
1832 vuc_pixel_SW_second_range, 15 ); | |
1833 // third range | |
1834 vector unsigned char vuc_pixel_SW_third_range = spu_insert( | |
1835 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 ); | |
1836 vuc_pixel_SW_third_range = spu_insert( | |
1837 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )), | |
1838 vuc_pixel_SW_third_range, 7 ); | |
1839 vuc_pixel_SW_third_range = spu_insert( | |
1840 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )), | |
1841 vuc_pixel_SW_third_range, 11 ); | |
1842 vuc_pixel_SW_third_range = spu_insert( | |
1843 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )), | |
1844 vuc_pixel_SW_third_range, 15 ); | |
1845 // fourth range | |
1846 vector unsigned char vuc_pixel_SW_fourth_range = spu_insert( | |
1847 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 ); | |
1848 vuc_pixel_SW_fourth_range = spu_insert( | |
1849 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )), | |
1850 vuc_pixel_SW_fourth_range, 7 ); | |
1851 vuc_pixel_SW_fourth_range = spu_insert( | |
1852 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )), | |
1853 vuc_pixel_SW_fourth_range, 11 ); | |
1854 vuc_pixel_SW_fourth_range = spu_insert( | |
1855 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )), | |
1856 vuc_pixel_SW_fourth_range, 15 ); | |
1857 | |
1858 // NORTH EAST | |
1859 // first range | |
1860 vector unsigned char vuc_pixel_SE_first_range = spu_insert( | |
1861 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 ); | |
1862 vuc_pixel_SE_first_range = spu_insert( | |
1863 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )), | |
1864 vuc_pixel_SE_first_range, 7 ); | |
1865 vuc_pixel_SE_first_range = spu_insert( | |
1866 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )), | |
1867 vuc_pixel_SE_first_range, 11 ); | |
1868 vuc_pixel_SE_first_range = spu_insert( | |
1869 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )), | |
1870 vuc_pixel_SE_first_range, 15 ); | |
1871 // second range | |
1872 vector unsigned char vuc_pixel_SE_second_range = spu_insert( | |
1873 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 ); | |
1874 vuc_pixel_SE_second_range = spu_insert( | |
1875 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )), | |
1876 vuc_pixel_SE_second_range, 7 ); | |
1877 vuc_pixel_SE_second_range = spu_insert( | |
1878 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )), | |
1879 vuc_pixel_SE_second_range, 11 ); | |
1880 vuc_pixel_SE_second_range = spu_insert( | |
1881 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )), | |
1882 vuc_pixel_SE_second_range, 15 ); | |
1883 // third range | |
1884 vector unsigned char vuc_pixel_SE_third_range = spu_insert( | |
1885 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 ); | |
1886 vuc_pixel_SE_third_range = spu_insert( | |
1887 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )), | |
1888 vuc_pixel_SE_third_range, 7 ); | |
1889 vuc_pixel_SE_third_range = spu_insert( | |
1890 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )), | |
1891 vuc_pixel_SE_third_range, 11 ); | |
1892 vuc_pixel_SE_third_range = spu_insert( | |
1893 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )), | |
1894 vuc_pixel_SE_third_range, 15 ); | |
1895 // fourth range | |
1896 vector unsigned char vuc_pixel_SE_fourth_range = spu_insert( | |
1897 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 ); | |
1898 vuc_pixel_SE_fourth_range = spu_insert( | |
1899 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )), | |
1900 vuc_pixel_SE_fourth_range, 7 ); | |
1901 vuc_pixel_SE_fourth_range = spu_insert( | |
1902 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )), | |
1903 vuc_pixel_SE_fourth_range, 11 ); | |
1904 vuc_pixel_SE_fourth_range = spu_insert( | |
1905 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )), | |
1906 vuc_pixel_SE_fourth_range, 15 ); | |
1907 | |
1908 | |
1909 | |
1910 // convert to float | |
1911 vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 ); | |
1912 vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 ); | |
1913 vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 ); | |
1914 vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 ); | |
1915 | |
1916 vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 ); | |
1917 vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 ); | |
1918 vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 ); | |
1919 vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 ); | |
1920 | |
1921 vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 ); | |
1922 vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 ); | |
1923 vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 ); | |
1924 vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 ); | |
1925 | |
1926 vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 ); | |
1927 vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 ); | |
1928 vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 ); | |
1929 vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 ); | |
1930 | |
1931 // first linear interpolation: EWtop | |
1932 // EWtop = NW + EWweight*(NE-NW) | |
1933 // | |
1934 // first range | |
1935 vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range ); | |
1936 vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range, | |
1937 vf_EWtop_first_range_tmp, | |
1938 vf_pixel_NW_first_range ); | |
1939 | |
1940 // second range | |
1941 vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range ); | |
1942 vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range, | |
1943 vf_EWtop_second_range_tmp, | |
1944 vf_pixel_NW_second_range ); | |
1945 | |
1946 // third range | |
1947 vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range ); | |
1948 vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range, | |
1949 vf_EWtop_third_range_tmp, | |
1950 vf_pixel_NW_third_range ); | |
1951 | |
1952 // fourth range | |
1953 vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range ); | |
1954 vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range, | |
1955 vf_EWtop_fourth_range_tmp, | |
1956 vf_pixel_NW_fourth_range ); | |
1957 | |
1958 | |
1959 | |
1960 // second linear interpolation: EWbottom | |
1961 // EWbottom = SW + EWweight*(SE-SW) | |
1962 // | |
1963 // first range | |
1964 vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range ); | |
1965 vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range, | |
1966 vf_EWbottom_first_range_tmp, | |
1967 vf_pixel_SW_first_range ); | |
1968 | |
1969 // second range | |
1970 vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range ); | |
1971 vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range, | |
1972 vf_EWbottom_second_range_tmp, | |
1973 vf_pixel_SW_second_range ); | |
1974 // first range | |
1975 vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range ); | |
1976 vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range, | |
1977 vf_EWbottom_third_range_tmp, | |
1978 vf_pixel_SW_third_range ); | |
1979 | |
1980 // first range | |
1981 vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range ); | |
1982 vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range, | |
1983 vf_EWbottom_fourth_range_tmp, | |
1984 vf_pixel_SW_fourth_range ); | |
1985 | |
1986 | |
1987 | |
1988 // third linear interpolation: the bilinear interpolated value | |
1989 // result = EWtop + NSweight*(EWbottom-EWtop); | |
1990 // | |
1991 // first range | |
1992 vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range ); | |
1993 vector float vf_result_first_range = spu_madd( vf_NSweight, | |
1994 vf_result_first_range_tmp, | |
1995 vf_EWtop_first_range ); | |
1996 | |
1997 // second range | |
1998 vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range ); | |
1999 vector float vf_result_second_range = spu_madd( vf_NSweight, | |
2000 vf_result_second_range_tmp, | |
2001 vf_EWtop_second_range ); | |
2002 | |
2003 // third range | |
2004 vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range ); | |
2005 vector float vf_result_third_range = spu_madd( vf_NSweight, | |
2006 vf_result_third_range_tmp, | |
2007 vf_EWtop_third_range ); | |
2008 | |
2009 // fourth range | |
2010 vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range ); | |
2011 vector float vf_result_fourth_range = spu_madd( vf_NSweight, | |
2012 vf_result_fourth_range_tmp, | |
2013 vf_EWtop_fourth_range ); | |
2014 | |
2015 | |
2016 | |
2017 // convert back: using saturated arithmetic | |
2018 vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range ); | |
2019 vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range ); | |
2020 vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range ); | |
2021 vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range ); | |
2022 | |
2023 // merge results->lower,upper | |
2024 vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F, | |
2025 0x13, 0x17, 0x1B, 0x1F, | |
2026 0x00, 0x00, 0x00, 0x00, | |
2027 0x00, 0x00, 0x00, 0x00 }; | |
2028 | |
2029 vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00, | |
2030 0x00, 0x00, 0x00, 0x00, | |
2031 0x03, 0x07, 0x0B, 0x0F, | |
2032 0x13, 0x17, 0x1B, 0x1F }; | |
2033 | |
2034 vector unsigned char vuc_result_first_second = | |
2035 spu_shuffle( (vector unsigned char) vui_result_first_range, | |
2036 (vector unsigned char) vui_result_second_range, | |
2037 vuc_mask_merge_result_first_second ); | |
2038 | |
2039 vector unsigned char vuc_result_third_fourth = | |
2040 spu_shuffle( (vector unsigned char) vui_result_third_range, | |
2041 (vector unsigned char) vui_result_fourth_range, | |
2042 vuc_mask_merge_result_third_fourth ); | |
2043 | |
2044 // store result | |
2045 *((vector unsigned char*)dst) = spu_or( vuc_result_first_second, | |
2046 vuc_result_third_fourth ); | |
2047 dst += 16; | |
2048 } | |
2049 } | |
2050 |