4165
|
1 /*
|
|
2 * SDL - Simple DirectMedia Layer
|
|
3 * CELL BE Support for PS3 Framebuffer
|
|
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
|
|
5 *
|
|
6 * This library is free software; you can redistribute it and/or modify it
|
|
7 * under the terms of the GNU Lesser General Public License as published
|
|
8 * by the Free Software Foundation; either version 2.1 of the License, or
|
|
9 * (at your option) any later version.
|
|
10 *
|
|
11 * This library is distributed in the hope that it will be useful, but
|
|
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 * Lesser General Public License for more details.
|
|
15 *
|
|
16 * You should have received a copy of the GNU Lesser General Public
|
|
17 * License along with this library; if not, write to the Free Software
|
|
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
|
19 * USA
|
|
20 *
|
|
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
|
|
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
|
|
23 * SPE code based on research by:
|
|
24 * Rene Becker
|
|
25 * Thimo Emmerich
|
|
26 */
|
|
27
|
|
28 #include "spu_common.h"
|
|
29
|
|
30 #include <spu_intrinsics.h>
|
|
31 #include <spu_mfcio.h>
|
|
32
|
|
33 // Debugging
|
|
34 //#define DEBUG
|
|
35
|
|
36 #ifdef DEBUG
|
|
37 #define deprintf(fmt, args... ) \
|
|
38 fprintf( stdout, fmt, ##args ); \
|
|
39 fflush( stdout );
|
|
40 #else
|
|
41 #define deprintf( fmt, args... )
|
|
42 #endif
|
|
43
|
|
44 struct scale_parms_t parms __attribute__((aligned(128)));
|
|
45
|
|
46 /* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
|
|
47 * there might be the need to retrieve misaligned data, adjust
|
|
48 * incoming v and u plane to be able to handle this (add 128)
|
|
49 */
|
|
50 unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128)));
|
|
51 unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
|
|
52 unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
|
|
53
|
|
54 /* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */
|
|
55 unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128)));
|
|
56 unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
|
|
57 unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
|
|
58
|
|
59 /* some vectors needed by the float to int conversion */
|
|
60 static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
|
|
61 static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
|
|
62
|
|
63 void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
|
|
64 void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
|
|
65
|
|
66 void scale_srcw16_dstw16();
|
|
67 void scale_srcw16_dstw32();
|
|
68 void scale_srcw32_dstw16();
|
|
69 void scale_srcw32_dstw32();
|
|
70
|
|
71 int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp )
|
|
72 {
|
|
73 deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id);
|
|
74 /* DMA transfer for the input parameters */
|
|
75 spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD);
|
|
76 DMA_WAIT_TAG(TAG_INIT);
|
|
77
|
|
78 deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height,
|
|
79 parms.dst_pixel_width, parms.dst_pixel_height);
|
|
80
|
|
81 if(parms.src_pixel_width & 0x1f) {
|
|
82 if(parms.dst_pixel_width & 0x1F) {
|
|
83 deprintf("[SPU] Using scale_srcw16_dstw16\n");
|
|
84 scale_srcw16_dstw16();
|
|
85 } else {
|
|
86 deprintf("[SPU] Using scale_srcw16_dstw32\n");
|
|
87 scale_srcw16_dstw32();
|
|
88 }
|
|
89 } else {
|
|
90 if(parms.dst_pixel_width & 0x1F) {
|
|
91 deprintf("[SPU] Using scale_srcw32_dstw16\n");
|
|
92 scale_srcw32_dstw16();
|
|
93 } else {
|
|
94 deprintf("[SPU] Using scale_srcw32_dstw32\n");
|
|
95 scale_srcw32_dstw32();
|
|
96 }
|
|
97 }
|
|
98 deprintf("[SPU] bilin_scaler_spu... done!\n");
|
|
99
|
|
100 return 0;
|
|
101 }
|
|
102
|
|
103
|
|
104 /*
|
|
105 * vfloat_to_vuint()
|
|
106 *
|
|
107 * converts a float vector to an unsinged int vector using saturated
|
|
108 * arithmetic
|
|
109 *
|
|
110 * @param vec_s float vector for conversion
|
|
111 * @returns converted unsigned int vector
|
|
112 */
|
|
113 inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
|
|
114 vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
|
|
115 vec_s = spu_sel(vec_s, vec_0_1, select_1);
|
|
116
|
|
117 vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
|
|
118 vec_s = spu_sel(vec_s, vec_255, select_2);
|
|
119 return spu_convtu(vec_s,0);
|
|
120 }
|
|
121
|
|
122
|
|
123 /*
|
|
124 * scale_srcw16_dstw16()
|
|
125 *
|
|
126 * processes an input image of width 16
|
|
127 * scaling is done to a width 16
|
|
128 * result stored in RAM
|
|
129 */
|
|
130 void scale_srcw16_dstw16() {
|
|
131 // extract parameters
|
|
132 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
|
|
133
|
|
134 unsigned int src_width = parms.src_pixel_width;
|
|
135 unsigned int src_height = parms.src_pixel_height;
|
|
136 unsigned int dst_width = parms.dst_pixel_width;
|
|
137 unsigned int dst_height = parms.dst_pixel_height;
|
|
138
|
|
139 // YVU
|
|
140 unsigned int src_linestride_y = src_width;
|
|
141 unsigned int src_dbl_linestride_y = src_width<<1;
|
|
142 unsigned int src_linestride_vu = src_width>>1;
|
|
143 unsigned int src_dbl_linestride_vu = src_width;
|
|
144
|
|
145 // scaled YVU
|
|
146 unsigned int scaled_src_linestride_y = dst_width;
|
|
147
|
|
148 // ram addresses
|
|
149 unsigned char* src_addr_y = parms.y_plane;
|
|
150 unsigned char* src_addr_v = parms.v_plane;
|
|
151 unsigned char* src_addr_u = parms.u_plane;
|
|
152
|
|
153 // for handling misalignment, addresses are precalculated
|
|
154 unsigned char* precalc_src_addr_v = src_addr_v;
|
|
155 unsigned char* precalc_src_addr_u = src_addr_u;
|
|
156
|
|
157 unsigned int dst_picture_size = dst_width*dst_height;
|
|
158
|
|
159 // Sizes for destination
|
|
160 unsigned int dst_dbl_linestride_y = dst_width<<1;
|
|
161 unsigned int dst_dbl_linestride_vu = dst_width>>1;
|
|
162
|
|
163 // Perform address calculation for Y, V and U in main memory with dst_addr as base
|
|
164 unsigned char* dst_addr_main_memory_y = dst_addr;
|
|
165 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
|
|
166 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
|
|
167
|
|
168 // calculate scale factors
|
|
169 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
|
|
170 float y_scale = (float)src_height/(float)dst_height;
|
|
171
|
|
172 // double buffered processing
|
|
173 // buffer switching
|
|
174 unsigned int curr_src_idx = 0;
|
|
175 unsigned int curr_dst_idx = 0;
|
|
176 unsigned int next_src_idx, next_dst_idx;
|
|
177
|
|
178 // 2 lines y as output, upper and lowerline
|
|
179 unsigned int curr_interpl_y_upper = 0;
|
|
180 unsigned int next_interpl_y_upper;
|
|
181 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
|
|
182 // only 1 line v/u output, both planes have the same dimension
|
|
183 unsigned int curr_interpl_vu = 0;
|
|
184 unsigned int next_interpl_vu;
|
|
185
|
|
186 // weights, calculated in every loop iteration
|
|
187 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
188 vector float vf_next_NSweight_y_upper;
|
|
189 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
|
|
190 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
191 vector float vf_next_NSweight_vu;
|
|
192
|
|
193 // line indices for the src picture
|
|
194 float curr_src_y_upper = 0.0f, next_src_y_upper;
|
|
195 float curr_src_y_lower, next_src_y_lower;
|
|
196 float curr_src_vu = 0.0f, next_src_vu;
|
|
197
|
|
198 // line indices for the dst picture
|
|
199 unsigned int dst_y=0, dst_vu=0;
|
|
200
|
|
201 // offset for the v and u plane to handle misalignement
|
|
202 unsigned int curr_lsoff_v = 0, next_lsoff_v;
|
|
203 unsigned int curr_lsoff_u = 0, next_lsoff_u;
|
|
204
|
|
205 // calculate lower line indices
|
|
206 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
|
|
207 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
|
|
208 // lower line weight
|
|
209 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
|
|
210
|
|
211
|
|
212 // start partially double buffered processing
|
|
213 // get initial data, 2 sets of y, 1 set v, 1 set u
|
|
214 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
|
|
215 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
216 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
|
|
217 src_dbl_linestride_y,
|
|
218 RETR_BUF,
|
|
219 0, 0 );
|
|
220 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
221 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
222
|
|
223 /* iteration loop
|
|
224 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
|
|
225 * the scaled output is 2 lines y, 1 line v, 1 line u
|
|
226 * the yuv2rgb-converted output is stored to RAM
|
|
227 */
|
|
228 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
|
|
229 dst_y = dst_vu<<1;
|
|
230
|
|
231 // calculate next indices
|
|
232 next_src_vu = ((float)dst_vu+1)*y_scale;
|
|
233 next_src_y_upper = ((float)dst_y+2)*y_scale;
|
|
234 next_src_y_lower = ((float)dst_y+3)*y_scale;
|
|
235
|
|
236 next_interpl_vu = (unsigned int) next_src_vu;
|
|
237 next_interpl_y_upper = (unsigned int) next_src_y_upper;
|
|
238 next_interpl_y_lower = (unsigned int) next_src_y_lower;
|
|
239
|
|
240 // calculate weight NORTH-SOUTH
|
|
241 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
|
|
242 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
|
|
243 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
|
|
244
|
|
245 // get next lines
|
|
246 next_src_idx = curr_src_idx^1;
|
|
247 next_dst_idx = curr_dst_idx^1;
|
|
248
|
|
249 // 4 lines y
|
|
250 mfc_get( y_plane[next_src_idx],
|
|
251 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
|
|
252 src_dbl_linestride_y,
|
|
253 RETR_BUF+next_src_idx,
|
|
254 0, 0 );
|
|
255 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
|
|
256 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
|
|
257 src_dbl_linestride_y,
|
|
258 RETR_BUF+next_src_idx,
|
|
259 0, 0 );
|
|
260
|
|
261 // 2 lines v
|
|
262 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
|
|
263 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
|
|
264 mfc_get( v_plane[next_src_idx],
|
|
265 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
|
|
266 src_dbl_linestride_vu+(next_lsoff_v<<1),
|
|
267 RETR_BUF+next_src_idx,
|
|
268 0, 0 );
|
|
269 // 2 lines u
|
|
270 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
|
|
271 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
|
|
272 mfc_get( u_plane[next_src_idx],
|
|
273 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
|
|
274 src_dbl_linestride_vu+(next_lsoff_v<<1),
|
|
275 RETR_BUF+next_src_idx,
|
|
276 0, 0 );
|
|
277
|
|
278 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
279
|
|
280 // scaling
|
|
281 // work line y_upper
|
|
282 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
283 scaled_y_plane[curr_src_idx],
|
|
284 dst_width,
|
|
285 vf_x_scale,
|
|
286 vf_curr_NSweight_y_upper,
|
|
287 src_linestride_y );
|
|
288 // work line y_lower
|
|
289 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
290 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
291 dst_width,
|
|
292 vf_x_scale,
|
|
293 vf_curr_NSweight_y_lower,
|
|
294 src_linestride_y );
|
|
295 // work line v
|
|
296 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
|
|
297 scaled_v_plane[curr_src_idx],
|
|
298 dst_width>>1,
|
|
299 vf_x_scale,
|
|
300 vf_curr_NSweight_vu,
|
|
301 src_linestride_vu );
|
|
302 // work line u
|
|
303 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
|
|
304 scaled_u_plane[curr_src_idx],
|
|
305 dst_width>>1,
|
|
306 vf_x_scale,
|
|
307 vf_curr_NSweight_vu,
|
|
308 src_linestride_vu );
|
|
309
|
|
310
|
|
311 // Store the result back to main memory into a destination buffer in YUV format
|
|
312 //---------------------------------------------------------------------------------------------
|
|
313 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
314
|
|
315 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
316 // dst_width: Pixel width of destination image
|
|
317 // dst_addr: Destination address in main memory
|
|
318 // dst_vu: Counter which is incremented one by one
|
|
319 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
320 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
321 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
322 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
323 STR_BUF+curr_dst_idx, // Tag
|
|
324 0, 0 );
|
|
325
|
|
326 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
327 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
328 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
329 STR_BUF+curr_dst_idx, // Tag
|
|
330 0, 0 );
|
|
331
|
|
332 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
333 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
334 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
335 STR_BUF+curr_dst_idx, // Tag
|
|
336 0, 0 );
|
|
337 //---------------------------------------------------------------------------------------------
|
|
338
|
|
339
|
|
340 // update for next cycle
|
|
341 curr_src_idx = next_src_idx;
|
|
342 curr_dst_idx = next_dst_idx;
|
|
343
|
|
344 curr_interpl_y_upper = next_interpl_y_upper;
|
|
345 curr_interpl_y_lower = next_interpl_y_lower;
|
|
346 curr_interpl_vu = next_interpl_vu;
|
|
347
|
|
348 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
|
|
349 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
|
|
350 vf_curr_NSweight_vu = vf_next_NSweight_vu;
|
|
351
|
|
352 curr_src_y_upper = next_src_y_upper;
|
|
353 curr_src_y_lower = next_src_y_lower;
|
|
354 curr_src_vu = next_src_vu;
|
|
355
|
|
356 curr_lsoff_v = next_lsoff_v;
|
|
357 curr_lsoff_u = next_lsoff_u;
|
|
358 }
|
|
359
|
|
360
|
|
361
|
|
362 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
363
|
|
364 // scaling
|
|
365 // work line y_upper
|
|
366 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
367 scaled_y_plane[curr_src_idx],
|
|
368 dst_width,
|
|
369 vf_x_scale,
|
|
370 vf_curr_NSweight_y_upper,
|
|
371 src_linestride_y );
|
|
372 // work line y_lower
|
|
373 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
374 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
375 dst_width,
|
|
376 vf_x_scale,
|
|
377 vf_curr_NSweight_y_lower,
|
|
378 src_linestride_y );
|
|
379 // work line v
|
|
380 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
|
|
381 scaled_v_plane[curr_src_idx],
|
|
382 dst_width>>1,
|
|
383 vf_x_scale,
|
|
384 vf_curr_NSweight_vu,
|
|
385 src_linestride_vu );
|
|
386 // work line u
|
|
387 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
|
|
388 scaled_u_plane[curr_src_idx],
|
|
389 dst_width>>1,
|
|
390 vf_x_scale,
|
|
391 vf_curr_NSweight_vu,
|
|
392 src_linestride_vu );
|
|
393
|
|
394
|
|
395 // Store the result back to main memory into a destination buffer in YUV format
|
|
396 //---------------------------------------------------------------------------------------------
|
|
397 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
398
|
|
399 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
400 // dst_width: Pixel width of destination image
|
|
401 // dst_addr: Destination address in main memory
|
|
402 // dst_vu: Counter which is incremented one by one
|
|
403 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
404 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
405 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
406 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
407 STR_BUF+curr_dst_idx, // Tag
|
|
408 0, 0 );
|
|
409
|
|
410 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
411 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
412 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
413 STR_BUF+curr_dst_idx, // Tag
|
|
414 0, 0 );
|
|
415
|
|
416 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
417 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
418 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
419 STR_BUF+curr_dst_idx, // Tag
|
|
420 0, 0 );
|
|
421
|
|
422 // wait for completion
|
|
423 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
424 //---------------------------------------------------------------------------------------------
|
|
425 }
|
|
426
|
|
427
|
|
428 /*
|
|
429 * scale_srcw16_dstw32()
|
|
430 *
|
|
431 * processes an input image of width 16
|
|
432 * scaling is done to a width 32
|
|
433 * yuv2rgb conversion on a width of 32
|
|
434 * result stored in RAM
|
|
435 */
|
|
436 void scale_srcw16_dstw32() {
|
|
437 // extract parameters
|
|
438 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
|
|
439
|
|
440 unsigned int src_width = parms.src_pixel_width;
|
|
441 unsigned int src_height = parms.src_pixel_height;
|
|
442 unsigned int dst_width = parms.dst_pixel_width;
|
|
443 unsigned int dst_height = parms.dst_pixel_height;
|
|
444
|
|
445 // YVU
|
|
446 unsigned int src_linestride_y = src_width;
|
|
447 unsigned int src_dbl_linestride_y = src_width<<1;
|
|
448 unsigned int src_linestride_vu = src_width>>1;
|
|
449 unsigned int src_dbl_linestride_vu = src_width;
|
|
450 // scaled YVU
|
|
451 unsigned int scaled_src_linestride_y = dst_width;
|
|
452
|
|
453 // ram addresses
|
|
454 unsigned char* src_addr_y = parms.y_plane;
|
|
455 unsigned char* src_addr_v = parms.v_plane;
|
|
456 unsigned char* src_addr_u = parms.u_plane;
|
|
457
|
|
458 unsigned int dst_picture_size = dst_width*dst_height;
|
|
459
|
|
460 // Sizes for destination
|
|
461 unsigned int dst_dbl_linestride_y = dst_width<<1;
|
|
462 unsigned int dst_dbl_linestride_vu = dst_width>>1;
|
|
463
|
|
464 // Perform address calculation for Y, V and U in main memory with dst_addr as base
|
|
465 unsigned char* dst_addr_main_memory_y = dst_addr;
|
|
466 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
|
|
467 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
|
|
468
|
|
469
|
|
470 // for handling misalignment, addresses are precalculated
|
|
471 unsigned char* precalc_src_addr_v = src_addr_v;
|
|
472 unsigned char* precalc_src_addr_u = src_addr_u;
|
|
473
|
|
474 // calculate scale factors
|
|
475 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
|
|
476 float y_scale = (float)src_height/(float)dst_height;
|
|
477
|
|
478 // double buffered processing
|
|
479 // buffer switching
|
|
480 unsigned int curr_src_idx = 0;
|
|
481 unsigned int curr_dst_idx = 0;
|
|
482 unsigned int next_src_idx, next_dst_idx;
|
|
483
|
|
484 // 2 lines y as output, upper and lowerline
|
|
485 unsigned int curr_interpl_y_upper = 0;
|
|
486 unsigned int next_interpl_y_upper;
|
|
487 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
|
|
488 // only 1 line v/u output, both planes have the same dimension
|
|
489 unsigned int curr_interpl_vu = 0;
|
|
490 unsigned int next_interpl_vu;
|
|
491
|
|
492 // weights, calculated in every loop iteration
|
|
493 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
494 vector float vf_next_NSweight_y_upper;
|
|
495 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
|
|
496 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
497 vector float vf_next_NSweight_vu;
|
|
498
|
|
499 // line indices for the src picture
|
|
500 float curr_src_y_upper = 0.0f, next_src_y_upper;
|
|
501 float curr_src_y_lower, next_src_y_lower;
|
|
502 float curr_src_vu = 0.0f, next_src_vu;
|
|
503
|
|
504 // line indices for the dst picture
|
|
505 unsigned int dst_y=0, dst_vu=0;
|
|
506
|
|
507 // offset for the v and u plane to handle misalignement
|
|
508 unsigned int curr_lsoff_v = 0, next_lsoff_v;
|
|
509 unsigned int curr_lsoff_u = 0, next_lsoff_u;
|
|
510
|
|
511 // calculate lower line idices
|
|
512 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
|
|
513 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
|
|
514 // lower line weight
|
|
515 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
|
|
516
|
|
517
|
|
518 // start partially double buffered processing
|
|
519 // get initial data, 2 sets of y, 1 set v, 1 set u
|
|
520 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
|
|
521 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
522 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
|
|
523 src_dbl_linestride_y,
|
|
524 RETR_BUF,
|
|
525 0, 0 );
|
|
526 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
527 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
528
|
|
529 // iteration loop
|
|
530 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
|
|
531 // the scaled output is 2 lines y, 1 line v, 1 line u
|
|
532 // the yuv2rgb-converted output is stored to RAM
|
|
533 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
|
|
534 dst_y = dst_vu<<1;
|
|
535
|
|
536 // calculate next indices
|
|
537 next_src_vu = ((float)dst_vu+1)*y_scale;
|
|
538 next_src_y_upper = ((float)dst_y+2)*y_scale;
|
|
539 next_src_y_lower = ((float)dst_y+3)*y_scale;
|
|
540
|
|
541 next_interpl_vu = (unsigned int) next_src_vu;
|
|
542 next_interpl_y_upper = (unsigned int) next_src_y_upper;
|
|
543 next_interpl_y_lower = (unsigned int) next_src_y_lower;
|
|
544
|
|
545 // calculate weight NORTH-SOUTH
|
|
546 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
|
|
547 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
|
|
548 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
|
|
549
|
|
550 // get next lines
|
|
551 next_src_idx = curr_src_idx^1;
|
|
552 next_dst_idx = curr_dst_idx^1;
|
|
553
|
|
554 // 4 lines y
|
|
555 mfc_get( y_plane[next_src_idx],
|
|
556 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
|
|
557 src_dbl_linestride_y,
|
|
558 RETR_BUF+next_src_idx,
|
|
559 0, 0 );
|
|
560 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
|
|
561 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
|
|
562 src_dbl_linestride_y,
|
|
563 RETR_BUF+next_src_idx,
|
|
564 0, 0 );
|
|
565
|
|
566 // 2 lines v
|
|
567 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
|
|
568 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
|
|
569 mfc_get( v_plane[next_src_idx],
|
|
570 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
|
|
571 src_dbl_linestride_vu+(next_lsoff_v<<1),
|
|
572 RETR_BUF+next_src_idx,
|
|
573 0, 0 );
|
|
574 // 2 lines u
|
|
575 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
|
|
576 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
|
|
577 mfc_get( u_plane[next_src_idx],
|
|
578 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
|
|
579 src_dbl_linestride_vu+(next_lsoff_v<<1),
|
|
580 RETR_BUF+next_src_idx,
|
|
581 0, 0 );
|
|
582
|
|
583 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
584
|
|
585 // scaling
|
|
586 // work line y_upper
|
|
587 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
588 scaled_y_plane[curr_src_idx],
|
|
589 dst_width,
|
|
590 vf_x_scale,
|
|
591 vf_curr_NSweight_y_upper,
|
|
592 src_linestride_y );
|
|
593 // work line y_lower
|
|
594 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
595 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
596 dst_width,
|
|
597 vf_x_scale,
|
|
598 vf_curr_NSweight_y_lower,
|
|
599 src_linestride_y );
|
|
600 // work line v
|
|
601 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
|
|
602 scaled_v_plane[curr_src_idx],
|
|
603 dst_width>>1,
|
|
604 vf_x_scale,
|
|
605 vf_curr_NSweight_vu,
|
|
606 src_linestride_vu );
|
|
607 // work line u
|
|
608 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
|
|
609 scaled_u_plane[curr_src_idx],
|
|
610 dst_width>>1,
|
|
611 vf_x_scale,
|
|
612 vf_curr_NSweight_vu,
|
|
613 src_linestride_vu );
|
|
614
|
|
615 //---------------------------------------------------------------------------------------------
|
|
616 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
617
|
|
618 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
619 // dst_width: Pixel width of destination image
|
|
620 // dst_addr: Destination address in main memory
|
|
621 // dst_vu: Counter which is incremented one by one
|
|
622 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
623
|
|
624 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
625 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
626 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
627 STR_BUF+curr_dst_idx, // Tag
|
|
628 0, 0 );
|
|
629
|
|
630 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
631 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
632 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
633 STR_BUF+curr_dst_idx, // Tag
|
|
634 0, 0 );
|
|
635
|
|
636 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
637 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
638 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
639 STR_BUF+curr_dst_idx, // Tag
|
|
640 0, 0 );
|
|
641 //---------------------------------------------------------------------------------------------
|
|
642
|
|
643
|
|
644 // update for next cycle
|
|
645 curr_src_idx = next_src_idx;
|
|
646 curr_dst_idx = next_dst_idx;
|
|
647
|
|
648 curr_interpl_y_upper = next_interpl_y_upper;
|
|
649 curr_interpl_y_lower = next_interpl_y_lower;
|
|
650 curr_interpl_vu = next_interpl_vu;
|
|
651
|
|
652 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
|
|
653 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
|
|
654 vf_curr_NSweight_vu = vf_next_NSweight_vu;
|
|
655
|
|
656 curr_src_y_upper = next_src_y_upper;
|
|
657 curr_src_y_lower = next_src_y_lower;
|
|
658 curr_src_vu = next_src_vu;
|
|
659
|
|
660 curr_lsoff_v = next_lsoff_v;
|
|
661 curr_lsoff_u = next_lsoff_u;
|
|
662 }
|
|
663
|
|
664
|
|
665
|
|
666 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
667
|
|
668 // scaling
|
|
669 // work line y_upper
|
|
670 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
671 scaled_y_plane[curr_src_idx],
|
|
672 dst_width,
|
|
673 vf_x_scale,
|
|
674 vf_curr_NSweight_y_upper,
|
|
675 src_linestride_y );
|
|
676 // work line y_lower
|
|
677 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
678 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
679 dst_width,
|
|
680 vf_x_scale,
|
|
681 vf_curr_NSweight_y_lower,
|
|
682 src_linestride_y );
|
|
683 // work line v
|
|
684 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
|
|
685 scaled_v_plane[curr_src_idx],
|
|
686 dst_width>>1,
|
|
687 vf_x_scale,
|
|
688 vf_curr_NSweight_vu,
|
|
689 src_linestride_vu );
|
|
690 // work line u
|
|
691 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
|
|
692 scaled_u_plane[curr_src_idx],
|
|
693 dst_width>>1,
|
|
694 vf_x_scale,
|
|
695 vf_curr_NSweight_vu,
|
|
696 src_linestride_vu );
|
|
697
|
|
698 //---------------------------------------------------------------------------------------------
|
|
699 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
700
|
|
701 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
702 // dst_width: Pixel width of destination image
|
|
703 // dst_addr: Destination address in main memory
|
|
704 // dst_vu: Counter which is incremented one by one
|
|
705 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
706
|
|
707 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
708 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
709 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
710 STR_BUF+curr_dst_idx, // Tag
|
|
711 0, 0 );
|
|
712
|
|
713 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
714 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
715 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
716 STR_BUF+curr_dst_idx, // Tag
|
|
717 0, 0 );
|
|
718
|
|
719 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
720 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
721 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
722 STR_BUF+curr_dst_idx, // Tag
|
|
723 0, 0 );
|
|
724
|
|
725 // wait for completion
|
|
726 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
727 //---------------------------------------------------------------------------------------------
|
|
728 }
|
|
729
|
|
730
|
|
731 /*
|
|
732 * scale_srcw32_dstw16()
|
|
733 *
|
|
734 * processes an input image of width 32
|
|
735 * scaling is done to a width 16
|
|
736 * yuv2rgb conversion on a width of 16
|
|
737 * result stored in RAM
|
|
738 */
|
|
739 void scale_srcw32_dstw16() {
|
|
740 // extract parameters
|
|
741 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
|
|
742
|
|
743 unsigned int src_width = parms.src_pixel_width;
|
|
744 unsigned int src_height = parms.src_pixel_height;
|
|
745 unsigned int dst_width = parms.dst_pixel_width;
|
|
746 unsigned int dst_height = parms.dst_pixel_height;
|
|
747
|
|
748 // YVU
|
|
749 unsigned int src_linestride_y = src_width;
|
|
750 unsigned int src_dbl_linestride_y = src_width<<1;
|
|
751 unsigned int src_linestride_vu = src_width>>1;
|
|
752 unsigned int src_dbl_linestride_vu = src_width;
|
|
753 // scaled YVU
|
|
754 unsigned int scaled_src_linestride_y = dst_width;
|
|
755
|
|
756 // ram addresses
|
|
757 unsigned char* src_addr_y = parms.y_plane;
|
|
758 unsigned char* src_addr_v = parms.v_plane;
|
|
759 unsigned char* src_addr_u = parms.u_plane;
|
|
760
|
|
761 unsigned int dst_picture_size = dst_width*dst_height;
|
|
762
|
|
763 // Sizes for destination
|
|
764 unsigned int dst_dbl_linestride_y = dst_width<<1;
|
|
765 unsigned int dst_dbl_linestride_vu = dst_width>>1;
|
|
766
|
|
767 // Perform address calculation for Y, V and U in main memory with dst_addr as base
|
|
768 unsigned char* dst_addr_main_memory_y = dst_addr;
|
|
769 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
|
|
770 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
|
|
771
|
|
772 // calculate scale factors
|
|
773 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
|
|
774 float y_scale = (float)src_height/(float)dst_height;
|
|
775
|
|
776 // double buffered processing
|
|
777 // buffer switching
|
|
778 unsigned int curr_src_idx = 0;
|
|
779 unsigned int curr_dst_idx = 0;
|
|
780 unsigned int next_src_idx, next_dst_idx;
|
|
781
|
|
782 // 2 lines y as output, upper and lowerline
|
|
783 unsigned int curr_interpl_y_upper = 0;
|
|
784 unsigned int next_interpl_y_upper;
|
|
785 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
|
|
786 // only 1 line v/u output, both planes have the same dimension
|
|
787 unsigned int curr_interpl_vu = 0;
|
|
788 unsigned int next_interpl_vu;
|
|
789
|
|
790 // weights, calculated in every loop iteration
|
|
791 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
792 vector float vf_next_NSweight_y_upper;
|
|
793 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
|
|
794 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
795 vector float vf_next_NSweight_vu;
|
|
796
|
|
797 // line indices for the src picture
|
|
798 float curr_src_y_upper = 0.0f, next_src_y_upper;
|
|
799 float curr_src_y_lower, next_src_y_lower;
|
|
800 float curr_src_vu = 0.0f, next_src_vu;
|
|
801
|
|
802 // line indices for the dst picture
|
|
803 unsigned int dst_y=0, dst_vu=0;
|
|
804
|
|
805 // calculate lower line idices
|
|
806 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
|
|
807 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
|
|
808 // lower line weight
|
|
809 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
|
|
810
|
|
811
|
|
812 // start partially double buffered processing
|
|
813 // get initial data, 2 sets of y, 1 set v, 1 set u
|
|
814 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
|
|
815 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
816 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
|
|
817 src_dbl_linestride_y,
|
|
818 RETR_BUF,
|
|
819 0, 0 );
|
|
820 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
821 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
822
|
|
823 // iteration loop
|
|
824 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
|
|
825 // the scaled output is 2 lines y, 1 line v, 1 line u
|
|
826 // the yuv2rgb-converted output is stored to RAM
|
|
827 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
|
|
828 dst_y = dst_vu<<1;
|
|
829
|
|
830 // calculate next indices
|
|
831 next_src_vu = ((float)dst_vu+1)*y_scale;
|
|
832 next_src_y_upper = ((float)dst_y+2)*y_scale;
|
|
833 next_src_y_lower = ((float)dst_y+3)*y_scale;
|
|
834
|
|
835 next_interpl_vu = (unsigned int) next_src_vu;
|
|
836 next_interpl_y_upper = (unsigned int) next_src_y_upper;
|
|
837 next_interpl_y_lower = (unsigned int) next_src_y_lower;
|
|
838
|
|
839 // calculate weight NORTH-SOUTH
|
|
840 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
|
|
841 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
|
|
842 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
|
|
843
|
|
844 // get next lines
|
|
845 next_src_idx = curr_src_idx^1;
|
|
846 next_dst_idx = curr_dst_idx^1;
|
|
847
|
|
848 // 4 lines y
|
|
849 mfc_get( y_plane[next_src_idx],
|
|
850 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
|
|
851 src_dbl_linestride_y,
|
|
852 RETR_BUF+next_src_idx,
|
|
853 0, 0 );
|
|
854 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
|
|
855 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
|
|
856 src_dbl_linestride_y,
|
|
857 RETR_BUF+next_src_idx,
|
|
858 0, 0 );
|
|
859
|
|
860 // 2 lines v
|
|
861 mfc_get( v_plane[next_src_idx],
|
|
862 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
|
|
863 src_dbl_linestride_vu,
|
|
864 RETR_BUF+next_src_idx,
|
|
865 0, 0 );
|
|
866 // 2 lines u
|
|
867 mfc_get( u_plane[next_src_idx],
|
|
868 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
|
|
869 src_dbl_linestride_vu,
|
|
870 RETR_BUF+next_src_idx,
|
|
871 0, 0 );
|
|
872
|
|
873 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
874
|
|
875 // scaling
|
|
876 // work line y_upper
|
|
877 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
878 scaled_y_plane[curr_src_idx],
|
|
879 dst_width,
|
|
880 vf_x_scale,
|
|
881 vf_curr_NSweight_y_upper,
|
|
882 src_linestride_y );
|
|
883 // work line y_lower
|
|
884 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
885 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
886 dst_width,
|
|
887 vf_x_scale,
|
|
888 vf_curr_NSweight_y_lower,
|
|
889 src_linestride_y );
|
|
890 // work line v
|
|
891 bilinear_scale_line_w16( v_plane[curr_src_idx],
|
|
892 scaled_v_plane[curr_src_idx],
|
|
893 dst_width>>1,
|
|
894 vf_x_scale,
|
|
895 vf_curr_NSweight_vu,
|
|
896 src_linestride_vu );
|
|
897 // work line u
|
|
898 bilinear_scale_line_w16( u_plane[curr_src_idx],
|
|
899 scaled_u_plane[curr_src_idx],
|
|
900 dst_width>>1,
|
|
901 vf_x_scale,
|
|
902 vf_curr_NSweight_vu,
|
|
903 src_linestride_vu );
|
|
904
|
|
905 //---------------------------------------------------------------------------------------------
|
|
906 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
907
|
|
908 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
909 // dst_width: Pixel width of destination image
|
|
910 // dst_addr: Destination address in main memory
|
|
911 // dst_vu: Counter which is incremented one by one
|
|
912 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
913
|
|
914 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
915 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
916 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
917 STR_BUF+curr_dst_idx, // Tag
|
|
918 0, 0 );
|
|
919
|
|
920 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
921 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
922 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
923 STR_BUF+curr_dst_idx, // Tag
|
|
924 0, 0 );
|
|
925
|
|
926 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
927 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
928 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
929 STR_BUF+curr_dst_idx, // Tag
|
|
930 0, 0 );
|
|
931 //---------------------------------------------------------------------------------------------
|
|
932
|
|
933
|
|
934 // update for next cycle
|
|
935 curr_src_idx = next_src_idx;
|
|
936 curr_dst_idx = next_dst_idx;
|
|
937
|
|
938 curr_interpl_y_upper = next_interpl_y_upper;
|
|
939 curr_interpl_y_lower = next_interpl_y_lower;
|
|
940 curr_interpl_vu = next_interpl_vu;
|
|
941
|
|
942 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
|
|
943 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
|
|
944 vf_curr_NSweight_vu = vf_next_NSweight_vu;
|
|
945
|
|
946 curr_src_y_upper = next_src_y_upper;
|
|
947 curr_src_y_lower = next_src_y_lower;
|
|
948 curr_src_vu = next_src_vu;
|
|
949 }
|
|
950
|
|
951
|
|
952
|
|
953 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
954
|
|
955 // scaling
|
|
956 // work line y_upper
|
|
957 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
958 scaled_y_plane[curr_src_idx],
|
|
959 dst_width,
|
|
960 vf_x_scale,
|
|
961 vf_curr_NSweight_y_upper,
|
|
962 src_linestride_y );
|
|
963 // work line y_lower
|
|
964 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
965 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
966 dst_width,
|
|
967 vf_x_scale,
|
|
968 vf_curr_NSweight_y_lower,
|
|
969 src_linestride_y );
|
|
970 // work line v
|
|
971 bilinear_scale_line_w16( v_plane[curr_src_idx],
|
|
972 scaled_v_plane[curr_src_idx],
|
|
973 dst_width>>1,
|
|
974 vf_x_scale,
|
|
975 vf_curr_NSweight_vu,
|
|
976 src_linestride_vu );
|
|
977 // work line u
|
|
978 bilinear_scale_line_w16( u_plane[curr_src_idx],
|
|
979 scaled_u_plane[curr_src_idx],
|
|
980 dst_width>>1,
|
|
981 vf_x_scale,
|
|
982 vf_curr_NSweight_vu,
|
|
983 src_linestride_vu );
|
|
984
|
|
985
|
|
986 //---------------------------------------------------------------------------------------------
|
|
987 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
988
|
|
989 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
990 // dst_width: Pixel width of destination image
|
|
991 // dst_addr: Destination address in main memory
|
|
992 // dst_vu: Counter which is incremented one by one
|
|
993 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
994
|
|
995 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
996 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
997 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
998 STR_BUF+curr_dst_idx, // Tag
|
|
999 0, 0 );
|
|
1000
|
|
1001 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
1002 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
1003 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
1004 STR_BUF+curr_dst_idx, // Tag
|
|
1005 0, 0 );
|
|
1006
|
|
1007 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
1008 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
1009 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
1010 STR_BUF+curr_dst_idx, // Tag
|
|
1011 0, 0 );
|
|
1012
|
|
1013 // wait for completion
|
|
1014 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
1015 //---------------------------------------------------------------------------------------------
|
|
1016 }
|
|
1017
|
|
1018
|
|
1019 /**
|
|
1020 * scale_srcw32_dstw32()
|
|
1021 *
|
|
1022 * processes an input image of width 32
|
|
1023 * scaling is done to a width 32
|
|
1024 * yuv2rgb conversion on a width of 32
|
|
1025 * result stored in RAM
|
|
1026 */
|
|
1027 void scale_srcw32_dstw32() {
|
|
1028 // extract parameters
|
|
1029 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
|
|
1030
|
|
1031 unsigned int src_width = parms.src_pixel_width;
|
|
1032 unsigned int src_height = parms.src_pixel_height;
|
|
1033 unsigned int dst_width = parms.dst_pixel_width;
|
|
1034 unsigned int dst_height = parms.dst_pixel_height;
|
|
1035
|
|
1036 // YVU
|
|
1037 unsigned int src_linestride_y = src_width;
|
|
1038 unsigned int src_dbl_linestride_y = src_width<<1;
|
|
1039 unsigned int src_linestride_vu = src_width>>1;
|
|
1040 unsigned int src_dbl_linestride_vu = src_width;
|
|
1041
|
|
1042 // scaled YVU
|
|
1043 unsigned int scaled_src_linestride_y = dst_width;
|
|
1044
|
|
1045 // ram addresses
|
|
1046 unsigned char* src_addr_y = parms.y_plane;
|
|
1047 unsigned char* src_addr_v = parms.v_plane;
|
|
1048 unsigned char* src_addr_u = parms.u_plane;
|
|
1049
|
|
1050 unsigned int dst_picture_size = dst_width*dst_height;
|
|
1051
|
|
1052 // Sizes for destination
|
|
1053 unsigned int dst_dbl_linestride_y = dst_width<<1;
|
|
1054 unsigned int dst_dbl_linestride_vu = dst_width>>1;
|
|
1055
|
|
1056 // Perform address calculation for Y, V and U in main memory with dst_addr as base
|
|
1057 unsigned char* dst_addr_main_memory_y = dst_addr;
|
|
1058 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
|
|
1059 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
|
|
1060
|
|
1061 // calculate scale factors
|
|
1062 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
|
|
1063 float y_scale = (float)src_height/(float)dst_height;
|
|
1064
|
|
1065 // double buffered processing
|
|
1066 // buffer switching
|
|
1067 unsigned int curr_src_idx = 0;
|
|
1068 unsigned int curr_dst_idx = 0;
|
|
1069 unsigned int next_src_idx, next_dst_idx;
|
|
1070
|
|
1071 // 2 lines y as output, upper and lowerline
|
|
1072 unsigned int curr_interpl_y_upper = 0;
|
|
1073 unsigned int next_interpl_y_upper;
|
|
1074 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
|
|
1075 // only 1 line v/u output, both planes have the same dimension
|
|
1076 unsigned int curr_interpl_vu = 0;
|
|
1077 unsigned int next_interpl_vu;
|
|
1078
|
|
1079 // weights, calculated in every loop iteration
|
|
1080 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
1081 vector float vf_next_NSweight_y_upper;
|
|
1082 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
|
|
1083 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
1084 vector float vf_next_NSweight_vu;
|
|
1085
|
|
1086 // line indices for the src picture
|
|
1087 float curr_src_y_upper = 0.0f, next_src_y_upper;
|
|
1088 float curr_src_y_lower, next_src_y_lower;
|
|
1089 float curr_src_vu = 0.0f, next_src_vu;
|
|
1090
|
|
1091 // line indices for the dst picture
|
|
1092 unsigned int dst_y=0, dst_vu=0;
|
|
1093
|
|
1094 // calculate lower line idices
|
|
1095 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
|
|
1096 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
|
|
1097 // lower line weight
|
|
1098 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
|
|
1099
|
|
1100
|
|
1101 // start partially double buffered processing
|
|
1102 // get initial data, 2 sets of y, 1 set v, 1 set u
|
|
1103 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
|
|
1104 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
1105 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
|
|
1106 src_dbl_linestride_y,
|
|
1107 RETR_BUF,
|
|
1108 0, 0 );
|
|
1109 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
1110 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
|
|
1111
|
|
1112 // iteration loop
|
|
1113 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
|
|
1114 // the scaled output is 2 lines y, 1 line v, 1 line u
|
|
1115 // the yuv2rgb-converted output is stored to RAM
|
|
1116 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
|
|
1117 dst_y = dst_vu<<1;
|
|
1118
|
|
1119 // calculate next indices
|
|
1120 next_src_vu = ((float)dst_vu+1)*y_scale;
|
|
1121 next_src_y_upper = ((float)dst_y+2)*y_scale;
|
|
1122 next_src_y_lower = ((float)dst_y+3)*y_scale;
|
|
1123
|
|
1124 next_interpl_vu = (unsigned int) next_src_vu;
|
|
1125 next_interpl_y_upper = (unsigned int) next_src_y_upper;
|
|
1126 next_interpl_y_lower = (unsigned int) next_src_y_lower;
|
|
1127
|
|
1128 // calculate weight NORTH-SOUTH
|
|
1129 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
|
|
1130 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
|
|
1131 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
|
|
1132
|
|
1133 // get next lines
|
|
1134 next_src_idx = curr_src_idx^1;
|
|
1135 next_dst_idx = curr_dst_idx^1;
|
|
1136
|
|
1137 // 4 lines y
|
|
1138 mfc_get( y_plane[next_src_idx],
|
|
1139 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
|
|
1140 src_dbl_linestride_y,
|
|
1141 RETR_BUF+next_src_idx,
|
|
1142 0, 0 );
|
|
1143 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
|
|
1144 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
|
|
1145 src_dbl_linestride_y,
|
|
1146 RETR_BUF+next_src_idx,
|
|
1147 0, 0 );
|
|
1148
|
|
1149 // 2 lines v
|
|
1150 mfc_get( v_plane[next_src_idx],
|
|
1151 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
|
|
1152 src_dbl_linestride_vu,
|
|
1153 RETR_BUF+next_src_idx,
|
|
1154 0, 0 );
|
|
1155 // 2 lines u
|
|
1156 mfc_get( u_plane[next_src_idx],
|
|
1157 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
|
|
1158 src_dbl_linestride_vu,
|
|
1159 RETR_BUF+next_src_idx,
|
|
1160 0, 0 );
|
|
1161
|
|
1162 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
1163
|
|
1164 // scaling
|
|
1165 // work line y_upper
|
|
1166 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
1167 scaled_y_plane[curr_src_idx],
|
|
1168 dst_width,
|
|
1169 vf_x_scale,
|
|
1170 vf_curr_NSweight_y_upper,
|
|
1171 src_linestride_y );
|
|
1172 // work line y_lower
|
|
1173 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
1174 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
1175 dst_width,
|
|
1176 vf_x_scale,
|
|
1177 vf_curr_NSweight_y_lower,
|
|
1178 src_linestride_y );
|
|
1179 // work line v
|
|
1180 bilinear_scale_line_w16( v_plane[curr_src_idx],
|
|
1181 scaled_v_plane[curr_src_idx],
|
|
1182 dst_width>>1,
|
|
1183 vf_x_scale,
|
|
1184 vf_curr_NSweight_vu,
|
|
1185 src_linestride_vu );
|
|
1186 // work line u
|
|
1187 bilinear_scale_line_w16( u_plane[curr_src_idx],
|
|
1188 scaled_u_plane[curr_src_idx],
|
|
1189 dst_width>>1,
|
|
1190 vf_x_scale,
|
|
1191 vf_curr_NSweight_vu,
|
|
1192 src_linestride_vu );
|
|
1193
|
|
1194
|
|
1195
|
|
1196 // Store the result back to main memory into a destination buffer in YUV format
|
|
1197 //---------------------------------------------------------------------------------------------
|
|
1198 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
1199
|
|
1200 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
1201 // dst_width: Pixel width of destination image
|
|
1202 // dst_addr: Destination address in main memory
|
|
1203 // dst_vu: Counter which is incremented one by one
|
|
1204 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
1205
|
|
1206 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
1207 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
1208 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
1209 STR_BUF+curr_dst_idx, // Tag
|
|
1210 0, 0 );
|
|
1211
|
|
1212 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
1213 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
1214 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
1215 STR_BUF+curr_dst_idx, // Tag
|
|
1216 0, 0 );
|
|
1217
|
|
1218 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
1219 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
1220 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
1221 STR_BUF+curr_dst_idx, // Tag
|
|
1222 0, 0 );
|
|
1223 //---------------------------------------------------------------------------------------------
|
|
1224
|
|
1225
|
|
1226 // update for next cycle
|
|
1227 curr_src_idx = next_src_idx;
|
|
1228 curr_dst_idx = next_dst_idx;
|
|
1229
|
|
1230 curr_interpl_y_upper = next_interpl_y_upper;
|
|
1231 curr_interpl_y_lower = next_interpl_y_lower;
|
|
1232 curr_interpl_vu = next_interpl_vu;
|
|
1233
|
|
1234 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
|
|
1235 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
|
|
1236 vf_curr_NSweight_vu = vf_next_NSweight_vu;
|
|
1237
|
|
1238 curr_src_y_upper = next_src_y_upper;
|
|
1239 curr_src_y_lower = next_src_y_lower;
|
|
1240 curr_src_vu = next_src_vu;
|
|
1241 }
|
|
1242
|
|
1243
|
|
1244
|
|
1245 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
|
|
1246
|
|
1247 // scaling
|
|
1248 // work line y_upper
|
|
1249 bilinear_scale_line_w16( y_plane[curr_src_idx],
|
|
1250 scaled_y_plane[curr_src_idx],
|
|
1251 dst_width,
|
|
1252 vf_x_scale,
|
|
1253 vf_curr_NSweight_y_upper,
|
|
1254 src_linestride_y );
|
|
1255 // work line y_lower
|
|
1256 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
|
|
1257 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
|
|
1258 dst_width,
|
|
1259 vf_x_scale,
|
|
1260 vf_curr_NSweight_y_lower,
|
|
1261 src_linestride_y );
|
|
1262 // work line v
|
|
1263 bilinear_scale_line_w16( v_plane[curr_src_idx],
|
|
1264 scaled_v_plane[curr_src_idx],
|
|
1265 dst_width>>1,
|
|
1266 vf_x_scale,
|
|
1267 vf_curr_NSweight_vu,
|
|
1268 src_linestride_vu );
|
|
1269 // work line u
|
|
1270 bilinear_scale_line_w16( u_plane[curr_src_idx],
|
|
1271 scaled_u_plane[curr_src_idx],
|
|
1272 dst_width>>1,
|
|
1273 vf_x_scale,
|
|
1274 vf_curr_NSweight_vu,
|
|
1275 src_linestride_vu );
|
|
1276
|
|
1277
|
|
1278 // Store the result back to main memory into a destination buffer in YUV format
|
|
1279 //---------------------------------------------------------------------------------------------
|
|
1280 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
1281
|
|
1282 // Perform three DMA transfers to 3 different locations in the main memory!
|
|
1283 // dst_width: Pixel width of destination image
|
|
1284 // dst_addr: Destination address in main memory
|
|
1285 // dst_vu: Counter which is incremented one by one
|
|
1286 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
|
|
1287
|
|
1288 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
|
|
1289 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
|
|
1290 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
|
|
1291 STR_BUF+curr_dst_idx, // Tag
|
|
1292 0, 0 );
|
|
1293
|
|
1294 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
|
|
1295 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
1296 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
|
|
1297 STR_BUF+curr_dst_idx, // Tag
|
|
1298 0, 0 );
|
|
1299
|
|
1300 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
|
|
1301 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
|
|
1302 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
|
|
1303 STR_BUF+curr_dst_idx, // Tag
|
|
1304 0, 0 );
|
|
1305
|
|
1306 // wait for completion
|
|
1307 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
|
|
1308 //---------------------------------------------------------------------------------------------
|
|
1309 }
|
|
1310
|
|
1311
|
|
1312 /*
|
|
1313 * bilinear_scale_line_w8()
|
|
1314 *
|
|
1315 * processes a line of yuv-input, width has to be a multiple of 8
|
|
1316 * scaled yuv-output is written to local store buffer
|
|
1317 *
|
|
1318 * @param src buffer for 2 lines input
|
|
1319 * @param dst_ buffer for 1 line output
|
|
1320 * @param dst_width the width of the destination line
|
|
1321 * @param vf_x_scale a float vector, at each entry is the x_scale-factor
|
|
1322 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
|
|
1323 * @param src_linestride the stride of the srcline
|
|
1324 */
|
|
1325 void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
|
|
1326
|
|
1327 unsigned char* dst = dst_;
|
|
1328
|
|
1329 unsigned int dst_x;
|
|
1330 for( dst_x=0; dst_x<dst_width; dst_x+=8) {
|
|
1331 // address calculation for loading the 4 surrounding pixel of each calculated
|
|
1332 // destination pixel
|
|
1333 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
|
|
1334 // lower range->first 4 pixel
|
|
1335 // upper range->next 4 pixel
|
|
1336 vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 };
|
|
1337 vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 };
|
|
1338 vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range );
|
|
1339 vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range );
|
|
1340
|
|
1341 // calculate weight EAST-WEST
|
|
1342 vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 );
|
|
1343 vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 );
|
|
1344 vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale );
|
|
1345 vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale );
|
|
1346 vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 );
|
|
1347 vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 );
|
|
1348 vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 );
|
|
1349 vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 );
|
|
1350 vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range );
|
|
1351 vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range );
|
|
1352
|
|
1353 // calculate address offset
|
|
1354 //
|
|
1355 // pixel NORTH WEST
|
|
1356 vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range;
|
|
1357 vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range;
|
|
1358
|
|
1359 // pixel NORTH EAST-->(offpixelNW+1)
|
|
1360 vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
|
|
1361 vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 );
|
|
1362 vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 );
|
|
1363
|
|
1364 // SOUTH-WEST-->(offpixelNW+src_linestride)
|
|
1365 vector unsigned int vui_srclinestride = spu_splats( src_linestride );
|
|
1366 vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range );
|
|
1367 vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range );
|
|
1368
|
|
1369 // SOUTH-EAST-->(offpixelNW+src_linestride+1)
|
|
1370 vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range );
|
|
1371 vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range );
|
|
1372
|
|
1373 // calculate each address
|
|
1374 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
|
|
1375 vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range );
|
|
1376 vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range );
|
|
1377 vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range );
|
|
1378 vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range );
|
|
1379
|
|
1380 vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range );
|
|
1381 vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range );
|
|
1382 vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range );
|
|
1383 vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range );
|
|
1384
|
|
1385 // get each pixel
|
|
1386 //
|
|
1387 // scalar load, afterwards insertion into the right position
|
|
1388 // NORTH WEST
|
|
1389 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
|
1390 vector unsigned char vuc_pixel_NW_lower_range = spu_insert(
|
|
1391 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 );
|
|
1392 vuc_pixel_NW_lower_range = spu_insert(
|
|
1393 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )),
|
|
1394 vuc_pixel_NW_lower_range, 7 );
|
|
1395 vuc_pixel_NW_lower_range = spu_insert(
|
|
1396 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )),
|
|
1397 vuc_pixel_NW_lower_range, 11 );
|
|
1398 vuc_pixel_NW_lower_range = spu_insert(
|
|
1399 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )),
|
|
1400 vuc_pixel_NW_lower_range, 15 );
|
|
1401
|
|
1402 vector unsigned char vuc_pixel_NW_upper_range = spu_insert(
|
|
1403 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 );
|
|
1404 vuc_pixel_NW_upper_range = spu_insert(
|
|
1405 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )),
|
|
1406 vuc_pixel_NW_upper_range, 7 );
|
|
1407 vuc_pixel_NW_upper_range = spu_insert(
|
|
1408 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )),
|
|
1409 vuc_pixel_NW_upper_range, 11 );
|
|
1410 vuc_pixel_NW_upper_range = spu_insert(
|
|
1411 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )),
|
|
1412 vuc_pixel_NW_upper_range, 15 );
|
|
1413
|
|
1414 // NORTH EAST
|
|
1415 vector unsigned char vuc_pixel_NE_lower_range = spu_insert(
|
|
1416 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 );
|
|
1417 vuc_pixel_NE_lower_range = spu_insert(
|
|
1418 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )),
|
|
1419 vuc_pixel_NE_lower_range, 7 );
|
|
1420 vuc_pixel_NE_lower_range = spu_insert(
|
|
1421 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )),
|
|
1422 vuc_pixel_NE_lower_range, 11 );
|
|
1423 vuc_pixel_NE_lower_range = spu_insert(
|
|
1424 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )),
|
|
1425 vuc_pixel_NE_lower_range, 15 );
|
|
1426
|
|
1427 vector unsigned char vuc_pixel_NE_upper_range = spu_insert(
|
|
1428 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 );
|
|
1429 vuc_pixel_NE_upper_range = spu_insert(
|
|
1430 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )),
|
|
1431 vuc_pixel_NE_upper_range, 7 );
|
|
1432 vuc_pixel_NE_upper_range = spu_insert(
|
|
1433 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )),
|
|
1434 vuc_pixel_NE_upper_range, 11 );
|
|
1435 vuc_pixel_NE_upper_range = spu_insert(
|
|
1436 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )),
|
|
1437 vuc_pixel_NE_upper_range, 15 );
|
|
1438
|
|
1439
|
|
1440 // SOUTH WEST
|
|
1441 vector unsigned char vuc_pixel_SW_lower_range = spu_insert(
|
|
1442 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 );
|
|
1443 vuc_pixel_SW_lower_range = spu_insert(
|
|
1444 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )),
|
|
1445 vuc_pixel_SW_lower_range, 7 );
|
|
1446 vuc_pixel_SW_lower_range = spu_insert(
|
|
1447 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )),
|
|
1448 vuc_pixel_SW_lower_range, 11 );
|
|
1449 vuc_pixel_SW_lower_range = spu_insert(
|
|
1450 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )),
|
|
1451 vuc_pixel_SW_lower_range, 15 );
|
|
1452
|
|
1453 vector unsigned char vuc_pixel_SW_upper_range = spu_insert(
|
|
1454 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 );
|
|
1455 vuc_pixel_SW_upper_range = spu_insert(
|
|
1456 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )),
|
|
1457 vuc_pixel_SW_upper_range, 7 );
|
|
1458 vuc_pixel_SW_upper_range = spu_insert(
|
|
1459 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )),
|
|
1460 vuc_pixel_SW_upper_range, 11 );
|
|
1461 vuc_pixel_SW_upper_range = spu_insert(
|
|
1462 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )),
|
|
1463 vuc_pixel_SW_upper_range, 15 );
|
|
1464
|
|
1465 // SOUTH EAST
|
|
1466 vector unsigned char vuc_pixel_SE_lower_range = spu_insert(
|
|
1467 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 );
|
|
1468 vuc_pixel_SE_lower_range = spu_insert(
|
|
1469 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )),
|
|
1470 vuc_pixel_SE_lower_range, 7 );
|
|
1471 vuc_pixel_SE_lower_range = spu_insert(
|
|
1472 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )),
|
|
1473 vuc_pixel_SE_lower_range, 11 );
|
|
1474 vuc_pixel_SE_lower_range = spu_insert(
|
|
1475 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )),
|
|
1476 vuc_pixel_SE_lower_range, 15 );
|
|
1477
|
|
1478 vector unsigned char vuc_pixel_SE_upper_range = spu_insert(
|
|
1479 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 );
|
|
1480 vuc_pixel_SE_upper_range = spu_insert(
|
|
1481 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )),
|
|
1482 vuc_pixel_SE_upper_range, 7 );
|
|
1483 vuc_pixel_SE_upper_range = spu_insert(
|
|
1484 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )),
|
|
1485 vuc_pixel_SE_upper_range, 11 );
|
|
1486 vuc_pixel_SE_upper_range = spu_insert(
|
|
1487 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )),
|
|
1488 vuc_pixel_SE_upper_range, 15 );
|
|
1489
|
|
1490
|
|
1491 // convert to float
|
|
1492 vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 );
|
|
1493 vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 );
|
|
1494
|
|
1495 vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 );
|
|
1496 vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 );
|
|
1497
|
|
1498 vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 );
|
|
1499 vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 );
|
|
1500
|
|
1501 vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 );
|
|
1502 vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 );
|
|
1503
|
|
1504
|
|
1505
|
|
1506 // first linear interpolation: EWtop
|
|
1507 // EWtop = NW + EWweight*(NE-NW)
|
|
1508 //
|
|
1509 // lower range
|
|
1510 vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range );
|
|
1511 vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range,
|
|
1512 vf_EWtop_lower_range_tmp,
|
|
1513 vf_pixel_NW_lower_range );
|
|
1514
|
|
1515 // upper range
|
|
1516 vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range );
|
|
1517 vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range,
|
|
1518 vf_EWtop_upper_range_tmp,
|
|
1519 vf_pixel_NW_upper_range );
|
|
1520
|
|
1521
|
|
1522
|
|
1523 // second linear interpolation: EWbottom
|
|
1524 // EWbottom = SW + EWweight*(SE-SW)
|
|
1525 //
|
|
1526 // lower range
|
|
1527 vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range );
|
|
1528 vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range,
|
|
1529 vf_EWbottom_lower_range_tmp,
|
|
1530 vf_pixel_SW_lower_range );
|
|
1531
|
|
1532 // upper range
|
|
1533 vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range );
|
|
1534 vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range,
|
|
1535 vf_EWbottom_upper_range_tmp,
|
|
1536 vf_pixel_SW_upper_range );
|
|
1537
|
|
1538
|
|
1539
|
|
1540 // third linear interpolation: the bilinear interpolated value
|
|
1541 // result = EWtop + NSweight*(EWbottom-EWtop);
|
|
1542 //
|
|
1543 // lower range
|
|
1544 vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range );
|
|
1545 vector float vf_result_lower_range = spu_madd( vf_NSweight,
|
|
1546 vf_result_lower_range_tmp,
|
|
1547 vf_EWtop_lower_range );
|
|
1548
|
|
1549 // upper range
|
|
1550 vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range );
|
|
1551 vector float vf_result_upper_range = spu_madd( vf_NSweight,
|
|
1552 vf_result_upper_range_tmp,
|
|
1553 vf_EWtop_upper_range );
|
|
1554
|
|
1555
|
|
1556 // convert back: using saturated arithmetic
|
|
1557 vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range );
|
|
1558 vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range );
|
|
1559
|
|
1560 // merge results->lower,upper
|
|
1561 vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F,
|
|
1562 0x13, 0x17, 0x1B, 0x1F,
|
|
1563 0x00, 0x00, 0x00, 0x00,
|
|
1564 0x00, 0x00, 0x00, 0x00 };
|
|
1565
|
|
1566 vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range,
|
|
1567 (vector unsigned char) vui_result_upper_range,
|
|
1568 vuc_mask_merge_result );
|
|
1569
|
|
1570 // partial storing
|
|
1571 vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00,
|
|
1572 0x00, 0x00, 0x00, 0x00,
|
|
1573 0xFF, 0xFF, 0xFF, 0xFF,
|
|
1574 0xFF, 0xFF, 0xFF, 0xFF };
|
|
1575
|
|
1576
|
|
1577 // get currently stored data
|
|
1578 vector unsigned char vuc_orig = *((vector unsigned char*)dst);
|
|
1579
|
|
1580 // clear currently stored data
|
|
1581 vuc_orig = spu_and( vuc_orig,
|
|
1582 spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) );
|
|
1583
|
|
1584 // rotate result according to storing address
|
|
1585 vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F );
|
|
1586
|
|
1587 // store result
|
|
1588 *((vector unsigned char*)dst) = spu_or( vuc_result,
|
|
1589 vuc_orig );
|
|
1590 dst += 8;
|
|
1591 }
|
|
1592 }
|
|
1593
|
|
1594
|
|
1595 /*
|
|
1596 * bilinear_scale_line_w16()
|
|
1597 *
|
|
1598 * processes a line of yuv-input, width has to be a multiple of 16
|
|
1599 * scaled yuv-output is written to local store buffer
|
|
1600 *
|
|
1601 * @param src buffer for 2 lines input
|
|
1602 * @param dst_ buffer for 1 line output
|
|
1603 * @param dst_width the width of the destination line
|
|
1604 * @param vf_x_scale a float vector, at each entry is the x_scale-factor
|
|
1605 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
|
|
1606 * @param src_linestride the stride of the srcline
|
|
1607 */
|
|
1608 void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
|
|
1609
|
|
1610 unsigned char* dst = dst_;
|
|
1611
|
|
1612 unsigned int dst_x;
|
|
1613 for( dst_x=0; dst_x<dst_width; dst_x+=16) {
|
|
1614 // address calculation for loading the 4 surrounding pixel of each calculated
|
|
1615 // destination pixel
|
|
1616 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
|
|
1617 // parallelised processing
|
|
1618 // first range->pixel 1 2 3 4
|
|
1619 // second range->pixel 5 6 7 8
|
|
1620 // third range->pixel 9 10 11 12
|
|
1621 // fourth range->pixel 13 14 15 16
|
|
1622 vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 };
|
|
1623 vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 };
|
|
1624 vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 };
|
|
1625 vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 };
|
|
1626 vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range );
|
|
1627 vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range );
|
|
1628 vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range );
|
|
1629 vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range );
|
|
1630
|
|
1631 // calculate weight EAST-WEST
|
|
1632 vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 );
|
|
1633 vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 );
|
|
1634 vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 );
|
|
1635 vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 );
|
|
1636 vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale );
|
|
1637 vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale );
|
|
1638 vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale );
|
|
1639 vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale );
|
|
1640 vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 );
|
|
1641 vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 );
|
|
1642 vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 );
|
|
1643 vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 );
|
|
1644 vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 );
|
|
1645 vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 );
|
|
1646 vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 );
|
|
1647 vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 );
|
|
1648 vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range );
|
|
1649 vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range );
|
|
1650 vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range );
|
|
1651 vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range );
|
|
1652
|
|
1653 // calculate address offset
|
|
1654 //
|
|
1655 // pixel NORTH WEST
|
|
1656 vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range;
|
|
1657 vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range;
|
|
1658 vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range;
|
|
1659 vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range;
|
|
1660
|
|
1661 // pixel NORTH EAST-->(offpixelNW+1)
|
|
1662 vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
|
|
1663 vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 );
|
|
1664 vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 );
|
|
1665 vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 );
|
|
1666 vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 );
|
|
1667
|
|
1668 // SOUTH-WEST-->(offpixelNW+src_linestride)
|
|
1669 vector unsigned int vui_srclinestride = spu_splats( src_linestride );
|
|
1670 vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range );
|
|
1671 vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range );
|
|
1672 vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range );
|
|
1673 vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range );
|
|
1674
|
|
1675 // SOUTH-EAST-->(offpixelNW+src_linestride+1)
|
|
1676 vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range );
|
|
1677 vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range );
|
|
1678 vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range );
|
|
1679 vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range );
|
|
1680
|
|
1681 // calculate each address
|
|
1682 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
|
|
1683 vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range );
|
|
1684 vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range );
|
|
1685 vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range );
|
|
1686 vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range );
|
|
1687
|
|
1688 vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range );
|
|
1689 vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range );
|
|
1690 vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range );
|
|
1691 vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range );
|
|
1692
|
|
1693 vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range );
|
|
1694 vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range );
|
|
1695 vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range );
|
|
1696 vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range );
|
|
1697
|
|
1698 vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range );
|
|
1699 vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range );
|
|
1700 vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range );
|
|
1701 vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range );
|
|
1702
|
|
1703
|
|
1704 // get each pixel
|
|
1705 //
|
|
1706 // scalar load, afterwards insertion into the right position
|
|
1707 // NORTH WEST
|
|
1708 // first range
|
|
1709 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
|
1710 vector unsigned char vuc_pixel_NW_first_range = spu_insert(
|
|
1711 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 );
|
|
1712 vuc_pixel_NW_first_range = spu_insert(
|
|
1713 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )),
|
|
1714 vuc_pixel_NW_first_range, 7 );
|
|
1715 vuc_pixel_NW_first_range = spu_insert(
|
|
1716 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )),
|
|
1717 vuc_pixel_NW_first_range, 11 );
|
|
1718 vuc_pixel_NW_first_range = spu_insert(
|
|
1719 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )),
|
|
1720 vuc_pixel_NW_first_range, 15 );
|
|
1721 // second range
|
|
1722 vector unsigned char vuc_pixel_NW_second_range = spu_insert(
|
|
1723 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 );
|
|
1724 vuc_pixel_NW_second_range = spu_insert(
|
|
1725 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )),
|
|
1726 vuc_pixel_NW_second_range, 7 );
|
|
1727 vuc_pixel_NW_second_range = spu_insert(
|
|
1728 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )),
|
|
1729 vuc_pixel_NW_second_range, 11 );
|
|
1730 vuc_pixel_NW_second_range = spu_insert(
|
|
1731 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )),
|
|
1732 vuc_pixel_NW_second_range, 15 );
|
|
1733 // third range
|
|
1734 vector unsigned char vuc_pixel_NW_third_range = spu_insert(
|
|
1735 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 );
|
|
1736 vuc_pixel_NW_third_range = spu_insert(
|
|
1737 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )),
|
|
1738 vuc_pixel_NW_third_range, 7 );
|
|
1739 vuc_pixel_NW_third_range = spu_insert(
|
|
1740 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )),
|
|
1741 vuc_pixel_NW_third_range, 11 );
|
|
1742 vuc_pixel_NW_third_range = spu_insert(
|
|
1743 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )),
|
|
1744 vuc_pixel_NW_third_range, 15 );
|
|
1745 // fourth range
|
|
1746 vector unsigned char vuc_pixel_NW_fourth_range = spu_insert(
|
|
1747 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 );
|
|
1748 vuc_pixel_NW_fourth_range = spu_insert(
|
|
1749 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )),
|
|
1750 vuc_pixel_NW_fourth_range, 7 );
|
|
1751 vuc_pixel_NW_fourth_range = spu_insert(
|
|
1752 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )),
|
|
1753 vuc_pixel_NW_fourth_range, 11 );
|
|
1754 vuc_pixel_NW_fourth_range = spu_insert(
|
|
1755 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )),
|
|
1756 vuc_pixel_NW_fourth_range, 15 );
|
|
1757
|
|
1758 // NORTH EAST
|
|
1759 // first range
|
|
1760 vector unsigned char vuc_pixel_NE_first_range = spu_insert(
|
|
1761 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 );
|
|
1762 vuc_pixel_NE_first_range = spu_insert(
|
|
1763 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )),
|
|
1764 vuc_pixel_NE_first_range, 7 );
|
|
1765 vuc_pixel_NE_first_range = spu_insert(
|
|
1766 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )),
|
|
1767 vuc_pixel_NE_first_range, 11 );
|
|
1768 vuc_pixel_NE_first_range = spu_insert(
|
|
1769 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )),
|
|
1770 vuc_pixel_NE_first_range, 15 );
|
|
1771 // second range
|
|
1772 vector unsigned char vuc_pixel_NE_second_range = spu_insert(
|
|
1773 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 );
|
|
1774 vuc_pixel_NE_second_range = spu_insert(
|
|
1775 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )),
|
|
1776 vuc_pixel_NE_second_range, 7 );
|
|
1777 vuc_pixel_NE_second_range = spu_insert(
|
|
1778 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )),
|
|
1779 vuc_pixel_NE_second_range, 11 );
|
|
1780 vuc_pixel_NE_second_range = spu_insert(
|
|
1781 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )),
|
|
1782 vuc_pixel_NE_second_range, 15 );
|
|
1783 // third range
|
|
1784 vector unsigned char vuc_pixel_NE_third_range = spu_insert(
|
|
1785 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 );
|
|
1786 vuc_pixel_NE_third_range = spu_insert(
|
|
1787 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )),
|
|
1788 vuc_pixel_NE_third_range, 7 );
|
|
1789 vuc_pixel_NE_third_range = spu_insert(
|
|
1790 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )),
|
|
1791 vuc_pixel_NE_third_range, 11 );
|
|
1792 vuc_pixel_NE_third_range = spu_insert(
|
|
1793 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )),
|
|
1794 vuc_pixel_NE_third_range, 15 );
|
|
1795 // fourth range
|
|
1796 vector unsigned char vuc_pixel_NE_fourth_range = spu_insert(
|
|
1797 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 );
|
|
1798 vuc_pixel_NE_fourth_range = spu_insert(
|
|
1799 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )),
|
|
1800 vuc_pixel_NE_fourth_range, 7 );
|
|
1801 vuc_pixel_NE_fourth_range = spu_insert(
|
|
1802 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )),
|
|
1803 vuc_pixel_NE_fourth_range, 11 );
|
|
1804 vuc_pixel_NE_fourth_range = spu_insert(
|
|
1805 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )),
|
|
1806 vuc_pixel_NE_fourth_range, 15 );
|
|
1807
|
|
1808 // SOUTH WEST
|
|
1809 // first range
|
|
1810 vector unsigned char vuc_pixel_SW_first_range = spu_insert(
|
|
1811 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 );
|
|
1812 vuc_pixel_SW_first_range = spu_insert(
|
|
1813 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )),
|
|
1814 vuc_pixel_SW_first_range, 7 );
|
|
1815 vuc_pixel_SW_first_range = spu_insert(
|
|
1816 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )),
|
|
1817 vuc_pixel_SW_first_range, 11 );
|
|
1818 vuc_pixel_SW_first_range = spu_insert(
|
|
1819 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )),
|
|
1820 vuc_pixel_SW_first_range, 15 );
|
|
1821 // second range
|
|
1822 vector unsigned char vuc_pixel_SW_second_range = spu_insert(
|
|
1823 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 );
|
|
1824 vuc_pixel_SW_second_range = spu_insert(
|
|
1825 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )),
|
|
1826 vuc_pixel_SW_second_range, 7 );
|
|
1827 vuc_pixel_SW_second_range = spu_insert(
|
|
1828 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )),
|
|
1829 vuc_pixel_SW_second_range, 11 );
|
|
1830 vuc_pixel_SW_second_range = spu_insert(
|
|
1831 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )),
|
|
1832 vuc_pixel_SW_second_range, 15 );
|
|
1833 // third range
|
|
1834 vector unsigned char vuc_pixel_SW_third_range = spu_insert(
|
|
1835 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 );
|
|
1836 vuc_pixel_SW_third_range = spu_insert(
|
|
1837 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )),
|
|
1838 vuc_pixel_SW_third_range, 7 );
|
|
1839 vuc_pixel_SW_third_range = spu_insert(
|
|
1840 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )),
|
|
1841 vuc_pixel_SW_third_range, 11 );
|
|
1842 vuc_pixel_SW_third_range = spu_insert(
|
|
1843 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )),
|
|
1844 vuc_pixel_SW_third_range, 15 );
|
|
1845 // fourth range
|
|
1846 vector unsigned char vuc_pixel_SW_fourth_range = spu_insert(
|
|
1847 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 );
|
|
1848 vuc_pixel_SW_fourth_range = spu_insert(
|
|
1849 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )),
|
|
1850 vuc_pixel_SW_fourth_range, 7 );
|
|
1851 vuc_pixel_SW_fourth_range = spu_insert(
|
|
1852 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )),
|
|
1853 vuc_pixel_SW_fourth_range, 11 );
|
|
1854 vuc_pixel_SW_fourth_range = spu_insert(
|
|
1855 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )),
|
|
1856 vuc_pixel_SW_fourth_range, 15 );
|
|
1857
|
|
1858 // NORTH EAST
|
|
1859 // first range
|
|
1860 vector unsigned char vuc_pixel_SE_first_range = spu_insert(
|
|
1861 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 );
|
|
1862 vuc_pixel_SE_first_range = spu_insert(
|
|
1863 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )),
|
|
1864 vuc_pixel_SE_first_range, 7 );
|
|
1865 vuc_pixel_SE_first_range = spu_insert(
|
|
1866 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )),
|
|
1867 vuc_pixel_SE_first_range, 11 );
|
|
1868 vuc_pixel_SE_first_range = spu_insert(
|
|
1869 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )),
|
|
1870 vuc_pixel_SE_first_range, 15 );
|
|
1871 // second range
|
|
1872 vector unsigned char vuc_pixel_SE_second_range = spu_insert(
|
|
1873 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 );
|
|
1874 vuc_pixel_SE_second_range = spu_insert(
|
|
1875 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )),
|
|
1876 vuc_pixel_SE_second_range, 7 );
|
|
1877 vuc_pixel_SE_second_range = spu_insert(
|
|
1878 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )),
|
|
1879 vuc_pixel_SE_second_range, 11 );
|
|
1880 vuc_pixel_SE_second_range = spu_insert(
|
|
1881 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )),
|
|
1882 vuc_pixel_SE_second_range, 15 );
|
|
1883 // third range
|
|
1884 vector unsigned char vuc_pixel_SE_third_range = spu_insert(
|
|
1885 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 );
|
|
1886 vuc_pixel_SE_third_range = spu_insert(
|
|
1887 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )),
|
|
1888 vuc_pixel_SE_third_range, 7 );
|
|
1889 vuc_pixel_SE_third_range = spu_insert(
|
|
1890 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )),
|
|
1891 vuc_pixel_SE_third_range, 11 );
|
|
1892 vuc_pixel_SE_third_range = spu_insert(
|
|
1893 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )),
|
|
1894 vuc_pixel_SE_third_range, 15 );
|
|
1895 // fourth range
|
|
1896 vector unsigned char vuc_pixel_SE_fourth_range = spu_insert(
|
|
1897 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 );
|
|
1898 vuc_pixel_SE_fourth_range = spu_insert(
|
|
1899 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )),
|
|
1900 vuc_pixel_SE_fourth_range, 7 );
|
|
1901 vuc_pixel_SE_fourth_range = spu_insert(
|
|
1902 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )),
|
|
1903 vuc_pixel_SE_fourth_range, 11 );
|
|
1904 vuc_pixel_SE_fourth_range = spu_insert(
|
|
1905 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )),
|
|
1906 vuc_pixel_SE_fourth_range, 15 );
|
|
1907
|
|
1908
|
|
1909
|
|
1910 // convert to float
|
|
1911 vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 );
|
|
1912 vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 );
|
|
1913 vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 );
|
|
1914 vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 );
|
|
1915
|
|
1916 vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 );
|
|
1917 vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 );
|
|
1918 vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 );
|
|
1919 vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 );
|
|
1920
|
|
1921 vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 );
|
|
1922 vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 );
|
|
1923 vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 );
|
|
1924 vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 );
|
|
1925
|
|
1926 vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 );
|
|
1927 vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 );
|
|
1928 vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 );
|
|
1929 vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 );
|
|
1930
|
|
1931 // first linear interpolation: EWtop
|
|
1932 // EWtop = NW + EWweight*(NE-NW)
|
|
1933 //
|
|
1934 // first range
|
|
1935 vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range );
|
|
1936 vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range,
|
|
1937 vf_EWtop_first_range_tmp,
|
|
1938 vf_pixel_NW_first_range );
|
|
1939
|
|
1940 // second range
|
|
1941 vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range );
|
|
1942 vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range,
|
|
1943 vf_EWtop_second_range_tmp,
|
|
1944 vf_pixel_NW_second_range );
|
|
1945
|
|
1946 // third range
|
|
1947 vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range );
|
|
1948 vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range,
|
|
1949 vf_EWtop_third_range_tmp,
|
|
1950 vf_pixel_NW_third_range );
|
|
1951
|
|
1952 // fourth range
|
|
1953 vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range );
|
|
1954 vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range,
|
|
1955 vf_EWtop_fourth_range_tmp,
|
|
1956 vf_pixel_NW_fourth_range );
|
|
1957
|
|
1958
|
|
1959
|
|
1960 // second linear interpolation: EWbottom
|
|
1961 // EWbottom = SW + EWweight*(SE-SW)
|
|
1962 //
|
|
1963 // first range
|
|
1964 vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range );
|
|
1965 vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range,
|
|
1966 vf_EWbottom_first_range_tmp,
|
|
1967 vf_pixel_SW_first_range );
|
|
1968
|
|
1969 // second range
|
|
1970 vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range );
|
|
1971 vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range,
|
|
1972 vf_EWbottom_second_range_tmp,
|
|
1973 vf_pixel_SW_second_range );
|
|
1974 // first range
|
|
1975 vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range );
|
|
1976 vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range,
|
|
1977 vf_EWbottom_third_range_tmp,
|
|
1978 vf_pixel_SW_third_range );
|
|
1979
|
|
1980 // first range
|
|
1981 vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range );
|
|
1982 vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range,
|
|
1983 vf_EWbottom_fourth_range_tmp,
|
|
1984 vf_pixel_SW_fourth_range );
|
|
1985
|
|
1986
|
|
1987
|
|
1988 // third linear interpolation: the bilinear interpolated value
|
|
1989 // result = EWtop + NSweight*(EWbottom-EWtop);
|
|
1990 //
|
|
1991 // first range
|
|
1992 vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range );
|
|
1993 vector float vf_result_first_range = spu_madd( vf_NSweight,
|
|
1994 vf_result_first_range_tmp,
|
|
1995 vf_EWtop_first_range );
|
|
1996
|
|
1997 // second range
|
|
1998 vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range );
|
|
1999 vector float vf_result_second_range = spu_madd( vf_NSweight,
|
|
2000 vf_result_second_range_tmp,
|
|
2001 vf_EWtop_second_range );
|
|
2002
|
|
2003 // third range
|
|
2004 vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range );
|
|
2005 vector float vf_result_third_range = spu_madd( vf_NSweight,
|
|
2006 vf_result_third_range_tmp,
|
|
2007 vf_EWtop_third_range );
|
|
2008
|
|
2009 // fourth range
|
|
2010 vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range );
|
|
2011 vector float vf_result_fourth_range = spu_madd( vf_NSweight,
|
|
2012 vf_result_fourth_range_tmp,
|
|
2013 vf_EWtop_fourth_range );
|
|
2014
|
|
2015
|
|
2016
|
|
2017 // convert back: using saturated arithmetic
|
|
2018 vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range );
|
|
2019 vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range );
|
|
2020 vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range );
|
|
2021 vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range );
|
|
2022
|
|
2023 // merge results->lower,upper
|
|
2024 vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F,
|
|
2025 0x13, 0x17, 0x1B, 0x1F,
|
|
2026 0x00, 0x00, 0x00, 0x00,
|
|
2027 0x00, 0x00, 0x00, 0x00 };
|
|
2028
|
|
2029 vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00,
|
|
2030 0x00, 0x00, 0x00, 0x00,
|
|
2031 0x03, 0x07, 0x0B, 0x0F,
|
|
2032 0x13, 0x17, 0x1B, 0x1F };
|
|
2033
|
|
2034 vector unsigned char vuc_result_first_second =
|
|
2035 spu_shuffle( (vector unsigned char) vui_result_first_range,
|
|
2036 (vector unsigned char) vui_result_second_range,
|
|
2037 vuc_mask_merge_result_first_second );
|
|
2038
|
|
2039 vector unsigned char vuc_result_third_fourth =
|
|
2040 spu_shuffle( (vector unsigned char) vui_result_third_range,
|
|
2041 (vector unsigned char) vui_result_fourth_range,
|
|
2042 vuc_mask_merge_result_third_fourth );
|
|
2043
|
|
2044 // store result
|
|
2045 *((vector unsigned char*)dst) = spu_or( vuc_result_first_second,
|
|
2046 vuc_result_third_fourth );
|
|
2047 dst += 16;
|
|
2048 }
|
|
2049 }
|
|
2050
|