Girisha SG
08/19/2010 8:41 AM
post63517
|
Reference code:
void Reference_Code (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
{
int i;
uint8_t r, g, b;
int y;
for (i=0; i<n; i++)
{
r = *src++; // load red
g = *src++; // load green
b = *src++; // load blue
// build weighted average:
y = (r*77) + (g*151)+ (b*28);
// undo the scale by 256 and write to memory:
*dest++ = (y>>8);
}
}
NEON gasm assembly code:
void NEON_Assembly (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
{
//register uint8_t *destination asm ("r0");
//register uint8_t *source asm ("r1");
//int temp;
__asm __volatile (
"mov r4, r2\n\t" /*%2*/
"lsr r4, r4, #3\n\t" /*%[n]*/
"vmov.u8 d3, #77\n\t"
"vmov.u8 d4, #151\n\t"
"vmov.u8 d5, #28\n\t"
"mov r3, #0\n\t"
"b .comp\n\t"
".lp:\n\t"
"# load 8 pixels:\n\t"
"vld3.8 {d0-d2}, [r1]!\n\t"
// do the weight average:
"vmull.u8 q3, d0, d3\n\t"
"vmlal.u8 q3, d1, d4\n\t"
"vmlal.u8 q3, d2, d5\n\t"
// shift and store:
"vshrn.i16 d8, q3, #8\n\t"
"vst1.8 {d8}, [r0]!\n\t"
"add r3, r3, #1\n\t"
".comp:\n\t"
"cmp r4, r3\n\t"
"blt .lp\n\t"
/* Output */:[dest] "=r"/*"=&r"*/ (dest), [n]/* Symbolic name */ "+r"/* register constraint */ (n)/* C
variable name*/
/* Input */ :[src] "r" (src)/*(&src)*/
/* Clobber */ :"r4", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d8", "q3", /* "memory"*/
);
}
This is getting compiled and running but the results are not maching :(
I have verified this code against the RVDS generated code and found it correct can anyone help me in understanding why
it is not working ???
It is really IMPORTANT for me, thanks in advance !
|
|
|
Elena Laskavaia
08/19/2010 9:28 AM
post63534
|
IDE is not the right forum for that. Try maybe core tools or bsp?
On 19/08/10 08:41 AM, Girisha SG wrote:
> Reference code:
>
> void Reference_Code (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
> {
> int i;
> uint8_t r, g, b;
> int y;
>
> for (i=0; i<n; i++)
> {
> r = *src++; // load red
> g = *src++; // load green
> b = *src++; // load blue
>
> // build weighted average:
> y = (r*77) + (g*151)+ (b*28);
>
> // undo the scale by 256 and write to memory:
> *dest++ = (y>>8);
> }
> }
>
>
> NEON gasm assembly code:
> void NEON_Assembly (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
> {
> //register uint8_t *destination asm ("r0");
> //register uint8_t *source asm ("r1");
> //int temp;
>
> __asm __volatile (
> "mov r4, r2\n\t" /*%2*/
> "lsr r4, r4, #3\n\t" /*%[n]*/
>
> "vmov.u8 d3, #77\n\t"
> "vmov.u8 d4, #151\n\t"
> "vmov.u8 d5, #28\n\t"
> "mov r3, #0\n\t"
> "b .comp\n\t"
>
> ".lp:\n\t"
> "# load 8 pixels:\n\t"
> "vld3.8 {d0-d2}, [r1]!\n\t"
>
> // do the weight average:
> "vmull.u8 q3, d0, d3\n\t"
> "vmlal.u8 q3, d1, d4\n\t"
> "vmlal.u8 q3, d2, d5\n\t"
>
> // shift and store:
> "vshrn.i16 d8, q3, #8\n\t"
> "vst1.8 {d8}, [r0]!\n\t"
>
> "add r3, r3, #1\n\t"
>
> ".comp:\n\t"
> "cmp r4, r3\n\t"
> "blt .lp\n\t"
>
>
> /* Output */:[dest] "=r"/*"=&r"*/ (dest), [n]/* Symbolic name */ "+r"/* register constraint */ (n)/* C variable name*/
> /* Input */ :[src] "r" (src)/*(&src)*/
> /* Clobber */ :"r4", "r3", "d0"
, "d1", "d2", "d3", "d4", "d5", "d8", "q3", /* "memory"*/
> );
>
> }
>
> This is getting compiled and running but the results are not maching :(
>
> I have verified this code against the RVDS generated code and found it correct can anyone help me in understanding why
it is not working ???
>
> It is really IMPORTANT for me, thanks in advance !
>
>
>
> _______________________________________________
>
> General
> http://community.qnx.com/sf/go/post63517
>
|
|
|
|