Project Home
Project Home
Trackers
Trackers
Documents
Documents
Wiki
Wiki
Discussion Forums
Discussions
Project Information
Project Info
Forum Topic - NEON assembly issue: (2 Items)
   
NEON assembly issue  
Reference code:

void Reference_Code (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
{
  int i;
  uint8_t r, g, b;
  int y;

  for (i=0; i<n; i++)
  {
    r = *src++; // load red
    g = *src++; // load green
    b = *src++; // load blue

    // build weighted average:
    y = (r*77) + (g*151)+ (b*28);

    // undo the scale by 256 and write to memory:
    *dest++ = (y>>8);
  }
}


NEON gasm assembly code:
void NEON_Assembly (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
{
	//register uint8_t *destination asm ("r0");
	//register uint8_t *source asm ("r1");
	//int temp;

	__asm __volatile (
		    "mov         r4, r2\n\t" /*%2*/
			"lsr         r4, r4, #3\n\t" /*%[n]*/

			"vmov.u8 d3, #77\n\t"
			"vmov.u8 d4, #151\n\t"
			"vmov.u8 d5, #28\n\t"
			"mov     r3, #0\n\t"
			"b 		 .comp\n\t"

			".lp:\n\t"
			  "# load 8 pixels:\n\t"
			  "vld3.8      {d0-d2}, [r1]!\n\t"

              // do the weight average:
			  "vmull.u8    q3, d0, d3\n\t"
			  "vmlal.u8    q3, d1, d4\n\t"
			  "vmlal.u8    q3, d2, d5\n\t"

			  // shift and store:
			  "vshrn.i16   d8, q3, #8\n\t"
			  "vst1.8      {d8}, [r0]!\n\t"

			  "add r3, r3, #1\n\t"

			".comp:\n\t"
			  "cmp        r4, r3\n\t"
			  "blt        .lp\n\t"


                /* Output */:[dest] "=r"/*"=&r"*/ (dest), [n]/* Symbolic name */ "+r"/* register constraint */ (n)/* C 
variable name*/
				/* Input */ :[src] "r" (src)/*(&src)*/
				/* Clobber */ :"r4", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d8", "q3", /* "memory"*/
		        );

}

This is getting compiled and running but the results are not maching :(

I have verified this code against the RVDS generated code and found it correct can anyone help me in understanding why 
it is not working ???

It is really IMPORTANT for me, thanks in advance !
Re: NEON assembly issue  
IDE is not the right forum for that. Try maybe core tools or bsp?

On 19/08/10 08:41 AM, Girisha SG wrote:
> Reference code:
> 
> void Reference_Code (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
> {
>   int i;
>   uint8_t r, g, b;
>   int y;
> 
>   for (i=0; i<n; i++)
>   {
>     r = *src++; // load red
>     g = *src++; // load green
>     b = *src++; // load blue
> 
>     // build weighted average:
>     y = (r*77) + (g*151)+ (b*28);
> 
>     // undo the scale by 256 and write to memory:
>     *dest++ = (y>>8);
>   }
> }
> 
> 
> NEON gasm assembly code:
> void NEON_Assembly (uint8_t * __restrict dest, uint8_t * __restrict src, int n)
> {
> 	//register uint8_t *destination asm ("r0");
> 	//register uint8_t *source asm ("r1");
> 	//int temp;
> 
> 	__asm __volatile (
> 		    "mov         r4, r2\n\t" /*%2*/
> 			"lsr         r4, r4, #3\n\t" /*%[n]*/
> 
> 			"vmov.u8 d3, #77\n\t"
> 			"vmov.u8 d4, #151\n\t"
> 			"vmov.u8 d5, #28\n\t"
> 			"mov     r3, #0\n\t"
> 			"b 		 .comp\n\t"
> 
> 			".lp:\n\t"
> 			  "# load 8 pixels:\n\t"
> 			  "vld3.8      {d0-d2}, [r1]!\n\t"
> 
>               // do the weight average:
> 			  "vmull.u8    q3, d0, d3\n\t"
> 			  "vmlal.u8    q3, d1, d4\n\t"
> 			  "vmlal.u8    q3, d2, d5\n\t"
> 
> 			  // shift and store:
> 			  "vshrn.i16   d8, q3, #8\n\t"
> 			  "vst1.8      {d8}, [r0]!\n\t"
> 
> 			  "add r3, r3, #1\n\t"
> 
> 			".comp:\n\t"
> 			  "cmp        r4, r3\n\t"
> 			  "blt        .lp\n\t"
> 
> 
>                 /* Output */:[dest] "=r"/*"=&r"*/ (dest), [n]/* Symbolic name */ "+r"/* register constraint */ (n)/* C variable name*/
> 				/* Input */ :[src] "r" (src)/*(&src)*/
> 				/* Clobber */ :"r4", "r3", "d0"
, "d1", "d2", "d3", "d4", "d5", "d8", "q3", /* "memory"*/
> 		        );
> 
> }
> 
> This is getting compiled and running but the results are not maching :(
> 
> I have verified this code against the RVDS generated code and found it correct can anyone help me in understanding why
 it is not working ???
> 
> It is really IMPORTANT for me, thanks in advance !
> 
> 
> 
> _______________________________________________
> 
> General
> http://community.qnx.com/sf/go/post63517
>