c - learning to use intrinsics -- segm fault using _mm256_sub_ps -



c - learning to use intrinsics -- segm fault using _mm256_sub_ps -

i trying larn how utilize intrinsics. , c code :

void vor( const int nbpoints, const int height, const int width, float * x, float * y, int * v, int * const ouvor ) { float xd , yd; float distance ,initdistance = flt_max; int threshold; int x , y; // pixel coordinates int i; ( y = 0; y < height; y++ ) { ( x = 0; x < width; x++ ) { ( = 0; < nbpoints; i++ ) { xd = x[ ] - x; yd = y[ ] - y; distance = xd * xd + yd * yd; //if point closer , assign proper threshold if ( distance < initdistance ) { initdistance = distance; threshold = v[ ]; } *( ouvor + ( x + y * width ) ) = threshold; } /* */ } /* x */ } /* y */ }

now , using openmp , intrinsics . doing:

void vor( const int nbofpoints, const int height, const int width, float * restrict x, float * restrict y, int * restrict v, int * restrict ouvor ) { __m128 xd , yd; __m128i threshold; int x , y; // pixel coordinates float initdistance = flt_max; float * tempdistance = (float*) _mm_malloc( nbofpoints * sizeof(*tempdistance) ,64 ); __m128 * simdtempdistance = (__m128*) tempdistance; __m128 * thex = (__m128*) x; __m128 * = (__m128*) y; __m128i * thev = (__m128i*) v; __m128i * thevor = (__m128i*) ouvor; #pragma omp parallel default( none ) shared( x , y , v , ouvor ,height , width ,nbofpoints ) private ( x,y,xd,yd,tempdistance ,threshold ) collapse(2) ( y = 0; y < height; y++ ) { ( x = 0; x < width; x++ ) { __m128 distance = _mm_load_ps( &initdistance ); ( int = 0; < nbofpoints; i++ ) { xd = _m128_sub_ps( thex[ ] , x ); yd = _m128_sub_ps( they[ ] , y ); simdtempdistance[ ] = _m128_add_ps( xd * xd , yd * yd ); __m128 themin = _m128_gmin_ps( simdtempdistance , &distance ); distance = themin; threshold = thev[ ]; } /* */ //write result *( ouvor + x + y * width ) = threshold; } /* x */ } /* y */ _mm_free( tempdistance ); }

i receiving errors like:

function "_m128_sub_ps" declared implicitly xd = _m128_sub_ps( thex[ ] , x ); error: value of type "int" cannot assigned entity of type "__m128" xd = _m128_sub_ps( thex[ ] , x ); value of type "__m128i" cannot assigned entity of type "int" *( ouvor + x + y * width ) = threshold

( , same errors yd ,themin ,simdtempdistance )

how can overcome these problems?

also, removed if statement , used _m128_gmin_ps find minimum value.is implementation correct?

-------------- update ---------------

after sourav ghosh comment , search headers. couldn't find anywhere 128 bit , used 256 bit using #include <immintrin.h>

after correcting couple of lines to:

__m256 distance = _mm256_load_ps( &intidistance ); __m256 themin = _mm256_min_ps( simdtempdistance[ ] , &distance );

and function calls _mm256 instead of _m256 , getting these errors:

error: argument of type "int" incompatible parameter of type "__m256" xd = _mm256_sub_ps( thex[ ] , x ); yd = _mm256_sub_ps( they[ ] , y );

the x , y integers , used in loop . don't know how overcome this.

-----update ----------------------

i figured!i casting .. used :

__m256i xxidx = _mm256_set1_epi32( x ); __m256 xidx = _mm256_castsi256_ps( xxidx );

now , code is:

void vor( const int nbofpoints, const int height, const int width, float * restrict x, float * restrict y, int * restrict v, int * restrict ouvor ) { __m256 xd , yd; __m256i threshold; int x , y; // pixel coordinates float * tempdistance = (float*) _mm_malloc( nbofpoints * sizeof(*tempdistance) ,64 ); __m256 * simdtempdistance = (__m256*) tempdistance; __m256 * thex = (__m256*) x; __m256 * = (__m256*) y; __m256i * thev = (__m256i*) v; __m256i * thevor = (__m256i*) ouvor; #pragma omp parallel default( none ) shared( x , y , v , ouvor ,height , width ,nbofpoints ,ouvor ,thex,they,thev ) private ( x,y,xd,yd,tempdistance ,threshold,simdtempdistance ) collapse(2) ( y = 0; y < height; y++ ) { ( x = 0; x < width; x++ ) { float initdistance = flt_max; __m256 distance = _mm256_set1_ps( initdistance ); ( int = 0; < nbofpoints; i++ ) { __m256i xxidx = _mm256_set1_epi32( x ); __m256 xidx = _mm256_castsi256_ps( xxidx ); __m256i yyidx = _mm256_set1_epi32( y ); __m256 yidx = _mm256_castsi256_ps( yyidx ); xd = _m256_sub_ps( thex[ ] , xidx ); yd = _m256_sub_ps( they[ ] , yidx ); simdtempdistance[ ] = _m256_add_ps( xd * xd , yd * yd ); __m256 themin = _m256_gmin_ps( simdtempdistance , distance ); distance = themin; threshold = thev[ ]; } /* */ //write result *( ouvor + x + y * width ) = threshold; } /* x */ } /* y */ _mm_free( tempdistance ); }

i compile like:

icc -std=c99 -g -openmp -qopt-report=2 -o mycode mycode.c

and ok.

but running code gives segmentation fault..

in line:

xd = _m256_sub_ps( thex[ ] , xidx ); yd = _m256_sub_ps( they[ ] , yidx );

i think, you're missing of required header files containing forwards declaration of _m128_sub_ps() function. can assume _m128_sub_ps() function having homecoming type of __m128, without forwards declaration in place, compiler assuming default homecoming type int _m128_sub_ps() function. why, compiler emitting

function "_m128_sub_ps" declared implicitly

then, int homecoming value beingness assigned variable of type __m128, createing issue.

edit:

as per changed code,

int x , y; // pixel coordinates

should

__m256 x , y; // pixel coordinates

as signature of _mm256_sub_ps() requires both arguments of type __m256

c vectorization intel intrinsics

Comments

Popular posts from this blog

java - How to set log4j.defaultInitOverride property to false in jboss server 6 -

c - GStreamer 1.0 1.4.5 RTSP Example Server sends 503 Service unavailable -

Using ajax with sonata admin list view pagination -