Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

armv7 #268

Open
Karutsenko opened this issue Dec 27, 2021 · 1 comment
Open

armv7 #268

Karutsenko opened this issue Dec 27, 2021 · 1 comment

Comments

@Karutsenko
Copy link

I try execute code with stm32mp1 stm32mp1 (two core) armV7:
I have exeption
Child terminated with signal = 0xb (SIGSEGV)

//=======================================

#include <math.h>
#include <fftw3.h>
#include <omp.h>

#define FFT_SIZE 1024
#define REAL 0
#define IMAG 1

#define I256 256
#define I128 128
#define F128 128.0f

static fftwf_plan plan;
static fftwf_complex *src, *dst;

void fft_f_test(float* mgn)
{

src =(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)* FFT_SIZE);
dst =(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)* FFT_SIZE);
for(int i=0; i<FFT_SIZE; i++)
{
	float theta  = (float)i/(float)FFT_SIZE * M_PI;
	src[i][REAL] = 1.0 * cos(10.0 * theta) + 0.5 * cos(25.0 * theta);
	src[i][IMAG] = 1.0 * sin(10.0 * theta) + 0.5 * sin(25.0 * theta);
}


int nbthreads  = 1;//omp_get_max_threads();
fftwf_init_threads();
fftwf_plan_with_nthreads(nbthreads);
plan = fftwf_plan_dft_1d(FFT_SIZE, src, dst, FFTW_FORWARD, FFTW_ESTIMATE);
fftwf_execute(plan); //<== this exeption

for(int i=0; i<FFT_SIZE; i++) {
	mgn[i] = sqrt(dst[i][REAL]*dst[i][REAL] + dst[i][IMAG]*dst[i][IMAG]);
}

fftwf_destroy_plan(plan);
fftwf_cleanup_threads();
fftwf_free(src);
fftwf_free(dst);

printf("fft test success!\n");

}

//=======================================================================================

Disassembly:

      fftwf_codelet_n2fv_16_neon:

b6f32c74: ldr r2, [pc, #8] ; (0xb6f32c80 <fftwf_codelet_n2fv_16_neon+12>)
b6f32c76: ldr r1, [pc, #12] ; (0xb6f32c84 <fftwf_codelet_n2fv_16_neon+16>)
b6f32c78: add r2, pc
b6f32c7a: add r1, pc
b6f32c7c: b.w 0xb6e72ca4 fftwf_kdft_register@plt
b6f32c80: ldmia r2, {r2, r7}
b6f32c82: movs r0, r1
b6f32c84: ; instruction: 0xfb43ffff
b6f32c88: stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
b6f32c8c: mov r11, r2
b6f32c8e: vpush {d8-d15}
b6f32c92: ldr.w r2, [pc, #1600] ; 0xb6f332d4
b6f32c96: sub.w sp, sp, #524 ; 0x20c
b6f32c9a: ldr r1, [sp, #632] ; 0x278
b6f32c9c: add r2, pc
b6f32c9e: str r0, [sp, #0]
b6f32ca0: cmp r1, #0
b6f32ca2: ldr r4, [sp, #624] ; 0x270
b6f32ca4: ldr r0, [sp, #640] ; 0x280
b6f32ca6: ble.w 0xb6f33c0e
b6f32caa: ldr.w r3, [pc, #1580] ; 0xb6f332d8
b6f32cae: mov r10, r4vm9ctv97u6dk8uu
b6f32cb0: str r1, [sp, #20]
b6f32cb2: ldr r3, [r2, r3]
b6f32cb4: ldr r3, [r3, #0]
b6f32cb6: str r3, [sp, #516] ; 0x204
b6f32cb8: lsls r3, r0, #2
b6f32cba: str r3, [sp, #512] ; 0x200
b6f32cbc: lsls r3, r0, #3
b6f32cbe: str r3, [sp, #508] ; 0x1fc
b6f32cc0: ldr r3, [sp, #636] ; 0x27c
b6f32cc2: lsls r3, r3, #3
b6f32cc4: str r3, [sp, #504] ; 0x1f8
b6f32cc6: ldr r2, [sp, #0]
b6f32cc8: mov.w r9, #76 ; 0x4c
b6f32ccc: mov.w r0, r10, lsl #6
b6f32cd0: add.w r5, r11, #224 ; 0xe0
b6f32cd4: mvn.w r3, #31
b6f32cd8: mul.w r12, r3, r10
b6f32cdc: adds r3, r2, r0
b6f32cde: movs r2, #24
b6f32ce0: mvn.w r6, #79 ; 0x4f
b6f32ce4: mov r1, r3
b6f32ce6: add.w r8, r1, r12
b6f32cea: movs r3, #56 ; 0x38
b6f32cec: mul.w r6, r6, r10
b6f32cf0: vld1.32 {d14-d15}, [r8], r0
b6f32cf4: str r1, [sp, #200] ; 0xc8
b6f32cf6: mvn.w r1, #63 ; 0x3f
b6f32cfa: add.w r7, r8, r6
b6f32cfe: mul.w r1, r1, r10
b6f32d02: vld1.32 {d12-d13}, [r7], r0
b6f32d06: vld1.32 {d18-d19}, [r8]
b6f32d0a: vld1.32 {d16-d17}, [r7], r7
b6f32d0e: mvn.w lr, #119 ; 0x77
b6f32d12: vstr d14, [sp, #472] ; 0x1d8
b6f32d16: vstr d15, [sp, #480] ; 0x1e0
b6f32d1a: add.w r8, r7, r1
b6f32d1e: mla r9, r9, r10, r8
b6f32d22: mla lr, lr, r10, r9
b6f32d26: mla r3, r3, r10, lr
b6f32d2a: vld1.32 {d28-d29}, [r9] <- THIS PROBLEM, Break at address "0xb6f32d2a" with no debug information available, or outside of program code.
RETURN Child terminated with signal = 0xb (SIGSEGV)

@Karutsenko
Copy link
Author

I did some additional work and found one peculiarity: at the fft size (n) 256 fftw works! for claritty, i printed the plan? and recorded the result:
void fft_f_test2(float* mgn)
{
int n = 512;

fftwf_plan p;
fftwf_complex *s, *d;

int nbthreads  = omp_get_max_threads();
fftwf_init_threads();
fftwf_plan_with_nthreads(nbthreads);

s =(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)* n);
d =(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)* n);

//fftwf_set_timelimit(FFTW_NO_TIMELIMIT);
//fftwf_make_planner_thread_safe();
p = fftwf_plan_dft_1d(n, s, d, FFTW_FORWARD, FFTW_ESTIMATE);
printf(" "); fftwf_print_plan(p); printf("\n");

/*   this work,  n=64
 *  (dft-thr-ct-dit-x2/8
  (dftw-direct-8/12 "t3fv_8_neon")
  (dftw-direct-8/12 "t3fv_8_neon")
  (dft-thr-vrank>=1-x2/1
	(dft-direct-8-x4 "n2fv_8_neon")
	(dft-direct-8-x4 "n2fv_8_neon")))

	n=128 =>this work!
	  (dft-thr-ct-dit-x2/8
	  (dftw-direct-8/12 "t3fv_8_neon")
	  (dftw-direct-8/12 "t3fv_8_neon")
	  (dft-thr-vrank>=1-x2/1
		(dft-direct-16-x4 "n2fv_16_neon")
		(dft-direct-16-x4 "n2fv_16_neon")))

	n=256 =>this work!
	  (dft-thr-ct-dit-x2/16
	  (dftw-direct-16/16 "t3fv_16_neon")
	  (dftw-direct-16/16 "t3fv_16_neon")
	  (dft-thr-vrank>=1-x2/1
		(dft-direct-16-x8 "n2fv_16_neon")
		(dft-direct-16-x8 "n2fv_16_neon")))

	n=512 dont'n work!!!
	(dft-thr-ct-dit-x2/16
	  (dftw-direct-16/16 "t3fv_16_neon")
	  (dftw-direct-16/16 "t3fv_16_neon")
	  (dft-thr-vrank>=1-x2/1
		(dft-direct-32-x8 "n2fv_32_neon")
		(dft-direct-32-x8 "n2fv_32_neon")))
		===========================================
		Child terminated with signal = 0xb (SIGSEGV)
 */


for(int i=0; i<n; i++)
{
	float theta  = (float)i/(float)n * M_PI;
	s[i][REAL] = 1.0 * cos(10.0 * theta) + 0.5 * cos(25.0 * theta);
	s[i][IMAG] = 1.0 * sin(10.0 * theta) + 0.5 * sin(25.0 * theta);
}

//fftwf_execute(p);
fftwf_execute_dft(p, s, d);

for(int i=0; i<n; i++) {
	mgn[i] = sqrt(SQR(d[i][REAL])+ SQR(d[i][IMAG]));
}

fftwf_cleanup_threads();
fftwf_destroy_plan(p);
fftwf_free(s);
fftwf_free(d);

printf("fft test success!\n");

}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant