diff --git a/3rdparty/oourafft/fftsg2d.cc b/3rdparty/oourafft/fftsg2d.cc index 5d674ed8..99986196 100644 --- a/3rdparty/oourafft/fftsg2d.cc +++ b/3rdparty/oourafft/fftsg2d.cc @@ -366,17 +366,15 @@ macro definitions . */ - - #include #include -#define fft2d_alloc_error_check(p) \ - { \ - if((p) == NULL) { \ - fprintf(stderr, "fft2d memory allocation error\n"); \ - exit(1); \ - } \ - } +#define fft2d_alloc_error_check(p) \ + { \ + if ((p) == NULL) { \ + fprintf(stderr, "fft2d memory allocation error\n"); \ + exit(1); \ + } \ + } #ifdef USE_FFT2D_PTHREADS #define USE_FFT2D_THREADS @@ -388,20 +386,20 @@ macro definitions #endif #include #define fft2d_thread_t pthread_t -#define fft2d_thread_create(thp, func, argp) \ - { \ - if(pthread_create(thp, NULL, func, (void *)(argp)) != 0) { \ - fprintf(stderr, "fft2d thread error\n"); \ - exit(1); \ - } \ - } -#define fft2d_thread_wait(th) \ - { \ - if(pthread_join(th, NULL) != 0) { \ - fprintf(stderr, "fft2d thread error\n"); \ - exit(1); \ - } \ - } +#define fft2d_thread_create(thp, func, argp) \ + { \ + if (pthread_create(thp, NULL, func, (void *)(argp)) != 0) { \ + fprintf(stderr, "fft2d thread error\n"); \ + exit(1); \ + } \ + } +#define fft2d_thread_wait(th) \ + { \ + if (pthread_join(th, NULL) != 0) { \ + fprintf(stderr, "fft2d thread error\n"); \ + exit(1); \ + } \ + } #endif /* USE_FFT2D_PTHREADS */ #ifdef USE_FFT2D_WINTHREADS @@ -415,1005 +413,856 @@ macro definitions #define NOMINMAX #include #define fft2d_thread_t HANDLE -#define fft2d_thread_create(thp, func, argp) \ - { \ - DWORD thid; \ - *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)(func), \ - (LPVOID)(argp), 0, &thid); \ - if(*(thp) == 0) { \ - fprintf(stderr, "fft2d thread error\n"); \ - exit(1); \ - } \ - } -#define fft2d_thread_wait(th) \ - { \ - WaitForSingleObject(th, INFINITE); \ - CloseHandle(th); \ - } +#define fft2d_thread_create(thp, func, argp) \ + { \ + DWORD thid; \ + *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)(func), (LPVOID)(argp), 0, &thid); \ + if (*(thp) == 0) { \ + fprintf(stderr, "fft2d thread error\n"); \ + exit(1); \ + } \ + } +#define fft2d_thread_wait(th) \ + { \ + WaitForSingleObject(th, INFINITE); \ + CloseHandle(th); \ + } #endif /* USE_FFT2D_WINTHREADS */ #include "nextpnr_namespaces.h" NEXTPNR_NAMESPACE_BEGIN +void ddct(int n, int isgn, float *a, int *ip, float *w); +void ddst(int n, int isgn, float *a, int *ip, float *w); +void cdft(int n, int isgn, float *a, int *ip, float *w); +void *cdft2d_th(void *p); +void *ddxt2d0_th(void *p); +void *ddxt2d_th(void *p); +void rdft(int n, int isgn, float *a, int *ip, float *w); +void makewt(int nw, int *ip, float *w); +void cdft2d_sub(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w); +void makect(int nc, int *ip, float *c); +void rdft2d_sub(int n1, int isgn, float **a); +void ddxt2d_sub(int n1, int n2, int ics, int isgn, float **a, float *t, int *ip, float *w); -void cdft2d(int n1, int n2, int isgn, float** a, float* t, int* ip, float* w) +#ifdef USE_FFT2D_THREADS +void xdft2d0_subth(int n1, int n2, int icr, int isgn, float **a, int *ip, float *w); +void cdft2d_subth(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w); +void ddxt2d0_subth(int n1, int n2, int ics, int isgn, float **a, int *ip, float *w); +void ddxt2d_subth(int n1, int n2, int ics, int isgn, float **a, float *t, int *ip, float *w); +#endif /* USE_FFT2D_THREADS */ + +void cdft2d(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void makewt(int nw, int* ip, float* w); - void cdft(int n, int isgn, float* a, int* ip, float* w); - void cdft2d_sub( - int n1, int n2, int isgn, float** a, float* t, int* ip, float* w); -#ifdef USE_FFT2D_THREADS - void xdft2d0_subth( - int n1, int n2, int icr, int isgn, float** a, int* ip, float* w); - void cdft2d_subth( - int n1, int n2, int isgn, float** a, float* t, int* ip, float* w); -#endif /* USE_FFT2D_THREADS */ - int n, itnull, nthread, nt, i; + int n, itnull, nthread, nt, i; - n = n1 << 1; - if (n < n2) { - n = n2; - } - if (n > (ip[0] << 2)) { - makewt(n >> 2, ip, w); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 8 * nthread * n1; - if (n2 == 4 * nthread) { - nt >>= 1; - } else if (n2 < 4 * nthread) { - nt >>= 2; + n = n1 << 1; + if (n < n2) { + n = n2; } - t = (float*) malloc(sizeof(float) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((float) n1 * n2 >= (float) FFT2D_THREADS_BEGIN_N) { - xdft2d0_subth(n1, n2, 0, isgn, a, ip, w); - cdft2d_subth(n1, n2, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - cdft(n2, isgn, a[i], ip, w); + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 8 * nthread * n1; + if (n2 == 4 * nthread) { + nt >>= 1; + } else if (n2 < 4 * nthread) { + nt >>= 2; + } + t = (float *)malloc(sizeof(float) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((float)n1 * n2 >= (float)FFT2D_THREADS_BEGIN_N) { + xdft2d0_subth(n1, n2, 0, isgn, a, ip, w); + cdft2d_subth(n1, n2, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + cdft(n2, isgn, a[i], ip, w); + } + cdft2d_sub(n1, n2, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); } - cdft2d_sub(n1, n2, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } } -void rdft2d(int n1, int n2, int isgn, float** a, float* t, int* ip, float* w) +void rdft2d(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void makewt(int nw, int* ip, float* w); - void makect(int nc, int* ip, float* c); - void rdft(int n, int isgn, float* a, int* ip, float* w); - void cdft2d_sub( - int n1, int n2, int isgn, float** a, float* t, int* ip, float* w); - void rdft2d_sub(int n1, int isgn, float** a); -#ifdef USE_FFT2D_THREADS - void xdft2d0_subth( - int n1, int n2, int icr, int isgn, float** a, int* ip, float* w); - void cdft2d_subth( - int n1, int n2, int isgn, float** a, float* t, int* ip, float* w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; + int n, nw, nc, itnull, nthread, nt, i; - n = n1 << 1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n2 > (nc << 2)) { - nc = n2 >> 2; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 8 * nthread * n1; - if (n2 == 4 * nthread) { - nt >>= 1; - } else if (n2 < 4 * nthread) { - nt >>= 2; + n = n1 << 1; + if (n < n2) { + n = n2; } - t = (float*) malloc(sizeof(float) * nt); - fft2d_alloc_error_check(t); - } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n2 > (nc << 2)) { + nc = n2 >> 2; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; #ifdef USE_FFT2D_THREADS - if ((float) n1 * n2 >= (float) FFT2D_THREADS_BEGIN_N) { + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 8 * nthread * n1; + if (n2 == 4 * nthread) { + nt >>= 1; + } else if (n2 < 4 * nthread) { + nt >>= 2; + } + t = (float *)malloc(sizeof(float) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((float)n1 * n2 >= (float)FFT2D_THREADS_BEGIN_N) { + if (isgn < 0) { + rdft2d_sub(n1, isgn, a); + cdft2d_subth(n1, n2, isgn, a, t, ip, w); + } + xdft2d0_subth(n1, n2, 1, isgn, a, ip, w); + if (isgn >= 0) { + cdft2d_subth(n1, n2, isgn, a, t, ip, w); + rdft2d_sub(n1, isgn, a); + } + } else +#endif /* USE_FFT2D_THREADS */ + { + if (isgn < 0) { + rdft2d_sub(n1, isgn, a); + cdft2d_sub(n1, n2, isgn, a, t, ip, w); + } + for (i = 0; i < n1; i++) { + rdft(n2, isgn, a[i], ip, w); + } + if (isgn >= 0) { + cdft2d_sub(n1, n2, isgn, a, t, ip, w); + rdft2d_sub(n1, isgn, a); + } + } + if (itnull != 0) { + free(t); + } +} + +void rdft2dsort(int n1, int n2, int isgn, float **a) +{ + int n1h, i; + float x, y; + + n1h = n1 >> 1; if (isgn < 0) { - rdft2d_sub(n1, isgn, a); - cdft2d_subth(n1, n2, isgn, a, t, ip, w); + for (i = n1h + 1; i < n1; i++) { + a[i][0] = a[i][n2 + 1]; + a[i][1] = a[i][n2]; + } + a[0][1] = a[0][n2]; + a[n1h][1] = a[n1h][n2]; + } else { + for (i = n1h + 1; i < n1; i++) { + y = a[i][0]; + x = a[i][1]; + a[i][n2] = x; + a[i][n2 + 1] = y; + a[n1 - i][n2] = x; + a[n1 - i][n2 + 1] = -y; + a[i][0] = a[n1 - i][0]; + a[i][1] = -a[n1 - i][1]; + } + a[0][n2] = a[0][1]; + a[0][n2 + 1] = 0; + a[0][1] = 0; + a[n1h][n2] = a[n1h][1]; + a[n1h][n2 + 1] = 0; + a[n1h][1] = 0; } - xdft2d0_subth(n1, n2, 1, isgn, a, ip, w); - if (isgn >= 0) { - cdft2d_subth(n1, n2, isgn, a, t, ip, w); - rdft2d_sub(n1, isgn, a); - } - } else -#endif /* USE_FFT2D_THREADS */ - { - if (isgn < 0) { - rdft2d_sub(n1, isgn, a); - cdft2d_sub(n1, n2, isgn, a, t, ip, w); - } - for (i = 0; i < n1; i++) { - rdft(n2, isgn, a[i], ip, w); - } - if (isgn >= 0) { - cdft2d_sub(n1, n2, isgn, a, t, ip, w); - rdft2d_sub(n1, isgn, a); - } - } - if (itnull != 0) { - free(t); - } } -void rdft2dsort(int n1, int n2, int isgn, float** a) +void ddcst2d(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - int n1h, i; - float x, y; + int n, nw, nc, itnull, nthread, nt, i; - n1h = n1 >> 1; - if (isgn < 0) { - for (i = n1h + 1; i < n1; i++) { - a[i][0] = a[i][n2 + 1]; - a[i][1] = a[i][n2]; + n = n1; + if (n < n2) { + n = n2; } - a[0][1] = a[0][n2]; - a[n1h][1] = a[n1h][n2]; - } else { - for (i = n1h + 1; i < n1; i++) { - y = a[i][0]; - x = a[i][1]; - a[i][n2] = x; - a[i][n2 + 1] = y; - a[n1 - i][n2] = x; - a[n1 - i][n2 + 1] = -y; - a[i][0] = a[n1 - i][0]; - a[i][1] = -a[n1 - i][1]; + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 4 * nthread * n1; + if (n2 == 2 * nthread) { + nt >>= 1; + } else if (n2 < 2 * nthread) { + nt >>= 2; + } + t = (float *)malloc(sizeof(float) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((float)n1 * n2 >= (float)FFT2D_THREADS_BEGIN_N) { + ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); + ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + ddst(n2, isgn, a[i], ip, w); + } + ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); } - a[0][n2] = a[0][1]; - a[0][n2 + 1] = 0; - a[0][1] = 0; - a[n1h][n2] = a[n1h][1]; - a[n1h][n2 + 1] = 0; - a[n1h][1] = 0; - } } -void ddcst2d(int n1, int n2, int isgn, float** a, float* t, int* ip, float* w) +void ddsct2d(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void makewt(int nw, int* ip, float* w); - void makect(int nc, int* ip, float* c); - void ddct(int n, int isgn, float* a, int* ip, float* w); - void ddst(int n, int isgn, float* a, int* ip, float* w); - void ddxt2d_sub(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#ifdef USE_FFT2D_THREADS - void ddxt2d0_subth( - int n1, int n2, int ics, int isgn, float** a, int* ip, float* w); - void ddxt2d_subth(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; + int n, nw, nc, itnull, nthread, nt, i; - n = n1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 4 * nthread * n1; - if (n2 == 2 * nthread) { - nt >>= 1; - } else if (n2 < 2 * nthread) { - nt >>= 2; + n = n1; + if (n < n2) { + n = n2; } - t = (float*) malloc(sizeof(float) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((float) n1 * n2 >= (float) FFT2D_THREADS_BEGIN_N) { - ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); - ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - ddst(n2, isgn, a[i], ip, w); + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 4 * nthread * n1; + if (n2 == 2 * nthread) { + nt >>= 1; + } else if (n2 < 2 * nthread) { + nt >>= 2; + } + t = (float *)malloc(sizeof(float) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((float)n1 * n2 >= (float)FFT2D_THREADS_BEGIN_N) { + ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); + ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + ddct(n2, isgn, a[i], ip, w); + } + ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); } - ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } } -void ddsct2d(int n1, int n2, int isgn, float** a, float* t, int* ip, float* w) +void ddct2d(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void makewt(int nw, int* ip, float* w); - void makect(int nc, int* ip, float* c); - void ddct(int n, int isgn, float* a, int* ip, float* w); - void ddst(int n, int isgn, float* a, int* ip, float* w); - void ddxt2d_sub(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#ifdef USE_FFT2D_THREADS - void ddxt2d0_subth( - int n1, int n2, int ics, int isgn, float** a, int* ip, float* w); - void ddxt2d_subth(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; + int n, nw, nc, itnull, nthread, nt, i; - n = n1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 4 * nthread * n1; - if (n2 == 2 * nthread) { - nt >>= 1; - } else if (n2 < 2 * nthread) { - nt >>= 2; + n = n1; + if (n < n2) { + n = n2; } - t = (float*) malloc(sizeof(float) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((float) n1 * n2 >= (float) FFT2D_THREADS_BEGIN_N) { - ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); - ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - ddct(n2, isgn, a[i], ip, w); + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 4 * nthread * n1; + if (n2 == 2 * nthread) { + nt >>= 1; + } else if (n2 < 2 * nthread) { + nt >>= 2; + } + t = (float *)malloc(sizeof(float) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((float)n1 * n2 >= (float)FFT2D_THREADS_BEGIN_N) { + ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); + ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + ddct(n2, isgn, a[i], ip, w); + } + ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); } - ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } } -void ddct2d(int n1, int n2, int isgn, float** a, float* t, int* ip, float* w) +void ddst2d(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void makewt(int nw, int* ip, float* w); - void makect(int nc, int* ip, float* c); - void ddct(int n, int isgn, float* a, int* ip, float* w); - void ddxt2d_sub(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#ifdef USE_FFT2D_THREADS - void ddxt2d0_subth( - int n1, int n2, int ics, int isgn, float** a, int* ip, float* w); - void ddxt2d_subth(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; + int n, nw, nc, itnull, nthread, nt, i; - n = n1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 4 * nthread * n1; - if (n2 == 2 * nthread) { - nt >>= 1; - } else if (n2 < 2 * nthread) { - nt >>= 2; + n = n1; + if (n < n2) { + n = n2; } - t = (float*) malloc(sizeof(float) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((float) n1 * n2 >= (float) FFT2D_THREADS_BEGIN_N) { - ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); - ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - ddct(n2, isgn, a[i], ip, w); + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); } - ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - -void ddst2d(int n1, int n2, int isgn, float** a, float* t, int* ip, float* w) -{ - void makewt(int nw, int* ip, float* w); - void makect(int nc, int* ip, float* c); - void ddst(int n, int isgn, float* a, int* ip, float* w); - void ddxt2d_sub(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#ifdef USE_FFT2D_THREADS - void ddxt2d0_subth( - int n1, int n2, int ics, int isgn, float** a, int* ip, float* w); - void ddxt2d_subth(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; - - n = n1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 4 * nthread * n1; - if (n2 == 2 * nthread) { - nt >>= 1; - } else if (n2 < 2 * nthread) { - nt >>= 2; + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); } - t = (float*) malloc(sizeof(float) * nt); - fft2d_alloc_error_check(t); - } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; #ifdef USE_FFT2D_THREADS - if ((float) n1 * n2 >= (float) FFT2D_THREADS_BEGIN_N) { - ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); - ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); - } else + nthread = FFT2D_MAX_THREADS; #endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - ddst(n2, isgn, a[i], ip, w); + nt = 4 * nthread * n1; + if (n2 == 2 * nthread) { + nt >>= 1; + } else if (n2 < 2 * nthread) { + nt >>= 2; + } + t = (float *)malloc(sizeof(float) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((float)n1 * n2 >= (float)FFT2D_THREADS_BEGIN_N) { + ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); + ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + ddst(n2, isgn, a[i], ip, w); + } + ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); } - ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } } /* -------- child routines -------- */ -void cdft2d_sub(int n1, - int n2, - int isgn, - float** a, - float* t, - int* ip, - float* w) +void cdft2d_sub(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void cdft(int n, int isgn, float* a, int* ip, float* w); - int i, j; + int i, j; - if (n2 > 4) { - for (j = 0; j < n2; j += 8) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j]; - t[2 * i + 1] = a[i][j + 1]; - t[2 * n1 + 2 * i] = a[i][j + 2]; - t[2 * n1 + 2 * i + 1] = a[i][j + 3]; - t[4 * n1 + 2 * i] = a[i][j + 4]; - t[4 * n1 + 2 * i + 1] = a[i][j + 5]; - t[6 * n1 + 2 * i] = a[i][j + 6]; - t[6 * n1 + 2 * i + 1] = a[i][j + 7]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - cdft(2 * n1, isgn, &t[4 * n1], ip, w); - cdft(2 * n1, isgn, &t[6 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j] = t[2 * i]; - a[i][j + 1] = t[2 * i + 1]; - a[i][j + 2] = t[2 * n1 + 2 * i]; - a[i][j + 3] = t[2 * n1 + 2 * i + 1]; - a[i][j + 4] = t[4 * n1 + 2 * i]; - a[i][j + 5] = t[4 * n1 + 2 * i + 1]; - a[i][j + 6] = t[6 * n1 + 2 * i]; - a[i][j + 7] = t[6 * n1 + 2 * i + 1]; - } + if (n2 > 4) { + for (j = 0; j < n2; j += 8) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j]; + t[2 * i + 1] = a[i][j + 1]; + t[2 * n1 + 2 * i] = a[i][j + 2]; + t[2 * n1 + 2 * i + 1] = a[i][j + 3]; + t[4 * n1 + 2 * i] = a[i][j + 4]; + t[4 * n1 + 2 * i + 1] = a[i][j + 5]; + t[6 * n1 + 2 * i] = a[i][j + 6]; + t[6 * n1 + 2 * i + 1] = a[i][j + 7]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + cdft(2 * n1, isgn, &t[4 * n1], ip, w); + cdft(2 * n1, isgn, &t[6 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j] = t[2 * i]; + a[i][j + 1] = t[2 * i + 1]; + a[i][j + 2] = t[2 * n1 + 2 * i]; + a[i][j + 3] = t[2 * n1 + 2 * i + 1]; + a[i][j + 4] = t[4 * n1 + 2 * i]; + a[i][j + 5] = t[4 * n1 + 2 * i + 1]; + a[i][j + 6] = t[6 * n1 + 2 * i]; + a[i][j + 7] = t[6 * n1 + 2 * i + 1]; + } + } + } else if (n2 == 4) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][0]; + t[2 * i + 1] = a[i][1]; + t[2 * n1 + 2 * i] = a[i][2]; + t[2 * n1 + 2 * i + 1] = a[i][3]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][0] = t[2 * i]; + a[i][1] = t[2 * i + 1]; + a[i][2] = t[2 * n1 + 2 * i]; + a[i][3] = t[2 * n1 + 2 * i + 1]; + } + } else if (n2 == 2) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][0]; + t[2 * i + 1] = a[i][1]; + } + cdft(2 * n1, isgn, t, ip, w); + for (i = 0; i < n1; i++) { + a[i][0] = t[2 * i]; + a[i][1] = t[2 * i + 1]; + } } - } else if (n2 == 4) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][0]; - t[2 * i + 1] = a[i][1]; - t[2 * n1 + 2 * i] = a[i][2]; - t[2 * n1 + 2 * i + 1] = a[i][3]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][0] = t[2 * i]; - a[i][1] = t[2 * i + 1]; - a[i][2] = t[2 * n1 + 2 * i]; - a[i][3] = t[2 * n1 + 2 * i + 1]; - } - } else if (n2 == 2) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][0]; - t[2 * i + 1] = a[i][1]; - } - cdft(2 * n1, isgn, t, ip, w); - for (i = 0; i < n1; i++) { - a[i][0] = t[2 * i]; - a[i][1] = t[2 * i + 1]; - } - } } -void rdft2d_sub(int n1, int isgn, float** a) +void rdft2d_sub(int n1, int isgn, float **a) { - int n1h, i, j; - float xi; + int n1h, i, j; + float xi; - n1h = n1 >> 1; - if (isgn < 0) { - for (i = 1; i < n1h; i++) { - j = n1 - i; - xi = a[i][0] - a[j][0]; - a[i][0] += a[j][0]; - a[j][0] = xi; - xi = a[j][1] - a[i][1]; - a[i][1] += a[j][1]; - a[j][1] = xi; - } - } else { - for (i = 1; i < n1h; i++) { - j = n1 - i; - a[j][0] = 0.5 * (a[i][0] - a[j][0]); - a[i][0] -= a[j][0]; - a[j][1] = 0.5 * (a[i][1] + a[j][1]); - a[i][1] -= a[j][1]; - } - } -} - -void ddxt2d_sub(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w) -{ - void ddct(int n, int isgn, float* a, int* ip, float* w); - void ddst(int n, int isgn, float* a, int* ip, float* w); - int i, j; - - if (n2 > 2) { - for (j = 0; j < n2; j += 4) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j]; - t[n1 + i] = a[i][j + 1]; - t[2 * n1 + i] = a[i][j + 2]; - t[3 * n1 + i] = a[i][j + 3]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - ddct(n1, isgn, &t[2 * n1], ip, w); - ddct(n1, isgn, &t[3 * n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - ddst(n1, isgn, &t[2 * n1], ip, w); - ddst(n1, isgn, &t[3 * n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j] = t[i]; - a[i][j + 1] = t[n1 + i]; - a[i][j + 2] = t[2 * n1 + i]; - a[i][j + 3] = t[3 * n1 + i]; - } - } - } else if (n2 == 2) { - for (i = 0; i < n1; i++) { - t[i] = a[i][0]; - t[n1 + i] = a[i][1]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); + n1h = n1 >> 1; + if (isgn < 0) { + for (i = 1; i < n1h; i++) { + j = n1 - i; + xi = a[i][0] - a[j][0]; + a[i][0] += a[j][0]; + a[j][0] = xi; + xi = a[j][1] - a[i][1]; + a[i][1] += a[j][1]; + a[j][1] = xi; + } } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); + for (i = 1; i < n1h; i++) { + j = n1 - i; + a[j][0] = 0.5 * (a[i][0] - a[j][0]); + a[i][0] -= a[j][0]; + a[j][1] = 0.5 * (a[i][1] + a[j][1]); + a[i][1] -= a[j][1]; + } } - for (i = 0; i < n1; i++) { - a[i][0] = t[i]; - a[i][1] = t[n1 + i]; +} + +void ddxt2d_sub(int n1, int n2, int ics, int isgn, float **a, float *t, int *ip, float *w) +{ + int i, j; + + if (n2 > 2) { + for (j = 0; j < n2; j += 4) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j]; + t[n1 + i] = a[i][j + 1]; + t[2 * n1 + i] = a[i][j + 2]; + t[3 * n1 + i] = a[i][j + 3]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + ddct(n1, isgn, &t[2 * n1], ip, w); + ddct(n1, isgn, &t[3 * n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + ddst(n1, isgn, &t[2 * n1], ip, w); + ddst(n1, isgn, &t[3 * n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j] = t[i]; + a[i][j + 1] = t[n1 + i]; + a[i][j + 2] = t[2 * n1 + i]; + a[i][j + 3] = t[3 * n1 + i]; + } + } + } else if (n2 == 2) { + for (i = 0; i < n1; i++) { + t[i] = a[i][0]; + t[n1 + i] = a[i][1]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][0] = t[i]; + a[i][1] = t[n1 + i]; + } } - } } #ifdef USE_FFT2D_THREADS struct fft2d_arg_st { - int nthread; - int n0; - int n1; - int n2; - int ic; - int isgn; - float** a; - float* t; - int* ip; - float* w; + int nthread; + int n0; + int n1; + int n2; + int ic; + int isgn; + float **a; + float *t; + int *ip; + float *w; }; typedef struct fft2d_arg_st fft2d_arg_t; -void xdft2d0_subth(int n1, - int n2, - int icr, - int isgn, - float** a, - int* ip, - float* w) +void xdft2d0_subth(int n1, int n2, int icr, int isgn, float **a, int *ip, float *w) { - void* xdft2d0_th(void* p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, i; + void *xdft2d0_th(void *p); + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, i; - nthread = FFT2D_MAX_THREADS; - if (nthread > n1) { - nthread = n1; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].ic = icr; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } + nthread = FFT2D_MAX_THREADS; + if (nthread > n1) { + nthread = n1; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].ic = icr; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } } -void cdft2d_subth(int n1, - int n2, - int isgn, - float** a, - float* t, - int* ip, - float* w) +void cdft2d_subth(int n1, int n2, int isgn, float **a, float *t, int *ip, float *w) { - void* cdft2d_th(void* p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, nt, i; + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, nt, i; - nthread = FFT2D_MAX_THREADS; - nt = 8 * n1; - if (n2 == 4 * FFT2D_MAX_THREADS) { - nt >>= 1; - } else if (n2 < 4 * FFT2D_MAX_THREADS) { - nthread = n2 >> 1; - nt >>= 2; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], cdft2d_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } + nthread = FFT2D_MAX_THREADS; + nt = 8 * n1; + if (n2 == 4 * FFT2D_MAX_THREADS) { + nt >>= 1; + } else if (n2 < 4 * FFT2D_MAX_THREADS) { + nthread = n2 >> 1; + nt >>= 2; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], cdft2d_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } } -void ddxt2d0_subth(int n1, - int n2, - int ics, - int isgn, - float** a, - int* ip, - float* w) +void ddxt2d0_subth(int n1, int n2, int ics, int isgn, float **a, int *ip, float *w) { - void* ddxt2d0_th(void* p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, i; + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, i; - nthread = FFT2D_MAX_THREADS; - if (nthread > n1) { - nthread = n1; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].ic = ics; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } + nthread = FFT2D_MAX_THREADS; + if (nthread > n1) { + nthread = n1; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].ic = ics; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } } -void ddxt2d_subth(int n1, - int n2, - int ics, - int isgn, - float** a, - float* t, - int* ip, - float* w) +void ddxt2d_subth(int n1, int n2, int ics, int isgn, float **a, float *t, int *ip, float *w) { - void* ddxt2d_th(void* p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, nt, i; + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, nt, i; - nthread = FFT2D_MAX_THREADS; - nt = 4 * n1; - if (n2 == 2 * FFT2D_MAX_THREADS) { - nt >>= 1; - } else if (n2 < 2 * FFT2D_MAX_THREADS) { - nthread = n2; - nt >>= 2; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].ic = ics; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } + nthread = FFT2D_MAX_THREADS; + nt = 4 * n1; + if (n2 == 2 * FFT2D_MAX_THREADS) { + nt >>= 1; + } else if (n2 < 2 * FFT2D_MAX_THREADS) { + nthread = n2; + nt >>= 2; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].ic = ics; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } } -void* xdft2d0_th(void* p) +void *xdft2d0_th(void *p) { - void cdft(int n, int isgn, float* a, int* ip, float* w); - void rdft(int n, int isgn, float* a, int* ip, float* w); - int nthread, n0, n1, n2, icr, isgn, *ip, i; - float **a, *w; + int nthread, n0, n1, n2, icr, isgn, *ip, i; + float **a, *w; - nthread = ((fft2d_arg_t*) p)->nthread; - n0 = ((fft2d_arg_t*) p)->n0; - n1 = ((fft2d_arg_t*) p)->n1; - n2 = ((fft2d_arg_t*) p)->n2; - icr = ((fft2d_arg_t*) p)->ic; - isgn = ((fft2d_arg_t*) p)->isgn; - a = ((fft2d_arg_t*) p)->a; - ip = ((fft2d_arg_t*) p)->ip; - w = ((fft2d_arg_t*) p)->w; - if (icr == 0) { - for (i = n0; i < n1; i += nthread) { - cdft(n2, isgn, a[i], ip, w); - } - } else { - for (i = n0; i < n1; i += nthread) { - rdft(n2, isgn, a[i], ip, w); - } - } - return (void*) 0; -} - -void* cdft2d_th(void* p) -{ - void cdft(int n, int isgn, float* a, int* ip, float* w); - int nthread, n0, n1, n2, isgn, *ip, i, j; - float **a, *t, *w; - - nthread = ((fft2d_arg_t*) p)->nthread; - n0 = ((fft2d_arg_t*) p)->n0; - n1 = ((fft2d_arg_t*) p)->n1; - n2 = ((fft2d_arg_t*) p)->n2; - isgn = ((fft2d_arg_t*) p)->isgn; - a = ((fft2d_arg_t*) p)->a; - t = ((fft2d_arg_t*) p)->t; - ip = ((fft2d_arg_t*) p)->ip; - w = ((fft2d_arg_t*) p)->w; - if (n2 > 4 * nthread) { - for (j = 8 * n0; j < n2; j += 8 * nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j]; - t[2 * i + 1] = a[i][j + 1]; - t[2 * n1 + 2 * i] = a[i][j + 2]; - t[2 * n1 + 2 * i + 1] = a[i][j + 3]; - t[4 * n1 + 2 * i] = a[i][j + 4]; - t[4 * n1 + 2 * i + 1] = a[i][j + 5]; - t[6 * n1 + 2 * i] = a[i][j + 6]; - t[6 * n1 + 2 * i + 1] = a[i][j + 7]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - cdft(2 * n1, isgn, &t[4 * n1], ip, w); - cdft(2 * n1, isgn, &t[6 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j] = t[2 * i]; - a[i][j + 1] = t[2 * i + 1]; - a[i][j + 2] = t[2 * n1 + 2 * i]; - a[i][j + 3] = t[2 * n1 + 2 * i + 1]; - a[i][j + 4] = t[4 * n1 + 2 * i]; - a[i][j + 5] = t[4 * n1 + 2 * i + 1]; - a[i][j + 6] = t[6 * n1 + 2 * i]; - a[i][j + 7] = t[6 * n1 + 2 * i + 1]; - } - } - } else if (n2 == 4 * nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][4 * n0]; - t[2 * i + 1] = a[i][4 * n0 + 1]; - t[2 * n1 + 2 * i] = a[i][4 * n0 + 2]; - t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][4 * n0] = t[2 * i]; - a[i][4 * n0 + 1] = t[2 * i + 1]; - a[i][4 * n0 + 2] = t[2 * n1 + 2 * i]; - a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1]; - } - } else if (n2 == 2 * nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][2 * n0]; - t[2 * i + 1] = a[i][2 * n0 + 1]; - } - cdft(2 * n1, isgn, t, ip, w); - for (i = 0; i < n1; i++) { - a[i][2 * n0] = t[2 * i]; - a[i][2 * n0 + 1] = t[2 * i + 1]; - } - } - return (void*) 0; -} - -void* ddxt2d0_th(void* p) -{ - void ddct(int n, int isgn, float* a, int* ip, float* w); - void ddst(int n, int isgn, float* a, int* ip, float* w); - int nthread, n0, n1, n2, ics, isgn, *ip, i; - float **a, *w; - - nthread = ((fft2d_arg_t*) p)->nthread; - n0 = ((fft2d_arg_t*) p)->n0; - n1 = ((fft2d_arg_t*) p)->n1; - n2 = ((fft2d_arg_t*) p)->n2; - ics = ((fft2d_arg_t*) p)->ic; - isgn = ((fft2d_arg_t*) p)->isgn; - a = ((fft2d_arg_t*) p)->a; - ip = ((fft2d_arg_t*) p)->ip; - w = ((fft2d_arg_t*) p)->w; - if (ics == 0) { - for (i = n0; i < n1; i += nthread) { - ddct(n2, isgn, a[i], ip, w); - } - } else { - for (i = n0; i < n1; i += nthread) { - ddst(n2, isgn, a[i], ip, w); - } - } - return (void*) 0; -} - -void* ddxt2d_th(void* p) -{ - void ddct(int n, int isgn, float* a, int* ip, float* w); - void ddst(int n, int isgn, float* a, int* ip, float* w); - int nthread, n0, n1, n2, ics, isgn, *ip, i, j; - float **a, *t, *w; - - nthread = ((fft2d_arg_t*) p)->nthread; - n0 = ((fft2d_arg_t*) p)->n0; - n1 = ((fft2d_arg_t*) p)->n1; - n2 = ((fft2d_arg_t*) p)->n2; - ics = ((fft2d_arg_t*) p)->ic; - isgn = ((fft2d_arg_t*) p)->isgn; - a = ((fft2d_arg_t*) p)->a; - t = ((fft2d_arg_t*) p)->t; - ip = ((fft2d_arg_t*) p)->ip; - w = ((fft2d_arg_t*) p)->w; - if (n2 > 2 * nthread) { - for (j = 4 * n0; j < n2; j += 4 * nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j]; - t[n1 + i] = a[i][j + 1]; - t[2 * n1 + i] = a[i][j + 2]; - t[3 * n1 + i] = a[i][j + 3]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - ddct(n1, isgn, &t[2 * n1], ip, w); - ddct(n1, isgn, &t[3 * n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - ddst(n1, isgn, &t[2 * n1], ip, w); - ddst(n1, isgn, &t[3 * n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j] = t[i]; - a[i][j + 1] = t[n1 + i]; - a[i][j + 2] = t[2 * n1 + i]; - a[i][j + 3] = t[3 * n1 + i]; - } - } - } else if (n2 == 2 * nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][2 * n0]; - t[n1 + i] = a[i][2 * n0 + 1]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); + nthread = ((fft2d_arg_t *)p)->nthread; + n0 = ((fft2d_arg_t *)p)->n0; + n1 = ((fft2d_arg_t *)p)->n1; + n2 = ((fft2d_arg_t *)p)->n2; + icr = ((fft2d_arg_t *)p)->ic; + isgn = ((fft2d_arg_t *)p)->isgn; + a = ((fft2d_arg_t *)p)->a; + ip = ((fft2d_arg_t *)p)->ip; + w = ((fft2d_arg_t *)p)->w; + if (icr == 0) { + for (i = n0; i < n1; i += nthread) { + cdft(n2, isgn, a[i], ip, w); + } } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); + for (i = n0; i < n1; i += nthread) { + rdft(n2, isgn, a[i], ip, w); + } } - for (i = 0; i < n1; i++) { - a[i][2 * n0] = t[i]; - a[i][2 * n0 + 1] = t[n1 + i]; - } - } else if (n2 == nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][n0]; + return (void *)0; +} + +void *cdft2d_th(void *p) +{ + int nthread, n0, n1, n2, isgn, *ip, i, j; + float **a, *t, *w; + + nthread = ((fft2d_arg_t *)p)->nthread; + n0 = ((fft2d_arg_t *)p)->n0; + n1 = ((fft2d_arg_t *)p)->n1; + n2 = ((fft2d_arg_t *)p)->n2; + isgn = ((fft2d_arg_t *)p)->isgn; + a = ((fft2d_arg_t *)p)->a; + t = ((fft2d_arg_t *)p)->t; + ip = ((fft2d_arg_t *)p)->ip; + w = ((fft2d_arg_t *)p)->w; + if (n2 > 4 * nthread) { + for (j = 8 * n0; j < n2; j += 8 * nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j]; + t[2 * i + 1] = a[i][j + 1]; + t[2 * n1 + 2 * i] = a[i][j + 2]; + t[2 * n1 + 2 * i + 1] = a[i][j + 3]; + t[4 * n1 + 2 * i] = a[i][j + 4]; + t[4 * n1 + 2 * i + 1] = a[i][j + 5]; + t[6 * n1 + 2 * i] = a[i][j + 6]; + t[6 * n1 + 2 * i + 1] = a[i][j + 7]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + cdft(2 * n1, isgn, &t[4 * n1], ip, w); + cdft(2 * n1, isgn, &t[6 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j] = t[2 * i]; + a[i][j + 1] = t[2 * i + 1]; + a[i][j + 2] = t[2 * n1 + 2 * i]; + a[i][j + 3] = t[2 * n1 + 2 * i + 1]; + a[i][j + 4] = t[4 * n1 + 2 * i]; + a[i][j + 5] = t[4 * n1 + 2 * i + 1]; + a[i][j + 6] = t[6 * n1 + 2 * i]; + a[i][j + 7] = t[6 * n1 + 2 * i + 1]; + } + } + } else if (n2 == 4 * nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][4 * n0]; + t[2 * i + 1] = a[i][4 * n0 + 1]; + t[2 * n1 + 2 * i] = a[i][4 * n0 + 2]; + t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][4 * n0] = t[2 * i]; + a[i][4 * n0 + 1] = t[2 * i + 1]; + a[i][4 * n0 + 2] = t[2 * n1 + 2 * i]; + a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1]; + } + } else if (n2 == 2 * nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][2 * n0]; + t[2 * i + 1] = a[i][2 * n0 + 1]; + } + cdft(2 * n1, isgn, t, ip, w); + for (i = 0; i < n1; i++) { + a[i][2 * n0] = t[2 * i]; + a[i][2 * n0 + 1] = t[2 * i + 1]; + } } + return (void *)0; +} + +void *ddxt2d0_th(void *p) +{ + int nthread, n0, n1, n2, ics, isgn, *ip, i; + float **a, *w; + + nthread = ((fft2d_arg_t *)p)->nthread; + n0 = ((fft2d_arg_t *)p)->n0; + n1 = ((fft2d_arg_t *)p)->n1; + n2 = ((fft2d_arg_t *)p)->n2; + ics = ((fft2d_arg_t *)p)->ic; + isgn = ((fft2d_arg_t *)p)->isgn; + a = ((fft2d_arg_t *)p)->a; + ip = ((fft2d_arg_t *)p)->ip; + w = ((fft2d_arg_t *)p)->w; if (ics == 0) { - ddct(n1, isgn, t, ip, w); + for (i = n0; i < n1; i += nthread) { + ddct(n2, isgn, a[i], ip, w); + } } else { - ddst(n1, isgn, t, ip, w); + for (i = n0; i < n1; i += nthread) { + ddst(n2, isgn, a[i], ip, w); + } } - for (i = 0; i < n1; i++) { - a[i][n0] = t[i]; + return (void *)0; +} + +void *ddxt2d_th(void *p) +{ + int nthread, n0, n1, n2, ics, isgn, *ip, i, j; + float **a, *t, *w; + + nthread = ((fft2d_arg_t *)p)->nthread; + n0 = ((fft2d_arg_t *)p)->n0; + n1 = ((fft2d_arg_t *)p)->n1; + n2 = ((fft2d_arg_t *)p)->n2; + ics = ((fft2d_arg_t *)p)->ic; + isgn = ((fft2d_arg_t *)p)->isgn; + a = ((fft2d_arg_t *)p)->a; + t = ((fft2d_arg_t *)p)->t; + ip = ((fft2d_arg_t *)p)->ip; + w = ((fft2d_arg_t *)p)->w; + if (n2 > 2 * nthread) { + for (j = 4 * n0; j < n2; j += 4 * nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j]; + t[n1 + i] = a[i][j + 1]; + t[2 * n1 + i] = a[i][j + 2]; + t[3 * n1 + i] = a[i][j + 3]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + ddct(n1, isgn, &t[2 * n1], ip, w); + ddct(n1, isgn, &t[3 * n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + ddst(n1, isgn, &t[2 * n1], ip, w); + ddst(n1, isgn, &t[3 * n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j] = t[i]; + a[i][j + 1] = t[n1 + i]; + a[i][j + 2] = t[2 * n1 + i]; + a[i][j + 3] = t[3 * n1 + i]; + } + } + } else if (n2 == 2 * nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][2 * n0]; + t[n1 + i] = a[i][2 * n0 + 1]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][2 * n0] = t[i]; + a[i][2 * n0 + 1] = t[n1 + i]; + } + } else if (n2 == nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][n0]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + } else { + ddst(n1, isgn, t, ip, w); + } + for (i = 0; i < n1; i++) { + a[i][n0] = t[i]; + } } - } - return (void*) 0; + return (void *)0; } #endif /* USE_FFT2D_THREADS */