forward_transform_power_of_2_2d (Fast-Fourier Transform)


Import

#include <archr/fft/fft.h>
#include <archr/fft/fft.hpp>
use archr_fft
import archr.fft
atomsLoads('archr')

Synopsis

 void archr_fft_forward_transform_power_of_2_2d_f32(float *out_real, float *out_imag, const float *real, const float *imag, const archr_fft_planner_2d_f32_t *plan);
(1)
 void archr_fft_forward_transform_power_of_2_2d_f64(double *out_real, double *out_imag, const double *real, const double *imag, const archr_fft_planner_2d_f64_t *plan);
(2)
 void archr_fft_forward_transform_power_of_2_2d_zf32(void *output, const void *input, const archr_fft_planner_2d_zf32_t *plan);
(3)
 void archr_fft_forward_transform_power_of_2_2d_zf64(void *output, const void *input, const archr_fft_planner_2d_zf64_t *plan);
(4)
 void archr_fft_forward_transform_power_of_2_2d_zf32f32(void *output, const float *input, const archr_fft_planner_r2c_2d_f32_t *plan);
(5)
 void archr_fft_forward_transform_power_of_2_2d_zf64f64(void *output, const double *input, const archr_fft_planner_r2c_2d_f64_t *plan);
(6)
namespace archr {
 void forward_transform_power_of_2_2d(const float *real, const float *imag, float *out_real, float *out_imag, const planner_2d<float>& plan);
(1)
 void forward_transform_power_of_2_2d(const double *real, const double *imag, double *out_real, double *out_imag, const planner_2d<double>& plan);
(2)
 void forward_transform_power_of_2_2d(const std::complex<float>*input, std::complex<float>*output, const planner_2d<std::complex<float>>& plan);
(3)
 void forward_transform_power_of_2_2d(const std::complex<double>*input, std::complex<double>*output, const planner_2d<std::complex<double>>& plan);
(4)
 void forward_transform_power_of_2_2d(const float *input, std::complex<float>*output, const r2c_planner_2d<float>& plan);
(5)
 void forward_transform_power_of_2_2d(const double *input, std::complex<double>*output, const r2c_planner_2d<double>& plan);
(6)
}
subroutine archr_fft_forward_transform_power_of_2_2d_f32(real(4), dimension(*, *), parameter :: real, real(4), dimension(*, *), parameter :: imag, real(4), dimension(*, *) :: out_real, real(4), dimension(*, *) :: out_imag, type(archr_fft_planner_r2c_2d_f32), parameter :: plan)
(1)
subroutine archr_fft_forward_transform_power_of_2_2d_f64(real(8), dimension(*, *), parameter :: real, real(8), dimension(*, *), parameter :: imag, real(8), dimension(*, *) :: out_real, real(8), dimension(*, *) :: out_imag, type(archr_fft_planner_r2c_2d_f64), parameter :: plan)
(2)
subroutine archr_fft_forward_transform_power_of_2_2d_zf32(complex(4), dimension(*, *), parameter :: input, complex(4), dimension(*, *) :: output, type(archr_fft_planner_r2c_2d_zf32), parameter :: plan)
(3)
subroutine archr_fft_forward_transform_power_of_2_2d_zf64(complex(8), dimension(*, *), parameter :: input, complex(8), dimension(*, *) :: output, type(archr_fft_planner_r2c_2d_zf64), parameter :: plan)
(4)
subroutine archr_fft_forward_transform_power_of_2_2d_f32zf32(real(4), dimension(*, *), parameter :: input, complex(4), dimension(*, *) :: output, real(4), parameter :: plan)
(5)
subroutine archr_fft_forward_transform_power_of_2_2d_f64zf64(real(8), dimension(*, *), parameter :: input, complex(8), dimension(*, *) :: output, real(8), parameter :: plan)
(6)
def fft2p2(input):
    return output
(1)
def fft2p2(input, plan):
    return output
(2)
def fft2p2(real, imag):
    return (out_real, out_imag)
(3)
def fft2p2(real, imag, plan):
    return (out_real, out_imag)
(4)
function output = archr_fft_fft2p2(input)
(1)
function output = archr_fft_fft2p2(input, plan)
(2)
function out_real, out_imag = archr_fft_fft2p2(real, imag)
(3)
function out_real, out_imag = archr_fft_fft2p2(real, imag, plan)
(4)

Description

This function calculates a 2D forward Fourrier transform of the input data. The results from the transform are stored in-order in the output array with the zero frequency component or the DC component stored in output[0]. The best possible performance from Arch-R FFT is realized when the input and output arrays are correctly aligned.

Example

#include <archr/fft.h>

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
#include <complex.h>

typedef float T;

int main(){
  {
    //input and output are split complex numbers. (storage format rrrrrrr...., iiiiiiii....)
    size_t height = 2048, width = 512;
    size_t N = height * width;
    archr_fft_planner_2d_f32_t a_planner;
    archr_fft_init_planner_f32_2d(&a_planner, height, width, archr_fft_planner_direction_Forward);

    T *real = malloc(sizeof(T) * N);
    T *imag = malloc(sizeof(T) * N);
    T *out_real = malloc(sizeof(T) * N);
    T *out_imag = malloc(sizeof(T) * N);
    for(size_t i = 0; i < N; ++i){
      real[i] = (T)rand() / INT_MAX;
      imag[i] = (T)rand() / INT_MAX;
    }

    archr_fft_forward_transform_power_of_2_2d_f32(out_real, out_imag, real, imag, &a_planner);
    T sum_real = 0, sum_imag = 0;
    for(size_t i = 0; i < N; ++i){
      sum_real += out_real[i];
      sum_imag += out_imag[i];
    }
    printf(" Sum %f +i %f\n", sum_real, sum_imag);
    free(real);
    free(imag);
    free(out_real);
    free(out_imag);
    archr_fft_destroy_planner_2d_f32(&a_planner);
  }
  {
    //input and output are interleaved complex numbers. (storage format riririri...)
    size_t height = 2048, width = 512;
    size_t N = height * width;
    archr_fft_planner_2d_zf32_t a_planner;
    archr_fft_init_planner_zf32_2d(&a_planner, height, width, archr_fft_planner_direction_Forward);

    T *input = malloc(2 * sizeof(T) * N);//times 2 as these are complex numbers
    T *output = malloc(2 * sizeof(T) * N);
    for(size_t i = 0; i < N * 2; ++i){
      input[i] = (T)rand() / INT_MAX;
    }

    archr_fft_forward_transform_power_of_2_2d_zf32(output, input, &a_planner);
    T sum_real = 0, sum_imag = 0;
    for(size_t i = 0; i < N; ++i){
      sum_real += output[2 * i];
      sum_imag += output[2 * i + 1];
    }
    printf(" Sum %f +i %f\n", sum_real, sum_imag);
    free(input);
    free(output);
    archr_fft_destroy_planner_2d_zf32(&a_planner);
  }
  {
    //In a complex to real transform, the input is in interleaved complex format and the output is real.
    size_t height = 2048, width = 512;
    size_t N = height * width, output_N = height * (width / 2 + 1);
    archr_fft_planner_r2c_2d_f32_t a_planner;
    archr_fft_init_planner_r2c_2d_f32(&a_planner, height, width, archr_fft_planner_direction_Forward);

    T *input = malloc(sizeof(T) * N);
    //the input to a c2r transform is symmetric so only half is required.
    T *output = malloc(2 * sizeof(T) * output_N);
    for(size_t i = 0; i < N; ++i){
      input[i] = (T)rand() / INT_MAX;
    }

    archr_fft_forward_transform_power_of_2_2d_zf32f32(output, input, &a_planner);
    T sum_real = 0, sum_imag = 0;
    for(size_t i = 0; i < output_N; ++i){
      sum_real += output[2 * i];
      sum_imag += output[2 * i + 1];
    }
    printf(" Sum %f +i %f\n", sum_real, sum_imag);
    free(input);
    free(output);
    archr_fft_destroy_planner_r2c_2d_f32(&a_planner);
  }
}

Possible Output

sum 3.52373e+06 3.36391e+06
sum (3.14642e+06,2.63654e+06)
sum (2.92368e+06,-79444.6)
#include <archr/fft.hpp>

#include <iostream>
#include <vector>
#include <numeric>
#include <algorithm>

using T = float;

int main(){
  {
    //input and output are split complex numbers. (storage format rrrrrrr...., iiiiiiii....)
    std::size_t height = 2048, width = 2048;
    std::size_t N = height * width;
    archr::fft::planner_2d<T> a_planner(height, width, archr::fft::Forward);

    std::vector<T> real(N), imag(N), out_real(N), out_imag(N);
    std::generate(real.begin(), real.end(),
                 [](){return (T)std::rand() / (T)std::numeric_limits<int>::max();}
                 );
    std::generate(imag.begin(), imag.end(),
                 [](){return (T)std::rand() / (T)std::numeric_limits<int>::max();}
                 );

    archr::fft::forward_transform_power_of_2_2d( real.data(), imag.data()
                                               , out_real.data(), out_imag.data()
                                               , a_planner
                                               );
    T sum_real = std::accumulate(out_real.begin(), out_real.end(), T(0));
    T sum_imag = std::accumulate(out_imag.begin(), out_imag.end(), T(0));
    std::cout<<" sum " << sum_real << " " << sum_imag  << std::endl;
  }
  {
    //input and output are interleaved complex numbers. (storage format riririri...)
    std::size_t height = 2048, width = 2048;
    std::size_t N = height * width;
    archr::fft::planner_2d<std::complex<T>> a_planner(height, width, archr::fft::Forward);

    std::vector<std::complex<T>> input(N), output(N);
    auto my_fill = [](){
      T r_tmp = (T)std::rand() / (T)std::numeric_limits<int>::max();
      T i_tmp = (T)std::rand() / (T)std::numeric_limits<int>::max();
      return std::complex<T>{r_tmp, i_tmp};
    };
    std::generate(input.begin(), input.end(), my_fill);

    archr::fft::forward_transform_power_of_2_2d(input.data(), output.data(), a_planner);
    std::complex<T> sum = std::accumulate(output.begin(), output.end(), std::complex<T>(0));
    std::cout<<" sum " << sum << std::endl;
  }
  {
    //In a real to complex transform, the input is real and the output is in interleaved complex format.
    std::size_t height = 2048, width = 2048;
    std::size_t N = height * width;
    archr::fft::r2c_planner_2d<T> a_planner(height, width, archr::fft::Forward);

    std::vector<T> input(N);
    std::vector<std::complex<T>> output(height * (width / 2 + 1));
    //r2c transform only calculates half of the output as it is symmetric
    std::generate(input.begin(), input.end(), [](){return (T)std::rand() / (T)std::numeric_limits<int>::max();});

    archr::fft::forward_transform_power_of_2_2d(input.data(), output.data(), a_planner);
    std::complex<T> sum = std::accumulate(output.begin(), output.end(), std::complex<T>{0});
    std::cout<<" sum " << sum << std::endl;
  }
}

Possible Output

sum 3.52373e+06 3.36391e+06
sum (3.14642e+06,2.63654e+06)
sum (2.92368e+06,-79444.6)
program test
  use archr_fft
  integer(8), parameter    :: height = 2048, width = 2048, N = height * width
  !
  real(4)   , dimension(N) :: f32_r, f32_i, f32_r_out, f32_i_out
  COMPLEX(4), dimension(N) :: f32_z, f32_z_out
  COMPLEX(4), dimension(height * (width / 2 + 1)) :: f32_r2c_out
  !
  real(4)                  :: r0, i0
  real(4)                  :: min0, max0, sum_real, sum_imag
  COMPLEX(4)               :: sum_z, sum_r2c
  !
  !integer(8)               :: plan
  type(archr_fft_planner_2d_f32) :: plan_f32
  type(archr_fft_planner_2d_zf32) :: plan_zf32
  type(archr_fft_planner_r2c_2d_f32) :: plan_r2c
  !
  ! Init:
  min0 = 0
  max0 = 1
  sum_real = 0
  sum_imag = 0
  sum_z = (0,0)
  sum_r2c = (0,0)
  do i=1,N
    r0 = random_in(min0, max0)
    i0 = random_in(min0, max0)
    !
    f32_r(i) = r0
    f32_i(i) = i0
    f32_z(i) = COMPLEX(r0, i0)
  end do
  ! split complex format rrrrr...., iiiiiii
  call archr_fft_init_planner_2d_f32(plan_f32, height, width, 1);
  call archr_fft_forward_transform_power_of_2_2d_f32(f32_r_out, f32_i_out, f32_r, f32_i, plan_f32)
  do i=1,N
    sum_real = sum_real + f32_r_out(i)
    sum_imag = sum_imag + f32_i_out(i)
  end do
  print *, "sum ", sum_real, " ", sum_imag
  call archr_fft_destroy_planner_2d_f32(plan_f32);
  ! interleaved complex format r, i, r, i
  call archr_fft_init_planner_2d_zf32(plan_zf32, height, width, 1);
  call archr_fft_forward_transform_power_of_2_2d_zf32(f32_z_out, f32_z, plan_zf32)
  do i=1,N
    sum_z = sum_z + f32_z_out(i)
  end do
  print *, "sum ", sum_z
  call archr_fft_destroy_planner_2d_zf32(plan_zf32);
  ! real to complex transform
  call archr_fft_init_planner_2d_r2c_f32(plan_r2c, height, width, 1);
  call archr_fft_forward_transform_power_of_2_2d_zf32f32(f32_r2c_out, f32_r, plan_r2c)
  do i=1,N / 2 + 1
    sum_r2c = sum_r2c + f32_r2c_out(i)
  end do
  print *, "sum ", sum_r2c
  call archr_fft_destroy_planner_2d_r2c_f32(plan_r2c);
contains
  ! Generate a random number within a range
  function random_in(mn, mx) result(r)
    real(4) :: r
    real(4), intent(in) :: mn, mx
    r = mn + (rand() * (mx - mn))
  end function random_in
end program

Possible Output

sum 3.52373e+06 3.36391e+06
sum (3.14642e+06,2.63654e+06)
sum (2.92368e+06,-79444.6)
atomsLoad('archr');

// Increase the stacksize
v = getversion('scilab');
if v(1) < 6 stacksize('max'); end

// The number of internal iterations
iter = 10;

//
sizes = [];
times = [];
speed = [];

// Main loop
for n = 10:12
    sz = 2^n;
    I  = rand(sz, sz);
    I  = (I > ((max(I) - min(I)) / 2)) * 255;
    I  = I + sqrt(-1) * 0; // Complex matrix

    fftw_flags('FFTW_MEASURE');
    fft2(I);
    timer();
    for j = 1:iter
        R1 = fft2(I);
    end
    T1 = timer();
    
    p = archr_fft_planner2_zf64(sz, sz);
    timer();
    for j = 1:iter
        R2 = archr_fft_fft2p2(I, sz, sz, p);
    end
    T2 = timer();

    sizes = [sizes n];
    times = [times; T1 T2];
    speed = [speed; 1 (T1 / T2)];

    printf("\n");
    printf("============================\n");
    printf("Size is: %d (2^%d)\n", sz, n);
    printf("\n");
    printf("Raw timing:\n");
    printf("  SciLab: %f\n", T1);
    printf("  Arch-R: %f\n", T2);
    printf("----------------------------\n");
    printf("Speedup:\n");
    printf("  x%f\n", T1 / T2);
    printf("============================\n");
end

// Plot timings
scf(0);
bar(sizes, times);
xlabel("Size (power-of-2)");
ylabel("Time (s)");
legend(["SciLab"; "Archr-R"], 'in_upper_left');
title("Raw timings between Archr-R and SciLab");

// Plot speedup
scf(1);
bar(sizes, speed);
xlabel("Size (power-of-2)");
ylabel("Factor of speed (n times faster than)");
legend(["SciLab"; "Archr-R"], 'in_upper_left');
title("Speedup factor betweeb Archr-R and SciLab");

Possible Output

Start Toolbox archr
	Load macros
	Load gateways


============================
Size is: 1024 (2^10)

Raw timing:
  SciLab: 0.464000
  Arch-R: 0.220000
----------------------------
Speedup:
  x2.109091
============================

============================
Size is: 2048 (2^11)

Raw timing:
  SciLab: 8.724000
  Arch-R: 1.248000
----------------------------
Speedup:
  x6.990385
============================

============================
Size is: 4096 (2^12)

Raw timing:
  SciLab: 24.004000
  Arch-R: 5.344000
----------------------------
Speedup:
  x4.491766
============================

See Also