Prime Factor Algorithm: Three Decomposition

Assume N is decomposed in order N1,N2,N3N_1,N_2,N_3 which are coprime (GCD=1).

n Index Generation

n=(N2N3n1+A1n2~)modNn2~=(N3n2+A2n3)modN2N3n = (N_2N_3n_1 + A_1\tilde{n_2}) mod N \\ \tilde{n_2} = (N_3 n_2 + A_2 n_3) mod N_2 N_3

A1=p1N1=Q1N2N3+1,Q1=q1A2=p2N2=Q2N3+1,Q2=q2N1A_1 = p_1 N_1 = Q_1 N_2 N_3 + 1, Q_1 = q_1 \\ A_2 = p_2 N_2 = Q_2 N_3 + 1, Q_2 = q_2 N_1 \\

QxQ_x is calculated via the Julia extended Euclidean algorithm gcd(a,b)=ax+by\text{gcd(a,b)}=ax+by.

function extended_euclid(a, b)
    @assert a>=0 && b>=0 "a and b must be non-negative"
    if a == 0
        return (b, 0, 1)
    else
        (g, y, x) = extended_euclid(mod(b, a), a)
    end
    (g, x - (b ÷ a) * y, y)
end

Substitution of (2) into (1) yields

n=(N2N3n1+p1N1N3n2+p1N1p2N2n3) mod Nn = (N_2 N_3 n_1 + p_1 N_1 N_3 n_2 + p_1 N_1 p_2 N_2 n_3) \space \text{mod} \space N

The reversed order N3,N2,N1N_3,N_2,N_1 is omitted here. This input equation can be rewritten [3] as

n=(N2N3n1+n2~) mod Nn2~=(N3n2+n3) mod N2N3n = (N_2N_3 n_1' + \tilde{n_2}) \space \text{mod N} \\ \tilde{n_2} = (N_3 n_2' + n_3) \space \text{mod} \space N_2 N_3

n1=<n1+Q1n2~>N1n2=<n2+Q2n3>N2n_1 = \left< n_1' + Q_1' \tilde{n_2} \right>_{N_1} \\ n_2 = \left< n_2' + Q_2' n_3 \right>_{N_2}

n=<N2N3n1+N3n2+n3> mod N[3]n = \left< N_2 N_3 n_1' + N_3 n_2' + n_3 \right> \space \text{mod} \space N [3]

Qx=(NxQx) mod NxQ_x' = (N_x - Q_x) \space \text{mod} \space N_x

The n2~\tilde{n_2} counter increments when wrap condition (W.C.) is met: n2=N21n_2'=N_2-1 and n3=N31n_3'=N_3-1.

pfa 3 n-index

nforward=((N2N3)n1+N1×<N11>N2N3×n~2,forward) mod Nn~2,forward=((N3n2+N2×<N21>)N3×n3) mod N2N3n_{forward} = ((N_2 N_3) n_1 + N_1 \times \left< N_1^{-1} \right>_{N_2N_3} \times \tilde{n}_{2,forward}) \space \text{mod} \space N \\ \tilde{n}_{2,forward} = ((N_3 n_2 + N_2 \times \left< N_2^{-1} \right>)_{N_3} \times n_3) \space \text{mod} \space N_2N_3

Q1(N2N3)+1=<N11>N2N3×N1Q2N3+1=<N21>N3×N2nforward=((N2N3)n1+(Q1N2N3+1)×(N3n2+(Q2N3+1)×n3)))Q_1 (N_2 N_3) + 1 = \left< N_1^{-1}\right>_{N_2 N_3} \times N_1 \\ Q_2 N_3 + 1 = \left< N_2^{-1} \right>_{N_3} \times N_2 \\[0.5cm] n_{forward} = ((N_2 N_3) n_1 + (Q_1 N_2 N_3 + 1) \times (N_3 n_2 + (Q_2 N_3 + 1 )\times n_3)))

n-Indexer Implementation

R1:={0if W.C.(R1+Q1) mod N1otherwiseR2:={0if n3=N31(R2+Q2) mod N2otherwisen1=(n1+R1) mod N1n2=(n2+R2) mod N2R_1 := \begin{cases} 0 & \text{if W.C.} \\ (R_1 + Q_1') \space \text{mod} \space N_1 & \text{otherwise} \end{cases} \\[0.5cm] R_2 := \begin{cases} 0 & \text{if} \space n_3' = N_3 - 1 \\ (R_2 + Q_2') \space \text{mod} \space N_2 & \text{otherwise} \end{cases} \\[0.5cm] n_1 = (n_1' + R_1) \space \text{mod} \space N_1 \\ n_2 = (n_2' + R_2) \space \text{mod} \space N_2

function nmap(Y, X, N1, N2, N3, Q1P, Q2P, Q3P, Q4P)
    mask_mux_mod(a, B) = a - (B & -(a ≥ B))

    rhs_n = 1
    for n1p = 0:N1-1
        R1 = 0
        for n2p = 0:N2-1
            R2 = 0
            for n3p = 0:N3-1
                n1 = mask_mux_mod(n1p + R1, N1)
                n2 = mask_mux_mod(n2p + R2, N2)
                lhs_n = n1 + N1 * n2 + N1 * N2 * n3p + 1
                Y[lhs_n] = X[rhs_n]
                R1 = mask_mux_mod(R1 + Q1P, N1)
                R2 = mask_mux_mod(R2 + Q2P, N2)
                rhs_n += 1
            end
        end
    end
end

k Index Generation

k=(B1k1+N1k2~) mod Nk2~=(B2k2+N2k3) mod N2N3k=(B_1k_1+N_1\tilde{k_2}) \space \text{mod} \space N \\ \tilde{k_2}=(B_2k_2+N_2k_3) \space \text{mod} \space N_2N_3

B2=p3N3=Q3N2+1,Q3=q3N1B1=p4(N2N3)=Q4N1+1=<(N2N3)1>N1×N2N3 [2]B_2 = p_3 N_3 = Q_3 N_2 + 1, Q_3 = q_3 N_1 \\ B_1 = p_4(N_2 N_3) = Q_4 N_1 + 1 = \left< (N_2N_3)^{-1} \right>_{N_1} \times N_2 N_3 \space \text{[2]}

k=(p2p3N2N3k1+N1k2~) mod Nk2~=(p3N3k2+N2k3) mod N2N3k=(p2p3N2N3k1+p3N1N3k2+N1N2k3) mod Nk=(p_2 p_3 N_2 N_3 k_1 + N_1\tilde{k_2}) \space \text{mod} \space N \\ \tilde{k_2}=(p3 N_3 k_2 + N_2 k_3) \space \text{mod} \space N_2N_3 \\[0.5cm] k=(p_2 p_3 N_2 N_3 k_1 + p_3 N_1 N_3 k_2 + N_1 N_2 k_3) \space \text{mod} \space N

kforward=((N2N3)×<(N2N3)1>N1×k1+N1k~2,forward) mod N(thesis 2.68)k_{forward} = ((N_2N_3) \times \left< (N_2 N_3)^{-1} \right>_{N_1} \times k_1 + N_1 \tilde{k}_{2, forward}) \space \text{mod} \space N \tag{thesis 2.68}

k~2,forward=(N3×<N31>N2×k2+N2k3) mod N2N3(thesis 2.69)\tilde{k}_{2,forward} = (N_3 \times \left< N_3^{-1} \right>_{N_2} \times k_2 + N_2 k_3 ) \space \text{mod} \space N_2N_3 \tag{thesis 2.69}

Substitute the following:

Q3N2+1=<N31>N2×N3(thesis 2.64)Q_3 N_2 + 1 = \left< N_3^{-1} \right>_{N_2} \times N_3 \tag{thesis 2.64}

Q4N1+1=<(N2N3)1>N1×N2N3(thesis 2.65)Q_4 N_1 + 1 = \left< (N_2 N_3)^{-1} \right>_{N_1} \times N_2 N_3 \tag{thesis 2.65}

To yield:

kforward=((Q4N1+1)×k1+N1k~2,forward) mod Nk_{forward} = ((Q_4 N_1 + 1) \times k_1 + N_1 \tilde{k}_{2,forward}) \space \text{mod} \space N

k~2,forward=<(Q3N2+1)×k2+N2k3>N2N3\tilde{k}_{2,forward} = \left< (Q_3 N_2 + 1) \times k_2 + N_2 k_3 \right>_{N_2N_3}

Solve for k1,k2,k3k_1',k_2',k_3':

k=N2N3k1+N3k2+k3=((Q4N1+1)×k1+N1×<(Q3N2+1)×k2+N2×k3>N2N3) mod Nk=N_2 N_3 k_1' + N_3 k_2' + k_3' = ((Q_4N_1+1) \times k_1 + N_1 \times \left< (Q_3N_2+1) \times k_2 + N_2 \times k_3 \right>_{N_2 N_3}) \space \text{mod} \space N

Candidate solution (what was implemented):

k1=k1k_1 = k_1'

P1=<N2Q4>N2=mod(Q4,N2)P_1 = \left< N_2 - Q_4 \right>_{N_2} = mod(-Q_4, N_2)

P2=<N3(Q3÷N1)>N3=mod(Q3÷N1,N3)P_2 = \left< N_3 - (Q_3 \div N_1) \right>_{N_3} = mod(-Q_3 \div N_1,N_3)

k2=<k2+<P1×k1>N2>N2k_2 = \left< k_2' + \left< P_1 \times k_1' \right>_{N_2} \right>_{N_2}

k3=<k3+<P2×(k1+N1k2)>N3>N3k_3 = \left< k_3' + \left< P_2 \times ( k_1' + N_1 * k_2') \right>_{N_3} \right>_{N_3}

Modulo Identities for Proof

(ab) mod N=(a mod N×b mod N) mod N(I1)(ab) \space \text{mod} \space N = (a \space \text{mod} \space N \times b \space \text{mod} \space N) \space \text{mod} \space N \tag{I1}

(a mod N) mod N=a mod N(I2)(a \space \text{mod} \space N) \space \text{mod} \space N = a \space \text{mod} \space N \tag{I2}

(a+b) mod N=[(a mod N)+(b mod N)] mod N(I3)(a + b) \space \text{mod} \space N = [(a \space \text{mod} \space N) + (b \space \text{mod} \space N)] \space \text{mod} \space N \tag{I3}

(a mod N+b) mod N=(a+b) mod N(I4)(a \space \text{mod} \space N + b) \space \text{mod} \space N = (a + b) \space \text{mod} \space N \tag{I4}

(Ab) mod (AN)=A(b mod N)(I5)(Ab) \space \text{mod} \space (AN) = A(b \space \text{mod} \space N) \tag{I5}

Proof of Candidate Solution for k Index

k=((Q4N1+1)×k1+N1×<(Q3N2+1)×k2+N2×k3>N2N3) mod Nk=((Q_4N_1+1) \times k_1 + N_1 \times \left< (Q_3N_2+1) \times k_2 + N_2 \times k_3 \right>_{N_2 N_3}) \space \text{mod} \space N

k index implementation

R1:={0if W.C.(R1+P1) mod N2otherwiseR2:={0ifn3=N31(R2+P2) mod N3n2=(n2+R1) mod N2n3=(n3+R2) mod N3R_1 := \begin{cases} 0 & \text{if W.C.} \\ (R_1 + P_1) \space \text{mod} \space N_2 & \text{otherwise} \end{cases} \\[0.5cm] R_2 := \begin{cases} 0 & if n_3' = N_3 -1 \\ (R_2 + P_2) \space \text{mod} \space N_3 \end{cases} \\[0.5cm] n_2 = (n_2' + R_1) \space \text{mod} \space N_2 \\ n_3 = (n_3' + R_2) \space \text{mod} \space N_3


function Qs(N1, N2, N3)
    (g1, p1, q1) = extended_euclid(N1, N2 * N3)
    (g2, p2, q2) = extended_euclid(N2, N1*N3)
    (g3, p3, q3) = extended_euclid(N3, N1*N2)
    (g4, p4, q4) = extended_euclid(N2*N3, N1)

    @assert g1 == 1 && g2 == 1 && g3 == 1 && g4 == 1 "N1, N2, N3 must be coprime"
    (p1, p2, p3, p4, -q1, -q2*N1, -q3*N1, -q4)
end

# TBD: Prove that P1 and P2 below are correct.
P1 = mod(-Q4, N2)
P2 = mod(-Q3 ÷ N1, N3)

function kmap!(Y, X, N1, N2, N3, P1, P2)
    mask_mux_mod(a, B) = a - (B & -(a ≥ B))

    lhs_k = 1
    for k3p = 0:N3-1
        R2 = 0
        for k2p = 0:N2-1
            R1 = 0
            for k1p = 0:N1-1
                k2 = mask_mux_mod(k2p + R1, N2)
                k3 = mask_mux_mod(k3p + R2, N3)
                rhs_k = k1p + N1 * k2 + N1 * N2 * k3 + 1
                Y[lhs_k] = X[rhs_k]
                R1 = mask_mux_mod(R1 + P1, N2)
                R2 = mask_mux_mod(R2 + P2, N3)
                lhs_k += 1
            end
        end
    end
end

References

[1] A. Wang, J. Bachrach and B. Nikolié, "A generator of memory-based, runtime-reconfigurable 2N3M5K FFT engines," 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Shanghai, China, 2016, pp. 1016-1020, doi: 10.1109/ICASSP.2016.7471829

[2] Wang, Angie. Ph.D. Dissertation, UC Berkeley. "Agile Design of Generator-Based Signal Processing Hardware," 2018.

[3] C. -F. Hsiao, Y. Chen and C. -Y. Lee, "A Generalized Mixed-Radix Algorithm for Memory-Based FFT Processors," in IEEE Transactions on Circuits and Systems II: Express Briefs, vol. 57, no. 1, pp. 26-30, Jan. 2010, doi: 10.1109/TCSII.2009.2037262