Prime Factor Algorithm: Three Decomposition
Assume N is decomposed in order N1,N2,N3 which are coprime (GCD=1).
n Index Generation
n=(N2N3n1+A1n2~)modNn2~=(N3n2+A2n3)modN2N3
A1=p1N1=Q1N2N3+1,Q1=q1A2=p2N2=Q2N3+1,Q2=q2N1
Qx is calculated via the Julia extended Euclidean algorithm gcd(a,b)=ax+by.
function extended_euclid(a, b)
@assert a>=0 && b>=0 "a and b must be non-negative"
if a == 0
return (b, 0, 1)
else
(g, y, x) = extended_euclid(mod(b, a), a)
end
(g, x - (b ÷ a) * y, y)
end
Substitution of (2) into (1) yields
n=(N2N3n1+p1N1N3n2+p1N1p2N2n3) mod N
The reversed order N3,N2,N1 is omitted here. This input equation can be rewritten [3] as
n=(N2N3n1′+n2~) mod Nn2~=(N3n2′+n3) mod N2N3
n1=⟨n1′+Q1′n2~⟩N1n2=⟨n2′+Q2′n3⟩N2
n=⟨N2N3n1′+N3n2′+n3⟩ mod N[3]
Qx′=(Nx−Qx) mod Nx
The n2~ counter increments when wrap condition (W.C.) is met: n2′=N2−1 and n3′=N3−1.
nforward=((N2N3)n1+N1×⟨N1−1⟩N2N3×n~2,forward) mod Nn~2,forward=((N3n2+N2×⟨N2−1⟩)N3×n3) mod N2N3
Q1(N2N3)+1=⟨N1−1⟩N2N3×N1Q2N3+1=⟨N2−1⟩N3×N2nforward=((N2N3)n1+(Q1N2N3+1)×(N3n2+(Q2N3+1)×n3)))
n-Indexer Implementation
R1:={0(R1+Q1′) mod N1if W.C.otherwiseR2:={0(R2+Q2′) mod N2if n3′=N3−1otherwisen1=(n1′+R1) mod N1n2=(n2′+R2) mod N2
function nmap(Y, X, N1, N2, N3, Q1P, Q2P, Q3P, Q4P)
mask_mux_mod(a, B) = a - (B & -(a ≥ B))
rhs_n = 1
for n1p = 0:N1-1
R1 = 0
for n2p = 0:N2-1
R2 = 0
for n3p = 0:N3-1
n1 = mask_mux_mod(n1p + R1, N1)
n2 = mask_mux_mod(n2p + R2, N2)
lhs_n = n1 + N1 * n2 + N1 * N2 * n3p + 1
Y[lhs_n] = X[rhs_n]
R1 = mask_mux_mod(R1 + Q1P, N1)
R2 = mask_mux_mod(R2 + Q2P, N2)
rhs_n += 1
end
end
end
end
k Index Generation
k=(B1k1+N1k2~) mod Nk2~=(B2k2+N2k3) mod N2N3
B2=p3N3=Q3N2+1,Q3=q3N1B1=p4(N2N3)=Q4N1+1=⟨(N2N3)−1⟩N1×N2N3 [2]
k=(p2p3N2N3k1+N1k2~) mod Nk2~=(p3N3k2+N2k3) mod N2N3k=(p2p3N2N3k1+p3N1N3k2+N1N2k3) mod N
kforward=((N2N3)×⟨(N2N3)−1⟩N1×k1+N1k~2,forward) mod N(thesis 2.68)
k~2,forward=(N3×⟨N3−1⟩N2×k2+N2k3) mod N2N3(thesis 2.69)
Substitute the following:
Q3N2+1=⟨N3−1⟩N2×N3(thesis 2.64)
Q4N1+1=⟨(N2N3)−1⟩N1×N2N3(thesis 2.65)
To yield:
kforward=((Q4N1+1)×k1+N1k~2,forward) mod N
k~2,forward=⟨(Q3N2+1)×k2+N2k3⟩N2N3
Solve for k1′,k2′,k3′:
k=N2N3k1′+N3k2′+k3′=((Q4N1+1)×k1+N1×⟨(Q3N2+1)×k2+N2×k3⟩N2N3) mod N
Candidate solution (what was implemented):
k1=k1′
P1=⟨N2−Q4⟩N2=mod(−Q4,N2)
P2=⟨N3−(Q3÷N1)⟩N3=mod(−Q3÷N1,N3)
k2=⟨k2′+⟨P1×k1′⟩N2⟩N2
k3=⟨k3′+⟨P2×(k1′+N1∗k2′)⟩N3⟩N3
Modulo Identities for Proof
(ab) mod N=(a mod N×b mod N) mod N(I1)
(a mod N) mod N=a mod N(I2)
(a+b) mod N=[(a mod N)+(b mod N)] mod N(I3)
(a mod N+b) mod N=(a+b) mod N(I4)
(Ab) mod (AN)=A(b mod N)(I5)
Proof of Candidate Solution for k Index
k=((Q4N1+1)×k1+N1×⟨(Q3N2+1)×k2+N2×k3⟩N2N3) mod N
k index implementation
R1:={0(R1+P1) mod N2if W.C.otherwiseR2:={0(R2+P2) mod N3ifn3′=N3−1n2=(n2′+R1) mod N2n3=(n3′+R2) mod N3
function Qs(N1, N2, N3)
(g1, p1, q1) = extended_euclid(N1, N2 * N3)
(g2, p2, q2) = extended_euclid(N2, N1*N3)
(g3, p3, q3) = extended_euclid(N3, N1*N2)
(g4, p4, q4) = extended_euclid(N2*N3, N1)
@assert g1 == 1 && g2 == 1 && g3 == 1 && g4 == 1 "N1, N2, N3 must be coprime"
(p1, p2, p3, p4, -q1, -q2*N1, -q3*N1, -q4)
end
P1 = mod(-Q4, N2)
P2 = mod(-Q3 ÷ N1, N3)
function kmap!(Y, X, N1, N2, N3, P1, P2)
mask_mux_mod(a, B) = a - (B & -(a ≥ B))
lhs_k = 1
for k3p = 0:N3-1
R2 = 0
for k2p = 0:N2-1
R1 = 0
for k1p = 0:N1-1
k2 = mask_mux_mod(k2p + R1, N2)
k3 = mask_mux_mod(k3p + R2, N3)
rhs_k = k1p + N1 * k2 + N1 * N2 * k3 + 1
Y[lhs_k] = X[rhs_k]
R1 = mask_mux_mod(R1 + P1, N2)
R2 = mask_mux_mod(R2 + P2, N3)
lhs_k += 1
end
end
end
end
References
[1] A. Wang, J. Bachrach and B. Nikolié, "A generator of memory-based, runtime-reconfigurable 2N3M5K FFT engines," 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Shanghai, China, 2016, pp. 1016-1020, doi: 10.1109/ICASSP.2016.7471829
[2] Wang, Angie. Ph.D. Dissertation, UC Berkeley. "Agile Design of Generator-Based Signal Processing Hardware," 2018.
[3] C. -F. Hsiao, Y. Chen and C. -Y. Lee, "A Generalized Mixed-Radix Algorithm for Memory-Based FFT Processors," in IEEE Transactions on Circuits and Systems II: Express Briefs, vol. 57, no. 1, pp. 26-30, Jan. 2010, doi: 10.1109/TCSII.2009.2037262