.set noreorder .set nobopt .ent autocorrv .globl autocorrv autocorrv: .frame $sp, 0, $31 # $4 = input vector address # $5 = output vector address # $6 = K, $7 = M, $8 = N li $12, 2 vmtcr $vpw, $12 # vpw = 32 bits addiu $12, $8, 1 vmtcr $vlr0, $8 # useful length of X = N vfset $vr0 vld.w $vr4, $4 # $vr4 = X[0...N] subu $13, $7, $6 # $13 = M-K (will count to N-K-1) subu $14, $8, $6 # $14 = N-K addu $15, $7, $0 # $15 = M (will count to N-1) addiu $9, $15, 1 # $9 = M+1 (will count to N) = vlr for vextract addiu $10, $6, 1 # $10 = K+1 = vlr for most computations vmtcr $vlr0, $10 vsub.s.vv $vr7, $vr7, $vr7 # $vr7 = 0 (will hold result) loop: vmtcr $vlr0, $9 vext.vv $vr5, $13, $vr4 # $vr5 = X[$13....$13+K] vmtcr $vlr0, $10 # vlr = K+1 vext.s.sv $f4, $6, $vr5 # $f4 = X[$13+K] vmul.s.sv $vr6, $f4, $vr5 # $vr6 = X[$13+k] * X[$13....$13+K] vadd.s.vv $vr7, $vr6, $vr7 # result += temp addiu $13, 1 addiu $15, 1 addiu $9, 1 blt $13, $14, loop nop li $12, -1 sll $6, 2 # $6 = 4*K addu $5, $5, $6 # $5 = &(R[K]) vsts.w $vr7, $5, $12 # store results in memory in reverse order jr $31 nop .end autocorrv