.set noreorder .set nobopt .ent pwfzv .globl pwfzv pwfzv: .frame $sp, 0, $31 # $4 = input array # $5 = output array # $6 = zirwfir # $7 = pwf_z_coeff # $8 = zirwiir # $9 = pwf_p_coeff li $12, 10 li $13, 2 vmtcr $vpw, $13 # vpw = 32 bits vmtcr $vlr0, $12 vfset $vf0 vld.w $vr5, $6 # $vr5 = zirwfir[0...9] vld.w $vr7, $8 # $vr7 = zirwiir[0...9] addiu $7, 4 vld.w $vr6, $7 # $vr6 = pwf_z_coeff[1...10] addiu $9, 4 vld.w $vr8, $9 # $vr8 = pwf_p_coeff[1...10] li $12, 5 vmtcr $vlr0, $12 vld.w $vr4, $4 # $vr4 = input[0...4] li $12, 10 vmtcr $vlr0, $12 li $12, 0 # $12 = k li $13, 5 # outer loop: for k=0 to 5 li.s $f4, 0.0 li $14, 1 loop: vext.s.sv $f5, $12, $vr4 # $f5 = input[k] vmul.s.vv $vr9, $vr5, $vr6 # $vr9 = zirwfir[0...9] * pwf_z[1...10] vins.vv $vr11, $14, $vr5 # $vr11[1...9] = zirwfir[0...8] vins.s.sv $vr11, $0, $f5 # $vr11[0] = input[k] vadd.s.sv $vr5, $f4, $vr11 # zirwfir updated vmul.s.vv $vr10, $vr7, $vr8 # $vr10 = zirwiir[0...9] * pwf_p[1...10] vsub.s.vv $vr9, $vr9, $vr10 # sum ($vr9[0..9]) = output[k] li $15, 5 # calculate output[k] by computing sum vext.vv $vr10, $15, $vr9 # of $vr9[0..9] using binary subdivision vmtcr $vlr0, $15 vext.vv $vr11, $0, $vr9 vadd.s.vv $vr10, $vr10, $vr11 li $15, 4 vext.s.sv $f7, $15, $vr10 vmtcr $vlr0, $15 li $15, 2 vext.vv $vr11, $15, $vr10 vmtcr $vlr0, $15 vext.vv $vr12, $0, $vr10 vadd.s.vv $vr11, $vr11, $vr12 li $15, 1 vext.s.sv $f6, $15, $vr11 add.s $f7, $f7, $f6 vext.s.sv $f6, $0, $vr11 add.s $f7, $f7, $f6 # $f7 contains new value of output[k] add.s $f7, $f7, $f5 vins.s.sv $vr13, $12, $f7 li $15, 10 vmtcr $vlr0, $15 vins.vv $vr12, $14, $vr7 # $vr12[1...9] = zirwiir[0...8] vins.s.sv $vr12, $0, $f7 # $vr12[0] = output[k] vadd.s.sv $vr7, $f4, $vr12 # zirwiir updated addi $12, 1 blt $12, $13, loop nop vst.w $vr5, $6 # store zirwfir vst.w $vr7, $8 # store zirwiir li $12, 5 vmtcr $vlr0, $12 vst.w $vr13, $5 jr $31 nop .end pwfzv