.set noreorder .set nobopt .ent cbv .globl cbv cbv: .frame $sp, 0, $31 # $4 = input vector (signal) # $5 = cb_shape # $6 = shape_energy # $7 = cb_gain2 # $8 = cb_gain_sq # $2 = return value (codebook index) # Use strided load of codewords li $12, 128 li $13, 2 vmtcr $vpw, $13 # vpw = 32 bits vmtcr $vlr0, $12 # mvl = 128 vmtcr $vlr1, $12 vfset $vf0 l.s $f4, 0($4) # Load signal into 5 floating pt registers l.s $f5, 4($4) l.s $f6, 8($4) l.s $f7, 12($4) l.s $f8, 16($4) li $12, 5 # $12 = stride = 5 elements addu $13, $5, 0 # Load codebook into vector registers vlds.w $vr4, $13, $12 addiu $13, 4 vlds.w $vr5, $13, $12 addiu $13, 4 vlds.w $vr6, $13, $12 addiu $13, 4 vlds.w $vr7, $13, $12 addiu $13, 4 vlds.w $vr8, $13, $12 vmul.s.sv $vr9, $f4, $vr4 # Compute vector product of signal vmul.s.sv $vr10, $f5, $vr5 # and codeword in parallel and store vadd.s.vv $vr11, $vr9, $vr10 # result in $vr11 = cor vmul.s.sv $vr9, $f6, $vr6 vadd.s.vv $vr11, $vr11, $vr9 vmul.s.sv $vr9, $f7, $vr7 vadd.s.vv $vr11, $vr11, $vr9 vmul.s.sv $vr9, $f8, $vr8 vadd.s.vv $vr11, $vr11, $vr9 # Should this be vsub.vv $vr14, $vr14, $vr14 vsub.vv $vr14, $vr14, $vr14 # $vr14 = idxg = 0 li.s $f9, 0.0 li $14, 4 vadd.s.sv $vr17, $f9, $vr11 vabs.s $vr17, $vr17 vcmp.s.lt.vs $vf0, $vr11, $f9 vadd.sv $vr14, $14, $vr14 # if cor<0 idxg += 4 vfset $vf0 vld.w $vr12, $6 # $vr12 contains shape_energy li $15, 1 # $14 = 1 li.s $f9, 0.708984375 vmul.s.sv $vr13, $f9, $vr12 # $vr13 = b0 = cb_gain_mid_0*sh_en vcmp.s.gt.vv $vf0, $vr17, $vr13 # idxg will be stored in $vr14 vadd.sv $vr14, $15, $vr14 # if (pcor>b0) idxg++ vfset $vf0 li.s $f9, 1.240722656 vmul.s.sv $vr13, $f9, $vr12 # $vr13 = b1 = cb_gain_mid_1*sh_en vcmp.s.gt.vv $vf0, $vr17, $vr13 vadd.sv $vr14, $15, $vr14 # if (pcor>b1) idxg++; vfset $vf0 li.s $f9, 2.171264649 vmul.s.sv $vr13, $f9, $vr12 # $vr13 = b2 = cb_gain_mid_2*sh_en vcmp.s.gt.vv $vf0, $vr17, $vr13 vadd.sv $vr14, $15, $vr14 # if (pcor>b2) idxg++; vfset $vf0 li $14, 2 vsll.vs $vr16, $vr14, $14 # $vr16 = idxg*4 = byte offset vldx.w $vr13, $8, $vr16 # $vr13 = gainsq[idxg] vmul.s.vv $vr15, $vr13, $vr12 # $vr15 = gainsq[idxg] * sh_en vldx.w $vr13, $7, $vr16 # $vr13 = gain2[idxg] vmul.s.vv $vr16, $vr13, $vr11 # $vr16 = gain2[idxg] * cor vsub.s.vv $vr15, $vr15, $vr16 # $vr15 = d # Parallel minimum finder - eventually go to serial case # must keep track of min index and minimum values # will not work without a floating point version of vext.vv # (or will not be more efficient than scalar version) li $13, 64 li.s $f4, 0.0 vsub.vv $vr10, $vr10, $vr10 vext.vv $vr5, $13, $vr15 # $vr4 = d[64...128] vmtcr $vlr0, $13 vext.vv $vr4, $0, $vr15 # $vr5 = d[0...63] vcmp.s.ge.vv $vf0, $vr4, $vr5 vadd.s.sv $vr4, $f4, $vr5 # vr4[i] = min(d[i], d[64+i]) vadd.sv $vr10, $13, $vr10 # $vr12 contains 0's and 64's vfset $vf0 li $13, 32 vext.vv $vr6, $13, $vr4 # put added indices in $vr11 [0...31] vext.vv $vr12, $13, $vr10 vmtcr $vlr0, $13 vext.vv $vr5, $0, $vr4 vext.vv $vr11, $0, $vr10 vcmp.s.ge.vv $vf0, $vr5, $vr6 vadd.s.sv $vr5, $f4, $vr6 vadd.sv $vr11, $0, $vr12 vadd.sv $vr11, $13, $vr11 vfset $vf0 li $13, 16 vext.vv $vr7, $13, $vr5 vext.vv $vr13, $13, $vr11 vmtcr $vlr0, $13 vext.vv $vr6, $0, $vr5 vext.vv $vr12, $0, $vr11 vcmp.s.ge.vv $vf0, $vr6, $vr7 vadd.s.sv $vr6, $f4, $vr7 vadd.sv $vr12, $0, $vr13 vadd.sv $vr12, $13, $vr12 vfset $vf0 li $13, 8 vext.vv $vr8, $13, $vr6 vext.vv $vr15, $13, $vr12 vmtcr $vlr0, $13 vext.vv $vr7, $0, $vr6 vext.vv $vr13, $0, $vr12 vcmp.s.ge.vv $vf0, $vr7, $vr8 vadd.s.sv $vr7, $f4, $vr8 vadd.sv $vr13, $0, $vr15 vadd.sv $vr13, $13, $vr13 vfset $vf0 li $13, 4 vext.vv $vr9, $13, $vr7 vext.vv $vr16, $13, $vr13 vmtcr $vlr0, $13 vext.vv $vr8, $0, $vr7 vext.vv $vr15, $0, $vr13 vcmp.s.ge.vv $vf0, $vr8, $vr9 vadd.s.sv $vr8, $f4, $vr9 vadd.sv $vr15, $0, $vr16 vadd.sv $vr15, $13, $vr15 vfset $vf0 # Serial minimum finder - start with array of 8 li $12, 0 # $12 = vector element index li $14, 0 # $14 = index of min element vext.s.sv $f5, $12, $vr8 # $f5 contains current minimum addiu $12, 1 findmin: vext.s.sv $f4, $12, $vr8 # $f4 = current d value c.le.s $f5, $f4 bc1t notmin nop addu $14, $12, $0 # update minimum index mov.s $f5, $f4 # update minimum d value notmin: addiu $12, 1 blt $12, $13, findmin nop vext.sv $13, $14, $vr15 addu $14, $14, $13 vext.sv $15, $14, $vr14 sll $14, 3 addu $2, $14, $15 jr $31 nop .end cbv