.set noreorder .set nobopt .ent shapeconvv .globl shapeconvv shapeconvv: .frame $sp, 0, $31 # $4 = input vector (signal) # $5 = sh_en # $6 = cb_shape (codebook) li $12, 128 # Vectorize: do all 128 codewords in parallel li $13, 2 vmtcr $vpw, $13 # vpw = 32 bits vmtcr $vlr0, $12 # mvl = 128 vfset $vf0 l.s $f4, 0($4) # $f4 = h[0] = h0 l.s $f5, 4($4) # $f5 = h[1] = h1 l.s $f6, 8($4) # $f6 = h[2] = h2 l.s $f7, 12($4) # $f7 = h[3] = h3 l.s $f8, 16($4) # $f8 = h[4] = h4 li $12, 5 # $12 = stride = 5 elements addu $13, $6, 0 vlds.w $vr4, $13, $12 # $vr4 = cb_shape[*][0] addiu $13, 4 vlds.w $vr5, $13, $12 # $vr5 = cb_shape[*][1] addiu $13, 4 vlds.w $vr6, $13, $12 # $vr6 = cb_shape[*][2] addiu $13, 4 vlds.w $vr7, $13, $12 # $vr7 = cb_shape[*][3] addiu $13, 4 vlds.w $vr8, $13, $12 # $vr8 = cb_shape[*][4] # Compute convolution of the input signal with each shape codeword # in parallel. Store the energy in $vr9. vmul.s.sv $vr10, $f4, $vr4 vmul.s.vv $vr9, $vr10, $vr10 vmul.s.sv $vr11, $f4, $vr5 vmul.s.sv $vr12, $f5, $vr4 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.vv $vr10, $vr11, $vr11 vadd.s.vv $vr9, $vr9, $vr10 vmul.s.sv $vr11, $f4, $vr6 vmul.s.sv $vr12, $f5, $vr5 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.sv $vr12, $f6, $vr4 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.vv $vr10, $vr11, $vr11 vadd.s.vv $vr9, $vr9, $vr10 vmul.s.sv $vr11, $f4, $vr7 vmul.s.sv $vr12, $f5, $vr6 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.sv $vr12, $f6, $vr5 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.sv $vr12, $f7, $vr4 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.vv $vr10, $vr11, $vr11 vadd.s.vv $vr9, $vr9, $vr10 vmul.s.sv $vr11, $f4, $vr8 vmul.s.sv $vr12, $f5, $vr7 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.sv $vr12, $f6, $vr6 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.sv $vr12, $f7, $vr5 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.sv $vr12, $f8, $vr4 vadd.s.vv $vr11, $vr11, $vr12 vmul.s.vv $vr10, $vr11, $vr11 vadd.s.vv $vr9, $vr9, $vr10 vst.w $vr9, $5 # Store energy for each # codeword to memory array jr $31 nop .end shapeconvv