You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
206 lines
13 KiB
206 lines
13 KiB
;* ======================================================================== *;
|
|
;* TEXAS INSTRUMENTS, INC. *;
|
|
;* *;
|
|
;* DSPLIB DSP Signal Processing Library *;
|
|
;* *;
|
|
;* Release: Revision 1.04b *;
|
|
;* CVS Revision: 1.18 Tue Oct 14 19:58:57 2003 (UTC) *;
|
|
;* Snapshot date: 23-Oct-2003 *;
|
|
;* *;
|
|
;* This library contains proprietary intellectual property of Texas *;
|
|
;* Instruments, Inc. The library and its source code are protected by *;
|
|
;* various copyrights, and portions may also be protected by patents or *;
|
|
;* other legal protections. *;
|
|
;* *;
|
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
|
;* family DSPs. This license was provided to you prior to installing *;
|
|
;* the software. You may review this license by consulting the file *;
|
|
;* TI_license.PDF which accompanies the files in this library. *;
|
|
;* ------------------------------------------------------------------------ *;
|
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
|
;* All Rights Reserved. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
;* ======================================================================== *;
|
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
|
;* tools prior to 4.30. *;
|
|
;* ======================================================================== *;
|
|
|
|
.if $isdefed(".ASSEMBLER_VERSION")
|
|
.asg .ASSEMBLER_VERSION, $asmver
|
|
.else
|
|
.asg 0, $asmver
|
|
.endif
|
|
|
|
.if ($asmver < 430)
|
|
|
|
.asg B, CALL ; Function Call
|
|
.asg B, RET ; Return from a Function
|
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
|
|
|
.if .TMS320C6400
|
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
|
.endif
|
|
|
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
|
|
|
.endif
|
|
|
|
;* ======================================================================== *;
|
|
;* End of assembler compatibility shim. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
* ========================================================================= *
|
|
* *
|
|
* TEXAS INSTRUMENTS, INC. *
|
|
* *
|
|
* NAME *
|
|
* DSP_dotprod *
|
|
* *
|
|
* REVISION DATE *
|
|
* 10-Oct-2003 *
|
|
* *
|
|
* USAGE *
|
|
* This routine is C callable, and has the following C prototype: *
|
|
* *
|
|
* int DSP_dotprod *
|
|
* ( *
|
|
* const short *m, /* Pointer to first vector */ *
|
|
* const short *n, /* Pointer to second vector */ *
|
|
* int count /* Length of vectors. */ *
|
|
* ); *
|
|
* *
|
|
* This routine returns the dot product as its return value. *
|
|
* *
|
|
* DESCRIPTION *
|
|
* The "DSP_dotprod" function implements a dot product of two input *
|
|
* vectors, returning the scalar result. Each element of the *
|
|
* first array is multiplied with the corresponding element of the *
|
|
* second array, and the products are summed. The sum is returned. *
|
|
* *
|
|
* int DSP_dotprod *
|
|
* ( *
|
|
* const short *m, /* Pointer to first vector */ *
|
|
* const short *n, /* Pointer to second vector */ *
|
|
* int count /* Length of vectors. */ *
|
|
* ) *
|
|
* { *
|
|
* int i, sum = 0; *
|
|
* *
|
|
* for (i = 0; i < count; i++) *
|
|
* sum += m[i] * n[i]; *
|
|
* *
|
|
* return sum; *
|
|
* } *
|
|
* *
|
|
* The above C code is a general implementation without *
|
|
* restrictions. The assembly code has some restrictions, as *
|
|
* noted below. *
|
|
* *
|
|
* TECHNIQUES *
|
|
* The code is unrolled 4 times to enable full memory and multiplier *
|
|
* bandwidth to be utilized. *
|
|
* *
|
|
* Interrupts are masked by branch delay slots only. *
|
|
* *
|
|
* Prolog collapsing has been performed to reduce codesize. *
|
|
* *
|
|
* ASSUMPTIONS *
|
|
* The input length is a multiple of 4 and greater than 0. *
|
|
* *
|
|
* The input data and coeeficients are stored on double word *
|
|
* aligned boundaries. *
|
|
* *
|
|
* This code is not interruptible. Interrupts are masked by *
|
|
* branch delay slots during the entire duration of this *
|
|
* function. *
|
|
* *
|
|
* MEMORY NOTE *
|
|
* To avoid bank conflicts, The input arrays 'm' and 'n' must *
|
|
* be offset by 4 half-words (8 bytes). *
|
|
* *
|
|
* The code is ENDIAN NEUTRAL. *
|
|
* *
|
|
* CODESIZE *
|
|
* 160 bytes *
|
|
* *
|
|
* CYCLES *
|
|
* cycles = count/4 + 16 *
|
|
* For count = 720, cycles = 196. *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
.sect ".text:_dotprod"
|
|
.global _DSP_dotprod
|
|
_DSP_dotprod:
|
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
|
|
.asg A4, A_m ; pointer to vector m
|
|
.asg B4, B_n ; pointer to vector n
|
|
.asg A6, A_count ; number of elements in each vector
|
|
.asg A0, A_i ; loop count
|
|
.asg A16, A_sum ; partial sum a
|
|
.asg A17, A_prod ; sum of products a[i]*b[i]+a[i+1]*b[i+1]
|
|
.asg B16, B_sum ; partial sum b
|
|
.asg B17, B_prod ; product sum a[i+2]*b[i+2]+a[i+3]*b[i+3]
|
|
.asg A9, A_reg1 ; elements a[i+3] a[i+2]
|
|
.asg A8, A_reg0 ; elements a[i+1] a[i]
|
|
.asg B7, B_reg1 ; elements b[i+3] b[i+2]
|
|
.asg B6, B_reg0 ; elements b[i+1] b[i]
|
|
.asg A4, A_sumt ; total sum a + b returned to caller
|
|
* ========================== PIPE LOOP PROLOG ============================= *
|
|
B .S2 loop ; prime loop
|
|
|| LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3]...b[i]
|
|
|| LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3]...a[i]
|
|
|
|
B .S2 loop ; prime loop
|
|
|| LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3]...b[i]
|
|
|| LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3]...a[i]
|
|
|| SHRU .S1 A_count, 2, A_i ; calc loop count
|
|
|| ZERO .L1 A_prod:A_sum
|
|
|| ZERO .L2 B_prod:B_sum
|
|
|
|
B .S1 loop ; prime loop
|
|
||[A_i] LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3]...b[i]
|
|
||[A_i] LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3]...a[i]
|
|
|| ZERO .L1 A_prod:A_sum ; added for branch-
|
|
|| ZERO .L2 B_prod:B_sum ; target-not-span
|
|
|
|
[A_i] BDEC .S1 loop, A_i ; prime loop
|
|
||[A_i] LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3]...b[i]
|
|
||[A_i] LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3]...a[i]
|
|
|| ZERO .L1 A_prod:A_sum ; added for branch-
|
|
|| ZERO .L2 B_prod:B_sum ; target-not-span
|
|
|
|
[A_i] BDEC .S1 loop, A_i ; prime loop
|
|
||[A_i] LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3]...b[i]
|
|
||[A_i] LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3]...a[i]
|
|
|| ZERO .L1 A_prod:A_sum ; added for branch-
|
|
|| ZERO .L2 B_prod:B_sum ; target-not-span
|
|
* ========================== PIPE LOOP KERNEL ============================= *
|
|
loop:
|
|
ADD .L2 B_sum, B_prod, B_sum ; sum += productb
|
|
|| ADD .L1 A_sum, A_prod, A_sum ; sum += producta
|
|
||[A_i] LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3]...b[i]
|
|
||[A_i] LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3]...a[i]
|
|
|| DOTP2 .M2X A_reg0, B_reg0, B_prod ; a[0]*b[0]+a[1]*b[1]
|
|
|| DOTP2 .M1X A_reg1, B_reg1, A_prod ; a[2]*b[2]+a[3]*b[3]
|
|
||[A_i] BDEC .S1 loop, A_i ; iterate loop
|
|
* ========================== PIPE LOOP EPILOG ============================= *
|
|
|
|
RETNOP.S2 B3, 4 ; Return to caller
|
|
ADD .L1X A_sum, B_sum, A_sumt ; final sum
|
|
; ===== Branch Occurs
|
|
|
|
* ========================================================================= *
|
|
* End of file: dsp_dotprod.asm *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|