You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
230 lines
13 KiB
230 lines
13 KiB
3 years ago
|
;* ======================================================================== *;
|
||
|
;* TEXAS INSTRUMENTS, INC. *;
|
||
|
;* *;
|
||
|
;* IMGLIB DSP Image/Video Processing Library *;
|
||
|
;* *;
|
||
|
;* Release: Revision 1.04b *;
|
||
|
;* CVS Revision: 1.3 Sun Sep 29 03:32:28 2002 (UTC) *;
|
||
|
;* Snapshot date: 23-Oct-2003 *;
|
||
|
;* *;
|
||
|
;* This library contains proprietary intellectual property of Texas *;
|
||
|
;* Instruments, Inc. The library and its source code are protected by *;
|
||
|
;* various copyrights, and portions may also be protected by patents or *;
|
||
|
;* other legal protections. *;
|
||
|
;* *;
|
||
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
||
|
;* family DSPs. This license was provided to you prior to installing *;
|
||
|
;* the software. You may review this license by consulting the file *;
|
||
|
;* TI_license.PDF which accompanies the files in this library. *;
|
||
|
;* ------------------------------------------------------------------------ *;
|
||
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
||
|
;* All Rights Reserved. *;
|
||
|
;* ======================================================================== *;
|
||
|
|
||
|
|
||
|
;* ======================================================================== *;
|
||
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
||
|
;* tools prior to 4.30. *;
|
||
|
;* ======================================================================== *;
|
||
|
|
||
|
.if $isdefed(".ASSEMBLER_VERSION")
|
||
|
.asg .ASSEMBLER_VERSION, $asmver
|
||
|
.else
|
||
|
.asg 0, $asmver
|
||
|
.endif
|
||
|
|
||
|
.if ($asmver < 430)
|
||
|
|
||
|
.asg B, CALL ; Function Call
|
||
|
.asg B, RET ; Return from a Function
|
||
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
||
|
|
||
|
.if .TMS320C6400
|
||
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
||
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
||
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
||
|
.endif
|
||
|
|
||
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
||
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
||
|
|
||
|
.endif
|
||
|
|
||
|
;* ======================================================================== *;
|
||
|
;* End of assembler compatibility shim. *;
|
||
|
;* ======================================================================== *;
|
||
|
|
||
|
|
||
|
* ========================================================================= *
|
||
|
* NAME *
|
||
|
* IMG_sad_16x16 -- Sum of Absolute Differences on single 16x16 block *
|
||
|
* *
|
||
|
* USAGE *
|
||
|
* unsigned IMG_sad_16x16 *
|
||
|
* ( *
|
||
|
* const unsigned char *restrict srcImg, /* 16x16 source block */ *
|
||
|
* const unsigned char *restrict refImg, /* Reference image */ *
|
||
|
* int pitch /* Width of ref image */ *
|
||
|
* ); *
|
||
|
* *
|
||
|
* The code accepts a pointer to the 16x16 source block (srcImg), *
|
||
|
* and a pointer to the upper-left corner of a target position in *
|
||
|
* a reference image (refImg). The width of the reference image *
|
||
|
* is given by the pitch argument. *
|
||
|
* *
|
||
|
* The function returns the sum of the absolute differences *
|
||
|
* between the source block and the 16x16 region pointed to in the *
|
||
|
* reference image. *
|
||
|
* *
|
||
|
* DESCRIPTION *
|
||
|
* The algorithm takes the difference between the pixel values in *
|
||
|
* the source image block and the corresponding pixels in the *
|
||
|
* reference image. It then takes the absolute values of these *
|
||
|
* differences, and accumulates them over the entire 16x16 region. *
|
||
|
* It returns the final accumulation. *
|
||
|
* *
|
||
|
* C CODE *
|
||
|
* The following is a C code description of the algorithm that lacks *
|
||
|
* restrictions. The assembly code may have additional restrictions *
|
||
|
* as noted below. *
|
||
|
* *
|
||
|
* unsigned IMG_sad_16x16 *
|
||
|
* ( *
|
||
|
* const unsigned char *restrict srcImg, *
|
||
|
* const unsigned char *restrict refImg, *
|
||
|
* int pitch *
|
||
|
* ) *
|
||
|
* { *
|
||
|
* int i, j; *
|
||
|
* unsigned sad = 0; *
|
||
|
* *
|
||
|
* for (i = 0; i < 16; i++) *
|
||
|
* for (j = 0; j < 16; j++) *
|
||
|
* sad += abs(srcImg[j+i*16] - refImg[j+i*pitch]); *
|
||
|
* *
|
||
|
* return sad; *
|
||
|
* } *
|
||
|
* *
|
||
|
* ASSUMPTIONS *
|
||
|
* Some versions of this kernel may assume that srcImg is double- *
|
||
|
* word aligned. *
|
||
|
* *
|
||
|
* MEMORY NOTE *
|
||
|
* No bank conflicts occur. *
|
||
|
* Endian Neutral. *
|
||
|
* *
|
||
|
* NOTES *
|
||
|
* This kernel blocks interrupts for 61 cycles. *
|
||
|
* *
|
||
|
* CYCLES *
|
||
|
* 67 cycles *
|
||
|
* *
|
||
|
* CODESIZE *
|
||
|
* 168 bytes *
|
||
|
* *
|
||
|
* ------------------------------------------------------------------------- *
|
||
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
||
|
* All Rights Reserved. *
|
||
|
* ========================================================================= *
|
||
|
|
||
|
|
||
|
.sect ".text:_sad_16x16"
|
||
|
.global _IMG_sad_16x16
|
||
|
_IMG_sad_16x16:
|
||
|
|
||
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
|
||
|
.asg A0, A_p
|
||
|
.asg A1, A_i
|
||
|
.asg A2, A_s0
|
||
|
.asg A3, A_k1
|
||
|
.asg A4, A_srcImg
|
||
|
.asg A4, A_retval
|
||
|
.asg A5, A_sad
|
||
|
.asg A6, A_pitch
|
||
|
.asg A6, A_s3210
|
||
|
.asg A7, A_d7654
|
||
|
.asg A7, A_s7654
|
||
|
.asg A8, A_r3210
|
||
|
.asg A8, A_s1
|
||
|
.asg A9, A_d3210
|
||
|
.asg A9, A_r7654
|
||
|
.asg B3, B_retaddr
|
||
|
.asg B4, B_refImg
|
||
|
.asg B5, B_pitch
|
||
|
.asg B6, B_rBA98
|
||
|
.asg B7, B_dFEDC
|
||
|
.asg B7, B_rFEDC
|
||
|
.asg B8, B_dBA98
|
||
|
.asg B8, B_sBA98
|
||
|
.asg B9, B_sFEDC
|
||
|
.asg B16, B_sad
|
||
|
.asg B17, B_srcImg
|
||
|
.asg B18, B_s2
|
||
|
.asg B19, B_s3
|
||
|
* ========================================================================= *
|
||
|
* =========================== PIPE LOOP PROLOG ============================ *
|
||
|
LDNDW .D2T2 *+B_refImg(8), B_rFEDC:B_rBA98 ;[ 1,1]
|
||
|
|| B .S2 loop
|
||
|
|| MVK .S1 0x0101, A_k1
|
||
|
|| ADD .L2X A_srcImg, 8, B_srcImg
|
||
|
|
||
|
LDDW .D1T1 *A_srcImg++(16), A_s7654:A_s3210 ;[ 2,1]
|
||
|
|| LDDW .D2T2 *B_srcImg++(16), B_sFEDC:B_sBA98 ;[ 2,1]
|
||
|
|| MV .L2X A_pitch, B_pitch
|
||
|
|| PACK2 .L1 A_k1, A_k1, A_k1
|
||
|
|
||
|
LDNDW .D2T1 *B_refImg++(B_pitch), A_r7654:A_r3210 ;[ 3,1]
|
||
|
|| ZERO .L2 B_sad
|
||
|
|| ZERO .L1 A_sad
|
||
|
|| MVK .S1 15, A_i
|
||
|
|| MVK .D1 3, A_p
|
||
|
|
||
|
* =========================== PIPE LOOP KERNEL ============================ *
|
||
|
loop:
|
||
|
[!A_p]ADD .L1 A_sad, A_s1, A_sad ;[13,1]
|
||
|
||[!A_p]ADD .S2 B_sad, B_s3, B_sad ;[13,1]
|
||
|
||[ A_i]BDEC .S1 loop, A_i ;[10,2]
|
||
|
|| DOTPU4 .M1 A_d3210, A_k1, A_s0 ;[10,2]
|
||
|
|| SUBABS4 .L2 B_sFEDC, B_rFEDC, B_dFEDC ;[ 7,3]
|
||
|
|| LDNDW .D2T2 *+B_refImg(8), B_rFEDC:B_rBA98 ;[ 1,5]
|
||
|
|
||
|
[!A_p]ADD .S2 B_sad, B_s2, B_sad ;[14,1]
|
||
|
|| DOTPU4 .M2X B_dFEDC, A_k1, B_s3 ;[ 8,3]
|
||
|
|| SUBABS4 .L1 A_s7654, A_r7654, A_d7654 ;[ 8,3]
|
||
|
|| SUBABS4 .L2 B_sBA98, B_rBA98, B_dBA98 ;[ 8,3]
|
||
|
|| LDDW .D1T1 *A_srcImg++(16), A_s7654:A_s3210 ;[ 2,5]
|
||
|
|| LDDW .D2T2 *B_srcImg++(16), B_sFEDC:B_sBA98 ;[ 2,5]
|
||
|
|
||
|
[ A_p]SUB .D1 A_p, 1, A_p ;[15,1]
|
||
|
||[!A_p]ADD .S1 A_sad, A_s0, A_sad ;[15,1]
|
||
|
|| DOTPU4 .M1 A_d7654, A_k1, A_s1 ;[ 9,3]
|
||
|
|| DOTPU4 .M2X B_dBA98, A_k1, B_s2 ;[ 9,3]
|
||
|
|| SUBABS4 .L1 A_s3210, A_r3210, A_d3210 ;[ 9,3]
|
||
|
|| LDNDW .D2T1 *B_refImg++(B_pitch), A_r7654:A_r3210 ;[ 3,5]
|
||
|
|
||
|
* =========================== PIPE LOOP EPILOG ============================ *
|
||
|
ADD .L1 A_sad, A_s1, A_sad ;[13,4]
|
||
|
|| ADD .S2 B_sad, B_s3, B_sad ;[13,4]
|
||
|
|| DOTPU4 .M1 A_d3210, A_k1, A_s0 ;[10,5]
|
||
|
|
||
|
ADD .L2 B_sad, B_s2, B_sad ;[14,4]
|
||
|
|| RET .S2 B_retaddr
|
||
|
|
||
|
ADD .S1 A_sad, A_s0, A_sad ;[15,4]
|
||
|
|
||
|
ADD .L1 A_sad, A_s1, A_sad ;[13,5]
|
||
|
|| ADD .S2 B_sad, B_s3, B_sad ;[13,5]
|
||
|
|
||
|
ADD .S2 B_sad, B_s2, B_sad ;[14,5]
|
||
|
|
||
|
ADD .S1 A_sad, A_s0, A_sad ;[15,5]
|
||
|
|
||
|
ADD .S1X A_sad, B_sad, A_retval
|
||
|
* ========================================================================= *
|
||
|
* ========================================================================= *
|
||
|
* End of file: img_sad_16x16.asm *
|
||
|
* ------------------------------------------------------------------------- *
|
||
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
||
|
* All Rights Reserved. *
|
||
|
* ========================================================================= *
|