You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

564 lines
32 KiB

;* ======================================================================== *;
;* TEXAS INSTRUMENTS, INC. *;
;* *;
;* IMGLIB DSP Image/Video Processing Library *;
;* *;
;* Release: Revision 1.04b *;
;* CVS Revision: 1.10 Sun Sep 29 03:32:24 2002 (UTC) *;
;* Snapshot date: 23-Oct-2003 *;
;* *;
;* This library contains proprietary intellectual property of Texas *;
;* Instruments, Inc. The library and its source code are protected by *;
;* various copyrights, and portions may also be protected by patents or *;
;* other legal protections. *;
;* *;
;* This software is licensed for use with Texas Instruments TMS320 *;
;* family DSPs. This license was provided to you prior to installing *;
;* the software. You may review this license by consulting the file *;
;* TI_license.PDF which accompanies the files in this library. *;
;* ------------------------------------------------------------------------ *;
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
;* All Rights Reserved. *;
;* ======================================================================== *;
;* ======================================================================== *;
;* Assembler compatibility shim for assembling 4.30 and later code on *;
;* tools prior to 4.30. *;
;* ======================================================================== *;
.if $isdefed(".ASSEMBLER_VERSION")
.asg .ASSEMBLER_VERSION, $asmver
.else
.asg 0, $asmver
.endif
.if ($asmver < 430)
.asg B, CALL ; Function Call
.asg B, RET ; Return from a Function
.asg B, CALLRET ; Function call with Call / Ret chaining.
.if .TMS320C6400
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
.endif
.asg , .asmfunc ; .func equivalent for hand-assembly code
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
.endif
;* ======================================================================== *;
;* End of assembler compatibility shim. *;
;* ======================================================================== *;
* ========================================================================= *
* *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* IMG_mad_16x16 *
* *
* REVISION DATE *
* 18-Dec-2001 *
* *
* USAGE *
* This routine is C-callable and can be called as: *
* *
* void IMG_mad_16x16 *
* ( *
* const unsigned char *restrict refImg, *
* const unsigned char *restrict srcImg, *
* int pitch, *
* int h, *
* int v, *
* unsigned *restrict match *
* ) *
* *
* refImg Reference image. *
* srcImg[256] 16x16 block image to look for. *
* pitch Width of reference image. *
* h Horiz. size of search area. *
* v Vert. size of search area. Must be multiple of 2. *
* match[2] Result: *
* match[0] is packed x, y. *
* match[1] is MAD value. *
* *
* DESCRIPTION *
* This routine returns the location of the minimum absolute *
* difference between a 16x16 search block and some block in a *
* (h + 16) x (v + 16) search area. h and v are the sizes of the *
* search space for the top left coordinate of the search block. *
* refImg points to the top left pixel of the search area. *
* *
* (0,0) (h,0) (h+16,0) *
* ;--------------+--------; *
* ; search | ; *
* ; space | ; *
* ; | ; search area *
* ;--------------+ ; within reference image *
* (0,v) (h,v) ; *
* ; ; *
* ;-----------------------; *
* (0, v+16) (v+16,h+16) *
* *
* The location is returned relative to the above coordinate system *
* as x and y packed in two 16-bit quantities in a 32-bit word: *
* *
* 31 16 15 0 *
* +----------------+----------------+ *
* match[0]: | x | y | *
* +----------------+----------------+ *
* *
* 31 0 *
* +---------------------------------+ *
* match[1]: | SAD value at location x, y | *
* +---------------------------------+ *
* *
* ASSUMPTIONS *
* srcImg and refImg do not alias in memory. *
* The routine is written for Little Endian configuration. *
* Two MADS are performed together and hence it is assumed that v *
* the vertical dimension is a multiple of 2. *
* *
* MEMORY NOTE *
* No special requirements on alignment of arrays is required. *
* *
* TECHNIQUES *
* The two outer loops are merged, the two inner loops are merged. *
* The inner loop process 2 lines of 2 search locations in parallel. *
* The search is performed in top-to-bottom, left-to-right order, *
* with the earliest match taking precedence in the case of ties. *
* Further use is made of C64x specific instructions such as SUBABS4 *
* and DOTPU4. The SUBABS4 takes the absolute difference on four 8 *
* bit quantities packed into a 32 bit word. The DOTPU4 performs four *
* 8 bit wide multiplies and adds the results together. *
* *
* C CODE *
* *
* void IMG_mad_16x16 *
* ( *
* const unsigned char *restrict refImg, *
* const unsigned char *restrict srcImg, *
* int pitch, int h, int v, *
* unsigned int *restrict match *
* ) *
* { *
* int i, j, x, y, matx, maty; *
* unsigned matpos, matval; *
* *
* matval = ~0U; *
* matx = maty = 0; *
* *
* for (x = 0; x < x; x++) *
* for (y = 0; y < v; y++) *
* { *
* unsigned acc = 0; *
* *
* for (i = 0; i < 16; i++) *
* for (j = 0; j < 16; j++) *
* acc += abs(srcImg[i*16 + j] - *
* refImg[(i+y)*pitch + x + j]); *
* *
* if (acc < matval) *
* { *
* matval = acc; *
* matx = x; *
* maty = y; *
* } *
* } *
* *
* matpos = (0xffff0000 & (matx << 16)) | *
* (0x0000ffff & maty); *
* match[0] = matpos; *
* match[1] = matval; *
* } *
* *
* CYCLES *
* 38 * h * v + 20 *
* *
* e.g. h=v= 4: 628 cycles *
* h=v=32: 38932 cycles *
* *
* CODESIZE *
* 776 bytes. *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
; ================= SYMBOLIC REGISTER ASSIGNMENTS: SETUP ================== ;
.asg B15, B_SP ; Stack pointer, B datapath
.asg B2, B_csr ; CSR's value
.asg B1, B_no_gie ; CSR w/ GIE bit cleared
.asg A30, A_csr ; Copy of CSR's value
.asg B3, B_ret ; Return address
; ========================================================================= ;
; ====================== SYMBOLIC REGISTER ASSIGNMENTS =======================
.asg A0, A_best
.asg A0, A_i
.asg A0, A_sub
.asg A0, A_w
.asg A1, A_vl
.asg A16, A_matpos
.asg A17, A_matval
.asg A14, A_ptch
.asg A10, A_ref_ri0d
.asg A11, A_ref_ri0c
.asg A18, A_2diffi_1c
.asg A18, A_2sumi1d
.asg A18, A_diffi_1d
.asg A19, A_2sumi1c
.asg A19, A_sumi1c
.asg A2, A_vl1
.asg A20, A_2diffi_1d
.asg A20, A_2sumi0d
.asg A20, A_diffi_0d
.asg A20, A_diffi_1c
.asg A21, A_matchi1
.asg A22, A_k_ones
.asg A23, A_pitch_8
.asg A24, A_2sumi0c
.asg A24, A_2sumi0cd
.asg A24, A_srchid
.asg A25, A_2sumi1cd
.asg A25, A_diffi_0c
.asg A25, A_srchic
.asg A26, A_2srchid
.asg A26, A_sumi1
.asg A26, A_sumi1cd
.asg A27, A_2diffi_0c
.asg A27, A_2srchic
.asg A27, A_sumi0cd
.asg A27, A_sumi1d
.asg A28, A_2diffi_0d
.asg A28, A_ref_ri2d
.asg A28, A_sumi0c
.asg A29, A_ref_ri2c
.asg A29, A_sumi0d
.asg A3, A_vptch
.asg A30, A_ref_ri1d
.asg A31, A_2sumi1
.asg A31, A_ref_ri1c
.asg A4, A_ref_img
; .asg A5, A_c40
.asg A6, A_hl
.asg A6, A_pitch
.asg A7, A_ffff
.asg B6, B_h
.asg A9, A_bptch
.asg B0, B_best
.asg B1, B_ml
.asg B5, B_matval
.asg B16, B_ref_ri0b
.asg B17, B_ref_ri0a
.asg B18, B_2sumi0ab
.asg B18, B_2sumi0b
.asg B18, B_sumi0a
.asg B19, B_diffi_0b
.asg B19, B_sumi0b
.asg B19, B_sumi1b
.asg B20, B_src_imgcp
.asg B21, B_2diffi_0b
.asg B21, B_2sumi0a
.asg B22, B_matchi0
.asg B23, B_k_ones
.asg B24, B_2sumi1a
.asg B24, B_srchib
.asg B25, B_2diffi_0a
.asg B25, B_diffi_0a
.asg B25, B_srchia
.asg B26, B_2srchib
.asg B26, B_sumi1ab
.asg B27, B_2srchia
.asg B27, B_2sumi1ab
.asg B27, B_diffi_1b
.asg B28, B_2diffi_1b
.asg B28, B_ref_ri2b
.asg B29, B_ref_ri2a
.asg B29, B_sumi1a
.asg B30, B_ref_ri1b
.asg B31, B_2sumi1b
.asg B31, B_ref_ri1a
.asg B4, B_src_img
.asg B8, B_match
.asg A8, A_v
.asg B9, B_2diffi_1a
.asg B9, B_2sumi0
.asg B9, B_diffi_1a
.asg B9, B_sumi0
.asg B9, B_sumi0ab
; ============================================================================
.sect ".text:_mad_16x16"
.global _IMG_mad_16x16
_IMG_mad_16x16:
; parameters: A_refImg, B_srcImg, A_pitch, B_h, A_v, B_match
; A4, B4, A6, B6, A8, B8
STW .D2T1 A14, *B_SP--[3] ; Save A14
|| MVC .S2 CSR, B_csr ; Remember CSR
AND B_csr, -2, B_no_gie ; Clear GIE
|| STW .D2T1 A11, *+B_SP[2] ; Save A11
|| ADD .D1 A_v, 17, A_w ; pitch
|| MPY .M1 -16, A_pitch, A_bptch ; hpatch
STW .D2T1 A10, *+B_SP[1] ; Save A10
|| MVC .S2 B_no_gie, CSR ; Disable ints
|| MPY .M1 A_w, A_pitch, A_vptch ; vptch
|| SUB .L1 A_pitch, 8, A_pitch_8 ; pitch - 8
|| MVKL .S1 0000FFFFh, A_ffff ; save -1
; ===== Interrupts masked here =====
MVKL .S1 01010101h, A_k_ones ; constant
|| MV .L1 A_v, A_vl ; vert. v...1
|| LDNDW .D *A_ref_img++(8), B_ref_ri0a:B_ref_ri0b ; Load ref
MVKH .S1 01010101h, A_k_ones ; constant
|| ADD .L1 -1, A_vptch, A_vptch ; vptch--
|| MPY .M2X A_v, B_h, B_ml ; ml = h*v
|| LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri0c:A_ref_ri0d ; Load
|| MV .D2 B_src_img, B_src_imgcp ; srcimg
MVKL .S2 01010101h, B_k_ones ; constant
|| MV .L1 A_v, A_vl1 ; v
|| MVKH .S1 0000FFFFh, A_ffff ; -1
|| LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ; Load src
|| SUB .D1 A_pitch, A_bptch, A_bptch ;
MVKH .S2 01010101h, B_k_ones ; constant
|| OR .L2 -1, B_matval, B_matval ; matval
|| MV .S1 A_bptch, A_ptch ; ptch
|| ZERO .L1 A_hl ; hl
|| LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b ; Load ref
; ============================ PIPE LOOP PROLOG ==============================
LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ; Load
|| ZERO .S1 A_matpos ; matpos
LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri1c:A_ref_ri1d ;[ 4,1]
LDNDW .D *A_ref_img++(8), B_ref_ri2a:B_ref_ri2b ;[ 5,1]
LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri2c:A_ref_ri2d ;[ 6,1]
SUBABS4 .L2 B_ref_ri1b, B_srchib, B_diffi_1b ;[ 7,1]
|| LDNDW .D2T2 *B_src_imgcp++, B_2srchia:B_2srchib ;[ 7,1]
|| B .S2 L_5 + 12 ;
LDNDW .D2T1 *B_src_imgcp++, A_2srchic:A_2srchid ;[ 8,1]
|| B .S2 L_6 + 4 ;
M_LOOP:
ADD .D1 -2, A_vl, A_vl ; vl-= 2
|| MPY .M1 0, A_matchi1, A_matchi1 ; matchi1
||[!A_vl1 ]MV .S1 A_v, A_vl1 ;[16,0]
|| SUBABS4 .L2 B_ref_ri1a, B_srchia, B_diffi_1a ;[ 9,1]
|| SUBABS4 .L1 A_ref_ri1d, A_srchid, A_diffi_1d ;[ 9,1]
|| LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ;[ 1,2]
|| B .S2 L_7 + 8 ;
MVK .S1 7, A_i ; i = 7
|| MV .D2 B_ref_ri2b, B_ref_ri0b ;[10,1]
|| SUBABS4 .L2 B_ref_ri0b, B_srchib, B_diffi_0b ;[10,1]
|| DOTPU4 .M2 B_diffi_1a, B_k_ones, B_sumi1a ;[10,1]
|| SUBABS4 .L1 A_ref_ri1c, A_srchic, A_diffi_1c ;[10,1]
|| LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b;[ 2,2]
|| B .S2 L_8 + 8 ;[ 2,2]
MV .S2 B_ref_ri2a, B_ref_ri0a ;[11,1]
|| DOTPU4 .M2 B_diffi_1b, B_k_ones, B_sumi1b ;[11,1]
|| SUBABS4 .L2 B_ref_ri0a, B_srchia, B_diffi_0a ;[11,1]
|| SUBABS4 .L1 A_ref_ri0d, A_srchid, A_diffi_0d ;[11,1]
|| DOTPU4 .M1 A_diffi_1c, A_k_ones, A_sumi1c ;[11,1]
|| LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ;[ 3,2]
|| B .S1 LOOP_X ;
||[!A_vl]MV .D1 A_vptch, A_ptch ; ptch
SUBABS4 .L1 A_ref_ri0c, A_srchic, A_diffi_0c ;[12,1]
|| DOTPU4 .M2 B_diffi_0a, B_k_ones, B_sumi0a ;[12,1]
|| SUBABS4 .L2 B_ref_ri1a, B_2srchia, B_2diffi_0a ;[12,1]
|| DOTPU4 .M1 A_diffi_0d, A_k_ones, A_sumi0d ;[12,1]
|| LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri1c:A_ref_ri1d ;[ 4,2]
||[!A_vl]MV .S1 A_v, A_vl ; vl = v
|| ZERO .S2 B_matchi0 ; matchi0
; ============================ PIPE LOOP KERNEL ==============================
LOOP_X:
[ A_i]ADD .S1 A_i, -1, A_i ;[17,1]
|| ADD .S2 B_sumi1a, B_sumi1b, B_sumi1ab ;[17,1]
|| DOTPU4 .M2 B_2diffi_1b,B_k_ones, B_2sumi1b ;[17,1]
|| DOTPU4 .M1 A_2diffi_0d,A_k_ones, A_2sumi0d ;[17,1]
|| ADD .D1 A_sumi1c, A_sumi1d, A_sumi1cd ;[17,1]
|| SUBABS4 .L2 B_ref_ri1a, B_srchia, B_diffi_1a ;[ 9,2]
|| SUBABS4 .L1 A_ref_ri1d, A_srchid, A_diffi_1d ;[ 9,2]
|| LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ;[ 1,3]
ADD .S2 B_sumi0a, B_sumi0b, B_sumi0ab ;[18,1]
|| DOTPU4 .M1 A_2diffi_1d,A_k_ones, A_2sumi1d ;[18,1]
|| ADD .S1 A_sumi0c, A_sumi0d, A_sumi0cd ;[18,1]
|| ADD .D2 B_ref_ri2b, 0, B_ref_ri0b ;[10,2]
|| SUBABS4 .L2 B_ref_ri0b, B_srchib, B_diffi_0b ;[10,2]
|| DOTPU4 .M2 B_diffi_1a, B_k_ones, B_sumi1a ;[10,2]
|| SUBABS4 .L1 A_ref_ri1c, A_srchic, A_diffi_1c ;[10,2]
|| LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b;[ 2,3]
[ A_i]B .S1 LOOP_X ;[19,1]
|| ADD .D1X A_sumi1cd, B_sumi1ab, A_sumi1 ;[19,1]
|| ADD .S2 B_ref_ri2a, 0, B_ref_ri0a ;[11,2]
|| DOTPU4 .M2 B_diffi_1b, B_k_ones, B_sumi1b ;[11,2]
|| SUBABS4 .L2 B_ref_ri0a, B_srchia, B_diffi_0a ;[11,2]
|| SUBABS4 .L1 A_ref_ri0d, A_srchid, A_diffi_0d ;[11,2]
|| DOTPU4 .M1 A_diffi_1c, A_k_ones, A_sumi1c ;[11,2]
|| LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ;[ 3,3]
ADD .S2X B_sumi0ab, A_sumi0cd, B_sumi0 ;[20,1]
|| ADD .S1 A_matchi1, A_sumi1, A_matchi1 ;[20,1]
|| SUBABS4 .L1 A_ref_ri0c, A_srchic, A_diffi_0c ;[12,2]
|| DOTPU4 .M2 B_diffi_0a, B_k_ones, B_sumi0a ;[12,2]
|| SUBABS4 .L2 B_ref_ri1a, B_2srchia, B_2diffi_0a ;[12,2]
|| DOTPU4 .M1 A_diffi_0d, A_k_ones, A_sumi0d ;[12,2]
|| LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri1c:A_ref_ri1d ;[ 4,3]
L_5: ADD .D2 B_matchi0, B_sumi0, B_matchi0 ;[21,1]
|| ADD .S2 B_2sumi1a, B_2sumi1b, B_2sumi1ab ;[21,1]
|| ADD .S1 A_2sumi0c, A_2sumi0d, A_2sumi0cd ;[21,1]
|| SUBABS4 .L2 B_ref_ri2a, B_2srchia, B_2diffi_1a ;[13,2]
|| DOTPU4 .M1 A_diffi_1d, A_k_ones, A_sumi1d ;[13,2]
|| SUBABS4 .L1 A_ref_ri2d, A_2srchid, A_2diffi_1d ;[13,2]
|| DOTPU4 .M2 B_2diffi_0a,B_k_ones, B_2sumi0a ;[13,2]
|| LDNDW .D *A_ref_img++(8), B_ref_ri2a:B_ref_ri2b;[ 5,3]
L_6: ADD .D2 B_2sumi0a, B_2sumi0b, B_2sumi0ab ;[22,1]
|| ADD .S1 A_2sumi1c, A_2sumi1d, A_2sumi1cd ;[22,1]
|| DOTPU4 .M1 A_diffi_0c, A_k_ones, A_sumi0c ;[14,2]
|| SUBABS4 .L1 A_ref_ri2c, A_2srchic, A_2diffi_1c ;[14,2]
|| DOTPU4 .M2 B_diffi_0b, B_k_ones, B_sumi0b ;[14,2]
|| SUBABS4 .L2 B_ref_ri1b, B_2srchib, B_2diffi_0b ;[14,2]
|| LDNDW .D *A_ref_img++(A_pitch_8), A_ref_ri2c:A_ref_ri2d ;[ 6,3]
L_7: ADD .S1X A_2sumi1cd, B_2sumi1ab, A_2sumi1 ;[23,1]
|| ADD .S2X B_2sumi0ab, A_2sumi0cd, B_2sumi0 ;[23,1]
|| ADD .D1 A_ref_ri2c, 0, A_ref_ri0c ;[15,2]
|| DOTPU4 .M1 A_2diffi_1c,A_k_ones, A_2sumi1c ;[15,2]
|| SUBABS4 .L1 A_ref_ri1c, A_2srchic, A_2diffi_0c ;[15,2]
|| DOTPU4 .M2 B_2diffi_1a,B_k_ones, B_2sumi1a ;[15,2]
|| SUBABS4 .L2 B_ref_ri1b, B_srchib, B_diffi_1b ;[ 7,3]
|| LDNDW .D2T2 *B_src_imgcp++, B_2srchia:B_2srchib ;[ 7,3]
L_8: ADD .S2 B_matchi0, B_2sumi0, B_matchi0 ;[24,1]
|| ADD .S1 A_matchi1, A_2sumi1, A_matchi1 ;[24,1]
|| ADD .D1 A_ref_ri2d, 0, A_ref_ri0d ;[16,2]
|| DOTPU4 .M2 B_2diffi_0b,B_k_ones, B_2sumi0b ;[16,2]
|| SUBABS4 .L2 B_ref_ri2b, B_2srchib, B_2diffi_1b ;[16,2]
|| SUBABS4 .L1 A_ref_ri1d, A_2srchid, A_2diffi_0d ;[16,2]
|| DOTPU4 .M1 A_2diffi_0c,A_k_ones, A_2sumi0c ;[16,2]
|| LDNDW .D2T1 *B_src_imgcp++, A_2srchic:A_2srchid ;[ 8,3]
; ============================ PIPE LOOP EPILOG ==============================
; EPILOG:
ADD .S2 B_sumi1a, B_sumi1b, B_sumi1ab ;[17,3]
|| DOTPU4 .M2 B_2diffi_1b,B_k_ones, B_2sumi1b ;[17,3]
|| DOTPU4 .M1 A_2diffi_0d,A_k_ones, A_2sumi0d ;[17,3]
|| ADD .D1 A_sumi1c, A_sumi1d, A_sumi1cd ;[17,3]
|| SUB .S1 A_ref_img, A_ptch, A_ref_img ;
ADD .S2 B_sumi0a, B_sumi0b, B_sumi0ab ;[18,3]
|| DOTPU4 .M1 A_2diffi_1d,A_k_ones, A_2sumi1d ;[18,3]
|| ADD .S1 A_sumi0c, A_sumi0d, A_sumi0cd ;[18,3]
|| MV .L2 B_src_img, B_src_imgcp ;
ADD .S1X A_sumi1cd, B_sumi1ab, A_sumi1 ;[19,3]
|| LDNDW .D *A_ref_img++(8), B_ref_ri0a:B_ref_ri0b;
ADD .S2X B_sumi0ab, A_sumi0cd, B_sumi0 ;[20,3]
|| ADD .S1 A_matchi1, A_sumi1, A_matchi1 ;[20,3]
|| LDNDW .D *A_ref_img++(A_pitch_8), A_ref_ri0c:A_ref_ri0d;
ADD .L2 B_matchi0, B_sumi0, B_matchi0 ;[21,3]
|| ADD .S2 B_2sumi1a, B_2sumi1b, B_2sumi1ab ;[21,3]
|| ADD .S1 A_2sumi0c, A_2sumi0d, A_2sumi0cd ;[21,3]
|| LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ;
ADD .S2 B_2sumi0a, B_2sumi0b, B_2sumi0ab ;[22,3]
|| ADD .S1 A_2sumi1c, A_2sumi1d, A_2sumi1cd ;[22,3]
|| [B_ml]ADD .L2 B_ml, -2, B_ml ;
|| LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b; Load ref
ADD .S1X A_2sumi1cd, B_2sumi1ab, A_2sumi1 ;[23,3]
|| ADD .L2X B_2sumi0ab, A_2sumi0cd, B_2sumi0 ;[23,3]
||[ B_ml]B .S2 M_LOOP ;
|| LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ; Load
ADD .L2 B_matchi0, B_2sumi0, B_matchi0 ;[24,3]
|| ADD .S1 A_matchi1, A_2sumi1, A_matchi1 ;[24,3]
|| LDNDW .D *A_ref_img++(A_pitch_8), A_ref_ri1c:A_ref_ri1d ;[ 4,1]
|| MV .L1X B_matval, A_matval
; ============================================================================
; END:
CMPLTU .L2 B_matchi0, B_matval, B_best ;
|| ADD .L1 -1, A_vl1, A_vl1 ;
|| MV .S1 A_bptch, A_ptch ;
|| LDNDW .D *A_ref_img++(8), B_ref_ri2a:B_ref_ri2b ;[ 5,1]
[ B_best] MV .L2 B_matchi0, B_matval ;
||[ B_best] MV .L1X B_matchi0, A_matval ;
||[ B_best] PACK2 .S1 A_hl, A_vl1, A_matpos ;
|| LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri2c:A_ref_ri2d ;[ 6,1]
ADD .S1 -1, A_vl1, A_vl1 ;
|| CMPLTU .L1 A_matchi1, A_matval, A_best ; XP stall
|| SUBABS4 .L2 B_ref_ri1b, B_srchib, B_diffi_1b ;[ 7,1]
|| LDNDW .D2T2 *B_src_imgcp++, B_2srchia:B_2srchib ;[ 7,1]
||[ B_ml]B .S2 L_5 + 12 ;
[ A_best] PACK2 .L1 A_hl, A_vl1, A_matpos ;
||[!A_vl1 ] ADD .D1 A_hl, 1, A_hl ;
||[ A_best] MV .S2X A_matchi1, B_matval ;
|| LDNDW .D2T1 *B_src_imgcp++, A_2srchic:A_2srchid ;[ 8,1]
||[ B_ml] B .S1 L_6 + 4 ;
;==== Branch occurs
; ========================================================================= ;
LDW .D2T1 *+B_SP[1], A10
RET .S2 B_ret ; Return to caller
|| LDW .D2T1 *+B_SP[2], A11
MV .S1 A_v, A_sub ;
|| AND .L1 A_matpos, A_ffff, A_vl ;
|| LDW .D2T1 *++B_SP[3], A14
SUB .L1 A_sub, 1, A_sub ;
|| SHL .S1 A_ffff, 16, A_ffff ;
SUB .S1 A_sub, A_vl, A_vl ;
|| AND .L1 A_matpos, A_ffff, A_matpos ;
ADD .S1 A_matpos, A_vl, A_matpos ;
|| MV .L1X B_matval, A_matval ;
STNDW .D2T1 A_matval:A_matpos, *B_match ;
|| MVC .S2 B_csr, CSR ; Restore CSR
; ===== Interruptibility state restored here =====
; ===== Branch Occurs =====
* ========================================================================= *
* End of file: img_mad_16x16.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *