;* ======================================================================== *; ;* TEXAS INSTRUMENTS, INC. *; ;* *; ;* IMGLIB DSP Image/Video Processing Library *; ;* *; ;* Release: Revision 1.04b *; ;* CVS Revision: 1.10 Sun Sep 29 03:32:24 2002 (UTC) *; ;* Snapshot date: 23-Oct-2003 *; ;* *; ;* This library contains proprietary intellectual property of Texas *; ;* Instruments, Inc. The library and its source code are protected by *; ;* various copyrights, and portions may also be protected by patents or *; ;* other legal protections. *; ;* *; ;* This software is licensed for use with Texas Instruments TMS320 *; ;* family DSPs. This license was provided to you prior to installing *; ;* the software. You may review this license by consulting the file *; ;* TI_license.PDF which accompanies the files in this library. *; ;* ------------------------------------------------------------------------ *; ;* Copyright (C) 2003 Texas Instruments, Incorporated. *; ;* All Rights Reserved. *; ;* ======================================================================== *; ;* ======================================================================== *; ;* Assembler compatibility shim for assembling 4.30 and later code on *; ;* tools prior to 4.30. *; ;* ======================================================================== *; .if $isdefed(".ASSEMBLER_VERSION") .asg .ASSEMBLER_VERSION, $asmver .else .asg 0, $asmver .endif .if ($asmver < 430) .asg B, CALL ; Function Call .asg B, RET ; Return from a Function .asg B, CALLRET ; Function call with Call / Ret chaining. .if .TMS320C6400 .asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call .asg BNOP, RETNOP ; C64x BNOP as a Fn. Return .asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP. .endif .asg , .asmfunc ; .func equivalent for hand-assembly code .asg , .endasmfunc ; .endfunc equivalent for hand-assembly code .endif ;* ======================================================================== *; ;* End of assembler compatibility shim. *; ;* ======================================================================== *; * ========================================================================= * * * * TEXAS INSTRUMENTS, INC. * * * * NAME * * IMG_mad_16x16 * * * * REVISION DATE * * 18-Dec-2001 * * * * USAGE * * This routine is C-callable and can be called as: * * * * void IMG_mad_16x16 * * ( * * const unsigned char *restrict refImg, * * const unsigned char *restrict srcImg, * * int pitch, * * int h, * * int v, * * unsigned *restrict match * * ) * * * * refImg Reference image. * * srcImg[256] 16x16 block image to look for. * * pitch Width of reference image. * * h Horiz. size of search area. * * v Vert. size of search area. Must be multiple of 2. * * match[2] Result: * * match[0] is packed x, y. * * match[1] is MAD value. * * * * DESCRIPTION * * This routine returns the location of the minimum absolute * * difference between a 16x16 search block and some block in a * * (h + 16) x (v + 16) search area. h and v are the sizes of the * * search space for the top left coordinate of the search block. * * refImg points to the top left pixel of the search area. * * * * (0,0) (h,0) (h+16,0) * * ;--------------+--------; * * ; search | ; * * ; space | ; * * ; | ; search area * * ;--------------+ ; within reference image * * (0,v) (h,v) ; * * ; ; * * ;-----------------------; * * (0, v+16) (v+16,h+16) * * * * The location is returned relative to the above coordinate system * * as x and y packed in two 16-bit quantities in a 32-bit word: * * * * 31 16 15 0 * * +----------------+----------------+ * * match[0]: | x | y | * * +----------------+----------------+ * * * * 31 0 * * +---------------------------------+ * * match[1]: | SAD value at location x, y | * * +---------------------------------+ * * * * ASSUMPTIONS * * srcImg and refImg do not alias in memory. * * The routine is written for Little Endian configuration. * * Two MADS are performed together and hence it is assumed that v * * the vertical dimension is a multiple of 2. * * * * MEMORY NOTE * * No special requirements on alignment of arrays is required. * * * * TECHNIQUES * * The two outer loops are merged, the two inner loops are merged. * * The inner loop process 2 lines of 2 search locations in parallel. * * The search is performed in top-to-bottom, left-to-right order, * * with the earliest match taking precedence in the case of ties. * * Further use is made of C64x specific instructions such as SUBABS4 * * and DOTPU4. The SUBABS4 takes the absolute difference on four 8 * * bit quantities packed into a 32 bit word. The DOTPU4 performs four * * 8 bit wide multiplies and adds the results together. * * * * C CODE * * * * void IMG_mad_16x16 * * ( * * const unsigned char *restrict refImg, * * const unsigned char *restrict srcImg, * * int pitch, int h, int v, * * unsigned int *restrict match * * ) * * { * * int i, j, x, y, matx, maty; * * unsigned matpos, matval; * * * * matval = ~0U; * * matx = maty = 0; * * * * for (x = 0; x < x; x++) * * for (y = 0; y < v; y++) * * { * * unsigned acc = 0; * * * * for (i = 0; i < 16; i++) * * for (j = 0; j < 16; j++) * * acc += abs(srcImg[i*16 + j] - * * refImg[(i+y)*pitch + x + j]); * * * * if (acc < matval) * * { * * matval = acc; * * matx = x; * * maty = y; * * } * * } * * * * matpos = (0xffff0000 & (matx << 16)) | * * (0x0000ffff & maty); * * match[0] = matpos; * * match[1] = matval; * * } * * * * CYCLES * * 38 * h * v + 20 * * * * e.g. h=v= 4: 628 cycles * * h=v=32: 38932 cycles * * * * CODESIZE * * 776 bytes. * * * * ------------------------------------------------------------------------- * * Copyright (c) 2003 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= * ; ================= SYMBOLIC REGISTER ASSIGNMENTS: SETUP ================== ; .asg B15, B_SP ; Stack pointer, B datapath .asg B2, B_csr ; CSR's value .asg B1, B_no_gie ; CSR w/ GIE bit cleared .asg A30, A_csr ; Copy of CSR's value .asg B3, B_ret ; Return address ; ========================================================================= ; ; ====================== SYMBOLIC REGISTER ASSIGNMENTS ======================= .asg A0, A_best .asg A0, A_i .asg A0, A_sub .asg A0, A_w .asg A1, A_vl .asg A16, A_matpos .asg A17, A_matval .asg A14, A_ptch .asg A10, A_ref_ri0d .asg A11, A_ref_ri0c .asg A18, A_2diffi_1c .asg A18, A_2sumi1d .asg A18, A_diffi_1d .asg A19, A_2sumi1c .asg A19, A_sumi1c .asg A2, A_vl1 .asg A20, A_2diffi_1d .asg A20, A_2sumi0d .asg A20, A_diffi_0d .asg A20, A_diffi_1c .asg A21, A_matchi1 .asg A22, A_k_ones .asg A23, A_pitch_8 .asg A24, A_2sumi0c .asg A24, A_2sumi0cd .asg A24, A_srchid .asg A25, A_2sumi1cd .asg A25, A_diffi_0c .asg A25, A_srchic .asg A26, A_2srchid .asg A26, A_sumi1 .asg A26, A_sumi1cd .asg A27, A_2diffi_0c .asg A27, A_2srchic .asg A27, A_sumi0cd .asg A27, A_sumi1d .asg A28, A_2diffi_0d .asg A28, A_ref_ri2d .asg A28, A_sumi0c .asg A29, A_ref_ri2c .asg A29, A_sumi0d .asg A3, A_vptch .asg A30, A_ref_ri1d .asg A31, A_2sumi1 .asg A31, A_ref_ri1c .asg A4, A_ref_img ; .asg A5, A_c40 .asg A6, A_hl .asg A6, A_pitch .asg A7, A_ffff .asg B6, B_h .asg A9, A_bptch .asg B0, B_best .asg B1, B_ml .asg B5, B_matval .asg B16, B_ref_ri0b .asg B17, B_ref_ri0a .asg B18, B_2sumi0ab .asg B18, B_2sumi0b .asg B18, B_sumi0a .asg B19, B_diffi_0b .asg B19, B_sumi0b .asg B19, B_sumi1b .asg B20, B_src_imgcp .asg B21, B_2diffi_0b .asg B21, B_2sumi0a .asg B22, B_matchi0 .asg B23, B_k_ones .asg B24, B_2sumi1a .asg B24, B_srchib .asg B25, B_2diffi_0a .asg B25, B_diffi_0a .asg B25, B_srchia .asg B26, B_2srchib .asg B26, B_sumi1ab .asg B27, B_2srchia .asg B27, B_2sumi1ab .asg B27, B_diffi_1b .asg B28, B_2diffi_1b .asg B28, B_ref_ri2b .asg B29, B_ref_ri2a .asg B29, B_sumi1a .asg B30, B_ref_ri1b .asg B31, B_2sumi1b .asg B31, B_ref_ri1a .asg B4, B_src_img .asg B8, B_match .asg A8, A_v .asg B9, B_2diffi_1a .asg B9, B_2sumi0 .asg B9, B_diffi_1a .asg B9, B_sumi0 .asg B9, B_sumi0ab ; ============================================================================ .sect ".text:_mad_16x16" .global _IMG_mad_16x16 _IMG_mad_16x16: ; parameters: A_refImg, B_srcImg, A_pitch, B_h, A_v, B_match ; A4, B4, A6, B6, A8, B8 STW .D2T1 A14, *B_SP--[3] ; Save A14 || MVC .S2 CSR, B_csr ; Remember CSR AND B_csr, -2, B_no_gie ; Clear GIE || STW .D2T1 A11, *+B_SP[2] ; Save A11 || ADD .D1 A_v, 17, A_w ; pitch || MPY .M1 -16, A_pitch, A_bptch ; hpatch STW .D2T1 A10, *+B_SP[1] ; Save A10 || MVC .S2 B_no_gie, CSR ; Disable ints || MPY .M1 A_w, A_pitch, A_vptch ; vptch || SUB .L1 A_pitch, 8, A_pitch_8 ; pitch - 8 || MVKL .S1 0000FFFFh, A_ffff ; save -1 ; ===== Interrupts masked here ===== MVKL .S1 01010101h, A_k_ones ; constant || MV .L1 A_v, A_vl ; vert. v...1 || LDNDW .D *A_ref_img++(8), B_ref_ri0a:B_ref_ri0b ; Load ref MVKH .S1 01010101h, A_k_ones ; constant || ADD .L1 -1, A_vptch, A_vptch ; vptch-- || MPY .M2X A_v, B_h, B_ml ; ml = h*v || LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri0c:A_ref_ri0d ; Load || MV .D2 B_src_img, B_src_imgcp ; srcimg MVKL .S2 01010101h, B_k_ones ; constant || MV .L1 A_v, A_vl1 ; v || MVKH .S1 0000FFFFh, A_ffff ; -1 || LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ; Load src || SUB .D1 A_pitch, A_bptch, A_bptch ; MVKH .S2 01010101h, B_k_ones ; constant || OR .L2 -1, B_matval, B_matval ; matval || MV .S1 A_bptch, A_ptch ; ptch || ZERO .L1 A_hl ; hl || LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b ; Load ref ; ============================ PIPE LOOP PROLOG ============================== LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ; Load || ZERO .S1 A_matpos ; matpos LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri1c:A_ref_ri1d ;[ 4,1] LDNDW .D *A_ref_img++(8), B_ref_ri2a:B_ref_ri2b ;[ 5,1] LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri2c:A_ref_ri2d ;[ 6,1] SUBABS4 .L2 B_ref_ri1b, B_srchib, B_diffi_1b ;[ 7,1] || LDNDW .D2T2 *B_src_imgcp++, B_2srchia:B_2srchib ;[ 7,1] || B .S2 L_5 + 12 ; LDNDW .D2T1 *B_src_imgcp++, A_2srchic:A_2srchid ;[ 8,1] || B .S2 L_6 + 4 ; M_LOOP: ADD .D1 -2, A_vl, A_vl ; vl-= 2 || MPY .M1 0, A_matchi1, A_matchi1 ; matchi1 ||[!A_vl1 ]MV .S1 A_v, A_vl1 ;[16,0] || SUBABS4 .L2 B_ref_ri1a, B_srchia, B_diffi_1a ;[ 9,1] || SUBABS4 .L1 A_ref_ri1d, A_srchid, A_diffi_1d ;[ 9,1] || LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ;[ 1,2] || B .S2 L_7 + 8 ; MVK .S1 7, A_i ; i = 7 || MV .D2 B_ref_ri2b, B_ref_ri0b ;[10,1] || SUBABS4 .L2 B_ref_ri0b, B_srchib, B_diffi_0b ;[10,1] || DOTPU4 .M2 B_diffi_1a, B_k_ones, B_sumi1a ;[10,1] || SUBABS4 .L1 A_ref_ri1c, A_srchic, A_diffi_1c ;[10,1] || LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b;[ 2,2] || B .S2 L_8 + 8 ;[ 2,2] MV .S2 B_ref_ri2a, B_ref_ri0a ;[11,1] || DOTPU4 .M2 B_diffi_1b, B_k_ones, B_sumi1b ;[11,1] || SUBABS4 .L2 B_ref_ri0a, B_srchia, B_diffi_0a ;[11,1] || SUBABS4 .L1 A_ref_ri0d, A_srchid, A_diffi_0d ;[11,1] || DOTPU4 .M1 A_diffi_1c, A_k_ones, A_sumi1c ;[11,1] || LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ;[ 3,2] || B .S1 LOOP_X ; ||[!A_vl]MV .D1 A_vptch, A_ptch ; ptch SUBABS4 .L1 A_ref_ri0c, A_srchic, A_diffi_0c ;[12,1] || DOTPU4 .M2 B_diffi_0a, B_k_ones, B_sumi0a ;[12,1] || SUBABS4 .L2 B_ref_ri1a, B_2srchia, B_2diffi_0a ;[12,1] || DOTPU4 .M1 A_diffi_0d, A_k_ones, A_sumi0d ;[12,1] || LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri1c:A_ref_ri1d ;[ 4,2] ||[!A_vl]MV .S1 A_v, A_vl ; vl = v || ZERO .S2 B_matchi0 ; matchi0 ; ============================ PIPE LOOP KERNEL ============================== LOOP_X: [ A_i]ADD .S1 A_i, -1, A_i ;[17,1] || ADD .S2 B_sumi1a, B_sumi1b, B_sumi1ab ;[17,1] || DOTPU4 .M2 B_2diffi_1b,B_k_ones, B_2sumi1b ;[17,1] || DOTPU4 .M1 A_2diffi_0d,A_k_ones, A_2sumi0d ;[17,1] || ADD .D1 A_sumi1c, A_sumi1d, A_sumi1cd ;[17,1] || SUBABS4 .L2 B_ref_ri1a, B_srchia, B_diffi_1a ;[ 9,2] || SUBABS4 .L1 A_ref_ri1d, A_srchid, A_diffi_1d ;[ 9,2] || LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ;[ 1,3] ADD .S2 B_sumi0a, B_sumi0b, B_sumi0ab ;[18,1] || DOTPU4 .M1 A_2diffi_1d,A_k_ones, A_2sumi1d ;[18,1] || ADD .S1 A_sumi0c, A_sumi0d, A_sumi0cd ;[18,1] || ADD .D2 B_ref_ri2b, 0, B_ref_ri0b ;[10,2] || SUBABS4 .L2 B_ref_ri0b, B_srchib, B_diffi_0b ;[10,2] || DOTPU4 .M2 B_diffi_1a, B_k_ones, B_sumi1a ;[10,2] || SUBABS4 .L1 A_ref_ri1c, A_srchic, A_diffi_1c ;[10,2] || LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b;[ 2,3] [ A_i]B .S1 LOOP_X ;[19,1] || ADD .D1X A_sumi1cd, B_sumi1ab, A_sumi1 ;[19,1] || ADD .S2 B_ref_ri2a, 0, B_ref_ri0a ;[11,2] || DOTPU4 .M2 B_diffi_1b, B_k_ones, B_sumi1b ;[11,2] || SUBABS4 .L2 B_ref_ri0a, B_srchia, B_diffi_0a ;[11,2] || SUBABS4 .L1 A_ref_ri0d, A_srchid, A_diffi_0d ;[11,2] || DOTPU4 .M1 A_diffi_1c, A_k_ones, A_sumi1c ;[11,2] || LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ;[ 3,3] ADD .S2X B_sumi0ab, A_sumi0cd, B_sumi0 ;[20,1] || ADD .S1 A_matchi1, A_sumi1, A_matchi1 ;[20,1] || SUBABS4 .L1 A_ref_ri0c, A_srchic, A_diffi_0c ;[12,2] || DOTPU4 .M2 B_diffi_0a, B_k_ones, B_sumi0a ;[12,2] || SUBABS4 .L2 B_ref_ri1a, B_2srchia, B_2diffi_0a ;[12,2] || DOTPU4 .M1 A_diffi_0d, A_k_ones, A_sumi0d ;[12,2] || LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri1c:A_ref_ri1d ;[ 4,3] L_5: ADD .D2 B_matchi0, B_sumi0, B_matchi0 ;[21,1] || ADD .S2 B_2sumi1a, B_2sumi1b, B_2sumi1ab ;[21,1] || ADD .S1 A_2sumi0c, A_2sumi0d, A_2sumi0cd ;[21,1] || SUBABS4 .L2 B_ref_ri2a, B_2srchia, B_2diffi_1a ;[13,2] || DOTPU4 .M1 A_diffi_1d, A_k_ones, A_sumi1d ;[13,2] || SUBABS4 .L1 A_ref_ri2d, A_2srchid, A_2diffi_1d ;[13,2] || DOTPU4 .M2 B_2diffi_0a,B_k_ones, B_2sumi0a ;[13,2] || LDNDW .D *A_ref_img++(8), B_ref_ri2a:B_ref_ri2b;[ 5,3] L_6: ADD .D2 B_2sumi0a, B_2sumi0b, B_2sumi0ab ;[22,1] || ADD .S1 A_2sumi1c, A_2sumi1d, A_2sumi1cd ;[22,1] || DOTPU4 .M1 A_diffi_0c, A_k_ones, A_sumi0c ;[14,2] || SUBABS4 .L1 A_ref_ri2c, A_2srchic, A_2diffi_1c ;[14,2] || DOTPU4 .M2 B_diffi_0b, B_k_ones, B_sumi0b ;[14,2] || SUBABS4 .L2 B_ref_ri1b, B_2srchib, B_2diffi_0b ;[14,2] || LDNDW .D *A_ref_img++(A_pitch_8), A_ref_ri2c:A_ref_ri2d ;[ 6,3] L_7: ADD .S1X A_2sumi1cd, B_2sumi1ab, A_2sumi1 ;[23,1] || ADD .S2X B_2sumi0ab, A_2sumi0cd, B_2sumi0 ;[23,1] || ADD .D1 A_ref_ri2c, 0, A_ref_ri0c ;[15,2] || DOTPU4 .M1 A_2diffi_1c,A_k_ones, A_2sumi1c ;[15,2] || SUBABS4 .L1 A_ref_ri1c, A_2srchic, A_2diffi_0c ;[15,2] || DOTPU4 .M2 B_2diffi_1a,B_k_ones, B_2sumi1a ;[15,2] || SUBABS4 .L2 B_ref_ri1b, B_srchib, B_diffi_1b ;[ 7,3] || LDNDW .D2T2 *B_src_imgcp++, B_2srchia:B_2srchib ;[ 7,3] L_8: ADD .S2 B_matchi0, B_2sumi0, B_matchi0 ;[24,1] || ADD .S1 A_matchi1, A_2sumi1, A_matchi1 ;[24,1] || ADD .D1 A_ref_ri2d, 0, A_ref_ri0d ;[16,2] || DOTPU4 .M2 B_2diffi_0b,B_k_ones, B_2sumi0b ;[16,2] || SUBABS4 .L2 B_ref_ri2b, B_2srchib, B_2diffi_1b ;[16,2] || SUBABS4 .L1 A_ref_ri1d, A_2srchid, A_2diffi_0d ;[16,2] || DOTPU4 .M1 A_2diffi_0c,A_k_ones, A_2sumi0c ;[16,2] || LDNDW .D2T1 *B_src_imgcp++, A_2srchic:A_2srchid ;[ 8,3] ; ============================ PIPE LOOP EPILOG ============================== ; EPILOG: ADD .S2 B_sumi1a, B_sumi1b, B_sumi1ab ;[17,3] || DOTPU4 .M2 B_2diffi_1b,B_k_ones, B_2sumi1b ;[17,3] || DOTPU4 .M1 A_2diffi_0d,A_k_ones, A_2sumi0d ;[17,3] || ADD .D1 A_sumi1c, A_sumi1d, A_sumi1cd ;[17,3] || SUB .S1 A_ref_img, A_ptch, A_ref_img ; ADD .S2 B_sumi0a, B_sumi0b, B_sumi0ab ;[18,3] || DOTPU4 .M1 A_2diffi_1d,A_k_ones, A_2sumi1d ;[18,3] || ADD .S1 A_sumi0c, A_sumi0d, A_sumi0cd ;[18,3] || MV .L2 B_src_img, B_src_imgcp ; ADD .S1X A_sumi1cd, B_sumi1ab, A_sumi1 ;[19,3] || LDNDW .D *A_ref_img++(8), B_ref_ri0a:B_ref_ri0b; ADD .S2X B_sumi0ab, A_sumi0cd, B_sumi0 ;[20,3] || ADD .S1 A_matchi1, A_sumi1, A_matchi1 ;[20,3] || LDNDW .D *A_ref_img++(A_pitch_8), A_ref_ri0c:A_ref_ri0d; ADD .L2 B_matchi0, B_sumi0, B_matchi0 ;[21,3] || ADD .S2 B_2sumi1a, B_2sumi1b, B_2sumi1ab ;[21,3] || ADD .S1 A_2sumi0c, A_2sumi0d, A_2sumi0cd ;[21,3] || LDNDW .D2T2 *B_src_imgcp++, B_srchia:B_srchib ; ADD .S2 B_2sumi0a, B_2sumi0b, B_2sumi0ab ;[22,3] || ADD .S1 A_2sumi1c, A_2sumi1d, A_2sumi1cd ;[22,3] || [B_ml]ADD .L2 B_ml, -2, B_ml ; || LDNDW .D *A_ref_img++(8), B_ref_ri1a:B_ref_ri1b; Load ref ADD .S1X A_2sumi1cd, B_2sumi1ab, A_2sumi1 ;[23,3] || ADD .L2X B_2sumi0ab, A_2sumi0cd, B_2sumi0 ;[23,3] ||[ B_ml]B .S2 M_LOOP ; || LDNDW .D2T1 *B_src_imgcp++, A_srchic:A_srchid ; Load ADD .L2 B_matchi0, B_2sumi0, B_matchi0 ;[24,3] || ADD .S1 A_matchi1, A_2sumi1, A_matchi1 ;[24,3] || LDNDW .D *A_ref_img++(A_pitch_8), A_ref_ri1c:A_ref_ri1d ;[ 4,1] || MV .L1X B_matval, A_matval ; ============================================================================ ; END: CMPLTU .L2 B_matchi0, B_matval, B_best ; || ADD .L1 -1, A_vl1, A_vl1 ; || MV .S1 A_bptch, A_ptch ; || LDNDW .D *A_ref_img++(8), B_ref_ri2a:B_ref_ri2b ;[ 5,1] [ B_best] MV .L2 B_matchi0, B_matval ; ||[ B_best] MV .L1X B_matchi0, A_matval ; ||[ B_best] PACK2 .S1 A_hl, A_vl1, A_matpos ; || LDNDW .D *A_ref_img++(A_pitch_8),A_ref_ri2c:A_ref_ri2d ;[ 6,1] ADD .S1 -1, A_vl1, A_vl1 ; || CMPLTU .L1 A_matchi1, A_matval, A_best ; XP stall || SUBABS4 .L2 B_ref_ri1b, B_srchib, B_diffi_1b ;[ 7,1] || LDNDW .D2T2 *B_src_imgcp++, B_2srchia:B_2srchib ;[ 7,1] ||[ B_ml]B .S2 L_5 + 12 ; [ A_best] PACK2 .L1 A_hl, A_vl1, A_matpos ; ||[!A_vl1 ] ADD .D1 A_hl, 1, A_hl ; ||[ A_best] MV .S2X A_matchi1, B_matval ; || LDNDW .D2T1 *B_src_imgcp++, A_2srchic:A_2srchid ;[ 8,1] ||[ B_ml] B .S1 L_6 + 4 ; ;==== Branch occurs ; ========================================================================= ; LDW .D2T1 *+B_SP[1], A10 RET .S2 B_ret ; Return to caller || LDW .D2T1 *+B_SP[2], A11 MV .S1 A_v, A_sub ; || AND .L1 A_matpos, A_ffff, A_vl ; || LDW .D2T1 *++B_SP[3], A14 SUB .L1 A_sub, 1, A_sub ; || SHL .S1 A_ffff, 16, A_ffff ; SUB .S1 A_sub, A_vl, A_vl ; || AND .L1 A_matpos, A_ffff, A_matpos ; ADD .S1 A_matpos, A_vl, A_matpos ; || MV .L1X B_matval, A_matval ; STNDW .D2T1 A_matval:A_matpos, *B_match ; || MVC .S2 B_csr, CSR ; Restore CSR ; ===== Interruptibility state restored here ===== ; ===== Branch Occurs ===== * ========================================================================= * * End of file: img_mad_16x16.asm * * ------------------------------------------------------------------------- * * Copyright (c) 2003 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= *