You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

544 lines
31 KiB

;* ======================================================================== *;
;* TEXAS INSTRUMENTS, INC. *;
;* *;
;* IMGLIB DSP Image/Video Processing Library *;
;* *;
;* Release: Revision 1.04b *;
;* CVS Revision: 1.5 Sun Sep 29 03:32:19 2002 (UTC) *;
;* Snapshot date: 23-Oct-2003 *;
;* *;
;* This library contains proprietary intellectual property of Texas *;
;* Instruments, Inc. The library and its source code are protected by *;
;* various copyrights, and portions may also be protected by patents or *;
;* other legal protections. *;
;* *;
;* This software is licensed for use with Texas Instruments TMS320 *;
;* family DSPs. This license was provided to you prior to installing *;
;* the software. You may review this license by consulting the file *;
;* TI_license.PDF which accompanies the files in this library. *;
;* ------------------------------------------------------------------------ *;
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
;* All Rights Reserved. *;
;* ======================================================================== *;
;* ======================================================================== *;
;* Assembler compatibility shim for assembling 4.30 and later code on *;
;* tools prior to 4.30. *;
;* ======================================================================== *;
.if $isdefed(".ASSEMBLER_VERSION")
.asg .ASSEMBLER_VERSION, $asmver
.else
.asg 0, $asmver
.endif
.if ($asmver < 430)
.asg B, CALL ; Function Call
.asg B, RET ; Return from a Function
.asg B, CALLRET ; Function call with Call / Ret chaining.
.if .TMS320C6400
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
.endif
.asg , .asmfunc ; .func equivalent for hand-assembly code
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
.endif
;* ======================================================================== *;
;* End of assembler compatibility shim. *;
;* ======================================================================== *;
* ========================================================================= *
* NAME *
* IMG_conv_3x3 -- 3x3 convolution *
* *
* REVISION DATE *
* 19-May-2002 *
* *
* USAGE *
* This routine has the following C prototype: *
* *
* void IMG_conv_3x3 ( const unsigned char *restrict inptr, *
* unsigned char *restrict outptr, *
* int x_dim, *
* const char *restrict mask, *
* int shift) *
* *
* The convolution routine accepts three rows of 'x_dim' input points *
* and performs some operation on each. A total of 'x_dim' outputs *
* are written to the output array. The 'mask' array has the 3 by 3 *
* array of coefficients. *
* *
* DESCRIPTION *
* *
* The convolution kernel accepts three rows of 'x_dim' input points *
* and produces one output row of 'x_dim' points using the input mask *
* of 3 by 3. The user defined shift value is used to shift the convo- *
* lution value, down to the byte range. The convolution sum is also *
* range limited to 0..255. The shift amount is non-zero for low pass *
* filters, and zero for high pass and sharpening filters. *
* *
* *
* The following is the C code model for the algorithm: *
* *
* *
* void IMG_conv_3x3( const unsigned char *restrict inptr, *
* unsigned char *restrict outptr, *
* int x_dim, *
* const char *restrict mask, *
* int shift) *
* { *
* const unsigned char *IN1,*IN2,*IN3; *
* unsigned char *OUT; *
* *
* short pix10, pix20, pix30; *
* short mask10, mask20, mask30; *
* *
* int sum, sum00, sum11; *
* int i; *
* int sum22, j; *
* *
* IN1 = inptr; *
* IN2 = IN1 + x_dim; *
* IN3 = IN2 + x_dim; *
* OUT = outptr; *
* *
* for (j = 0; j < x_dim ; j++) *
* { *
* sum = 0; *
* *
* for (i = 0; i < 3; i++) *
* { *
* pix10 = IN1[i]; *
* pix20 = IN2[i]; *
* pix30 = IN3[i]; *
* *
* mask10 = mask[i]; *
* mask20 = mask[i + 3]; *
* mask30 = mask[i + 6]; *
* *
* sum00 = pix10 * mask10; *
* sum11 = pix20 * mask20; *
* sum22 = pix30 * mask30; *
* *
* sum += sum00 + sum11+ sum22; *
* } *
* *
* IN1++; *
* IN2++; *
* IN3++; *
* *
* sum = (sum >> shift); *
* if ( sum < 0 ) sum = 0; *
* if ( sum > 255 ) sum = 255; *
* *OUT++ = sum; *
* } *
* } *
* *
* *
* TECHNIQUES *
* The inner loop that computes the convolution sum is completely *
* unrolled and 8 output pixels are computed together. The mask *
* values are loaded and packed as double words. *
* *
* ASSUMPTIONS *
* x_dim must be a multiple of 8. *
* *
* NOTES *
* None *
* *
* CYCLES *
* 33 + x_dim/8 * 9 *
* *
* CODESIZE *
* 724 bytes *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".text:_conv_3x3"
.global _IMG_conv_3x3
_IMG_conv_3x3: ; A_INPTR, B_OUTPTR, A_inputcols, B_mask, A_shift
; A4, B4, A6, B6, A8
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A0, A_h01word
.asg A0, A_mask01
.asg A0, A_mask20
.asg A1, A_h20word
.asg A10, A_line00
.asg A11, A_line01
.asg A16, A_pixel45
.asg A16, A_prodA6
.asg A16, A_sum1d
.asg A16, A_sum2g
.asg A16, A_SP
.asg A17, A_pixel67a
.asg A17, A_prodA7
.asg A17, A_sum3b
.asg A18, A_line04
.asg A18, A_line14
.asg A18, A_prodC8
.asg A18, A_sum0g
.asg A18, A_sum2h
.asg A19, A_line05
.asg A19, A_line15
.asg A19, A_prodC9
.asg A19, A_sum0e
.asg A19, A_sum1a
.asg A2, A_h02word
.asg A20, A_line02
.asg A20, A_pixel23a
.asg A20, A_prodA4
.asg A21, A_line03
.asg A21, A_prodA5
.asg A22, A_outword0
.asg A22, A_pixel01a
.asg A22, A_prodA10
.asg A22, A_prodA8
.asg A23, A_outword1
.asg A23, A_prodA11
.asg A23, A_prodA9
.asg A23, A_sum0f
.asg A24, A_pixel45a
.asg A24, A_prodC2
.asg A25, A_prodC3
.asg A26, A_prodC0
.asg A26, A_sum3a
.asg A27, A_prodC1
.asg A28, A_line13
.asg A28, A_prodA2
.asg A29, A_prodA3
.asg A3, A_h22word
.asg A30, A_prodA0
.asg A30, A_sum1c
.asg A31, A_mask
.asg A31, A_mask00
.asg A31, A_mask02
.asg A31, A_prodA1
.asg A4, A_INPTR
.asg A4, A_sum0h
.asg A4, A_sum1b
.asg A4, A_sum2f
.asg A4, A_sum3c
.asg A5, A_IN1
.asg A6, A_inputcols
.asg A6, A_pixel01
.asg A7, A_OPTR
.asg A8, A_shift
.asg A9, A_h00word
.asg B0, B_h21word
.asg B0, B_mask21
.asg B0, B_roundval1
.asg B1, B_h12word
.asg B1, B_mask12
.asg B10, B_h10word
.asg B15, B_SP
.asg B16, B_line22
.asg B16, B_prodC4
.asg B16, B_sum1g
.asg B16, B_sum2a
.asg B16, B_sum3e
.asg B17, B_line23
.asg B17, B_prodC5
.asg B17, B_sum0b
.asg B17, B_sum0d
.asg B18, B_line12
.asg B18, B_line24
.asg B18, B_prodB4
.asg B19, B_line25
.asg B19, B_prodB5
.asg B19, B_sum0a
.asg B2, B_h11word
.asg B2, B_mask11
.asg B20, B_line20
.asg B20, B_prodC10
.asg B20, B_sum0c
.asg B21, B_line21
.asg B21, B_prodC11
.asg B22, B_line10
.asg B22, B_sum1h
.asg B23, B_line11
.asg B23, B_sum2d
.asg B23, B_sum2e
.asg B23, B_sum3f
.asg B24, B_prodB0
.asg B24, B_prodC6
.asg B24, B_sum1f
.asg B24, B_sum3d
.asg B25, B_prodB1
.asg B25, B_prodC7
.asg B26, B_prodB8
.asg B26, B_sum2c
.asg B27, B_pixel23
.asg B27, B_prodB9
.asg B28, B_prodB2
.asg B28, B_prodB6
.asg B28, B_sum1e
.asg B29, B_prodB3
.asg B29, B_prodB7
.asg B30, B_inputcols
.asg B30, B_prodB10
.asg B31, B_mask10
.asg B31, B_mask22
.asg B31, B_prodB11
.asg B31, B_sum2b
.asg B4, B_h22word
.asg B4, B_h22word
.asg B4, B_OUTPTR
.asg B5, B_roundval
.asg B6, B_count1
.asg B6, B_mask
.asg B6, B_pixel67
.asg B6, B_sum3g
.asg B6, B_sum3h
.asg B7, B_IN3
.asg B8, B_IN2
.asg B9, B_count
.asg B9, B_csr
.asg B9, B_no_gie
* ========================================================================= *
MVC .S2 CSR, B_csr
|| STW .D2T1 A11, *B_SP--[4]
|| MV .L1X B_SP, A_SP
|| MV .S1 A_INPTR, A_IN1 ;[10,0]
|| MV .L2X A_inputcols, B_inputcols ;[ 7,0]
AND .L2 B_csr, -2, B_no_gie
|| STW .D2T2 B_csr, *+B_SP[2]
|| STW .D1T1 A10, *-A_SP[1]
|| ADD .S2X A_IN1, B_inputcols,B_IN2 ;[12,0]
MVC .S2 B_no_gie, CSR
|| STW .D2T2 B10, *+B_SP[1]
|| ADD .L2 B_IN2, B_inputcols,B_IN3 ;[13,0]
LDB .D2T2 *+B_mask[8], B_mask22 ;[ 3,0]
|| MV .L1X B_mask, A_mask ;[ 3,0]
LDB .D2T2 *+B_mask[7], B_mask21 ;[ 4,0]
|| LDB .D1T1 *+A_mask[6], A_mask20 ;[ 4,0]
LDB .D2T2 *+B_mask[5], B_mask12 ;[ 5,0]
|| LDB .D1T1 *+A_mask[2], A_mask02 ;[ 5,0]
MVK .S2 0, B_roundval1 ;[ 6,0]
|| LDB .D2T2 *+B_mask[4], B_mask11 ;[ 6,0]
|| LDB .D1T1 *+A_mask[1], A_mask01 ;[ 6,0]
PACK2 .L2 B_roundval1,B_roundval1,B_roundval ;[ 7,0]
|| LDB .D2T2 *+B_mask[3], B_mask10 ;[ 7,0]
|| LDB .D1T1 *+A_mask[0], A_mask00 ;[ 7,0]
SHRU .S2 B_inputcols,3, B_count1 ;[ 8,0]
|| PACK2 .L2 B_mask22, B_mask22, B_h22word ;[ 8,0]
|| MV .D1X B_OUTPTR, A_OPTR ;[12,0]
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,1]
PACKL4 .L2 B_h22word, B_h22word, B_h22word ;[ 9,0]
|| PACK2 .S2 B_mask21, B_mask21, B_h21word ;[ 9,0]
|| PACK2 .L1 A_mask20, A_mask20, A_h20word ;[ 9,0]
|| LDNDW .D2T2 *B_IN2++(2), B_line11:B_line10 ;[ 2,1]
PACKL4 .L2 B_h21word, B_h21word, B_h21word ;[10,0]
|| PACKL4 .L1 A_h20word, A_h20word, A_h20word ;[10,0]
|| PACK2 .S2 B_mask12, B_mask12, B_h12word ;[10,0]
|| PACK2 .S1 A_mask02, A_mask02, A_h02word ;[10,0]
|| LDNDW .D2T1 *B_IN2++(6), A_line15:A_line14 ;[ 3,1]
PACKL4 .L2 B_h12word, B_h12word, B_h12word ;[11,0]
|| PACK2 .S2 B_mask11, B_mask11, B_h11word ;[11,0]
|| PACKL4 .L1 A_h02word, A_h02word, A_h02word ;[11,0]
|| PACK2 .S1 A_mask01, A_mask01, A_h01word ;[11,0]
|| LDNDW .D2T2 *B_IN3++(1), B_line23:B_line22 ;[ 4,1]
PACKL4 .L2 B_h11word, B_h11word, B_h11word ;[12,0]
|| PACK2 .S2 B_mask10, B_mask10, B_h10word ;[12,0]
|| PACKL4 .L1 A_h01word, A_h01word, A_h01word ;[12,0]
|| PACK2 .S1 A_mask00, A_mask00, A_h00word ;[12,0]
|| LDNDW .D1T1 *A_IN1++(1), A_line01:A_line00 ;[ 5,1]
SUB .S2 B_count1, 2, B_count ;[13,0]
|| MV .S1X B_h22word, A_h22word ;[13,0]
|| PACKL4 .L2 B_h10word, B_h10word, B_h10word ;[13,0]
|| PACKL4 .L1 A_h00word, A_h00word, A_h00word ;[13,0]
|| LDNDW .D1T1 *A_IN1++(1), A_line03:A_line02 ;[ 6,1]
* =========================== PIPE LOOP PROLOG ============================ *
MPYUS4 .M2 B_line11, B_h10word, B_prodB3:B_prodB2 ;[ 7,1]
MPYSU4 .M1X A_h20word, B_line20, A_prodC1:A_prodC0 ;[ 8,1]
|| MPYSU4 .M2X B_h12word, A_line15, B_prodB11:B_prodB10 ;[ 8,1]
|| SHLMB .S1 A_line14, A_line15, A_line13 ;[ 8,1]
|| SHRMB .S2 B_line11, B_line10, B_line12 ;[ 8,1]
|| LDNDW .D1T1 *A_IN1++(6), A_line05:A_line04 ;[ 8,1]
; -
MPYUS4 .M2 B_line23, B_h21word, B_prodC7:B_prodC6 ;[ 9,1]
|| MPYSU4 .M1X A_h20word, B_line21, A_prodC3:A_prodC2 ;[ 9,1]
|| LDNDW .D2T2 *B_IN3++(6), B_line25:B_line24 ;[ 9,1]
|| B .S1 L7
MPYSU4 .M2X B_h12word, A_line14, B_prodB9:B_prodB8 ;[10,1]
|| MPYUS4 .M1 A_line00, A_h00word, A_prodA1:A_prodA0 ;[10,1]
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,2]
|| B .S1 L8 + 12
MPYUS4 .M2 B_line22, B_h21word, B_prodC5:B_prodC4 ;[11,1]
|| MPYUS4 .M1 A_line01, A_h00word, A_prodA3:A_prodA2 ;[11,1]
|| LDNDW .D2T2 *B_IN2++(2), B_line11:B_line10 ;[ 2,2]
|| B .S1 L9
MPYUS4 .M2 B_line10, B_h10word, B_prodB1:B_prodB0 ;[12,1]
|| MPYUS4 .M1 A_line02, A_h01word, A_prodA5:A_prodA4 ;[12,1]
|| LDNDW .D2T1 *B_IN2++(6), A_line15:A_line14 ;[ 3,2]
|| B .S1 L1
ADD2 .L2 B_roundval, B_prodC6, B_sum2a ;[13,1]
|| MPYUS4 .M2 B_line12, B_h11word, B_prodB5:B_prodB4 ;[13,1]
|| MPYUS4 .M1 A_line04, A_h02word, A_prodA9:A_prodA8 ;[13,1]
|| LDNDW .D2T2 *B_IN3++(1), B_line23:B_line22 ;[ 4,2]
|| B .S1 L2_P
ADD2 .S2 B_prodB11, B_roundval, B_sum3d ;[14,1]
|| ADD2 .D2 B_sum2a, B_prodB2, B_sum2b ;[14,1]
|| ADD2 .L2 B_prodB9, B_roundval, B_sum1e ;[14,1]
|| MPYUS4 .M2X A_line13, B_h11word, B_prodB7:B_prodB6 ;[14,1]
|| MPYUS4 .M1 A_line03, A_h01word, A_prodA7:A_prodA6 ;[14,1]
|| LDNDW .D1T1 *A_IN1++(1), A_line01:A_line00 ;[ 5,2]
|| B .S1 L3
; L7, L8+12, L9, L1
L2_P: ADD2 .S2 B_sum3f, B_prodB7, B_sum3g ;[19,1]
|| ADD2 .D1 A_prodA11, A_prodA7, A_sum3a ;[19,1]
|| ADD2 .L2 B_sum2c, B_prodB10, B_sum2d ;[19,1]
|| ADD2 .L1 A_sum0e, A_prodC0, A_sum0f ;[19,1]
|| MPYSU4 .M2X B_h12word, A_line14, B_prodB9:B_prodB8 ;[10,2]
|| MPYUS4 .M1 A_line00, A_h00word, A_prodA1:A_prodA0 ;[10,2]
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,3]
|| B .S1 L8 + 4
; L3, L4, L5, L6, L7, L8+4, L9
* =========================== PIPE LOOP KERNEL ============================ *
loop:
L1: ADD2 .D1 A_sum2f, A_prodA6, A_sum2g ;[18,2]
|| ADD2 .L2 B_sum2b, B_prodB6, B_sum2c ;[18,2]
|| ADD2 .L1 A_sum1a, A_prodC1, A_sum1b ;[18,2]
|| ADD2 .S1 A_prodA4, A_prodA8, A_sum0e ;[18,2]
|| ADD2 .S2 B_sum0c, B_prodB8, B_sum0d ;[18,2]
|| MPYUS4 .M2 B_line23, B_h21word, B_prodC7:B_prodC6 ;[ 9,3]
|| MPYSU4 .M1X A_h20word, B_line21, A_prodC3:A_prodC2 ;[ 9,3]
|| LDNDW .D2T2 *B_IN3++(6), B_line25:B_line24 ;[ 9,3]
SHR2 .S1X B_pixel67, A_shift, A_pixel67a ;[28,1]
|| ADD2 .S2 B_sum3f, B_prodB7, B_sum3g ;[19,2]
|| ADD2 .D1 A_prodA11, A_prodA7, A_sum3a ;[19,2]
|| ADD2 .L2 B_sum2c, B_prodB10, B_sum2d ;[19,2]
|| ADD2 .L1 A_sum0e, A_prodC0, A_sum0f ;[19,2]
|| MPYSU4 .M2X B_h12word, A_line14, B_prodB9:B_prodB8 ;[10,3]
|| MPYUS4 .M1 A_line00, A_h00word, A_prodA1:A_prodA0 ;[10,3]
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,4]
L3: SPACKU4 .S1 A_pixel67a, A_pixel45a, A_outword1 ;[29,1]
|| ADD2 .L2 B_sum3g, B_prodC11, B_sum3h ;[20,2]
|| ADD2 .L1 A_sum3a, A_prodC3, A_sum3b ;[20,2]
|| ADD2 .S2 B_sum2d, B_prodC10, B_sum2e ;[20,2]
|| ADD2 .D1 A_sum0f, A_prodC8, A_sum0g ;[20,2]
|| MPYUS4 .M2 B_line22, B_h21word, B_prodC5:B_prodC4 ;[11,3]
|| MPYUS4 .M1 A_line01, A_h00word, A_prodA3:A_prodA2 ;[11,3]
|| LDNDW .D2T2 *B_IN2++(2), B_line11:B_line10 ;[ 2,4]
BDEC .S2 loop, B_count ;[30,1]
|| SHR2 .S1X B_pixel23, A_shift, A_pixel23a ;[30,1]
|| ADD2 .L2 B_sum1f, B_prodB1, B_sum1g ;[21,2]
|| ADD2 .D1 A_sum1b, A_prodC9, A_sum1c ;[21,2]
|| ADD2 .L1 A_sum0g, A_prodA0, A_sum0h ;[21,2]
|| MPYUS4 .M2 B_line10, B_h10word, B_prodB1:B_prodB0 ;[12,3]
|| MPYUS4 .M1 A_line02, A_h01word, A_prodA5:A_prodA4 ;[12,3]
|| LDNDW .D2T1 *B_IN2++(6), A_line15:A_line14 ;[ 3,4]
SHR2 .S1 A_pixel01, A_shift, A_pixel01a ;[31,1]
|| ADD2 .D1 A_sum2g, A_prodA10, A_sum2h ;[22,2]
|| ADD2 .S2 B_sum1g, B_prodB5, B_sum1h ;[22,2]
|| ADD2 .L1 A_sum1c, A_prodA1, A_sum1d ;[22,2]
|| ADD2 .L2 B_roundval, B_prodC6, B_sum2a ;[13,3]
|| MPYUS4 .M2 B_line12, B_h11word, B_prodB5:B_prodB4 ;[13,3]
|| MPYUS4 .M1 A_line04, A_h02word, A_prodA9:A_prodA8 ;[13,3]
|| LDNDW .D2T2 *B_IN3++(1), B_line23:B_line22 ;[ 4,4]
ADD2 .S1 A_sum3b, A_prodA3, A_sum3c ;[23,2]
|| ADD2 .L1X B_sum0d, A_sum0h, A_pixel01 ;[23,2]
|| ADD2 .S2 B_prodB11, B_roundval, B_sum3d ;[14,3]
|| ADD2 .D2 B_sum2a, B_prodB2, B_sum2b ;[14,3]
|| ADD2 .L2 B_prodB9, B_roundval, B_sum1e ;[14,3]
|| MPYUS4 .M2X A_line13, B_h11word, B_prodB7:B_prodB6 ;[14,3]
|| MPYUS4 .M1 A_line03, A_h01word, A_prodA7:A_prodA6 ;[14,3]
|| LDNDW .D1T1 *A_IN1++(1), A_line01:A_line00 ;[ 5,4]
L7: SPACKU4 .S1 A_pixel23a, A_pixel01a, A_outword0 ;[33,1]
|| ADD2 .L1X A_sum2h, B_sum2e, A_pixel45 ;[24,2]
|| ADD2 .S2X B_sum1h, A_sum1d, B_pixel23 ;[24,2]
|| ADD2 .L2 B_sum3d, B_prodC7, B_sum3e ;[15,3]
|| ADD2 .D2 B_roundval, B_prodC4, B_sum0a ;[15,3]
|| MPYSU4 .M2 B_h22word, B_line25, B_prodC11:B_prodC10 ;[15,3]
|| MPYUS4 .M1 A_line05, A_h02word, A_prodA11:A_prodA10 ;[15,3]
|| LDNDW .D1T1 *A_IN1++(1), A_line03:A_line02 ;[ 6,4]
L8:
STNDW .D1T1 A_outword1:A_outword0, *A_OPTR++(8) ;[34,1]
|| SHR2 .S1 A_pixel45, A_shift, A_pixel45a ;[25,2]
|| ADD2 .L2X B_sum3h, A_sum3c, B_pixel67 ;[25,2]
|| ADD2 .L1 A_prodC2, A_prodA2, A_sum2f ;[16,3]
|| ADD2 .S2 B_sum1e, B_prodC5, B_sum1f ;[16,3]
|| ADD2 .D2 B_sum0a, B_prodB0, B_sum0b ;[16,3]
|| MPYSU4 .M1X A_h22word, B_line24, A_prodC9:A_prodC8 ;[16,3]
|| MPYUS4 .M2 B_line11, B_h10word, B_prodB3:B_prodB2 ;[ 7,4]
L9: ADD2 .D2 B_sum3e, B_prodB3, B_sum3f ;[17,3]
|| ADD2 .L1 A_prodA9, A_prodA5, A_sum1a ;[17,3]
|| ADD2 .L2 B_sum0b, B_prodB4, B_sum0c ;[17,3]
|| MPYSU4 .M1X A_h20word, B_line20, A_prodC1:A_prodC0 ;[ 8,4]
|| MPYSU4 .M2X B_h12word, A_line15, B_prodB11:B_prodB10 ;[ 8,4]
|| SHLMB .S1 A_line14, A_line15, A_line13 ;[ 8,4]
|| SHRMB .S2 B_line11, B_line10, B_line12 ;[ 8,4]
|| LDNDW .D1T1 *A_IN1++(6), A_line05:A_line04 ;[ 8,4]
* =========================== PIPE LOOP EPILOG ============================ *
SHR2 .S1X B_pixel67, A_shift, A_pixel67a ;[28,4]
|| LDW .D2T2 *+B_SP[2], B_csr
SPACKU4 .S1 A_pixel67a, A_pixel45a, A_outword1 ;[29,4]
|| RET .S2 B3
|| LDW .D2T1 *+B_SP[3], A10
|| MV .L1X B_SP, A_SP
SHR2 .S1X B_pixel23, A_shift, A_pixel23a ;[30,4]
|| LDW .D2T1 *++B_SP[4], A11
|| LDW .D1T2 *+A_SP[1], B10
SHR2 .S1 A_pixel01, A_shift, A_pixel01a ;[31,4]
MVC .S2 B_csr, CSR
SPACKU4 .S1 A_pixel23a, A_pixel01a, A_outword0 ;[33,4]
STNDW .D1T1 A_outword1:A_outword0, *A_OPTR++(8) ;[34,4]
* ========================================================================= *
* End of file: img_conv_3x3.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *