You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
544 lines
31 KiB
544 lines
31 KiB
;* ======================================================================== *;
|
|
;* TEXAS INSTRUMENTS, INC. *;
|
|
;* *;
|
|
;* IMGLIB DSP Image/Video Processing Library *;
|
|
;* *;
|
|
;* Release: Revision 1.04b *;
|
|
;* CVS Revision: 1.5 Sun Sep 29 03:32:19 2002 (UTC) *;
|
|
;* Snapshot date: 23-Oct-2003 *;
|
|
;* *;
|
|
;* This library contains proprietary intellectual property of Texas *;
|
|
;* Instruments, Inc. The library and its source code are protected by *;
|
|
;* various copyrights, and portions may also be protected by patents or *;
|
|
;* other legal protections. *;
|
|
;* *;
|
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
|
;* family DSPs. This license was provided to you prior to installing *;
|
|
;* the software. You may review this license by consulting the file *;
|
|
;* TI_license.PDF which accompanies the files in this library. *;
|
|
;* ------------------------------------------------------------------------ *;
|
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
|
;* All Rights Reserved. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
;* ======================================================================== *;
|
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
|
;* tools prior to 4.30. *;
|
|
;* ======================================================================== *;
|
|
|
|
.if $isdefed(".ASSEMBLER_VERSION")
|
|
.asg .ASSEMBLER_VERSION, $asmver
|
|
.else
|
|
.asg 0, $asmver
|
|
.endif
|
|
|
|
.if ($asmver < 430)
|
|
|
|
.asg B, CALL ; Function Call
|
|
.asg B, RET ; Return from a Function
|
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
|
|
|
.if .TMS320C6400
|
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
|
.endif
|
|
|
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
|
|
|
.endif
|
|
|
|
;* ======================================================================== *;
|
|
;* End of assembler compatibility shim. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
* ========================================================================= *
|
|
* NAME *
|
|
* IMG_conv_3x3 -- 3x3 convolution *
|
|
* *
|
|
* REVISION DATE *
|
|
* 19-May-2002 *
|
|
* *
|
|
* USAGE *
|
|
* This routine has the following C prototype: *
|
|
* *
|
|
* void IMG_conv_3x3 ( const unsigned char *restrict inptr, *
|
|
* unsigned char *restrict outptr, *
|
|
* int x_dim, *
|
|
* const char *restrict mask, *
|
|
* int shift) *
|
|
* *
|
|
* The convolution routine accepts three rows of 'x_dim' input points *
|
|
* and performs some operation on each. A total of 'x_dim' outputs *
|
|
* are written to the output array. The 'mask' array has the 3 by 3 *
|
|
* array of coefficients. *
|
|
* *
|
|
* DESCRIPTION *
|
|
* *
|
|
* The convolution kernel accepts three rows of 'x_dim' input points *
|
|
* and produces one output row of 'x_dim' points using the input mask *
|
|
* of 3 by 3. The user defined shift value is used to shift the convo- *
|
|
* lution value, down to the byte range. The convolution sum is also *
|
|
* range limited to 0..255. The shift amount is non-zero for low pass *
|
|
* filters, and zero for high pass and sharpening filters. *
|
|
* *
|
|
* *
|
|
* The following is the C code model for the algorithm: *
|
|
* *
|
|
* *
|
|
* void IMG_conv_3x3( const unsigned char *restrict inptr, *
|
|
* unsigned char *restrict outptr, *
|
|
* int x_dim, *
|
|
* const char *restrict mask, *
|
|
* int shift) *
|
|
* { *
|
|
* const unsigned char *IN1,*IN2,*IN3; *
|
|
* unsigned char *OUT; *
|
|
* *
|
|
* short pix10, pix20, pix30; *
|
|
* short mask10, mask20, mask30; *
|
|
* *
|
|
* int sum, sum00, sum11; *
|
|
* int i; *
|
|
* int sum22, j; *
|
|
* *
|
|
* IN1 = inptr; *
|
|
* IN2 = IN1 + x_dim; *
|
|
* IN3 = IN2 + x_dim; *
|
|
* OUT = outptr; *
|
|
* *
|
|
* for (j = 0; j < x_dim ; j++) *
|
|
* { *
|
|
* sum = 0; *
|
|
* *
|
|
* for (i = 0; i < 3; i++) *
|
|
* { *
|
|
* pix10 = IN1[i]; *
|
|
* pix20 = IN2[i]; *
|
|
* pix30 = IN3[i]; *
|
|
* *
|
|
* mask10 = mask[i]; *
|
|
* mask20 = mask[i + 3]; *
|
|
* mask30 = mask[i + 6]; *
|
|
* *
|
|
* sum00 = pix10 * mask10; *
|
|
* sum11 = pix20 * mask20; *
|
|
* sum22 = pix30 * mask30; *
|
|
* *
|
|
* sum += sum00 + sum11+ sum22; *
|
|
* } *
|
|
* *
|
|
* IN1++; *
|
|
* IN2++; *
|
|
* IN3++; *
|
|
* *
|
|
* sum = (sum >> shift); *
|
|
* if ( sum < 0 ) sum = 0; *
|
|
* if ( sum > 255 ) sum = 255; *
|
|
* *OUT++ = sum; *
|
|
* } *
|
|
* } *
|
|
* *
|
|
* *
|
|
* TECHNIQUES *
|
|
* The inner loop that computes the convolution sum is completely *
|
|
* unrolled and 8 output pixels are computed together. The mask *
|
|
* values are loaded and packed as double words. *
|
|
* *
|
|
* ASSUMPTIONS *
|
|
* x_dim must be a multiple of 8. *
|
|
* *
|
|
* NOTES *
|
|
* None *
|
|
* *
|
|
* CYCLES *
|
|
* 33 + x_dim/8 * 9 *
|
|
* *
|
|
* CODESIZE *
|
|
* 724 bytes *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
|
|
.sect ".text:_conv_3x3"
|
|
.global _IMG_conv_3x3
|
|
_IMG_conv_3x3: ; A_INPTR, B_OUTPTR, A_inputcols, B_mask, A_shift
|
|
; A4, B4, A6, B6, A8
|
|
|
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
|
|
.asg A0, A_h01word
|
|
.asg A0, A_mask01
|
|
.asg A0, A_mask20
|
|
.asg A1, A_h20word
|
|
.asg A10, A_line00
|
|
.asg A11, A_line01
|
|
.asg A16, A_pixel45
|
|
.asg A16, A_prodA6
|
|
.asg A16, A_sum1d
|
|
.asg A16, A_sum2g
|
|
.asg A16, A_SP
|
|
.asg A17, A_pixel67a
|
|
.asg A17, A_prodA7
|
|
.asg A17, A_sum3b
|
|
.asg A18, A_line04
|
|
.asg A18, A_line14
|
|
.asg A18, A_prodC8
|
|
.asg A18, A_sum0g
|
|
.asg A18, A_sum2h
|
|
.asg A19, A_line05
|
|
.asg A19, A_line15
|
|
.asg A19, A_prodC9
|
|
.asg A19, A_sum0e
|
|
.asg A19, A_sum1a
|
|
.asg A2, A_h02word
|
|
.asg A20, A_line02
|
|
.asg A20, A_pixel23a
|
|
.asg A20, A_prodA4
|
|
.asg A21, A_line03
|
|
.asg A21, A_prodA5
|
|
.asg A22, A_outword0
|
|
.asg A22, A_pixel01a
|
|
.asg A22, A_prodA10
|
|
.asg A22, A_prodA8
|
|
.asg A23, A_outword1
|
|
.asg A23, A_prodA11
|
|
.asg A23, A_prodA9
|
|
.asg A23, A_sum0f
|
|
.asg A24, A_pixel45a
|
|
.asg A24, A_prodC2
|
|
.asg A25, A_prodC3
|
|
.asg A26, A_prodC0
|
|
.asg A26, A_sum3a
|
|
.asg A27, A_prodC1
|
|
.asg A28, A_line13
|
|
.asg A28, A_prodA2
|
|
.asg A29, A_prodA3
|
|
.asg A3, A_h22word
|
|
.asg A30, A_prodA0
|
|
.asg A30, A_sum1c
|
|
.asg A31, A_mask
|
|
.asg A31, A_mask00
|
|
.asg A31, A_mask02
|
|
.asg A31, A_prodA1
|
|
.asg A4, A_INPTR
|
|
.asg A4, A_sum0h
|
|
.asg A4, A_sum1b
|
|
.asg A4, A_sum2f
|
|
.asg A4, A_sum3c
|
|
.asg A5, A_IN1
|
|
.asg A6, A_inputcols
|
|
.asg A6, A_pixel01
|
|
.asg A7, A_OPTR
|
|
.asg A8, A_shift
|
|
.asg A9, A_h00word
|
|
.asg B0, B_h21word
|
|
.asg B0, B_mask21
|
|
.asg B0, B_roundval1
|
|
.asg B1, B_h12word
|
|
.asg B1, B_mask12
|
|
.asg B10, B_h10word
|
|
.asg B15, B_SP
|
|
.asg B16, B_line22
|
|
.asg B16, B_prodC4
|
|
.asg B16, B_sum1g
|
|
.asg B16, B_sum2a
|
|
.asg B16, B_sum3e
|
|
.asg B17, B_line23
|
|
.asg B17, B_prodC5
|
|
.asg B17, B_sum0b
|
|
.asg B17, B_sum0d
|
|
.asg B18, B_line12
|
|
.asg B18, B_line24
|
|
.asg B18, B_prodB4
|
|
.asg B19, B_line25
|
|
.asg B19, B_prodB5
|
|
.asg B19, B_sum0a
|
|
.asg B2, B_h11word
|
|
.asg B2, B_mask11
|
|
.asg B20, B_line20
|
|
.asg B20, B_prodC10
|
|
.asg B20, B_sum0c
|
|
.asg B21, B_line21
|
|
.asg B21, B_prodC11
|
|
.asg B22, B_line10
|
|
.asg B22, B_sum1h
|
|
.asg B23, B_line11
|
|
.asg B23, B_sum2d
|
|
.asg B23, B_sum2e
|
|
.asg B23, B_sum3f
|
|
.asg B24, B_prodB0
|
|
.asg B24, B_prodC6
|
|
.asg B24, B_sum1f
|
|
.asg B24, B_sum3d
|
|
.asg B25, B_prodB1
|
|
.asg B25, B_prodC7
|
|
.asg B26, B_prodB8
|
|
.asg B26, B_sum2c
|
|
.asg B27, B_pixel23
|
|
.asg B27, B_prodB9
|
|
.asg B28, B_prodB2
|
|
.asg B28, B_prodB6
|
|
.asg B28, B_sum1e
|
|
.asg B29, B_prodB3
|
|
.asg B29, B_prodB7
|
|
.asg B30, B_inputcols
|
|
.asg B30, B_prodB10
|
|
.asg B31, B_mask10
|
|
.asg B31, B_mask22
|
|
.asg B31, B_prodB11
|
|
.asg B31, B_sum2b
|
|
.asg B4, B_h22word
|
|
.asg B4, B_h22word
|
|
.asg B4, B_OUTPTR
|
|
.asg B5, B_roundval
|
|
.asg B6, B_count1
|
|
.asg B6, B_mask
|
|
.asg B6, B_pixel67
|
|
.asg B6, B_sum3g
|
|
.asg B6, B_sum3h
|
|
.asg B7, B_IN3
|
|
.asg B8, B_IN2
|
|
.asg B9, B_count
|
|
.asg B9, B_csr
|
|
.asg B9, B_no_gie
|
|
* ========================================================================= *
|
|
|
|
MVC .S2 CSR, B_csr
|
|
|| STW .D2T1 A11, *B_SP--[4]
|
|
|| MV .L1X B_SP, A_SP
|
|
|| MV .S1 A_INPTR, A_IN1 ;[10,0]
|
|
|| MV .L2X A_inputcols, B_inputcols ;[ 7,0]
|
|
|
|
AND .L2 B_csr, -2, B_no_gie
|
|
|| STW .D2T2 B_csr, *+B_SP[2]
|
|
|| STW .D1T1 A10, *-A_SP[1]
|
|
|| ADD .S2X A_IN1, B_inputcols,B_IN2 ;[12,0]
|
|
|
|
MVC .S2 B_no_gie, CSR
|
|
|| STW .D2T2 B10, *+B_SP[1]
|
|
|| ADD .L2 B_IN2, B_inputcols,B_IN3 ;[13,0]
|
|
|
|
LDB .D2T2 *+B_mask[8], B_mask22 ;[ 3,0]
|
|
|| MV .L1X B_mask, A_mask ;[ 3,0]
|
|
|
|
LDB .D2T2 *+B_mask[7], B_mask21 ;[ 4,0]
|
|
|| LDB .D1T1 *+A_mask[6], A_mask20 ;[ 4,0]
|
|
|
|
LDB .D2T2 *+B_mask[5], B_mask12 ;[ 5,0]
|
|
|| LDB .D1T1 *+A_mask[2], A_mask02 ;[ 5,0]
|
|
|
|
MVK .S2 0, B_roundval1 ;[ 6,0]
|
|
|| LDB .D2T2 *+B_mask[4], B_mask11 ;[ 6,0]
|
|
|| LDB .D1T1 *+A_mask[1], A_mask01 ;[ 6,0]
|
|
|
|
PACK2 .L2 B_roundval1,B_roundval1,B_roundval ;[ 7,0]
|
|
|| LDB .D2T2 *+B_mask[3], B_mask10 ;[ 7,0]
|
|
|| LDB .D1T1 *+A_mask[0], A_mask00 ;[ 7,0]
|
|
|
|
SHRU .S2 B_inputcols,3, B_count1 ;[ 8,0]
|
|
|| PACK2 .L2 B_mask22, B_mask22, B_h22word ;[ 8,0]
|
|
|| MV .D1X B_OUTPTR, A_OPTR ;[12,0]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,1]
|
|
|
|
PACKL4 .L2 B_h22word, B_h22word, B_h22word ;[ 9,0]
|
|
|| PACK2 .S2 B_mask21, B_mask21, B_h21word ;[ 9,0]
|
|
|| PACK2 .L1 A_mask20, A_mask20, A_h20word ;[ 9,0]
|
|
|| LDNDW .D2T2 *B_IN2++(2), B_line11:B_line10 ;[ 2,1]
|
|
|
|
PACKL4 .L2 B_h21word, B_h21word, B_h21word ;[10,0]
|
|
|| PACKL4 .L1 A_h20word, A_h20word, A_h20word ;[10,0]
|
|
|| PACK2 .S2 B_mask12, B_mask12, B_h12word ;[10,0]
|
|
|| PACK2 .S1 A_mask02, A_mask02, A_h02word ;[10,0]
|
|
|| LDNDW .D2T1 *B_IN2++(6), A_line15:A_line14 ;[ 3,1]
|
|
|
|
PACKL4 .L2 B_h12word, B_h12word, B_h12word ;[11,0]
|
|
|| PACK2 .S2 B_mask11, B_mask11, B_h11word ;[11,0]
|
|
|| PACKL4 .L1 A_h02word, A_h02word, A_h02word ;[11,0]
|
|
|| PACK2 .S1 A_mask01, A_mask01, A_h01word ;[11,0]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line23:B_line22 ;[ 4,1]
|
|
|
|
PACKL4 .L2 B_h11word, B_h11word, B_h11word ;[12,0]
|
|
|| PACK2 .S2 B_mask10, B_mask10, B_h10word ;[12,0]
|
|
|| PACKL4 .L1 A_h01word, A_h01word, A_h01word ;[12,0]
|
|
|| PACK2 .S1 A_mask00, A_mask00, A_h00word ;[12,0]
|
|
|| LDNDW .D1T1 *A_IN1++(1), A_line01:A_line00 ;[ 5,1]
|
|
|
|
SUB .S2 B_count1, 2, B_count ;[13,0]
|
|
|| MV .S1X B_h22word, A_h22word ;[13,0]
|
|
|| PACKL4 .L2 B_h10word, B_h10word, B_h10word ;[13,0]
|
|
|| PACKL4 .L1 A_h00word, A_h00word, A_h00word ;[13,0]
|
|
|| LDNDW .D1T1 *A_IN1++(1), A_line03:A_line02 ;[ 6,1]
|
|
|
|
* =========================== PIPE LOOP PROLOG ============================ *
|
|
MPYUS4 .M2 B_line11, B_h10word, B_prodB3:B_prodB2 ;[ 7,1]
|
|
|
|
MPYSU4 .M1X A_h20word, B_line20, A_prodC1:A_prodC0 ;[ 8,1]
|
|
|| MPYSU4 .M2X B_h12word, A_line15, B_prodB11:B_prodB10 ;[ 8,1]
|
|
|| SHLMB .S1 A_line14, A_line15, A_line13 ;[ 8,1]
|
|
|| SHRMB .S2 B_line11, B_line10, B_line12 ;[ 8,1]
|
|
|| LDNDW .D1T1 *A_IN1++(6), A_line05:A_line04 ;[ 8,1]
|
|
|
|
; -
|
|
MPYUS4 .M2 B_line23, B_h21word, B_prodC7:B_prodC6 ;[ 9,1]
|
|
|| MPYSU4 .M1X A_h20word, B_line21, A_prodC3:A_prodC2 ;[ 9,1]
|
|
|| LDNDW .D2T2 *B_IN3++(6), B_line25:B_line24 ;[ 9,1]
|
|
|| B .S1 L7
|
|
|
|
MPYSU4 .M2X B_h12word, A_line14, B_prodB9:B_prodB8 ;[10,1]
|
|
|| MPYUS4 .M1 A_line00, A_h00word, A_prodA1:A_prodA0 ;[10,1]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,2]
|
|
|| B .S1 L8 + 12
|
|
|
|
MPYUS4 .M2 B_line22, B_h21word, B_prodC5:B_prodC4 ;[11,1]
|
|
|| MPYUS4 .M1 A_line01, A_h00word, A_prodA3:A_prodA2 ;[11,1]
|
|
|| LDNDW .D2T2 *B_IN2++(2), B_line11:B_line10 ;[ 2,2]
|
|
|| B .S1 L9
|
|
|
|
MPYUS4 .M2 B_line10, B_h10word, B_prodB1:B_prodB0 ;[12,1]
|
|
|| MPYUS4 .M1 A_line02, A_h01word, A_prodA5:A_prodA4 ;[12,1]
|
|
|| LDNDW .D2T1 *B_IN2++(6), A_line15:A_line14 ;[ 3,2]
|
|
|| B .S1 L1
|
|
|
|
ADD2 .L2 B_roundval, B_prodC6, B_sum2a ;[13,1]
|
|
|| MPYUS4 .M2 B_line12, B_h11word, B_prodB5:B_prodB4 ;[13,1]
|
|
|| MPYUS4 .M1 A_line04, A_h02word, A_prodA9:A_prodA8 ;[13,1]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line23:B_line22 ;[ 4,2]
|
|
|| B .S1 L2_P
|
|
|
|
ADD2 .S2 B_prodB11, B_roundval, B_sum3d ;[14,1]
|
|
|| ADD2 .D2 B_sum2a, B_prodB2, B_sum2b ;[14,1]
|
|
|| ADD2 .L2 B_prodB9, B_roundval, B_sum1e ;[14,1]
|
|
|| MPYUS4 .M2X A_line13, B_h11word, B_prodB7:B_prodB6 ;[14,1]
|
|
|| MPYUS4 .M1 A_line03, A_h01word, A_prodA7:A_prodA6 ;[14,1]
|
|
|| LDNDW .D1T1 *A_IN1++(1), A_line01:A_line00 ;[ 5,2]
|
|
|| B .S1 L3
|
|
|
|
; L7, L8+12, L9, L1
|
|
|
|
L2_P: ADD2 .S2 B_sum3f, B_prodB7, B_sum3g ;[19,1]
|
|
|| ADD2 .D1 A_prodA11, A_prodA7, A_sum3a ;[19,1]
|
|
|| ADD2 .L2 B_sum2c, B_prodB10, B_sum2d ;[19,1]
|
|
|| ADD2 .L1 A_sum0e, A_prodC0, A_sum0f ;[19,1]
|
|
|| MPYSU4 .M2X B_h12word, A_line14, B_prodB9:B_prodB8 ;[10,2]
|
|
|| MPYUS4 .M1 A_line00, A_h00word, A_prodA1:A_prodA0 ;[10,2]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,3]
|
|
|| B .S1 L8 + 4
|
|
|
|
; L3, L4, L5, L6, L7, L8+4, L9
|
|
|
|
* =========================== PIPE LOOP KERNEL ============================ *
|
|
loop:
|
|
L1: ADD2 .D1 A_sum2f, A_prodA6, A_sum2g ;[18,2]
|
|
|| ADD2 .L2 B_sum2b, B_prodB6, B_sum2c ;[18,2]
|
|
|| ADD2 .L1 A_sum1a, A_prodC1, A_sum1b ;[18,2]
|
|
|| ADD2 .S1 A_prodA4, A_prodA8, A_sum0e ;[18,2]
|
|
|| ADD2 .S2 B_sum0c, B_prodB8, B_sum0d ;[18,2]
|
|
|| MPYUS4 .M2 B_line23, B_h21word, B_prodC7:B_prodC6 ;[ 9,3]
|
|
|| MPYSU4 .M1X A_h20word, B_line21, A_prodC3:A_prodC2 ;[ 9,3]
|
|
|| LDNDW .D2T2 *B_IN3++(6), B_line25:B_line24 ;[ 9,3]
|
|
|
|
SHR2 .S1X B_pixel67, A_shift, A_pixel67a ;[28,1]
|
|
|| ADD2 .S2 B_sum3f, B_prodB7, B_sum3g ;[19,2]
|
|
|| ADD2 .D1 A_prodA11, A_prodA7, A_sum3a ;[19,2]
|
|
|| ADD2 .L2 B_sum2c, B_prodB10, B_sum2d ;[19,2]
|
|
|| ADD2 .L1 A_sum0e, A_prodC0, A_sum0f ;[19,2]
|
|
|| MPYSU4 .M2X B_h12word, A_line14, B_prodB9:B_prodB8 ;[10,3]
|
|
|| MPYUS4 .M1 A_line00, A_h00word, A_prodA1:A_prodA0 ;[10,3]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line21:B_line20 ;[ 1,4]
|
|
|
|
L3: SPACKU4 .S1 A_pixel67a, A_pixel45a, A_outword1 ;[29,1]
|
|
|| ADD2 .L2 B_sum3g, B_prodC11, B_sum3h ;[20,2]
|
|
|| ADD2 .L1 A_sum3a, A_prodC3, A_sum3b ;[20,2]
|
|
|| ADD2 .S2 B_sum2d, B_prodC10, B_sum2e ;[20,2]
|
|
|| ADD2 .D1 A_sum0f, A_prodC8, A_sum0g ;[20,2]
|
|
|| MPYUS4 .M2 B_line22, B_h21word, B_prodC5:B_prodC4 ;[11,3]
|
|
|| MPYUS4 .M1 A_line01, A_h00word, A_prodA3:A_prodA2 ;[11,3]
|
|
|| LDNDW .D2T2 *B_IN2++(2), B_line11:B_line10 ;[ 2,4]
|
|
|
|
BDEC .S2 loop, B_count ;[30,1]
|
|
|| SHR2 .S1X B_pixel23, A_shift, A_pixel23a ;[30,1]
|
|
|| ADD2 .L2 B_sum1f, B_prodB1, B_sum1g ;[21,2]
|
|
|| ADD2 .D1 A_sum1b, A_prodC9, A_sum1c ;[21,2]
|
|
|| ADD2 .L1 A_sum0g, A_prodA0, A_sum0h ;[21,2]
|
|
|| MPYUS4 .M2 B_line10, B_h10word, B_prodB1:B_prodB0 ;[12,3]
|
|
|| MPYUS4 .M1 A_line02, A_h01word, A_prodA5:A_prodA4 ;[12,3]
|
|
|| LDNDW .D2T1 *B_IN2++(6), A_line15:A_line14 ;[ 3,4]
|
|
|
|
SHR2 .S1 A_pixel01, A_shift, A_pixel01a ;[31,1]
|
|
|| ADD2 .D1 A_sum2g, A_prodA10, A_sum2h ;[22,2]
|
|
|| ADD2 .S2 B_sum1g, B_prodB5, B_sum1h ;[22,2]
|
|
|| ADD2 .L1 A_sum1c, A_prodA1, A_sum1d ;[22,2]
|
|
|| ADD2 .L2 B_roundval, B_prodC6, B_sum2a ;[13,3]
|
|
|| MPYUS4 .M2 B_line12, B_h11word, B_prodB5:B_prodB4 ;[13,3]
|
|
|| MPYUS4 .M1 A_line04, A_h02word, A_prodA9:A_prodA8 ;[13,3]
|
|
|| LDNDW .D2T2 *B_IN3++(1), B_line23:B_line22 ;[ 4,4]
|
|
|
|
ADD2 .S1 A_sum3b, A_prodA3, A_sum3c ;[23,2]
|
|
|| ADD2 .L1X B_sum0d, A_sum0h, A_pixel01 ;[23,2]
|
|
|| ADD2 .S2 B_prodB11, B_roundval, B_sum3d ;[14,3]
|
|
|| ADD2 .D2 B_sum2a, B_prodB2, B_sum2b ;[14,3]
|
|
|| ADD2 .L2 B_prodB9, B_roundval, B_sum1e ;[14,3]
|
|
|| MPYUS4 .M2X A_line13, B_h11word, B_prodB7:B_prodB6 ;[14,3]
|
|
|| MPYUS4 .M1 A_line03, A_h01word, A_prodA7:A_prodA6 ;[14,3]
|
|
|| LDNDW .D1T1 *A_IN1++(1), A_line01:A_line00 ;[ 5,4]
|
|
|
|
L7: SPACKU4 .S1 A_pixel23a, A_pixel01a, A_outword0 ;[33,1]
|
|
|| ADD2 .L1X A_sum2h, B_sum2e, A_pixel45 ;[24,2]
|
|
|| ADD2 .S2X B_sum1h, A_sum1d, B_pixel23 ;[24,2]
|
|
|| ADD2 .L2 B_sum3d, B_prodC7, B_sum3e ;[15,3]
|
|
|| ADD2 .D2 B_roundval, B_prodC4, B_sum0a ;[15,3]
|
|
|| MPYSU4 .M2 B_h22word, B_line25, B_prodC11:B_prodC10 ;[15,3]
|
|
|| MPYUS4 .M1 A_line05, A_h02word, A_prodA11:A_prodA10 ;[15,3]
|
|
|| LDNDW .D1T1 *A_IN1++(1), A_line03:A_line02 ;[ 6,4]
|
|
L8:
|
|
STNDW .D1T1 A_outword1:A_outword0, *A_OPTR++(8) ;[34,1]
|
|
|| SHR2 .S1 A_pixel45, A_shift, A_pixel45a ;[25,2]
|
|
|| ADD2 .L2X B_sum3h, A_sum3c, B_pixel67 ;[25,2]
|
|
|| ADD2 .L1 A_prodC2, A_prodA2, A_sum2f ;[16,3]
|
|
|| ADD2 .S2 B_sum1e, B_prodC5, B_sum1f ;[16,3]
|
|
|| ADD2 .D2 B_sum0a, B_prodB0, B_sum0b ;[16,3]
|
|
|| MPYSU4 .M1X A_h22word, B_line24, A_prodC9:A_prodC8 ;[16,3]
|
|
|| MPYUS4 .M2 B_line11, B_h10word, B_prodB3:B_prodB2 ;[ 7,4]
|
|
|
|
L9: ADD2 .D2 B_sum3e, B_prodB3, B_sum3f ;[17,3]
|
|
|| ADD2 .L1 A_prodA9, A_prodA5, A_sum1a ;[17,3]
|
|
|| ADD2 .L2 B_sum0b, B_prodB4, B_sum0c ;[17,3]
|
|
|| MPYSU4 .M1X A_h20word, B_line20, A_prodC1:A_prodC0 ;[ 8,4]
|
|
|| MPYSU4 .M2X B_h12word, A_line15, B_prodB11:B_prodB10 ;[ 8,4]
|
|
|| SHLMB .S1 A_line14, A_line15, A_line13 ;[ 8,4]
|
|
|| SHRMB .S2 B_line11, B_line10, B_line12 ;[ 8,4]
|
|
|| LDNDW .D1T1 *A_IN1++(6), A_line05:A_line04 ;[ 8,4]
|
|
|
|
* =========================== PIPE LOOP EPILOG ============================ *
|
|
SHR2 .S1X B_pixel67, A_shift, A_pixel67a ;[28,4]
|
|
|| LDW .D2T2 *+B_SP[2], B_csr
|
|
|
|
SPACKU4 .S1 A_pixel67a, A_pixel45a, A_outword1 ;[29,4]
|
|
|| RET .S2 B3
|
|
|| LDW .D2T1 *+B_SP[3], A10
|
|
|| MV .L1X B_SP, A_SP
|
|
|
|
SHR2 .S1X B_pixel23, A_shift, A_pixel23a ;[30,4]
|
|
|| LDW .D2T1 *++B_SP[4], A11
|
|
|| LDW .D1T2 *+A_SP[1], B10
|
|
|
|
SHR2 .S1 A_pixel01, A_shift, A_pixel01a ;[31,4]
|
|
|
|
MVC .S2 B_csr, CSR
|
|
|
|
SPACKU4 .S1 A_pixel23a, A_pixel01a, A_outword0 ;[33,4]
|
|
|
|
STNDW .D1T1 A_outword1:A_outword0, *A_OPTR++(8) ;[34,4]
|
|
|
|
|
|
* ========================================================================= *
|
|
* End of file: img_conv_3x3.asm *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|