You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
327 lines
19 KiB
327 lines
19 KiB
;* ======================================================================== *;
|
|
;* TEXAS INSTRUMENTS, INC. *;
|
|
;* *;
|
|
;* IMGLIB DSP Image/Video Processing Library *;
|
|
;* *;
|
|
;* Release: Revision 1.04b *;
|
|
;* CVS Revision: 1.3 Sun Sep 29 03:32:32 2002 (UTC) *;
|
|
;* Snapshot date: 23-Oct-2003 *;
|
|
;* *;
|
|
;* This library contains proprietary intellectual property of Texas *;
|
|
;* Instruments, Inc. The library and its source code are protected by *;
|
|
;* various copyrights, and portions may also be protected by patents or *;
|
|
;* other legal protections. *;
|
|
;* *;
|
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
|
;* family DSPs. This license was provided to you prior to installing *;
|
|
;* the software. You may review this license by consulting the file *;
|
|
;* TI_license.PDF which accompanies the files in this library. *;
|
|
;* ------------------------------------------------------------------------ *;
|
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
|
;* All Rights Reserved. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
;* ======================================================================== *;
|
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
|
;* tools prior to 4.30. *;
|
|
;* ======================================================================== *;
|
|
|
|
.if $isdefed(".ASSEMBLER_VERSION")
|
|
.asg .ASSEMBLER_VERSION, $asmver
|
|
.else
|
|
.asg 0, $asmver
|
|
.endif
|
|
|
|
.if ($asmver < 430)
|
|
|
|
.asg B, CALL ; Function Call
|
|
.asg B, RET ; Return from a Function
|
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
|
|
|
.if .TMS320C6400
|
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
|
.endif
|
|
|
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
|
|
|
.endif
|
|
|
|
;* ======================================================================== *;
|
|
;* End of assembler compatibility shim. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
* ========================================================================= *
|
|
* TEXAS INSTRUMENTS, INC. *
|
|
* *
|
|
* NAME *
|
|
* IMG_yc_demux_le16 -- De-interleave a 4:2:2 LITTLE ENDIAN video stream *
|
|
* into three separate LITTLE ENDIAN 16-bit planes *
|
|
* *
|
|
* REVISION DATE *
|
|
* 15-Mar-2002 *
|
|
* *
|
|
* USAGE *
|
|
* This function is C callable, and is called as follows: *
|
|
* *
|
|
* void IMG_yc_demux_le16 *
|
|
* ( *
|
|
* int n, // Number of luma pixels // *
|
|
* const unsigned char * yc, // Interleaved luma/chroma // *
|
|
* short *restrict y, // Luma plane (16-bit) // *
|
|
* short *restrict cr, // Cr chroma plane (16-bit) // *
|
|
* short *restrict cb // Cb chroma plane (16-bit) // *
|
|
* ); *
|
|
* *
|
|
* The input array 'yc' is expected to be an interleaved 4:2:2 *
|
|
* video stream. The input is expected in LITTLE ENDIAN byte *
|
|
* order within each 4-byte word. This is consistent with reading *
|
|
* the video stream from a word-oriented LITTLE ENDIAN device *
|
|
* while the C6000 device is in a LITTLE ENDIAN configuration. *
|
|
* *
|
|
* In other words, the expected pixel order is: *
|
|
* *
|
|
* Word 0 Word 1 Word 2 *
|
|
* +---------------+---------------+---------------+-- *
|
|
* Byte# | 0 1 2 3 | 4 5 6 7 | 8 9 10 11 |... *
|
|
* | y0 cr0 y1 cb0 | y2 cr2 y3 cb2 | y4 cr4 y5 cb4 |... *
|
|
* +---------------+---------------+---------------+-- *
|
|
* *
|
|
* The output arrays 'y', 'cr', and 'cb' are expected to not *
|
|
* overlap. The de-interleaved pixels are written as half-words *
|
|
* in LITTLE ENDIAN order. *
|
|
* *
|
|
* Please see the IMGLIB function IMB_yc_demux_be16 for code which *
|
|
* handles input coming from a BIG ENDIAN device. *
|
|
* *
|
|
* DESCRIPTION *
|
|
* This function reads the byte-oriented pixel data, zero-extends *
|
|
* it, and then writes it to the appropriate result array. Both *
|
|
* the luma and chroma values are expected to be unsigned. *
|
|
* *
|
|
* The data is expected to be in an order consistent with reading *
|
|
* byte oriented data from a word-oriented peripheral that is *
|
|
* operating in LITTLE ENDIAN mode, while the CPU is in LITTLE *
|
|
* ENDIAN mode. This function unpacks the byte-oriented data *
|
|
* so that further processing may proceed in LITTLE ENDIAN mode. *
|
|
* *
|
|
* ASSUMPTIONS *
|
|
* Input and output arrays are double-word aligned. *
|
|
* The input must be a multiple of 16 luma pixels long. *
|
|
* *
|
|
* TECHNIQUES *
|
|
* The loop has been unrolled a total of 16 times to allow for *
|
|
* processing 8 pixels in each datapath. *
|
|
* *
|
|
* Double-word loads and stores maximize memory bandwidth *
|
|
* utilization. *
|
|
* *
|
|
* This code uses _gmpy4() to ease the L/S/D unit bottleneck on *
|
|
* ANDs. The _gmpy4(value, 0x00010001) is equivalent to *
|
|
* value & 0x00FF00FF, as long as the size field of GFPGFR is *
|
|
* equal to 7. (The polynomial does not matter.) *
|
|
* *
|
|
* NOTES *
|
|
* This code is fully interruptible. *
|
|
* *
|
|
* CYCLES *
|
|
* cycles = 3 * (num_luma / 8) + 18. *
|
|
* For num_luma = 1024, cycles = 402 *
|
|
* *
|
|
* This number includes 6 cycles of function call overhead. The *
|
|
* exact overhead will vary depending on compiler options used. *
|
|
* *
|
|
* CODESIZE *
|
|
* 352 bytes *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
|
|
.sect ".text:_yc_demux_le16"
|
|
.global _IMG_yc_demux_le16
|
|
_IMG_yc_demux_le16
|
|
|
|
|
|
|
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
|
|
.asg A4, A_i
|
|
.asg B4, B_yc
|
|
.asg A6, A_y
|
|
.asg B6, B_cr
|
|
.asg A8, A_cb
|
|
.asg B3, B_ret_addr
|
|
|
|
.asg A1, A_p
|
|
.asg A2, A_b0y1r0y0
|
|
.asg A3, A_b2y3r2y2
|
|
.asg A16, A_r0b0
|
|
.asg A17, A_b0r0
|
|
.asg A18, A_b6r6
|
|
.asg A19, A_r2b2
|
|
.asg A20, A_yc
|
|
.asg A21, A_r6b6
|
|
.asg A22, A_cr
|
|
.asg A23, A_00010001
|
|
.asg A24, A_r2r0
|
|
.asg A25, A_b2r2
|
|
.asg A25, A_r6r4
|
|
.asg A26, A_b2b0
|
|
.asg A26, A_y5y4
|
|
.asg A27, A_b4r4
|
|
.asg A27, A_b6b4
|
|
.asg A27, A_y7y6
|
|
.asg A28, A_b4y5r4y4
|
|
.asg A29, A_b6y7r6y6
|
|
.asg A30, A_r4b4
|
|
.asg A30, A_y1y0
|
|
.asg A31, A_y3y2
|
|
|
|
.asg B8, B_bCyDrCyC
|
|
.asg B9, B_bEyFrEyE
|
|
.asg B17, B_gfpgfr
|
|
.asg B18, B_rAbA
|
|
.asg B19, B_rCbC
|
|
.asg B20, B_rEbE
|
|
.asg B21, B_cb
|
|
.asg B22, B_00FF00FF
|
|
.asg B23, B_y
|
|
.asg B24, B_rAr8
|
|
.asg B24, B_y9y8
|
|
.asg B25, B_rErC
|
|
.asg B25, B_yByA
|
|
.asg B26, B_b8r8
|
|
.asg B26, B_bAb8
|
|
.asg B26, B_bErE
|
|
.asg B27, B_bArA
|
|
.asg B27, B_bEbC
|
|
.asg B28, B_b8y9r8y8
|
|
.asg B29, B_bAyBrAyA
|
|
.asg B29, B_bCrC
|
|
.asg B30, B_r8b8
|
|
.asg B30, B_yDyC
|
|
.asg B31, B_yFyE
|
|
* ========================================================================= *
|
|
|
|
* =========================== PIPE LOOP PROLOG ============================ *
|
|
LDDW .D2T1 *B_yc[1], A_b6y7r6y6:A_b4y5r4y4 ;[ 1,1]
|
|
|| ZERO .S1 A_p
|
|
|| ADD .L1X B_yc, 8, A_yc
|
|
|| ADD .L2X A_y, 8, B_y
|
|
|
|
MVKLH .S1 0x0700, A_p
|
|
|| ADD .L1 A_yc, 8, A_yc
|
|
|| ADD .L2 B_y, 8, B_y
|
|
|| MVK .S2 0x00FF, B_00FF00FF
|
|
|
|
LDDW .D2T1 *B_yc++[4], A_b2y3r2y2:A_b0y1r0y0 ;[ 3,1]
|
|
|| LDDW .D1T2 *A_yc[1], B_bEyFrEyE:B_bCyDrCyC ;[ 3,1]
|
|
|| PACK2 .L2 B_00FF00FF, B_00FF00FF, B_00FF00FF
|
|
|
|
MVC .S2 GFPGFR, B_gfpgfr
|
|
|| ADD .L1X B_cr, 8, A_cr
|
|
|| ADD .L2X A_cb, 8, B_cb
|
|
|| SHRU .S1 A_i, 4, A_i
|
|
|
|
MVC .S2X A_p, GFPGFR
|
|
|| SHRU2 .S1X B_00FF00FF, 7, A_00010001
|
|
|| SUB .L1 A_i, 2, A_i
|
|
|
|
ANDN .L1X A_b6y7r6y6, B_00FF00FF, A_b6r6 ;[ 6,1]
|
|
|| AND .S1X A_b4y5r4y4, B_00FF00FF, A_y5y4 ;[ 6,1]
|
|
; ===== 1 prolog stage collapsed
|
|
* =========================== PIPE LOOP KERNEL ============================ *
|
|
loop:
|
|
BDEC .S1 loop, A_i ;[13,1]
|
|
|| ROTL .M1 A_b4r4, 8, A_r4b4 ;[13,1]
|
|
|| ROTL .M2 B_b8r8, 8, B_r8b8 ;[13,1]
|
|
|| AND .L2 B_b8y9r8y8, B_00FF00FF, B_y9y8 ;[13,1]
|
|
|| AND .S2 B_bAyBrAyA, B_00FF00FF, B_yByA ;[13,1]
|
|
|| AND .L1X A_b6y7r6y6, B_00FF00FF, A_y7y6 ;[ 7,2]
|
|
|| LDDW .D1T2 *A_yc++[4], B_bAyBrAyA:B_b8y9r8y8 ;[ 7,2]
|
|
|| LDDW .D2T1 *B_yc[1], A_b6y7r6y6:A_b4y5r4y4 ;[ 1,3]
|
|
|
|
[!A_p]STDW .D1T1 A_y3y2:A_y1y0, *A_y++[4] ;[14,1]
|
|
||[!A_p]STDW .D2T2 B_yFyE:B_yDyC, *B_y[1] ;[14,1]
|
|
|| PACK2 .S2 B_rEbE, B_rCbC, B_bEbC ;[14,1]
|
|
|| ROTL .M2 B_bArA, 8, B_rAbA ;[14,1]
|
|
|| ANDN .L1X A_b2y3r2y2, B_00FF00FF, A_b2r2 ;[ 8,2]
|
|
|| ANDN .L2 B_bEyFrEyE, B_00FF00FF, B_bErE ;[ 8,2]
|
|
|| GMPY4 .M1 A_b0y1r0y0, A_00010001, A_y1y0 ;[ 8,2]
|
|
|| AND .S1X A_b2y3r2y2, B_00FF00FF, A_y3y2 ;[ 8,2]
|
|
|
|
PACKH2 .L1 A_r2b2, A_r0b0, A_r2r0 ;[15,1]
|
|
|| ANDN .S1X A_b0y1r0y0, B_00FF00FF, A_b0r0 ;[ 9,2]
|
|
|| ANDN .S2 B_bCyDrCyC, B_00FF00FF, B_bCrC ;[ 9,2]
|
|
|| AND .L2 B_bEyFrEyE, B_00FF00FF, B_yFyE ;[ 9,2]
|
|
|| LDDW .D2T1 *B_yc++[4], A_b2y3r2y2:A_b0y1r0y0 ;[ 3,3]
|
|
|| LDDW .D1T2 *A_yc[1], B_bEyFrEyE:B_bCyDrCyC ;[ 3,3]
|
|
||[ A_p]DOTPN2 .M1 A_00010001, A_00010001, A_p ; pro collapse
|
|
|
|
[!A_p]STDW .D2T2 B_yByA:B_y9y8, *B_y++[4] ;[16,1]
|
|
|| PACK2 .S1 A_r2b2, A_r0b0, A_b2b0 ;[16,1]
|
|
|| PACK2 .L1 A_r6b6, A_r4b4, A_b6b4 ;[16,1]
|
|
|| PACK2 .S2 B_rAbA, B_r8b8, B_bAb8 ;[16,1]
|
|
|| PACKH2 .L2 B_rAbA, B_r8b8, B_rAr8 ;[16,1]
|
|
|| STDW .D1T1 A_y7y6:A_y5y4, *A_y[1] ;[10,2]
|
|
|| ROTL .M1 A_b6r6, 8, A_r6b6 ;[10,2]
|
|
|| ROTL .M2 B_bErE, 8, B_rEbE ;[10,2]
|
|
|
|
[!A_p]STDW .D1T1 A_b6b4:A_b2b0, *A_cb++[2] ;[17,1]
|
|
||[!A_p]STDW .D2T2 B_bEbC:B_bAb8, *B_cb++[2] ;[17,1]
|
|
|| PACKH2 .L1 A_r6b6, A_r4b4, A_r6r4 ;[17,1]
|
|
|| PACKH2 .S2 B_rEbE, B_rCbC, B_rErC ;[17,1]
|
|
|| ROTL .M1 A_b2r2, 8, A_r2b2 ;[11,2]
|
|
|| ROTL .M2 B_bCrC, 8, B_rCbC ;[11,2]
|
|
|| ANDN .S1X A_b4y5r4y4, B_00FF00FF, A_b4r4 ;[11,2]
|
|
|| AND .L2 B_bCyDrCyC, B_00FF00FF, B_yDyC ;[11,2]
|
|
|
|
[!A_p]STDW .D2T1 A_r6r4:A_r2r0, *B_cr++[2] ;[18,1]
|
|
||[!A_p]STDW .D1T2 B_rErC:B_rAr8, *A_cr++[2] ;[18,1]
|
|
|| ROTL .M1 A_b0r0, 8, A_r0b0 ;[12,2]
|
|
|| ANDN .L2 B_b8y9r8y8, B_00FF00FF, B_b8r8 ;[12,2]
|
|
|| ANDN .S2 B_bAyBrAyA, B_00FF00FF, B_bArA ;[12,2]
|
|
|| ANDN .L1X A_b6y7r6y6, B_00FF00FF, A_b6r6 ;[ 6,3]
|
|
|| AND .S1X A_b4y5r4y4, B_00FF00FF, A_y5y4 ;[ 6,3]
|
|
* =========================== PIPE LOOP EPILOG ============================ *
|
|
; ===== 1 epilog stage collapsed
|
|
ROTL .M1 A_b4r4, 8, A_r4b4 ;[13,3]
|
|
|| ROTL .M2 B_b8r8, 8, B_r8b8 ;[13,3]
|
|
|| AND .L2 B_b8y9r8y8, B_00FF00FF, B_y9y8 ;[13,3]
|
|
|| AND .D2 B_bAyBrAyA, B_00FF00FF, B_yByA ;[13,3]
|
|
|| RET B_ret_addr
|
|
|
|
STDW .D1T1 A_y3y2:A_y1y0, *A_y ;[14,3]
|
|
|| STDW .D2T2 B_yFyE:B_yDyC, *B_y[1] ;[14,3]
|
|
|| PACK2 .S2 B_rEbE, B_rCbC, B_bEbC ;[14,3]
|
|
|| ROTL .M2 B_bArA, 8, B_rAbA ;[14,3]
|
|
|
|
PACKH2 .L1 A_r2b2, A_r0b0, A_r2r0 ;[15,3]
|
|
|
|
STDW .D2T2 B_yByA:B_y9y8, *B_y ;[16,3]
|
|
|| PACK2 .S1 A_r2b2, A_r0b0, A_b2b0 ;[16,3]
|
|
|| PACK2 .L1 A_r6b6, A_r4b4, A_b6b4 ;[16,3]
|
|
|| PACK2 .S2 B_rAbA, B_r8b8, B_bAb8 ;[16,3]
|
|
|| PACKH2 .L2 B_rAbA, B_r8b8, B_rAr8 ;[16,3]
|
|
|
|
STDW .D1T1 A_b6b4:A_b2b0, *A_cb ;[17,3]
|
|
|| STDW .D2T2 B_bEbC:B_bAb8, *B_cb ;[17,3]
|
|
|| PACKH2 .L1 A_r6b6, A_r4b4, A_r6r4 ;[17,3]
|
|
|| PACKH2 .S2 B_rEbE, B_rCbC, B_rErC ;[17,3]
|
|
|
|
STDW .D2T1 A_r6r4:A_r2r0, *B_cr ;[18,3]
|
|
|| STDW .D1T2 B_rErC:B_rAr8, *A_cr ;[18,3]
|
|
|| MVC .S2 B_gfpgfr, GFPGFR
|
|
|
|
* ========================================================================= *
|
|
* End of file: img_yc_demux_le16.asm *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
|