You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
286 lines
18 KiB
286 lines
18 KiB
;* ======================================================================== *;
|
|
;* TEXAS INSTRUMENTS, INC. *;
|
|
;* *;
|
|
;* IMGLIB DSP Image/Video Processing Library *;
|
|
;* *;
|
|
;* Release: Revision 1.04b *;
|
|
;* CVS Revision: 1.10 Sun Sep 29 03:32:30 2002 (UTC) *;
|
|
;* Snapshot date: 23-Oct-2003 *;
|
|
;* *;
|
|
;* This library contains proprietary intellectual property of Texas *;
|
|
;* Instruments, Inc. The library and its source code are protected by *;
|
|
;* various copyrights, and portions may also be protected by patents or *;
|
|
;* other legal protections. *;
|
|
;* *;
|
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
|
;* family DSPs. This license was provided to you prior to installing *;
|
|
;* the software. You may review this license by consulting the file *;
|
|
;* TI_license.PDF which accompanies the files in this library. *;
|
|
;* ------------------------------------------------------------------------ *;
|
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
|
;* All Rights Reserved. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
;* ======================================================================== *;
|
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
|
;* tools prior to 4.30. *;
|
|
;* ======================================================================== *;
|
|
|
|
.if $isdefed(".ASSEMBLER_VERSION")
|
|
.asg .ASSEMBLER_VERSION, $asmver
|
|
.else
|
|
.asg 0, $asmver
|
|
.endif
|
|
|
|
.if ($asmver < 430)
|
|
|
|
.asg B, CALL ; Function Call
|
|
.asg B, RET ; Return from a Function
|
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
|
|
|
.if .TMS320C6400
|
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
|
.endif
|
|
|
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
|
|
|
.endif
|
|
|
|
;* ======================================================================== *;
|
|
;* End of assembler compatibility shim. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
* ========================================================================= *
|
|
* TEXAS INSTRUMENTS, INC. *
|
|
* *
|
|
* NAME *
|
|
* IMG_thr_gt2max *
|
|
* *
|
|
* *
|
|
* REVISION DATE *
|
|
* 13-Mar-2002 *
|
|
* *
|
|
* USAGE *
|
|
* This routine has the following C prototype: *
|
|
* *
|
|
* void IMG_thr_gt2max *
|
|
* ( *
|
|
* const unsigned char *in_data, /* Input image data */ *
|
|
* unsigned char *restrict out_data, /* Output image data */ *
|
|
* short cols, short rows, /* Image dimensions */ *
|
|
* unsigned char threshold /* Threshold value */ *
|
|
* ) *
|
|
* *
|
|
* This routine performs a thresholding operation on an input *
|
|
* image in in_data[] whose dimensions are given in the arguments *
|
|
* 'cols' and 'rows'. The thresholded pixels are written to the *
|
|
* output image pointed to by out_data[]. The input and output *
|
|
* are exactly the same dimensions. *
|
|
* *
|
|
* DESCRIPTION *
|
|
* Pixels that are above the threshold value are written to the *
|
|
* output unmodified. Pixels that are greater than the threshold *
|
|
* are set to 255 in the output image. *
|
|
* *
|
|
* The exact thresholding function performed is described by *
|
|
* the following transfer function diagram: *
|
|
* *
|
|
* *
|
|
* 255_| _________ *
|
|
* | | *
|
|
* | | *
|
|
* O | | *
|
|
* U | | *
|
|
* T th _|. . . . .| *
|
|
* P | /. *
|
|
* U | / . *
|
|
* T | / . *
|
|
* | / . *
|
|
* 0_|/________.__________ *
|
|
* | | | *
|
|
* 0 th 255 *
|
|
* *
|
|
* INPUT *
|
|
* *
|
|
* Please see the IMGLIB functions IMG_thr_gt2thr, IMG_thr_le2thr *
|
|
* and IMG_thr_le2min for other thresholding functions. *
|
|
* *
|
|
* ASSUMPTIONS *
|
|
* The input and output buffers do not alias. *
|
|
* *
|
|
* The input and output buffers must be double-word aligned. *
|
|
* *
|
|
* The total number of pixels rows*cols must be at least *
|
|
* 16 and a multiple of 16. *
|
|
* *
|
|
* TECHNIQUES *
|
|
* The loop is unrolled 16x. Packed-data processing techniques *
|
|
* allow us to process all 16 pixels in parallel. *
|
|
* *
|
|
* Two LDDW instructions load in 16 pixels, designated p0 thru p9 *
|
|
* pA thru pF. These pixels are packed in the four registers pFEDC, *
|
|
* pBA98, p7654, and p3210 as follows: *
|
|
* *
|
|
* Upper double-word: *
|
|
* *
|
|
* 31 24 16 8 0 31 24 16 8 0 *
|
|
* +----+----+----+----+ +----+----+----+----+ *
|
|
* pFEDC | pF | pE | pD | pC | pBA98 | pB | pA | p9 | p8 | *
|
|
* +----+----+----+----+ +----+----+----+----+ *
|
|
* *
|
|
* Lower double-word: *
|
|
* *
|
|
* 31 24 16 8 0 31 24 16 8 0 *
|
|
* +----+----+----+----+ +----+----+----+----+ *
|
|
* p7654 | p7 | p6 | p5 | p4 | p3210 | p3 | p2 | p1 | p0 | *
|
|
* +----+----+----+----+ +----+----+----+----+ *
|
|
* *
|
|
* (Note that this diagram assumes a little endian memory *
|
|
* configuration, although this kernel works equally well in *
|
|
* either endian mode.) *
|
|
* *
|
|
* We compare these four words against a packed copy of the threshold *
|
|
* value. The packed threshold contains four copies of the threshold *
|
|
* value, one in each byte, like so: *
|
|
* *
|
|
* 31 24 16 8 0 *
|
|
* +----+----+----+----+ *
|
|
* thththth | th | th | th | th | *
|
|
* +----+----+----+----+ *
|
|
* *
|
|
* We compare using CMPGTU4. The comparison results (stored in *
|
|
* cFEDC, cBA98, c7654, and c3210) are expanded to masks using XPND4. *
|
|
* The results are a set of four masks (xFEDC, xBA98, x7654, and *
|
|
* x3210) which contain 0xFF in bytes that are greater than the *
|
|
* threshold, and 0x00 in bytes that are less than or equal to the *
|
|
* threshold. *
|
|
* *
|
|
* To complete the thresholding process, we compute the logical OR *
|
|
* between our original pixel values and the mask. This forces *
|
|
* values above the threshold to 0xFF, and leaves the other values *
|
|
* unmodified. *
|
|
* *
|
|
* The 16 results are then written with two STDWs. *
|
|
* *
|
|
* MEMORY NOTE *
|
|
* This code is ENDIAN NEUTRAL. *
|
|
* *
|
|
* The input and output arrays must be double-word aligned. *
|
|
* *
|
|
* No bank conflicts occur, regardless of the relative alignment of *
|
|
* in_data[] and out_data[]. *
|
|
* *
|
|
* CYCLES *
|
|
* cycles = 0.1875 * rows * cols + 22. *
|
|
* For rows = 32 and cols = 32, cycles = 214. *
|
|
* *
|
|
* This number includes 6 cycles of function call overhead. The *
|
|
* exact overhead will vary depending on compiler options used. *
|
|
* *
|
|
* CODESIZE *
|
|
* 164 bytes. *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
|
|
.sect ".text:_thr_gt2max"
|
|
.global _IMG_thr_gt2max
|
|
_IMG_thr_gt2max:
|
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
|
|
.asg A4, A_i_ptr
|
|
.asg B4, B_o_ptr
|
|
.asg A6, A_rows
|
|
.asg B6, B_cols
|
|
.asg A8, A_th
|
|
.asg B3, B_ret_addr
|
|
|
|
.asg A3, A_i
|
|
.asg A5, A_o_ptr
|
|
.asg A6, A_c7654
|
|
.asg A6, A_x7654
|
|
.asg A8, A_thththth
|
|
.asg A9, A_p3210_
|
|
.asg A16, A_t3210
|
|
.asg A16, A_x3210
|
|
.asg A17, A_p7654_
|
|
.asg A17, A_t7654
|
|
.asg A18, A_c3210
|
|
.asg A20, A_p3210
|
|
.asg A21, A_p7654
|
|
.asg B0, B_p
|
|
.asg B5, B_pFEDC_
|
|
.asg B7, B_cBA98
|
|
.asg B7, B_xBA98
|
|
.asg B8, B_pBA98_
|
|
.asg B8, B_tBA98
|
|
.asg B9, B_tFEDC
|
|
.asg B9, B_xFEDC
|
|
.asg B16, B_i_ptr
|
|
.asg B17, B_cFEDC
|
|
.asg B18, B_thththth
|
|
.asg B20, B_pBA98
|
|
.asg B21, B_pFEDC
|
|
* ========================================================================= *
|
|
* =========================== PIPE LOOP PROLOG ============================ *
|
|
MPY .M1X A_rows, B_cols, A_i
|
|
|| PACK2 .L1 A_th, A_th, A_thththth
|
|
|
|
B .S1 loop ;[ 8,1]
|
|
|| PACKL4 .L1 A_thththth, A_thththth, A_thththth
|
|
|
|
SHR .S1 A_i, 4, A_i
|
|
|| ADD .L2X A_i_ptr, 8, B_i_ptr
|
|
|
|
LDDW .D1T1 *A_i_ptr++[2], A_p7654:A_p3210 ;[ 1,1]
|
|
|| LDDW .D2T2 *B_i_ptr++[2], B_pFEDC:B_pBA98 ;[ 1,1]
|
|
|| ADD .S1X B_o_ptr, 8, A_o_ptr
|
|
|| SUB .L1 A_i, 1, A_i
|
|
|| MVK .L2 3, B_p
|
|
;-
|
|
; ===== 3 prolog stages collapsed
|
|
* =========================== PIPE LOOP KERNEL ============================ *
|
|
loop:
|
|
[ B_p]SUB .L2 B_p, 1, B_p ;[11,1]
|
|
||[!B_p]STDW .D2T1 A_t7654:A_t3210, *B_o_ptr++[2] ;[11,1]
|
|
||[!B_p]STDW .D1T2 B_tFEDC:B_tBA98, *A_o_ptr++[2] ;[11,1]
|
|
|| BDEC .S1 loop, A_i ;[ 8,2]
|
|
|| XPND4 .M1 A_c7654, A_x7654 ;[ 8,2]
|
|
|| XPND4 .M2 B_cBA98, B_xBA98 ;[ 8,2]
|
|
|| MV .L1 A_p7654, A_p7654_ ;[ 8,2]
|
|
|| MV .S2 B_pBA98, B_pBA98_ ;[ 8,2]
|
|
|
|
OR .D1 A_p3210_, A_x3210, A_t3210 ;[ 9,2]
|
|
|| OR .D2 B_pFEDC_, B_xFEDC, B_tFEDC ;[ 9,2]
|
|
|| CMPGTU4 .S1 A_p3210, A_thththth, A_c3210 ;[ 6,3]
|
|
|| CMPGTU4 .S2X B_pFEDC, A_thththth, B_cFEDC ;[ 6,3]
|
|
|| MV .L1 A_p3210, A_p3210_ ;[ 6,3]
|
|
|| MV .L2 B_pFEDC, B_pFEDC_ ;[ 6,3]
|
|
|
|
OR .L1 A_p7654_, A_x7654, A_t7654 ;[10,2]
|
|
|| OR .L2 B_pBA98_, B_xBA98, B_tBA98 ;[10,2]
|
|
|| XPND4 .M1 A_c3210, A_x3210 ;[ 7,3]
|
|
|| XPND4 .M2 B_cFEDC, B_xFEDC ;[ 7,3]
|
|
|| CMPGTU4 .S1 A_p7654, A_thththth, A_c7654 ;[ 7,3]
|
|
|| CMPGTU4 .S2X B_pBA98, A_thththth, B_cBA98 ;[ 7,3]
|
|
|| LDDW .D1T1 *A_i_ptr++[2], A_p7654:A_p3210 ;[ 1,5]
|
|
|| LDDW .D2T2 *B_i_ptr++[2], B_pFEDC:B_pBA98 ;[ 1,5]
|
|
|
|
* =========================== PIPE LOOP EPILOG ============================ *
|
|
; ===== 3 epilog stages collapsed.
|
|
STDW .D2T1 A_t7654:A_t3210, *B_o_ptr ;[11,5]
|
|
|| STDW .D1T2 B_tFEDC:B_tBA98, *A_o_ptr ;[11,5]
|
|
|| RETNOP .S2 B_ret_addr, 5
|
|
|
|
* ========================================================================= *
|
|
* End of file: img_thr_gt2max.asm *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|