You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

259 lines
16 KiB

;* ======================================================================== *;
;* TEXAS INSTRUMENTS, INC. *;
;* *;
;* IMGLIB DSP Image/Video Processing Library *;
;* *;
;* Release: Revision 1.04b *;
;* CVS Revision: 1.4 Sun Sep 29 03:32:30 2002 (UTC) *;
;* Snapshot date: 23-Oct-2003 *;
;* *;
;* This library contains proprietary intellectual property of Texas *;
;* Instruments, Inc. The library and its source code are protected by *;
;* various copyrights, and portions may also be protected by patents or *;
;* other legal protections. *;
;* *;
;* This software is licensed for use with Texas Instruments TMS320 *;
;* family DSPs. This license was provided to you prior to installing *;
;* the software. You may review this license by consulting the file *;
;* TI_license.PDF which accompanies the files in this library. *;
;* ------------------------------------------------------------------------ *;
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
;* All Rights Reserved. *;
;* ======================================================================== *;
;* ======================================================================== *;
;* Assembler compatibility shim for assembling 4.30 and later code on *;
;* tools prior to 4.30. *;
;* ======================================================================== *;
.if $isdefed(".ASSEMBLER_VERSION")
.asg .ASSEMBLER_VERSION, $asmver
.else
.asg 0, $asmver
.endif
.if ($asmver < 430)
.asg B, CALL ; Function Call
.asg B, RET ; Return from a Function
.asg B, CALLRET ; Function call with Call / Ret chaining.
.if .TMS320C6400
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
.endif
.asg , .asmfunc ; .func equivalent for hand-assembly code
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
.endif
;* ======================================================================== *;
;* End of assembler compatibility shim. *;
;* ======================================================================== *;
* ========================================================================= *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* IMG_thr_le2thr *
* *
* *
* REVISION DATE *
* 13-Sep-2002 *
* *
* USAGE *
* This routine has the following C prototype: *
* *
* void IMG_thr_le2thr *
* ( *
* const unsigned char *in_data, /* Input image data */ *
* unsigned char *restrict out_data, /* Output image data */ *
* short cols, short rows, /* Image dimensions */ *
* unsigned char threshold /* Threshold value */ *
* ) *
* *
* This routine performs a thresholding operation on an input *
* image in in_data[] whose dimensions are given in the arguments *
* 'cols' and 'rows'. The thresholded pixels are written to the *
* output image pointed to by out_data[]. The input and output *
* are exactly the same dimensions. *
* *
* DESCRIPTION *
* Pixels that are above the threshold value are written to the *
* output unmodified. Pixels that are greater than the threshold *
* are set to the threshold value in the output image. *
* *
* The exact thresholding function performed is described by *
* the following transfer function diagram: *
* *
* *
* 255_| *
* | / *
* | / *
* O | / *
* U | / *
* T th _|_________ / *
* P | . *
* U | . *
* T | . *
* | . *
* 0_|_________.__________ *
* | | | *
* 0 th 255 *
* *
* INPUT *
* *
* Please see the IMGLIB functions IMG_thr_gt2thr, IMG_thr_le2min *
* and IMG_thr_gt2max for other thresholding functions. *
* *
* ASSUMPTIONS *
* The input and output buffers do not alias. *
* *
* The input and output buffers must be double-word aligned. *
* *
* The total number of pixels rows*cols must be at least *
* 16 and a multiple of 16. *
* *
* TECHNIQUES *
* The loop is unrolled 16x. Packed-data processing techniques *
* allow us to process all 16 pixels in parallel. *
* *
* Two LDDW instructions load in 16 pixels, designated p0 thru p9 *
* pA thru pF. These pixels are packed in the four registers pFEDC, *
* pBA98, p7654, and p3210 as follows: *
* *
* Upper double-word: *
* *
* 31 24 16 8 0 31 24 16 8 0 *
* +----+----+----+----+ +----+----+----+----+ *
* pFEDC | pF | pE | pD | pC | pBA98 | pB | pA | p9 | p8 | *
* +----+----+----+----+ +----+----+----+----+ *
* *
* Lower double-word: *
* *
* 31 24 16 8 0 31 24 16 8 0 *
* +----+----+----+----+ +----+----+----+----+ *
* p7654 | p7 | p6 | p5 | p4 | p3210 | p3 | p2 | p1 | p0 | *
* +----+----+----+----+ +----+----+----+----+ *
* *
* (Note that this diagram assumes a little endian memory *
* configuration, although this kernel works equally well in *
* either endian mode.) *
* *
* We compare these four words against a packed copy of the threshold *
* value. The packed threshold contains four copies of the threshold *
* value, one in each byte, like so: *
* *
* 31 24 16 8 0 *
* +----+----+----+----+ *
* thththth | th | th | th | th | *
* +----+----+----+----+ *
* *
* We compare using MAXU4. This instruction with select the smaller *
* value between our input pixels and the threshold value. The *
* result is that input values below the threshold are clamped to the *
* threshold value. *
* *
* The 16 results are then written with two STDWs. *
* *
* MEMORY NOTE *
* This code is ENDIAN NEUTRAL. *
* *
* The input and output arrays must be double-word aligned. *
* *
* No bank conflicts occur, regardless of the relative alignment of *
* in_data[] and out_data[]. *
* *
* CYCLES *
* cycles = 0.125 * rows * cols + 20. *
* For rows = 32 and cols = 32, cycles = 148. *
* *
* This number includes 6 cycles of function call overhead. The *
* exact overhead will vary depending on compiler options used. *
* *
* CODESIZE *
* 108 bytes. *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".text:_thr_le2thr"
.global _IMG_thr_le2thr
_IMG_thr_le2thr:
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A4, A_i_ptr
.asg B4, B_o_ptr
.asg A6, A_rows
.asg B6, B_cols
.asg A8, A_th
.asg B3, B_ret_addr
.asg A3, A_i
.asg A5, A_o_ptr
.asg A6, A_t3210
.asg A7, A_t7654
.asg A8, A_thththth
.asg A16, A_p3210
.asg A17, A_p7654
.asg B0, B_p
.asg B5, B_i_ptr
.asg B6, B_tBA98
.asg B7, B_tFEDC
.asg B8, B_pBA98
.asg B9, B_pFEDC
* ========================================================================= *
* =========================== PIPE LOOP PROLOG ============================ *
MPY .M1X A_rows, B_cols, A_i
PACK2 A_th, A_th, A_thththth
B .S2 loop ;[ 3,1]
|| SHR A_i, 4, A_i
ADD A_i_ptr, 8, B_i_ptr
|| PACKL4 A_thththth, A_thththth, A_thththth
|| SUB A_i, 2, A_i
BDEC .S1 loop, A_i ;[ 3,2]
|| LDDW .D1T1 *A_i_ptr++[2], A_p7654:A_p3210 ;[ 1,1]
|| LDDW .D2T2 *B_i_ptr++[2], B_pFEDC:B_pBA98 ;[ 1,1]
MVK 2, B_p
; ===== 2 prolog stages collapsed
* =========================== PIPE LOOP KERNEL ============================ *
loop:
MAXU4 .L1 A_p7654, A_thththth, A_t7654 ;[ 7,1]
|| MAXU4 .L2X B_pBA98, A_thththth, B_tBA98 ;[ 7,1]
|| BDEC .S1 loop, A_i ;[ 3,3]
|| LDDW .D1T1 *A_i_ptr++[2], A_p7654:A_p3210 ;[ 1,4]
|| LDDW .D2T2 *B_i_ptr++[2], B_pFEDC:B_pBA98 ;[ 1,4]
[ B_p]SUB .S2 B_p, 1, B_p ;[ 8,1]
||[ B_p]ADD .S1X B_o_ptr, 8, A_o_ptr
||[!B_p]STDW .D2T1 A_t7654:A_t3210, *B_o_ptr++[2] ;[ 8,1]
||[!B_p]STDW .D1T2 B_tFEDC:B_tBA98, *A_o_ptr++[2] ;[ 8,1]
|| MAXU4 .L1 A_p3210, A_thththth, A_t3210 ;[ 6,2]
|| MAXU4 .L2X B_pFEDC, A_thththth, B_tFEDC ;[ 6,2]
* =========================== PIPE LOOP EPILOG ============================ *
; ===== 2 epilog stages collapsed
MAXU4 .L1 A_p7654, A_thththth, A_t7654 ;[ 7,4]
|| MAXU4 .L2X B_pBA98, A_thththth, B_tBA98 ;[ 7,4]
|| RETNOP B_ret_addr, 4
STDW .D2T1 A_t7654:A_t3210, *B_o_ptr ;[ 8,4]
|| STDW .D1T2 B_tFEDC:B_tBA98, *A_o_ptr ;[ 8,4]
* ========================================================================= *
* End of file: img_thr_le2thr.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *