You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

507 lines
28 KiB

;* ======================================================================== *;
;* TEXAS INSTRUMENTS, INC. *;
;* *;
;* IMGLIB DSP Image/Video Processing Library *;
;* *;
;* Release: Revision 1.04b *;
;* CVS Revision: 1.7 Sun Sep 29 07:07:22 2002 (UTC) *;
;* Snapshot date: 23-Oct-2003 *;
;* *;
;* This library contains proprietary intellectual property of Texas *;
;* Instruments, Inc. The library and its source code are protected by *;
;* various copyrights, and portions may also be protected by patents or *;
;* other legal protections. *;
;* *;
;* This software is licensed for use with Texas Instruments TMS320 *;
;* family DSPs. This license was provided to you prior to installing *;
;* the software. You may review this license by consulting the file *;
;* TI_license.PDF which accompanies the files in this library. *;
;* ------------------------------------------------------------------------ *;
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
;* All Rights Reserved. *;
;* ======================================================================== *;
;* ======================================================================== *;
;* Assembler compatibility shim for assembling 4.30 and later code on *;
;* tools prior to 4.30. *;
;* ======================================================================== *;
.if $isdefed(".ASSEMBLER_VERSION")
.asg .ASSEMBLER_VERSION, $asmver
.else
.asg 0, $asmver
.endif
.if ($asmver < 430)
.asg B, CALL ; Function Call
.asg B, RET ; Return from a Function
.asg B, CALLRET ; Function call with Call / Ret chaining.
.if .TMS320C6400
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
.endif
.asg , .asmfunc ; .func equivalent for hand-assembly code
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
.endif
;* ======================================================================== *;
;* End of assembler compatibility shim. *;
;* ======================================================================== *;
*============================================================================*
* *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* Perimeter: Detection of the boundary of a binary image *
* *
* REVISION DATE *
* 21-Aug-2001 *
* *
* USAGE *
* This routine is C callable, and has the following C prototype: *
* *
* int IMG_perimeter *
* ( *
* const unsigned char *restrict in, /* Input image */ *
* int cols, /* Width of input */ *
* unsigned char *restrict out /* Output image */ *
* ); *
* *
* DESCRIPTION *
* This routine produces the boundary of a binary image. It echoes *
* the boundary pixels with a value of 0xFF and sets the other pixels *
* as 0. Detection of the boundary of a binary image is a segmentation *
* problem and is done by examining spatial locality of the neighboring *
* pixels. This is done by using the four connectivity algorithm *
* *
* pix_up *
* pix_lft pix_cent pix_rgt *
* pix_dn *
* *
* The output pixel at location pix_cent is echoed as a boundary pixel *
* if pix_cent is non-zero and any one of its four neighbors is zero *
* The four neighbors are shown and stand for the foll: *
* *
* pix_up: top pixel *
* pix_lft: left pixel *
* pix_rgt: right pixel *
* pix_dn: bottom pixel *
* *
* ASSUMPTIONS *
* input image must be double-word aligned *
* cols must be a multiple of 16 *
* *
* MEMORY NOTE *
* No bank conflicts are expected for this kernel. *
* *
* TECHNIQUES *
* *
* Use double word wide loads and bring in pixels along three lines which *
* we shall call top, mid and bot. Use split compares to compare if pix- *
* els are greater than or equal to zero. Use the 4 lsb's to find out *
* the result. Prepare an 8 bit mask using the result of 2 such split *
* compares. Perform this operation for the top, middle and botton. *
* Logically invert the result of mid, left shift and right shift and *
* add the context information by setting the 8 th bit or the 1st bit. *
* Use xpnd4 and bitc4 to perform expansion and bit count. Store output *
* pixels as double word. The actual handassembly code is unrolled once *
* and computes 16 output pixels in 10 cycles *
* *
* CYCLES *
* 10 * cols/16 + 55 *
* cols = 720, 505 cycles *
* *
* CodeSize: 600 bytes *
* *
*============================================================================*
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
*============================================================================*
.sect ".text:_perimeter"
.global _IMG_perimeter
_IMG_perimeter:
*================== SYMBOLIC REGISTER ASSIGNMENTS: SETUP ======================*
.asg B15, B_SP ; Stack pointer, B datapath
.asg A3, A_SP ; Stack pointer, A datapath
.asg B0, B_csr ; CSR's value
.asg B1, B_no_gie ; CSR w/ GIE bit cleared
.asg A0, A_csr ; Copy of CSR's value
.asg B3, B_ret ; Return address
*==============================================================================*
; Stack frame. 14 words: A10..A15, B10..B14, B3, CSR, pad
;-
STW .D2T1 A15, *B_SP--[14] ; Save A15
|| MV .S1X B_SP, A_SP ; Twin Stack Pointer
STW .D1T1 A14, *-A_SP[ 2] ; Save A14
|| STW .D2T2 B14, *+B_SP[11] ; Save B14
|| MVC .S2 CSR, B_csr ; Capture CSR
STW .D1T1 A13, *-A_SP[ 4] ; Save A13
|| STW .D2T2 B13, *+B_SP[ 9] ; Save B13
|| AND .L2 B_csr, -2, B_no_gie ; Clear GIE
;-
STW .D1T1 A12, *-A_SP[ 6] ; Save A12
|| STW .D2T2 B12, *+B_SP[ 7] ; Save B12
STW .D1T1 A11, *-A_SP[ 8] ; Save A11
|| STW .D2T2 B11, *+B_SP[ 5] ; Save B11
|| MV .L1X B_csr, A_csr ; Partitioning MV
STW .D1T1 A10, *-A_SP[10] ; Save A10
|| STW .D2T2 B10, *+B_SP[ 3] ; Save B10
|| MVC .S2 B_no_gie, CSR ; Dis interrupts
; ===== Interrupts masked here =====
;-
; ====================== SYMBOLIC REGISTER ASSIGNMENTS =======================
.asg A4, A_img_in ;
.asg B4, B_incols ;
.asg A6, A_img_out ;
.asg A11, A_top_ptr ;
.asg A10, A_mid_ptr ;
.asg A2, A_bot_ptr ;
.asg B5, B_top_ptr ;
.asg B6, B_mid_ptr ;
.asg B16, B_bot_ptr ;
.asg A19, A_bot_zero ;
.asg B19, B_bot_zero ;
.asg A20, A_k_16 ;
.asg B20, B_k_16 ;
.asg A21, A_FF ;
.asg B21, B_FF ;
.asg B1, B_lt0 ;
.asg A3, A_sum ;
.asg A5, A_optr ;
.asg B17, B_optr ;
.asg B2, B_pd ;
.asg A16, A_byte_ptr ;
.asg A22, A_mid_rt_const ;
.asg B22, B_mid_lf_const ;
.asg A23, A_mid_lf_const ;
.asg B23, B_mid_rt_const ;
.asg B18, B_i ;
; ============================================================================
MVK .S1 080h, A_mid_rt_const ;[ 1,0]
|| STW .D1T1 A_csr, *-A_SP[12] ; Save CSR
|| STW .D2T2 B_ret, *+B_SP[ 1] ; return address
MV .L1 A_img_out, A_optr ;[ 2,0]
MV .D2X A_mid_rt_const, B_mid_rt_const ;[ 3,0]
|| MVK .S1 16, A_k_16 ;[ 3,0]
SHRU .S2 B_incols, 4, B_i ;[ 4,0]
|| ADD .L2X A_optr, 8, B_optr ;[ 4,0]
|| ADD .D1X A_img_in, B_incols, A_bot_ptr ;[ 4,0]
MVK .L2 01h, B_mid_lf_const ;[ 5,0]
|| MV .D2X A_k_16, B_k_16 ;[ 5,0]
|| ADD .S1 A_img_in, 0, A_mid_ptr ;[ 5,0]
ADD .D2X A_bot_ptr, 8, B_bot_ptr ;[ 6,0]
|| SUB .L1X A_img_in, B_incols, A_top_ptr ;[ 6,0]
MV .S1X B_mid_lf_const, A_mid_lf_const ;[ 7,0]
|| ADD .D1 A_img_in, 16, A_byte_ptr ;[ 7,0]
|| ZERO .D2 B_lt0 ;[ 7,0]
|| ADD .L2X A_mid_ptr, 8, B_mid_ptr ;[ 7,0]
|| MVK .S2 3, B_pd
ZERO .L2 B_bot_zero ;[ 8,0]
|| ZERO .D1 A_bot_zero ;[ 8,0]
|| ADD .D2X A_top_ptr, 8, B_top_ptr ;[ 8,0]
;==== Branch occurs
; ====================== SYMBOLIC REGISTER ASSIGNMENTS =======================
.asg A11, A_top_ptr
.asg A10, A_mid_ptr
.asg A2, A_bot_ptr
.asg B5, B_top_ptr
.asg B6, B_mid_ptr
.asg B16, B_bot_ptr
.asg A19, A_bot_zero
.asg B19, B_bot_zero
.asg A20, A_k_16
.asg B20, B_k_16
.asg A21, A_FF
.asg B21, B_FF
.asg B1, B_lt0
.asg A3, A_sum
.asg A5, A_optr
.asg B17, B_optr
.asg B2, B_pd
.asg A22, A_mid_rt_const
.asg B22, B_mid_lf_const
.asg A23, A_mid_lf_const
.asg B23, B_mid_rt_const
.asg A16, A_byte_ptr
.asg B18, B_i
.asg A27, A_top_word1
.asg A26, A_top_word0
.asg A31, A_mid_word1
.asg A30, A_mid_word0
.asg A29, A_bot_word1
.asg A28, A_bot_word0
.asg B27, B_top_word3
.asg B26, B_top_word2
.asg B31, B_mid_word3
.asg B30, B_mid_word2
.asg B29, B_bot_word3
.asg B28, B_bot_word2
.asg A17, A_mid_word0c
.asg A9, A_mid_word1c
.asg B12, B_mid_word2c
.asg B29, B_mid_word3c
.asg A25, A_midval00
.asg A7, A_midval01
.asg B7, B_midval02
.asg B8, B_midval03
.asg A8, A_midval01_s
.asg B3, B_midval03_s
.asg A18, A_midval_w0
.asg B3, B_midval_w1
.asg A26, A_upval00
.asg A24, A_upval01
.asg B26, B_upval02
.asg B9, B_upval03
.asg A24, A_upval01_s
.asg B25, B_upval03_s
.asg A8, A_upval_w0
.asg B10, B_upval_w1
.asg A8, A_botval00
.asg A7, A_botval01
.asg B28, B_botval02
.asg B8, B_botval03
.asg A24, A_botval01_s
.asg B9, B_botval03_s
.asg A27, A_botval_w0
.asg B27, B_botval_w1
.asg A30, A_midval_k0
.asg B8, B_midval_k1
.asg A9, A_mid_rt_val0
.asg B8, B_mid_rt_val1
.asg A27, A_mid_lt_val0
.asg B7, B_mid_lt_val1
.asg B0, B_rt0
.asg A1, A_lt1
.asg A0, A_rt1
.asg A12, A_updnval0
.asg A7, A_rgtlft0
.asg A7, A_udrl0
.asg A25, A_udrlm0
.asg B11, B_updnval1
.asg B7, B_rgtlft1
.asg B7, B_udrl1
.asg B8, B_udrlm1
.asg A30, A_count0
.asg B7, B_count1
.asg A25, A_count
.asg A24, A_pixel0
.asg A7, A_udrlm_s
.asg A25, A_pixel1
.asg B24, B_pixel2
.asg B14, B_udrl_r
.asg B25, B_pixel3
; ============================================================================
; START:
; ============================ PIPE LOOP PROLOG ==============================
; PROLOG:
LDDW .D2T2 *B_top_ptr++[2], B_top_word3:B_top_word2 ;[ 1,1]
|| LDDW .D1T1 *A_bot_ptr++[2], A_bot_word1:A_bot_word0 ;[ 1,1]
|| ZERO .L1 A_sum ;[ 8,0]
|| MVK .S2 0FFh, B_FF ;[ 8,0]
|| MVK .S1 0FFh, A_FF ;[ 8,0]
LDDW .D2T2 *B_bot_ptr++[2], B_bot_word3:B_bot_word2 ;[ 2,1]
NOP 3 ;
CMPEQ4 .S2 B_top_word3, B_bot_zero, B_upval03 ;[ 6,1]
MPY .M2 B_upval03, B_k_16, B_upval03_s ;[ 7,1]
; ============================ PIPE LOOP KERNEL ==============================
LOOP:
OR .D1 A_mid_rt_val0, A_mid_lt_val0, A_rgtlft0 ;
|| OR .L2 B_mid_rt_val1, B_mid_lt_val1, B_rgtlft1 ;
|| AND .D2 B_mid_word2, B_FF, B_rt0 ;
|| ADD .L1 A_botval01_s, A_botval00, A_botval_w0 ;
|| SHRU .S1 A_mid_word1, 24, A_lt1 ;
|| CMPGTU4 .S2 B_mid_word3, B_bot_zero, B_midval03 ;
|| MPY .M1 A_midval01, A_k_16, A_midval01_s ;
OR .L1 A_updnval0, A_rgtlft0, A_udrl0 ;
|| OR .L2 B_updnval1, B_rgtlft1, B_udrl1 ;
||[ B_pd]SUB .D2 B_pd, 1, B_pd ;
|| CMPEQ4 .S1 A_top_word0, A_bot_zero, A_upval00 ;
|| MPY .M2 B_midval03, B_k_16, B_midval03_s ;
|| LDDW .D1T1 *A_top_ptr++[2], A_top_word1:A_top_word0 ;
|| CMPEQ4 .S2 B_bot_word3, B_bot_zero, B_botval03 ;
AND .L1 A_udrl0, A_midval_w0, A_udrlm0 ;
|| AND .L2 B_udrl1, B_midval_w1, B_udrlm1 ;
|| MVD .M1 A_mid_word0, A_mid_word0c ;
|| CMPGTU4 .S2 B_mid_word2, B_bot_zero, B_midval02 ;
|| ADD .S1 A_midval01_s, A_midval00, A_midval_w0 ;
|| MPY .M2 B_botval03, B_k_16, B_botval03_s ;
|| LDDW .D2T2 *B_top_ptr++[2], B_top_word3:B_top_word2 ;
|| LDDW .D1T1 *A_bot_ptr++[2], A_bot_word1:A_bot_word0 ;
SHRU .S1 A_udrlm0, 4, A_udrlm_s ;
|| XPND4 .M1 A_udrlm0, A_pixel0 ;
|| XPND4 .M2 B_udrlm1, B_pixel2 ;
|| SHRU .S2 B_udrlm1, 4, B_udrl_r ;
|| ADD .L2 B_midval03_s, B_midval02, B_midval_w1 ;
|| NOT .L1 A_midval_w0, A_midval_k0 ;
|| LDDW .D1T1 *A_mid_ptr++[2], A_mid_word1:A_mid_word0 ;
|| LDDW .D2T2 *B_bot_ptr++[2], B_bot_word3:B_bot_word2 ;
BDEC .S2 LOOP, B_i ;
|| XPND4 .M1 A_udrlm_s, A_pixel1 ;
|| BITC4 .M2 B_udrlm1, B_count1 ;
|| ADD .L1 A_upval01_s, A_upval00, A_upval_w0 ;
|| OR .D2 B_upval_w1, B_botval_w1, B_updnval1 ;
|| NOT .L2 B_midval_w1, B_midval_k1 ;
|| AND .D1 A_midval_k0, A_FF, A_midval_k0 ;
|| CMPEQ4 .S1 A_bot_word1, A_bot_zero, A_botval01 ;
BITC4 .M1 A_udrlm0, A_count0 ;
|| AND .D1 A_pixel0, A_mid_word0c, A_pixel0 ;
|| XPND4 .M2 B_udrl_r, B_pixel3 ;
|| OR .L1 A_upval_w0, A_botval_w0, A_updnval0 ;
|| AND .L2 B_midval_k1, B_FF, B_midval_k1 ;
|| CMPEQ4 .S1 A_bot_word0, A_bot_zero, A_botval00 ;
|| CMPEQ4 .S2 B_top_word2, B_bot_zero, B_upval02 ;
|| LDDW .D2T2 *B_mid_ptr++[2], B_mid_word3:B_mid_word2 ;
AND .L1 A_pixel1, A_mid_word1c, A_pixel1 ;
|| AND .L2 B_pixel2, B_mid_word2c, B_pixel2 ;
|| SHRU .S1 A_midval_k0, 1, A_mid_rt_val0 ;
|| MPY .M2 B_midval_k1, 2, B_mid_lt_val1 ;
|| MPY .M1 A_midval_k0, 2, A_mid_lt_val0 ;
|| LDBU .D1T1 *A_byte_ptr++[16], A_rt1 ;
|| ADD .D2 B_upval03_s, B_upval02, B_upval_w1 ;
|| CMPEQ4 .S2 B_bot_word2, B_bot_zero, B_botval02 ;
[!B_pd]STNDW .D1T1 A_pixel1:A_pixel0, *A_optr++[2] ;
|| ADD .L1X A_count0, B_count1, A_count ;
|| AND .D2 B_pixel3, B_mid_word3c, B_pixel3 ;
|| MVD .M2 B_mid_word2, B_mid_word2c ;
|| MVD .M1 A_mid_word1, A_mid_word1c ;
|| ADD .L2 B_botval03_s, B_botval02, B_botval_w1 ;
|| CMPEQ4 .S1 A_top_word1, A_bot_zero, A_upval01 ;
|| CMPEQ4 .S2 B_top_word3, B_bot_zero, B_upval03 ;
[!B_pd]ADD .L1 A_sum, A_count, A_sum ;
||[!B_pd]STNDW .D2T2 B_pixel3:B_pixel2, *B_optr++[2] ;
|| SHRU .S2 B_midval_k1, 1, B_mid_rt_val1 ;
|| AND .D1 A_mid_lt_val0, A_FF, A_mid_lt_val0 ;
|| AND .L2 B_mid_lt_val1, B_FF, B_mid_lt_val1 ;
|| CMPGTU4 .S1 A_mid_word0, A_bot_zero, A_midval00 ;
|| MPY .M1 A_botval01, A_k_16, A_botval01_s ;
|| MPY .M2 B_upval03, B_k_16, B_upval03_s ;
MVD .M2 B_mid_word3, B_mid_word3c ;
||[!B_rt0]ADD .D1 A_mid_rt_val0, A_mid_rt_const, A_mid_rt_val0 ;
||[!A_rt1]ADD .L2 B_mid_rt_val1, B_mid_rt_const, B_mid_rt_val1 ;
||[!B_lt0]ADD .L1 A_mid_lt_val0, A_mid_lf_const, A_mid_lt_val0 ;
||[!A_lt1]ADD .D2 B_mid_lt_val1, B_mid_lf_const, B_mid_lt_val1 ;
|| SHRU .S2 B_mid_word3, 24, B_lt0 ;
|| MPY .M1 A_upval01, A_k_16, A_upval01_s ;
|| CMPGTU4 .S1 A_mid_word1, A_bot_zero, A_midval01 ;
; ============================ PIPE LOOP EPILOG ==============================
; EPILOG:
; ============================================================================
; END:
; ====================== SYMBOLIC REGISTER ASSIGNMENTS =======================
.asg A19, A_bot_zero ;
.asg A6, A_img_out ;
.asg B4, B_incols ;
.asg A3, A_sum ;
.asg A4, A_ret_val ;
.asg A0, A_p0
.asg A5, A_offset
.asg A0, A_p1
.asg B15, B_SP ; Stack pointer, B datapath
.asg A15, A_SP ; Stack pointer, A datapath
.asg A0, A_csr ; CSR value to restore
.asg B3, B_ret ; Return address
.asg B0, B_p1
.asg B14, B_udrl_r
;============================================================================
LDBU .D1T1 *A_img_out, A_p0 ;
|| SUB .S1X B_incols, 1, A_offset ;
SHRU .S2 B_udrl_r, 3, B_p1
|| MV .S1X B_SP, A_SP ; Twin Stack Pointer
LDW .D1T2 *+A_SP[ 1], B_ret ; Get return address
|| LDW .D2T1 *+B_SP[ 2], A_csr ; Get CSR's value
LDW .D1T2 *+A_SP[ 3], B10 ; Restore B10
|| LDW .D2T1 *+B_SP[ 4], A10 ; Restore A10
LDW .D1T2 *+A_SP[ 5], B11 ; Restore B11
|| LDW .D2T1 *+B_SP[ 6], A11 ; Restore A11
[ A_p0]SUB .L1 A_sum, 1, A_sum ;
|| LDW .D1T2 *+A_SP[ 7], B12 ; Restore B12
|| LDW .D2T1 *+B_SP[ 8], A12 ; Restore A12
[ B_p1]SUB .S1 A_sum, 1, A_sum ;
|| LDW .D1T2 *+A_SP[ 9], B13 ; Restore B13
|| LDW .D2T1 *+B_SP[10], A13 ; Restore A13
;==== Branch occurs ;
*================== SYMBOLIC REGISTER ASSIGNMENTS: SETUP ======================*
.asg B15, B_SP ; Stack pointer, B datapath
.asg A15, A_SP ; Stack pointer, A datapath
.asg A0, A_csr ; CSR value to restore
.asg B3, B_ret ; Return address
*==============================================================================*
LDW .D1T2 *+A_SP[11], B14 ; Restore B14
|| LDW .D2T1 *+B_SP[12], A14 ; Restore A14
|| RET .S2 B_ret ; Return to caller
MVC .S2X A_csr, CSR ; Restore CSR
|| LDW .D2T1 *++B_SP[14],A15 ; Restore A15
*====== Interruptibility state restored here ==================================*
STB .D1T1 A_bot_zero, *+A_img_out[A_offset] ;
MV .L1 A_sum, A_ret_val ;
|| STB .D1T1 A_bot_zero, *A_img_out ;
NOP 2
;====== Branch Occurs =====
*==============================================================================*
*= End of file: img_perimeter.asm =*
*==============================================================================*
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
*==============================================================================*