You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

361 lines
20 KiB

;* ======================================================================== *;
;* TEXAS INSTRUMENTS, INC. *;
;* *;
;* IMGLIB DSP Image/Video Processing Library *;
;* *;
;* Release: Revision 1.04b *;
;* CVS Revision: 1.6 Sun Sep 29 03:32:19 2002 (UTC) *;
;* Snapshot date: 23-Oct-2003 *;
;* *;
;* This library contains proprietary intellectual property of Texas *;
;* Instruments, Inc. The library and its source code are protected by *;
;* various copyrights, and portions may also be protected by patents or *;
;* other legal protections. *;
;* *;
;* This software is licensed for use with Texas Instruments TMS320 *;
;* family DSPs. This license was provided to you prior to installing *;
;* the software. You may review this license by consulting the file *;
;* TI_license.PDF which accompanies the files in this library. *;
;* ------------------------------------------------------------------------ *;
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
;* All Rights Reserved. *;
;* ======================================================================== *;
;* ======================================================================== *;
;* Assembler compatibility shim for assembling 4.30 and later code on *;
;* tools prior to 4.30. *;
;* ======================================================================== *;
.if $isdefed(".ASSEMBLER_VERSION")
.asg .ASSEMBLER_VERSION, $asmver
.else
.asg 0, $asmver
.endif
.if ($asmver < 430)
.asg B, CALL ; Function Call
.asg B, RET ; Return from a Function
.asg B, CALLRET ; Function call with Call / Ret chaining.
.if .TMS320C6400
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
.endif
.asg , .asmfunc ; .func equivalent for hand-assembly code
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
.endif
;* ======================================================================== *;
;* End of assembler compatibility shim. *;
;* ======================================================================== *;
* ========================================================================= *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* IMG_dilate_bin--This code performs 3x3 binary dilation *
* *
* REVISION DATE *
* 19-May-2002 *
* *
* USAGE *
* This routine is C callable, and has the following C prototype: *
* *
* void IMG_dilate_bin *
* ( *
* const unsigned char *restrict in_data, /* Incoming image */ *
* unsigned char *restrict out_data, /* Filtered output */ *
* const char *restrict mask, /* Filter mask */ *
* int cols /* Number of columns to process, in bytes. */ *
* ); *
* *
* DESCRIPTION *
* The function IMG_dilate_bin() implements binary dilation using an *
* arbitrary 3x3 mask. The dilation operator generates output pixels *
* by ORing the pixels under the input mask together to generate *
* the output pixel. The input mask specifies whether one or more *
* pixels from the input are to be ignored. *
* *
* In pseudo-code, the filtering operation for a pixel at (x, y) *
* works like so: *
* *
* result = 0; *
* if (mask[0][0] != DONT_CARE) result |= input[y + 0][x + 0]; *
* if (mask[0][1] != DONT_CARE) result |= input[y + 1][x + 1]; *
* if (mask[0][2] != DONT_CARE) result |= input[y + 2][x + 2]; *
* if (mask[1][0] != DONT_CARE) result |= input[y + 0][x + 0]; *
* if (mask[1][1] != DONT_CARE) result |= input[y + 1][x + 1]; *
* if (mask[1][2] != DONT_CARE) result |= input[y + 2][x + 2]; *
* if (mask[2][0] != DONT_CARE) result |= input[y + 0][x + 0]; *
* if (mask[2][1] != DONT_CARE) result |= input[y + 1][x + 1]; *
* if (mask[2][2] != DONT_CARE) result |= input[y + 2][x + 2]; *
* output[y][x] = result; *
* *
* For this code, "DONT_CARE" is specified by a negative value *
* in the input mask. Non-negative values in the mask cause the *
* corresponding pixel to be included in the dilation operation. *
* *
* Note that this code operates on a bitmap where each pixel is *
* represented as a single bit within a byte or word. Although *
* the pseudo-code above operates only on one pixel at a time, *
* with a single pixel in each array element, this implementation *
* operates on a bitmap which contains 8 pixels in each byte. *
* *
* Pixels are organized within each byte such that the pixel with *
* the smallest index is in the LSB position, and the pixel with *
* the largest index is in the MSB position. (That is, the code *
* assumes a LITTLE ENDIAN bit ordering.) *
* *
* Note that the "cols" argument actually specifies the number of *
* BYTES in the output, not the number of columns. The number of *
* columns is 8 times this argument. *
* *
* TECHNIQUES *
* The 3x3 dilation mask is applied to 32 output pixels *
* simultaneously. This is done with 32-bit-wide bitwise *
* operators in the register file. In order to do this, the code *
* reads in a 34-bit-wide input window, and 40-bit operations *
* are used to manipulate the pixels initially. *
* *
* Because the code reads a 34-bit context for each 32-bits of *
* output, the input needs to be one byte longer than the output *
* in order to make the rightmost two pixels well-defined. *
* *
* ASSUMPTIONS *
* 1. Negative values in the mask specify "DONT_CARE", and non- *
* negative values specify that pixels are included in the *
* dilation operation. *
* *
* 2. The input image needs to have a multiple of 64 pixels(bits) *
* per row. Therefore, "cols" must be a multiple of 8. *
* *
* NOTES *
* Little Endian *
* *
* CYCLES *
* cycles = (cols / 8) * 7 + 25 (Note: Number of pixels = 8*cols) *
* For 640 pixels, cols = 80 and cycles = 95. *
* This corresponds to 0.14844 cycles/pixel. *
* *
* CODESIZE *
* 328 bytes. *
* *
* BIBLIOGRAPHY *
* "Digital Image Processing: Principles and Applications" *
* by Gregory A. Baxes, Chapter 5 *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".text:_dilate_bin"
.global _IMG_dilate_bin
_IMG_dilate_bin:
* =============== SYMBOLIC REGISTER ASSIGNMENTS: ARGUMENTS ================ *
.asg A4, A_in_data
.asg B4, B_out_data
.asg A6, A_mask
.asg B6, B_cols
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A0, A_m4
.asg A1, A_m2
.asg A2, A_m1
.asg A2, A_m3210
.asg A3, A_m7654
.asg A4, A_ir0
.asg A8, A_ir2
.asg A9, A_rC
.asg A16, A_out_data
.asg A17, A_ir1
.asg A18, A_p2
.asg A19, A_q2
.asg A19, A_rB
.asg A20, A_m0
.asg A21, A_m3
.asg A22, A_p4
.asg A22, A_p7
.asg A22, A_p8
.asg A23, A_q4
.asg A23, A_q7
.asg A23, A_q8
.asg A23, A_rslt
.asg A24, A_p1
.asg A24, A_p5
.asg A25, A_q1
.asg A25, A_q5
.asg A25, A_rA
.asg A25, A_rD
.asg A26, A_p6
.asg A27, A_p6h
.asg A28, A_p3
.asg A29, A_p3h
.asg A30, A_p0
.asg A31, A_p0h
.asg B0, B_m8
.asg B1, B_m7
.asg B2, B_m5
.asg B5, B_m7654
.asg B5, B_m4
.asg B6, B_rA
.asg B7, B_ir2
.asg B8, B_rB
.asg B9, B_m6
.asg B16, B_p1
.asg B16, B_p2
.asg B16, B_p5
.asg B17, B_q1
.asg B17, B_q2
.asg B17, B_q5
.asg B18, B_ir1
.asg B19, B_i
.asg B20, B_ir0
.asg B21, B_m3
.asg B22, B_p4
.asg B23, B_q4
.asg B23, B_rC
.asg B24, B_p7
.asg B24, B_p8
.asg B24, B_rD
.asg B24, B_rslt
.asg B25, B_q7
.asg B25, B_q8
.asg B25, B_rslt_
.asg B26, B_p6
.asg B27, B_p6h
.asg B28, B_p3
.asg B29, B_p3h
.asg B30, B_p0
.asg B31, B_p0h
* ========================================================================= *
LDB .D1T2 *A_mask[8], B_m8
LDNDW .D1T1 *A_mask, A_m7654:A_m3210
ADD .L2X A_ir0, B_cols, B_ir1
ADD .D2X A_ir0, 4, B_ir0
ADD .L2 B_ir1, B_cols, B_ir2
ADD .L1X B_ir1, 4, A_ir1
* =========================== PIPE LOOP PROLOG ============================ *
LDNDW .D *B_ir0++(8), A_p0h:A_p0 ;[ 1,1]
|| SHR .S2 B_m8, 31, B_m8
|| MV .L2X A_m7654, B_m7654
LDNDW .D *A_ir1++(8), A_p3h:A_p3 ;[ 2,1]
|| SHR .S2 B_m7654, 31, B_m7
|| SHR .S1 A_m3210, 31, A_m3
LDNDW .D *B_ir1++(8), B_p3h:B_p3 ;[ 3,1]
|| ADD B_ir2, 4, A_ir2
|| EXT .S2 B_m7654, 8, 31, B_m6
|| EXT .S1 A_m3210, 8, 31, A_m2
LDNDW .D *A_ir2++(8), A_p6h:A_p6 ;[ 4,1]
|| EXT .S2 B_m7654, 16, 31, B_m5
|| EXT .S1 A_m3210, 24, 31, A_m0
LDNDW .D *A_ir0++(8), B_p0h:B_p0 ;[ 5,1]
|| EXT .S2 B_m7654, 24, 31, B_m4
|| EXT .S1 A_m3210, 16, 31, A_m1
SHR .S1 A_p0h:A_p0, 2, A_q2:A_p2 ;[ 6,1]
|| MV .L1X B_out_data, A_out_data
|| SHR .S2 B_cols, 3, B_i
ANDN .D1 A_p3, A_m3, A_rB ;[ 7,1]
|| SHR .S1 A_p3h:A_p3, 2, A_q5:A_p5 ;[ 7,1]
|| LDNDW .D2T2 *B_ir2++(8), B_p6h:B_p6 ;[ 7,1]
|| MV .L1X B_m4, A_m4
|| ROTL .M2X A_m3, 0, B_m3
|| SUB .L2 B_i, 2, B_i
|| B loop_5 + 4
* =========================== PIPE LOOP KERNEL ============================ *
loop:
[!B_m7]OR .L2 B_p7, B_rA, B_rA ;[15,1]
||[!B_m5]OR .L1 A_p5, A_rB, A_rB ;[ 8,2]
|| ANDN .D1 A_p0, A_m0, A_rC ;[ 8,2]
|| SHR .S1 A_p0h:A_p0, 1, A_q1:A_p1 ;[ 8,2]
|| SHR .S2 B_p3h:B_p3, 2, B_q5:B_p5 ;[ 8,2]
|| LDNDW .D *B_ir0++(8), A_p0h:A_p0 ;[ 1,3]
;loop_1:
BDEC .S2 loop, B_i ;[16,1]
|| ROTL .M2X A_rslt, 0, B_rslt_ ;[16,1]
|| OR .L2 B_rA, B_rB, B_rD ;[16,1]
|| ANDN .L1X A_p6, B_m6, A_rA ;[ 9,2]
|| SHR .S1 A_p3h:A_p3, 1, A_q4:A_p4 ;[ 9,2]
|| ANDN .D2 B_p3, B_m3, B_rB ;[ 9,2]
|| LDNDW .D *A_ir1++(8), A_p3h:A_p3 ;[ 2,3]
;loop_2:
OR .L2 B_rC, B_rD, B_rslt ;[17,1]
||[!A_m2]OR .D1 A_p2, A_rC, A_rC ;[10,2]
||[!A_m4]OR .L1 A_p4, A_rB, A_rB ;[10,2]
|| SHR .S1 A_p6h:A_p6, 2, A_q8:A_p8 ;[10,2]
|| SHR .S2 B_p3h:B_p3, 1, B_q4:B_p4 ;[10,2]
|| LDNDW .D *B_ir1++(8), B_p3h:B_p3 ;[ 3,3]
;loop_3:
[!B_m8]OR .L1 A_p8, A_rA, A_rA ;[11,2]
|| SHR .S1 A_p6h:A_p6, 1, A_q7:A_p7 ;[11,2]
||[!B_m5]OR .L2 B_p5, B_rB, B_rB ;[11,2]
|| ANDN .D2X B_p0, A_m0, B_rC ;[11,2]
|| SHR .S2 B_p0h:B_p0, 2, B_q2:B_p2 ;[11,2]
|| LDNDW .D *A_ir2++(8), A_p6h:A_p6 ;[ 4,3]
;loop_4:
[!A_m1]OR .L1 A_p1, A_rC, A_rC ;[12,2]
||[!B_m7]OR .S1 A_p7, A_rA, A_rA ;[12,2]
||[!A_m2]OR .L2 B_p2, B_rC, B_rC ;[12,2]
|| ANDN .D2 B_p6, B_m6, B_rA ;[12,2]
|| SHR .S2 B_p0h:B_p0, 1, B_q1:B_p1 ;[12,2]
|| LDNDW .D *A_ir0++(8), B_p0h:B_p0 ;[ 5,3]
loop_5:
STNDW .D B_rslt_:B_rslt, *A_out_data++(8);[20,1]
|| OR .L1 A_rA, A_rB, A_rD ;[13,2]
||[!A_m1]OR .D2 B_p1, B_rC, B_rC ;[13,2]
||[!A_m4]OR .L2 B_p4, B_rB, B_rB ;[13,2]
|| SHR .S2 B_p6h:B_p6, 2, B_q8:B_p8 ;[13,2]
|| SHR .S1 A_p0h:A_p0, 2, A_q2:A_p2 ;[ 6,3]
;loop_6:
OR .L1 A_rC, A_rD, A_rslt ;[14,2]
||[!B_m8]OR .L2 B_p8, B_rA, B_rA ;[14,2]
|| SHR .S2 B_p6h:B_p6, 1, B_q7:B_p7 ;[14,2]
|| ANDN .D1 A_p3, A_m3, A_rB ;[ 7,3]
|| SHR .S1 A_p3h:A_p3, 2, A_q5:A_p5 ;[ 7,3]
|| LDNDW .D *B_ir2++(8), B_p6h:B_p6 ;[ 7,3]
* =========================== PIPE LOOP EPILOG ============================ *
RET B3
[!B_m7]OR .L2 B_p7, B_rA, B_rA ;[15,3]
ROTL .M2X A_rslt, 0, B_rslt_ ;[16,3]
OR .L2 B_rA, B_rB, B_rD ;[16,3]
OR .L2 B_rC, B_rD, B_rslt ;[17,3]
STNDW .D B_rslt_:B_rslt, *A_out_data ;[20,3]
* ========================================================================= *
* End of file: img_dilate_bin.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *