You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
361 lines
20 KiB
361 lines
20 KiB
;* ======================================================================== *;
|
|
;* TEXAS INSTRUMENTS, INC. *;
|
|
;* *;
|
|
;* IMGLIB DSP Image/Video Processing Library *;
|
|
;* *;
|
|
;* Release: Revision 1.04b *;
|
|
;* CVS Revision: 1.6 Sun Sep 29 03:32:19 2002 (UTC) *;
|
|
;* Snapshot date: 23-Oct-2003 *;
|
|
;* *;
|
|
;* This library contains proprietary intellectual property of Texas *;
|
|
;* Instruments, Inc. The library and its source code are protected by *;
|
|
;* various copyrights, and portions may also be protected by patents or *;
|
|
;* other legal protections. *;
|
|
;* *;
|
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
|
;* family DSPs. This license was provided to you prior to installing *;
|
|
;* the software. You may review this license by consulting the file *;
|
|
;* TI_license.PDF which accompanies the files in this library. *;
|
|
;* ------------------------------------------------------------------------ *;
|
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
|
;* All Rights Reserved. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
;* ======================================================================== *;
|
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
|
;* tools prior to 4.30. *;
|
|
;* ======================================================================== *;
|
|
|
|
.if $isdefed(".ASSEMBLER_VERSION")
|
|
.asg .ASSEMBLER_VERSION, $asmver
|
|
.else
|
|
.asg 0, $asmver
|
|
.endif
|
|
|
|
.if ($asmver < 430)
|
|
|
|
.asg B, CALL ; Function Call
|
|
.asg B, RET ; Return from a Function
|
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
|
|
|
.if .TMS320C6400
|
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
|
.endif
|
|
|
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
|
|
|
.endif
|
|
|
|
;* ======================================================================== *;
|
|
;* End of assembler compatibility shim. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
* ========================================================================= *
|
|
* TEXAS INSTRUMENTS, INC. *
|
|
* *
|
|
* NAME *
|
|
* IMG_dilate_bin--This code performs 3x3 binary dilation *
|
|
* *
|
|
* REVISION DATE *
|
|
* 19-May-2002 *
|
|
* *
|
|
* USAGE *
|
|
* This routine is C callable, and has the following C prototype: *
|
|
* *
|
|
* void IMG_dilate_bin *
|
|
* ( *
|
|
* const unsigned char *restrict in_data, /* Incoming image */ *
|
|
* unsigned char *restrict out_data, /* Filtered output */ *
|
|
* const char *restrict mask, /* Filter mask */ *
|
|
* int cols /* Number of columns to process, in bytes. */ *
|
|
* ); *
|
|
* *
|
|
* DESCRIPTION *
|
|
* The function IMG_dilate_bin() implements binary dilation using an *
|
|
* arbitrary 3x3 mask. The dilation operator generates output pixels *
|
|
* by ORing the pixels under the input mask together to generate *
|
|
* the output pixel. The input mask specifies whether one or more *
|
|
* pixels from the input are to be ignored. *
|
|
* *
|
|
* In pseudo-code, the filtering operation for a pixel at (x, y) *
|
|
* works like so: *
|
|
* *
|
|
* result = 0; *
|
|
* if (mask[0][0] != DONT_CARE) result |= input[y + 0][x + 0]; *
|
|
* if (mask[0][1] != DONT_CARE) result |= input[y + 1][x + 1]; *
|
|
* if (mask[0][2] != DONT_CARE) result |= input[y + 2][x + 2]; *
|
|
* if (mask[1][0] != DONT_CARE) result |= input[y + 0][x + 0]; *
|
|
* if (mask[1][1] != DONT_CARE) result |= input[y + 1][x + 1]; *
|
|
* if (mask[1][2] != DONT_CARE) result |= input[y + 2][x + 2]; *
|
|
* if (mask[2][0] != DONT_CARE) result |= input[y + 0][x + 0]; *
|
|
* if (mask[2][1] != DONT_CARE) result |= input[y + 1][x + 1]; *
|
|
* if (mask[2][2] != DONT_CARE) result |= input[y + 2][x + 2]; *
|
|
* output[y][x] = result; *
|
|
* *
|
|
* For this code, "DONT_CARE" is specified by a negative value *
|
|
* in the input mask. Non-negative values in the mask cause the *
|
|
* corresponding pixel to be included in the dilation operation. *
|
|
* *
|
|
* Note that this code operates on a bitmap where each pixel is *
|
|
* represented as a single bit within a byte or word. Although *
|
|
* the pseudo-code above operates only on one pixel at a time, *
|
|
* with a single pixel in each array element, this implementation *
|
|
* operates on a bitmap which contains 8 pixels in each byte. *
|
|
* *
|
|
* Pixels are organized within each byte such that the pixel with *
|
|
* the smallest index is in the LSB position, and the pixel with *
|
|
* the largest index is in the MSB position. (That is, the code *
|
|
* assumes a LITTLE ENDIAN bit ordering.) *
|
|
* *
|
|
* Note that the "cols" argument actually specifies the number of *
|
|
* BYTES in the output, not the number of columns. The number of *
|
|
* columns is 8 times this argument. *
|
|
* *
|
|
* TECHNIQUES *
|
|
* The 3x3 dilation mask is applied to 32 output pixels *
|
|
* simultaneously. This is done with 32-bit-wide bitwise *
|
|
* operators in the register file. In order to do this, the code *
|
|
* reads in a 34-bit-wide input window, and 40-bit operations *
|
|
* are used to manipulate the pixels initially. *
|
|
* *
|
|
* Because the code reads a 34-bit context for each 32-bits of *
|
|
* output, the input needs to be one byte longer than the output *
|
|
* in order to make the rightmost two pixels well-defined. *
|
|
* *
|
|
* ASSUMPTIONS *
|
|
* 1. Negative values in the mask specify "DONT_CARE", and non- *
|
|
* negative values specify that pixels are included in the *
|
|
* dilation operation. *
|
|
* *
|
|
* 2. The input image needs to have a multiple of 64 pixels(bits) *
|
|
* per row. Therefore, "cols" must be a multiple of 8. *
|
|
* *
|
|
* NOTES *
|
|
* Little Endian *
|
|
* *
|
|
* CYCLES *
|
|
* cycles = (cols / 8) * 7 + 25 (Note: Number of pixels = 8*cols) *
|
|
* For 640 pixels, cols = 80 and cycles = 95. *
|
|
* This corresponds to 0.14844 cycles/pixel. *
|
|
* *
|
|
* CODESIZE *
|
|
* 328 bytes. *
|
|
* *
|
|
* BIBLIOGRAPHY *
|
|
* "Digital Image Processing: Principles and Applications" *
|
|
* by Gregory A. Baxes, Chapter 5 *
|
|
* *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
|
|
.sect ".text:_dilate_bin"
|
|
.global _IMG_dilate_bin
|
|
_IMG_dilate_bin:
|
|
* =============== SYMBOLIC REGISTER ASSIGNMENTS: ARGUMENTS ================ *
|
|
.asg A4, A_in_data
|
|
.asg B4, B_out_data
|
|
.asg A6, A_mask
|
|
.asg B6, B_cols
|
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
|
|
.asg A0, A_m4
|
|
.asg A1, A_m2
|
|
.asg A2, A_m1
|
|
.asg A2, A_m3210
|
|
.asg A3, A_m7654
|
|
.asg A4, A_ir0
|
|
.asg A8, A_ir2
|
|
.asg A9, A_rC
|
|
.asg A16, A_out_data
|
|
.asg A17, A_ir1
|
|
.asg A18, A_p2
|
|
.asg A19, A_q2
|
|
.asg A19, A_rB
|
|
.asg A20, A_m0
|
|
.asg A21, A_m3
|
|
.asg A22, A_p4
|
|
.asg A22, A_p7
|
|
.asg A22, A_p8
|
|
.asg A23, A_q4
|
|
.asg A23, A_q7
|
|
.asg A23, A_q8
|
|
.asg A23, A_rslt
|
|
.asg A24, A_p1
|
|
.asg A24, A_p5
|
|
.asg A25, A_q1
|
|
.asg A25, A_q5
|
|
.asg A25, A_rA
|
|
.asg A25, A_rD
|
|
.asg A26, A_p6
|
|
.asg A27, A_p6h
|
|
.asg A28, A_p3
|
|
.asg A29, A_p3h
|
|
.asg A30, A_p0
|
|
.asg A31, A_p0h
|
|
.asg B0, B_m8
|
|
.asg B1, B_m7
|
|
.asg B2, B_m5
|
|
.asg B5, B_m7654
|
|
.asg B5, B_m4
|
|
.asg B6, B_rA
|
|
.asg B7, B_ir2
|
|
.asg B8, B_rB
|
|
.asg B9, B_m6
|
|
.asg B16, B_p1
|
|
.asg B16, B_p2
|
|
.asg B16, B_p5
|
|
.asg B17, B_q1
|
|
.asg B17, B_q2
|
|
.asg B17, B_q5
|
|
.asg B18, B_ir1
|
|
.asg B19, B_i
|
|
.asg B20, B_ir0
|
|
.asg B21, B_m3
|
|
.asg B22, B_p4
|
|
.asg B23, B_q4
|
|
.asg B23, B_rC
|
|
.asg B24, B_p7
|
|
.asg B24, B_p8
|
|
.asg B24, B_rD
|
|
.asg B24, B_rslt
|
|
.asg B25, B_q7
|
|
.asg B25, B_q8
|
|
.asg B25, B_rslt_
|
|
.asg B26, B_p6
|
|
.asg B27, B_p6h
|
|
.asg B28, B_p3
|
|
.asg B29, B_p3h
|
|
.asg B30, B_p0
|
|
.asg B31, B_p0h
|
|
* ========================================================================= *
|
|
|
|
LDB .D1T2 *A_mask[8], B_m8
|
|
|
|
LDNDW .D1T1 *A_mask, A_m7654:A_m3210
|
|
|
|
ADD .L2X A_ir0, B_cols, B_ir1
|
|
|
|
ADD .D2X A_ir0, 4, B_ir0
|
|
|
|
ADD .L2 B_ir1, B_cols, B_ir2
|
|
|
|
ADD .L1X B_ir1, 4, A_ir1
|
|
|
|
* =========================== PIPE LOOP PROLOG ============================ *
|
|
LDNDW .D *B_ir0++(8), A_p0h:A_p0 ;[ 1,1]
|
|
|| SHR .S2 B_m8, 31, B_m8
|
|
|| MV .L2X A_m7654, B_m7654
|
|
|
|
LDNDW .D *A_ir1++(8), A_p3h:A_p3 ;[ 2,1]
|
|
|| SHR .S2 B_m7654, 31, B_m7
|
|
|| SHR .S1 A_m3210, 31, A_m3
|
|
|
|
LDNDW .D *B_ir1++(8), B_p3h:B_p3 ;[ 3,1]
|
|
|| ADD B_ir2, 4, A_ir2
|
|
|| EXT .S2 B_m7654, 8, 31, B_m6
|
|
|| EXT .S1 A_m3210, 8, 31, A_m2
|
|
|
|
LDNDW .D *A_ir2++(8), A_p6h:A_p6 ;[ 4,1]
|
|
|| EXT .S2 B_m7654, 16, 31, B_m5
|
|
|| EXT .S1 A_m3210, 24, 31, A_m0
|
|
|
|
LDNDW .D *A_ir0++(8), B_p0h:B_p0 ;[ 5,1]
|
|
|| EXT .S2 B_m7654, 24, 31, B_m4
|
|
|| EXT .S1 A_m3210, 16, 31, A_m1
|
|
|
|
SHR .S1 A_p0h:A_p0, 2, A_q2:A_p2 ;[ 6,1]
|
|
|| MV .L1X B_out_data, A_out_data
|
|
|| SHR .S2 B_cols, 3, B_i
|
|
|
|
ANDN .D1 A_p3, A_m3, A_rB ;[ 7,1]
|
|
|| SHR .S1 A_p3h:A_p3, 2, A_q5:A_p5 ;[ 7,1]
|
|
|| LDNDW .D2T2 *B_ir2++(8), B_p6h:B_p6 ;[ 7,1]
|
|
|| MV .L1X B_m4, A_m4
|
|
|| ROTL .M2X A_m3, 0, B_m3
|
|
|| SUB .L2 B_i, 2, B_i
|
|
|| B loop_5 + 4
|
|
|
|
* =========================== PIPE LOOP KERNEL ============================ *
|
|
loop:
|
|
[!B_m7]OR .L2 B_p7, B_rA, B_rA ;[15,1]
|
|
||[!B_m5]OR .L1 A_p5, A_rB, A_rB ;[ 8,2]
|
|
|| ANDN .D1 A_p0, A_m0, A_rC ;[ 8,2]
|
|
|| SHR .S1 A_p0h:A_p0, 1, A_q1:A_p1 ;[ 8,2]
|
|
|| SHR .S2 B_p3h:B_p3, 2, B_q5:B_p5 ;[ 8,2]
|
|
|| LDNDW .D *B_ir0++(8), A_p0h:A_p0 ;[ 1,3]
|
|
|
|
;loop_1:
|
|
BDEC .S2 loop, B_i ;[16,1]
|
|
|| ROTL .M2X A_rslt, 0, B_rslt_ ;[16,1]
|
|
|| OR .L2 B_rA, B_rB, B_rD ;[16,1]
|
|
|| ANDN .L1X A_p6, B_m6, A_rA ;[ 9,2]
|
|
|| SHR .S1 A_p3h:A_p3, 1, A_q4:A_p4 ;[ 9,2]
|
|
|| ANDN .D2 B_p3, B_m3, B_rB ;[ 9,2]
|
|
|| LDNDW .D *A_ir1++(8), A_p3h:A_p3 ;[ 2,3]
|
|
|
|
;loop_2:
|
|
OR .L2 B_rC, B_rD, B_rslt ;[17,1]
|
|
||[!A_m2]OR .D1 A_p2, A_rC, A_rC ;[10,2]
|
|
||[!A_m4]OR .L1 A_p4, A_rB, A_rB ;[10,2]
|
|
|| SHR .S1 A_p6h:A_p6, 2, A_q8:A_p8 ;[10,2]
|
|
|| SHR .S2 B_p3h:B_p3, 1, B_q4:B_p4 ;[10,2]
|
|
|| LDNDW .D *B_ir1++(8), B_p3h:B_p3 ;[ 3,3]
|
|
|
|
;loop_3:
|
|
[!B_m8]OR .L1 A_p8, A_rA, A_rA ;[11,2]
|
|
|| SHR .S1 A_p6h:A_p6, 1, A_q7:A_p7 ;[11,2]
|
|
||[!B_m5]OR .L2 B_p5, B_rB, B_rB ;[11,2]
|
|
|| ANDN .D2X B_p0, A_m0, B_rC ;[11,2]
|
|
|| SHR .S2 B_p0h:B_p0, 2, B_q2:B_p2 ;[11,2]
|
|
|| LDNDW .D *A_ir2++(8), A_p6h:A_p6 ;[ 4,3]
|
|
|
|
;loop_4:
|
|
[!A_m1]OR .L1 A_p1, A_rC, A_rC ;[12,2]
|
|
||[!B_m7]OR .S1 A_p7, A_rA, A_rA ;[12,2]
|
|
||[!A_m2]OR .L2 B_p2, B_rC, B_rC ;[12,2]
|
|
|| ANDN .D2 B_p6, B_m6, B_rA ;[12,2]
|
|
|| SHR .S2 B_p0h:B_p0, 1, B_q1:B_p1 ;[12,2]
|
|
|| LDNDW .D *A_ir0++(8), B_p0h:B_p0 ;[ 5,3]
|
|
|
|
loop_5:
|
|
STNDW .D B_rslt_:B_rslt, *A_out_data++(8);[20,1]
|
|
|| OR .L1 A_rA, A_rB, A_rD ;[13,2]
|
|
||[!A_m1]OR .D2 B_p1, B_rC, B_rC ;[13,2]
|
|
||[!A_m4]OR .L2 B_p4, B_rB, B_rB ;[13,2]
|
|
|| SHR .S2 B_p6h:B_p6, 2, B_q8:B_p8 ;[13,2]
|
|
|| SHR .S1 A_p0h:A_p0, 2, A_q2:A_p2 ;[ 6,3]
|
|
|
|
;loop_6:
|
|
OR .L1 A_rC, A_rD, A_rslt ;[14,2]
|
|
||[!B_m8]OR .L2 B_p8, B_rA, B_rA ;[14,2]
|
|
|| SHR .S2 B_p6h:B_p6, 1, B_q7:B_p7 ;[14,2]
|
|
|| ANDN .D1 A_p3, A_m3, A_rB ;[ 7,3]
|
|
|| SHR .S1 A_p3h:A_p3, 2, A_q5:A_p5 ;[ 7,3]
|
|
|| LDNDW .D *B_ir2++(8), B_p6h:B_p6 ;[ 7,3]
|
|
|
|
* =========================== PIPE LOOP EPILOG ============================ *
|
|
RET B3
|
|
|
|
[!B_m7]OR .L2 B_p7, B_rA, B_rA ;[15,3]
|
|
|
|
ROTL .M2X A_rslt, 0, B_rslt_ ;[16,3]
|
|
|
|
OR .L2 B_rA, B_rB, B_rD ;[16,3]
|
|
|
|
OR .L2 B_rC, B_rD, B_rslt ;[17,3]
|
|
|
|
STNDW .D B_rslt_:B_rslt, *A_out_data ;[20,3]
|
|
|
|
* ========================================================================= *
|
|
* End of file: img_dilate_bin.asm *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|