c6416_sdk/imglib/dilate_bin.asm


								;* ======================================================================== *;

								;*  TEXAS INSTRUMENTS, INC.                                                 *;

								;*                                                                          *;

								;*  IMGLIB  DSP Image/Video Processing Library                              *;

								;*                                                                          *;

								;*      Release:        Revision 1.04b                                      *;

								;*      CVS Revision:   1.6     Sun Sep 29 03:32:19 2002 (UTC)              *;

								;*      Snapshot date:  23-Oct-2003                                         *;

								;*                                                                          *;

								;*  This library contains proprietary intellectual property of Texas        *;

								;*  Instruments, Inc.  The library and its source code are protected by     *;

								;*  various copyrights, and portions may also be protected by patents or    *;

								;*  other legal protections.                                                *;

								;*                                                                          *;

								;*  This software is licensed for use with Texas Instruments TMS320         *;

								;*  family DSPs.  This license was provided to you prior to installing      *;

								;*  the software.  You may review this license by consulting the file       *;

								;*  TI_license.PDF which accompanies the files in this library.             *;

								;* ------------------------------------------------------------------------ *;

								;*          Copyright (C) 2003 Texas Instruments, Incorporated.             *;

								;*                          All Rights Reserved.                            *;

								;* ======================================================================== *;


								;* ======================================================================== *;

								;*  Assembler compatibility shim for assembling 4.30 and later code on      *;

								;*  tools prior to 4.30.                                                    *;

								;* ======================================================================== *;


								        .if $isdefed(".ASSEMBLER_VERSION")

								        .asg    .ASSEMBLER_VERSION, $asmver

								        .else

								        .asg    0,    $asmver

								        .endif


								        .if ($asmver < 430)


								        .asg    B,    CALL     ; Function Call

								        .asg    B,    RET      ; Return from a Function

								        .asg    B,    CALLRET  ; Function call with Call / Ret chaining.


								        .if .TMS320C6400

								        .asg    BNOP, CALLNOP  ; C64x BNOP as a Fn. Call

								        .asg    BNOP, RETNOP   ; C64x BNOP as a Fn. Return

								        .asg    BNOP, CRNOP    ; C64x Fn call w/, Call/Ret chaining via BNOP.

								        .endif


								        .asg    , .asmfunc     ; .func equivalent for hand-assembly code

								        .asg    , .endasmfunc  ; .endfunc equivalent for hand-assembly code


								        .endif


								;* ======================================================================== *;

								;*  End of assembler compatibility shim.                                    *;

								;* ======================================================================== *;


								* ========================================================================= *

								*   TEXAS INSTRUMENTS, INC.                                                 *

								*                                                                           *

								*   NAME                                                                    *

								*       IMG_dilate_bin--This code performs 3x3 binary dilation              *

								*                                                                           *

								*   REVISION DATE                                                           *

								*       19-May-2002                                                         *

								*                                                                           *

								*   USAGE                                                                   *

								*       This routine is C callable, and has the following C prototype:      *

								*                                                                           *

								*       void IMG_dilate_bin                                                 *

								*       (                                                                   *

								*           const unsigned char *restrict in_data,   /* Incoming image  */  *

								*           unsigned char       *restrict out_data,  /* Filtered output */  *

								*           const char          *restrict mask,      /* Filter mask     */  *

								*           int cols  /* Number of columns to process, in bytes.        */  *

								*       );                                                                  *

								*                                                                           *

								*   DESCRIPTION                                                             *

								*       The function IMG_dilate_bin() implements binary dilation using an   *

								*       arbitrary 3x3 mask.  The dilation operator generates output pixels  *

								*       by ORing the pixels under the input mask together to generate       *

								*       the output pixel.  The input mask specifies whether one or more     *

								*       pixels from the input are to be ignored.                            *

								*                                                                           *

								*       In pseudo-code, the filtering operation for a pixel at (x, y)       *

								*       works like so:                                                      *

								*                                                                           *

								*           result = 0;                                                     *

								*           if (mask[0][0] != DONT_CARE) result |= input[y + 0][x + 0];     *

								*           if (mask[0][1] != DONT_CARE) result |= input[y + 1][x + 1];     *

								*           if (mask[0][2] != DONT_CARE) result |= input[y + 2][x + 2];     *

								*           if (mask[1][0] != DONT_CARE) result |= input[y + 0][x + 0];     *

								*           if (mask[1][1] != DONT_CARE) result |= input[y + 1][x + 1];     *

								*           if (mask[1][2] != DONT_CARE) result |= input[y + 2][x + 2];     *

								*           if (mask[2][0] != DONT_CARE) result |= input[y + 0][x + 0];     *

								*           if (mask[2][1] != DONT_CARE) result |= input[y + 1][x + 1];     *

								*           if (mask[2][2] != DONT_CARE) result |= input[y + 2][x + 2];     *

								*           output[y][x] = result;                                          *

								*                                                                           *

								*       For this code, "DONT_CARE" is specified by a negative value         *

								*       in the input mask.  Non-negative values in the mask cause the       *

								*       corresponding pixel to be included in the dilation operation.       *

								*                                                                           *

								*       Note that this code operates on a bitmap where each pixel is        *

								*       represented as a single bit within a byte or word.  Although        *

								*       the pseudo-code above operates only on one pixel at a time,         *

								*       with a single pixel in each array element, this implementation      *

								*       operates on a bitmap which contains 8 pixels in each byte.          *

								*                                                                           *

								*       Pixels are organized within each byte such that the pixel with      *

								*       the smallest index is in the LSB position, and the pixel with       *

								*       the largest index is in the MSB position.  (That is, the code       *

								*       assumes a LITTLE ENDIAN bit ordering.)                              *

								*                                                                           *

								*       Note that the "cols" argument actually specifies the number of      *

								*       BYTES in the output, not the number of columns.  The number of      *

								*       columns is 8 times this argument.                                   *

								*                                                                           *

								*   TECHNIQUES                                                              *

								*       The 3x3 dilation mask is applied to 32 output pixels                *

								*       simultaneously.  This is done with 32-bit-wide bitwise              *

								*       operators in the register file.  In order to do this, the code      *

								*       reads in a 34-bit-wide input window, and 40-bit operations          *

								*       are used to manipulate the pixels initially.                        *

								*                                                                           *

								*       Because the code reads a 34-bit context for each 32-bits of         *

								*       output, the input needs to be one byte longer than the output       *

								*       in order to make the rightmost two pixels well-defined.             *

								*                                                                           *

								*   ASSUMPTIONS                                                             *

								*       1.  Negative values in the mask specify "DONT_CARE", and non-       *

								*           negative values specify that pixels are included in the         *

								*           dilation operation.                                             *

								*                                                                           *

								*       2.  The input image needs to have a multiple of 64 pixels(bits)     *

								*           per row.  Therefore, "cols" must be a multiple of 8.            *

								*                                                                           *

								*   NOTES                                                                   *

								*       Little Endian                                                       *

								*                                                                           *

								*   CYCLES                                                                  *

								*       cycles = (cols / 8) * 7 + 25  (Note: Number of pixels = 8*cols)     *

								*       For 640 pixels, cols = 80 and cycles = 95.                          *

								*       This corresponds to 0.14844 cycles/pixel.                           *

								*                                                                           *

								*   CODESIZE                                                                *

								*       328 bytes.                                                          *

								*                                                                           *

								*   BIBLIOGRAPHY                                                            *

								*       "Digital Image Processing: Principles and Applications"             *

								*       by Gregory A. Baxes, Chapter 5                                      *

								*                                                                           *

								* ------------------------------------------------------------------------- *

								*             Copyright (c) 2003 Texas Instruments, Incorporated.           *

								*                            All Rights Reserved.                           *

								* ========================================================================= *


								        .sect ".text:_dilate_bin"

								        .global _IMG_dilate_bin

								_IMG_dilate_bin:

								* =============== SYMBOLIC REGISTER ASSIGNMENTS: ARGUMENTS ================ *

								        .asg            A4,         A_in_data

								        .asg            B4,         B_out_data

								        .asg            A6,         A_mask

								        .asg            B6,         B_cols

								* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *

								        .asg            A0,         A_m4

								        .asg            A1,         A_m2

								        .asg            A2,         A_m1

								        .asg            A2,         A_m3210

								        .asg            A3,         A_m7654

								        .asg            A4,         A_ir0

								        .asg            A8,         A_ir2

								        .asg            A9,         A_rC

								        .asg            A16,        A_out_data

								        .asg            A17,        A_ir1

								        .asg            A18,        A_p2

								        .asg            A19,        A_q2

								        .asg            A19,        A_rB

								        .asg            A20,        A_m0

								        .asg            A21,        A_m3

								        .asg            A22,        A_p4

								        .asg            A22,        A_p7

								        .asg            A22,        A_p8

								        .asg            A23,        A_q4

								        .asg            A23,        A_q7

								        .asg            A23,        A_q8

								        .asg            A23,        A_rslt

								        .asg            A24,        A_p1

								        .asg            A24,        A_p5

								        .asg            A25,        A_q1

								        .asg            A25,        A_q5

								        .asg            A25,        A_rA

								        .asg            A25,        A_rD

								        .asg            A26,        A_p6

								        .asg            A27,        A_p6h

								        .asg            A28,        A_p3

								        .asg            A29,        A_p3h

								        .asg            A30,        A_p0

								        .asg            A31,        A_p0h

								        .asg            B0,         B_m8

								        .asg            B1,         B_m7

								        .asg            B2,         B_m5

								        .asg            B5,         B_m7654

								        .asg            B5,         B_m4

								        .asg            B6,         B_rA

								        .asg            B7,         B_ir2

								        .asg            B8,         B_rB

								        .asg            B9,         B_m6

								        .asg            B16,        B_p1

								        .asg            B16,        B_p2

								        .asg            B16,        B_p5

								        .asg            B17,        B_q1

								        .asg            B17,        B_q2

								        .asg            B17,        B_q5

								        .asg            B18,        B_ir1

								        .asg            B19,        B_i

								        .asg            B20,        B_ir0

								        .asg            B21,        B_m3

								        .asg            B22,        B_p4

								        .asg            B23,        B_q4

								        .asg            B23,        B_rC

								        .asg            B24,        B_p7

								        .asg            B24,        B_p8

								        .asg            B24,        B_rD

								        .asg            B24,        B_rslt

								        .asg            B25,        B_q7

								        .asg            B25,        B_q8

								        .asg            B25,        B_rslt_

								        .asg            B26,        B_p6

								        .asg            B27,        B_p6h

								        .asg            B28,        B_p3

								        .asg            B29,        B_p3h

								        .asg            B30,        B_p0

								        .asg            B31,        B_p0h

								* ========================================================================= *


								        LDB     .D1T2   *A_mask[8], B_m8


								        LDNDW   .D1T1   *A_mask,    A_m7654:A_m3210


								        ADD     .L2X    A_ir0,      B_cols,     B_ir1


								        ADD     .D2X    A_ir0,      4,          B_ir0


								        ADD     .L2     B_ir1,      B_cols,     B_ir2


								        ADD     .L1X    B_ir1,      4,          A_ir1


								* =========================== PIPE LOOP PROLOG ============================ *

								        LDNDW   .D      *B_ir0++(8),            A_p0h:A_p0      ;[ 1,1]

								||      SHR     .S2     B_m8,       31,         B_m8

								||      MV      .L2X    A_m7654,    B_m7654


								        LDNDW   .D      *A_ir1++(8),            A_p3h:A_p3      ;[ 2,1]

								||      SHR     .S2     B_m7654,    31,         B_m7

								||      SHR     .S1     A_m3210,    31,         A_m3


								        LDNDW   .D      *B_ir1++(8),            B_p3h:B_p3      ;[ 3,1]

								||      ADD             B_ir2,      4,          A_ir2

								||      EXT     .S2     B_m7654,    8,  31,     B_m6

								||      EXT     .S1     A_m3210,    8,  31,     A_m2


								        LDNDW   .D      *A_ir2++(8),            A_p6h:A_p6      ;[ 4,1]

								||      EXT     .S2     B_m7654,    16, 31,     B_m5

								||      EXT     .S1     A_m3210,    24, 31,     A_m0


								        LDNDW   .D      *A_ir0++(8),            B_p0h:B_p0      ;[ 5,1]

								||      EXT     .S2     B_m7654,    24, 31,     B_m4

								||      EXT     .S1     A_m3210,    16, 31,     A_m1


								        SHR     .S1     A_p0h:A_p0, 2,          A_q2:A_p2       ;[ 6,1]

								||      MV      .L1X    B_out_data,             A_out_data

								||      SHR     .S2     B_cols,     3,          B_i


								        ANDN    .D1     A_p3,       A_m3,       A_rB            ;[ 7,1]

								||      SHR     .S1     A_p3h:A_p3, 2,          A_q5:A_p5       ;[ 7,1]

								||      LDNDW   .D2T2   *B_ir2++(8),            B_p6h:B_p6      ;[ 7,1]

								||      MV      .L1X    B_m4,       A_m4

								||      ROTL    .M2X    A_m3,       0,          B_m3

								||      SUB     .L2     B_i,        2,          B_i

								||      B               loop_5 + 4


								* =========================== PIPE LOOP KERNEL ============================ *

								loop:

								  [!B_m7]OR     .L2     B_p7,       B_rA,       B_rA            ;[15,1]

								||[!B_m5]OR     .L1     A_p5,       A_rB,       A_rB            ;[ 8,2]

								||      ANDN    .D1     A_p0,       A_m0,       A_rC            ;[ 8,2]

								||      SHR     .S1     A_p0h:A_p0, 1,          A_q1:A_p1       ;[ 8,2]

								||      SHR     .S2     B_p3h:B_p3, 2,          B_q5:B_p5       ;[ 8,2]

								||      LDNDW   .D      *B_ir0++(8),            A_p0h:A_p0      ;[ 1,3]


								;loop_1:

								        BDEC    .S2     loop,       B_i                         ;[16,1]

								||      ROTL    .M2X    A_rslt,     0,          B_rslt_         ;[16,1]

								||      OR      .L2     B_rA,       B_rB,       B_rD            ;[16,1]

								||      ANDN    .L1X    A_p6,       B_m6,       A_rA            ;[ 9,2]

								||      SHR     .S1     A_p3h:A_p3, 1,          A_q4:A_p4       ;[ 9,2]

								||      ANDN    .D2     B_p3,       B_m3,       B_rB            ;[ 9,2]

								||      LDNDW   .D      *A_ir1++(8),            A_p3h:A_p3      ;[ 2,3]


								;loop_2:

								        OR      .L2     B_rC,       B_rD,       B_rslt          ;[17,1]

								||[!A_m2]OR     .D1     A_p2,       A_rC,       A_rC            ;[10,2]

								||[!A_m4]OR     .L1     A_p4,       A_rB,       A_rB            ;[10,2]

								||      SHR     .S1     A_p6h:A_p6, 2,          A_q8:A_p8       ;[10,2]

								||      SHR     .S2     B_p3h:B_p3, 1,          B_q4:B_p4       ;[10,2]

								||      LDNDW   .D      *B_ir1++(8),            B_p3h:B_p3      ;[ 3,3]


								;loop_3:

								  [!B_m8]OR     .L1     A_p8,       A_rA,       A_rA            ;[11,2]

								||      SHR     .S1     A_p6h:A_p6, 1,          A_q7:A_p7       ;[11,2]

								||[!B_m5]OR     .L2     B_p5,       B_rB,       B_rB            ;[11,2]

								||      ANDN    .D2X    B_p0,       A_m0,       B_rC            ;[11,2]

								||      SHR     .S2     B_p0h:B_p0, 2,          B_q2:B_p2       ;[11,2]

								||      LDNDW   .D      *A_ir2++(8),            A_p6h:A_p6      ;[ 4,3]


								;loop_4:

								  [!A_m1]OR     .L1     A_p1,       A_rC,       A_rC            ;[12,2]

								||[!B_m7]OR     .S1     A_p7,       A_rA,       A_rA            ;[12,2]

								||[!A_m2]OR     .L2     B_p2,       B_rC,       B_rC            ;[12,2]

								||      ANDN    .D2     B_p6,       B_m6,       B_rA            ;[12,2]

								||      SHR     .S2     B_p0h:B_p0, 1,          B_q1:B_p1       ;[12,2]

								||      LDNDW   .D      *A_ir0++(8),            B_p0h:B_p0      ;[ 5,3]


								loop_5:

								        STNDW   .D      B_rslt_:B_rslt,         *A_out_data++(8);[20,1]

								||      OR      .L1     A_rA,       A_rB,       A_rD            ;[13,2]

								||[!A_m1]OR     .D2     B_p1,       B_rC,       B_rC            ;[13,2]

								||[!A_m4]OR     .L2     B_p4,       B_rB,       B_rB            ;[13,2]

								||      SHR     .S2     B_p6h:B_p6, 2,          B_q8:B_p8       ;[13,2]

								||      SHR     .S1     A_p0h:A_p0, 2,          A_q2:A_p2       ;[ 6,3]


								;loop_6:

								        OR      .L1     A_rC,       A_rD,       A_rslt          ;[14,2]

								||[!B_m8]OR     .L2     B_p8,       B_rA,       B_rA            ;[14,2]

								||      SHR     .S2     B_p6h:B_p6, 1,          B_q7:B_p7       ;[14,2]

								||      ANDN    .D1     A_p3,       A_m3,       A_rB            ;[ 7,3]

								||      SHR     .S1     A_p3h:A_p3, 2,          A_q5:A_p5       ;[ 7,3]

								||      LDNDW   .D      *B_ir2++(8),            B_p6h:B_p6      ;[ 7,3]


								* =========================== PIPE LOOP EPILOG ============================ *

								        RET             B3


								 [!B_m7]OR      .L2     B_p7,       B_rA,       B_rA            ;[15,3]


								        ROTL    .M2X    A_rslt,     0,          B_rslt_         ;[16,3]


								        OR      .L2     B_rA,       B_rB,       B_rD            ;[16,3]


								        OR      .L2     B_rC,       B_rD,       B_rslt          ;[17,3]


								        STNDW   .D      B_rslt_:B_rslt,         *A_out_data     ;[20,3]


								* ========================================================================= *

								*   End of file:  img_dilate_bin.asm                                        *

								* ------------------------------------------------------------------------- *

								*             Copyright (c) 2003 Texas Instruments, Incorporated.           *

								*                            All Rights Reserved.                           *

								* ========================================================================= *