;* ======================================================================== *; ;* TEXAS INSTRUMENTS, INC. *; ;* *; ;* IMGLIB DSP Image/Video Processing Library *; ;* *; ;* Release: Revision 1.04b *; ;* CVS Revision: 1.11 Sun Sep 29 03:32:26 2002 (UTC) *; ;* Snapshot date: 23-Oct-2003 *; ;* *; ;* This library contains proprietary intellectual property of Texas *; ;* Instruments, Inc. The library and its source code are protected by *; ;* various copyrights, and portions may also be protected by patents or *; ;* other legal protections. *; ;* *; ;* This software is licensed for use with Texas Instruments TMS320 *; ;* family DSPs. This license was provided to you prior to installing *; ;* the software. You may review this license by consulting the file *; ;* TI_license.PDF which accompanies the files in this library. *; ;* ------------------------------------------------------------------------ *; ;* Copyright (C) 2003 Texas Instruments, Incorporated. *; ;* All Rights Reserved. *; ;* ======================================================================== *; ;* ======================================================================== *; ;* Assembler compatibility shim for assembling 4.30 and later code on *; ;* tools prior to 4.30. *; ;* ======================================================================== *; .if $isdefed(".ASSEMBLER_VERSION") .asg .ASSEMBLER_VERSION, $asmver .else .asg 0, $asmver .endif .if ($asmver < 430) .asg B, CALL ; Function Call .asg B, RET ; Return from a Function .asg B, CALLRET ; Function call with Call / Ret chaining. .if .TMS320C6400 .asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call .asg BNOP, RETNOP ; C64x BNOP as a Fn. Return .asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP. .endif .asg , .asmfunc ; .func equivalent for hand-assembly code .asg , .endasmfunc ; .endfunc equivalent for hand-assembly code .endif ;* ======================================================================== *; ;* End of assembler compatibility shim. *; ;* ======================================================================== *; * ========================================================================= * * TEXAS INSTRUMENTS, INC. * * * * NAME * * IMG_mpeg2_vld_inter * * * * PLATFORM * * C6400 * * * * REVISION DATE * * 23-May-2002 * * * * DESCRIPTION * * This routine takes a bitstream of an MPEG-2 non-intra coded * * macroblock and returns the decoded IDCT coefficients. The routine * * is implemented as specified in the MPEG-2 standard text (ISO/IEC * * 13818-2). The routine checks the coded block pattern (cbp), * * performs coefficient decoding inlcuding, variable length decode, * * run-length expansion, inverse zigzag, dequantization, saturation * * and mismatch control. * * * * USAGE * * This routine is C callable, and has the following C prototype: * * * * void IMG_mpeg2_vld_inter * * ( * * const short *restrict Wptr, * * short *restrict outi, * * IMG_mpeg2_vld *restrict Mpeg2v, * * int mode_12Q4, * * int num_blocks, * * int bsbuf_words * * ); * * * * Wptr: Pointer to array that contains quantization matrix. The * * elements of the quantization matrix in *Wptr must be * * ordered according to the scan pattern used (zigzag or * * alternate scan). Video format 4:2:0 requires one * * quantization matrix (64 array elements). For formats * * 4:2:2 and 4:4:4 two quantization matrices (one for luma * * and one for chroma) must specified in the array (128 * * array elements). * * * * outi: Pointer to the IDCT coefficients output array * * (6*64 elements), elements must be set to zero prior to * * function call. * * * * Mpeg2v: Pointer to the context object containing the coding * * parameters of the MB to be decoded and the current state * * of the bitstream buffer. The structure is described * * below. * * * * mode_12Q4: 0: Coefficients are returned in normal 16-bit integer * * format. * * Otherwise: Coefficients are returned in 12Q4 format * * (normal 16-bit integer format left shifted by 4). This * * mode is useful for directly passing the coefficients * * into the IMG_idct_8x8 routine. * * * * num_blocks: Number of blocks that the MB contains. Valid values are * * 6 for 4:2:0, 8 for 4:2:2 and 12 for 4:4:4 format. * * * * bsbuf_words: Size of bitstream buffer in words. Must be a power of 2. * * Bitstream buffer must be aligned at an address boundary * * equal to its size in bytes (bitstream buffer is * * addressed circularly by this routine.) * * * * The structure Mpeg2v is defined as follows: * * * *C #ifndef IMG_MPEG2_VLD_STRUCT_ * *C #define IMG_MPEG2_VLD_STRUCT_ 1 * *C * *C typedef struct { * *C unsigned int *bsbuf; // pointer to bitstream buffer * *C unsigned int next_wptr; // next word to read from buffer * *C unsigned int bptr; // bit position within word * *C unsigned int word1; // word aligned buffer * *C unsigned int word2; // word aligned buffer * *C unsigned int top0; // top 32 bits of bitstream * *C unsigned int top1; // next 32 bits of bitstream * *C const unsigned char *scan; // inverse zigzag scan matrix * *C unsigned int intravlc; // intra_vlc_format * *C unsigned int quant_scale; // quant_scale * *C unsigned int dc_prec; // intra_dc_precision * *C unsigned int cbp; // coded_block_pattern * *C unsigned int fault; // fault condition (returned) * *C unsigned int reserved; // reserved * *C } IMG_mpeg2_vld; * *C * *C #endif * * * * The Mpeg2v variables should have a fixed layout since they are * * accessed by this routine. If the layout is changed, the * * corresponding changes have to be made in the assembly code too. * * * * The routine sets the fault flag Mpeg2v.fault to 1 if an invalid * * VLC code was encountered or the total run went beyond 63. In * * theses cases the decoder has to resynchronize. * * * * The required lookup tables for this routine are provided in * * IMGLIB and are linked in automatically when linking against * * IMGLIB. * * * * Before calling the routine the bitstream varaibles in Mpeg2v * * have to be initialized. If bsbuf is a circular buffer and bsptr * * contains the number of bits in the buffer that already have * * been consumed, then next_wptr, bptr, word1, word2, top0 and * * top1 are initialized as follows: * * * * 1. nextwptr: bsptr may not be a multiple of 32, therefore obtain * * the next lower multiple of 32. * * * * next_wptr = (bsptr >> 5); * * * * 2. bptr: bptr is the bit pointer which points to the current * * bit WITHIN the word pointed to by next_wptr. * * * * bptr = bsptr & 31; * * bptr_cmpl = 32 - bptr; * * * * 3. word1 and word2: read next 3 words from the bitstream buffer * * (word0 is a temporary variable). bsbuf_words is the size of the * * bitstream buffer in words. * * * * word0 = bsbuf[next_wptr]; * * next_wptr = (next_wptr + 1) & (bsbuf_words-1); * * * * word1 = bsbuf[next_wptr]; * * next_wptr = (next_wptr + 1) & (bsbuf_words-1); * * * * word2 = bsbuf[next_wptr]; * * next_wptr = (next_wptr + 1) & (bsbuf_words-1); * * * * 4. top0 and top1: Shift words word0, word1, word2 by bptr to the * * left so that the current bit becomes the MSB in word0. word0 can * * simply be shifted by bptr; the then empty LSBs of word0 have to be * * filled with the MSBs of word1. To do that the required MSBs are * * brought into the position of empty LSBs of word0 by shifting word1 * * to the right by (32-bptr). The result is then copied into word0 by * * an addition. Rather than overwriting word0, top0 is used to hold * * the new bit aligned word. The same procedure is used to obtain * * top1. top0 and top1 contain the next 64 bits of the bitstream. * * * * s1 = word0 << bptr; * * s2 = word1 >> bptr_cmpl; /* unsigned right-shift */ * * top0 = s1 + s2; * * * * s3 = word1 << bptr; * * s4 = word2 >> bptr_cmpl; /* unsigned right-shift */ * * top1 = s3 + s4; * * * * Note that the routine returns the updated state of the bitstream * * buffer variables, top0, top1, word1, word2, bptr and next_wptr. If * * all other functions which access the bitstream in a decoder system * * maintain the buffer variables in the same way, then the above * * initialization procedure has to be performed only once at the * * beginning. * * * * * * TECHNIQUES * * The instruction NORM is used to detect the number of leading zeros * * or ones in a code word. This value together with additional bits * * extracted from the codeword is then used as an index into look-up * * tables to determine the length, run, level and sign. Escape code * * sequences are directly extracted from the code word. * * * * ASSUMPTIONS * * The bitstream must be stored in memory in 32-bit words which are * * in little endian byte order. * * * * Wptr is allowed to overrun once (to detect total run overrun), so * * maximum overrun that can occur is 66 (Error mark). Therefore, * * in memory 66+1 halfwords behind the weighting matrix should be * * valid (e.g. peripherals). No memory is overwritten, * * only loads occurr. * * * * Note that the AMR register is set to zero on exit. * * * * NOTES * * This code is little ENDIAN. * * This code is interrupt-tolerant but not interruptible. * * * * MEMORY NOTES * * No bank conflicts * * * * CYCLES * * 10 * (S - CB) + 37 * CB + 15 * NCB + 34 * * where S: Number of symbols in MB, CB: Number of coded blocks, * * NCB: Number of not-coded blocks, and CB+NCB=6 * * * * CODE SIZE * * 1248 bytes * * * * MEMORY REQUIREMENTS * * 1792 bytes for the lookup tables * * (can be shared with mpeg2_vld_intra) * * * * * * ------------------------------------------------------------------------- * * Copyright (c) 2003 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= * * ===================== SYMBOLIC REGISTER ASSIGNMENTS ====================== * .asg A0, A_neg .asg A1, A_bptr1 .asg A1, A_qw .asg A1, A_test1 .asg A1, A_test2 .asg A16, A_len_c .asg A17, A_bptr .asg A18, A_len_tbl_adr .asg A19, A_const31 .asg A2, A_top0l .asg A20, A_const32 .asg A22, A_const36 .asg A23, A_qscl .asg A24, A_level4 .asg A24, A_t2 .asg A24, A_top0_bk .asg A25, A_empty .asg A25, A_len .asg A26, A_nrm .asg A26, A_t1l .asg A26, A_t4l .asg A27, A_t1h .asg A27, A_t4h .asg A3, A_top0h .asg A4, A_ptop0l .asg A5, A_level_f .asg A5, A_level5 .asg A5, A_ptop0h .asg A6, A_W .asg A7, A_top1 .asg A8, A_word1 .asg A9, A_t3 .asg A9, A_t7 .asg A9, A_t8 .asg B0, B_eob .asg B1, B_run .asg B1, B_test3 .asg B1, B_12Q4 ; 12Q4 MERGE .asg B16, B_level2 .asg B16, B_rld_left .asg B17, B_bptr_cmpl .asg B17, B_t14 .asg B17, B_t9 .asg B19, B_word2 .asg B20, B_Wptr_end .asg B21, B_Zptr .asg B22, B_outi .asg B23, B_sum .asg B24, B_top0_bk .asg B26, B_level3 .asg B3, B_const63 .asg B4, B_rld_table_adr .asg B5, B_const32 .asg B6, B_rld_table_adr_1 .asg B7, B_bsbuf_circ .asg B8, B_Wptr .asg B9, B_level .asg B9, B_t12 .asg B9, B_t13 .asg B9, B_t15 .asg B9, B_t16 .asg A11, A_outi ; 12Q4 .asg A12, A_cnum ; 12Q4 .asg A13, A_const16 ; 12Q4 .asg B18, B_constFFF0 ; 12Q4 * ========================================================================= * .global _IMG_len_tbl0 .global _IMG_rld_table0 ; Mpeg2v structure: BSBUF_M2OFF .set 0x0 NEXTWPTR_M2OFF .set 0x1 BPTR_M2OFF .set 0x2 WORD1_M2OFF .set 0x3 WORD2_M2OFF .set 0x4 TOP0_M2OFF .set 0x5 TOP1_M2OFF .set 0x6 ZPTR_M2OFF .set 0x7 QSCL_M2OFF .set 0x9 CBP_M2OFF .set 0xB FAULT_M2OFF .set 0xC .sect ".text:_mpeg2_vld_inter" .global _IMG_mpeg2_vld_inter _IMG_mpeg2_vld_inter: ; parameters: B_Wptr, B_outi, A_Mpeg2v, B_12Q4, A_num_blocks, B_bsbuf_words ; A4, B4, A6, B6, A8, B8 * ========================================================================= * * Setup * ========================================================================= * .asg B15, B_SP ; Stack pointer, B datapath .asg A16, A_SP ; Stack pointer, A datapath .asg B0, B_csr ; CSR's value .asg B1, B_no_gie ; CSR w/ GIE bit cleared .asg B3, B_ret ; Return address .asg A29, A_Mpeg2v .asg B18, B_Mpeg2v .asg B2, B_cnt .asg A10, A_amr_arg ; AMR arg .asg B9, B_amr_arg ; AMR arg STW .D2T1 A10, *B_SP--[9] ; RWD, MERGE, 2 W-mat || MVC .S2 CSR, B_csr ; Get CSR's state || MV .L2 B4, B_outi || MV .L1 A6, A_Mpeg2v || MV .S1X B8, A_amr_arg ; AMR arg STW .D2T2 B_csr, *+B_SP[1] ; Save CSR || AND .L2 B_csr, -2, B_no_gie ; Clear GIE || MV .S2X A4, B_Wptr || MV .D1X B_SP, A_SP ; 12Q4 MERGE || LMBD .L1 1, A_amr_arg, A_amr_arg; AMR arg || MVK .S1 32, A_const32 ; AMR arg STW .D2T2 B_ret, *+B_SP[2] ; Save return addr. || STW .D1T1 A14, *+A_SP[6] ; MERGE || MV .L2X A6, B_Mpeg2v || MVC .S2 B_no_gie, CSR ; Disable ints. || SUB .L1 A_const32, A_amr_arg, A_amr_arg; AMR arg ; ===== Interrupts masked here ===== * ========================================================================= * * Get bitstream info * Setup circuar bitstream buffer * Load table addresses and constants * Block loop setup * ========================================================================= * .asg B31, B_bsbuf .asg B29, B_next_wptr .asg B27, B_cbp .asg B0, B0_amr_config .asg A21, A_const1 .asg A9, A_tbs1 .asg A4, A_tbs2 .asg B17, B_tbs3 .asg B3, B_const126 .asg B3, B_const128 .asg A14, A_constSHR ; 12Q4 MERGE LDW .D2T2 *+B_Mpeg2v[BSBUF_M2OFF], B_bsbuf || LDW .D1T1 *+A_Mpeg2v[TOP0_M2OFF], A_top0_bk || MVK .S2 128, B_const128 || MV .L2 B6, B_12Q4 ; 12Q4 MERGE LDW .D2T2 *+B_Mpeg2v[NEXTWPTR_M2OFF], B_next_wptr || LDW .D1T1 *+A_Mpeg2v[TOP1_M2OFF], A_top1 || ADD .L2 B_Wptr, B_const128, B_Wptr_end ||[!B_12Q4]MVK .S1 20, A_constSHR ; non-12Q4 MERGE || SHL .S2X A_amr_arg, 16, B_amr_arg; AMR arg LDW .D1T1 *+A_Mpeg2v[BPTR_M2OFF], A_bptr || STW .D2T2 B_Wptr, *+B_SP[7] ; 2 W-mat || MV .L2X A8, B_cnt ; 2 W-mat || SET .S2 B_amr_arg, 14, 14, B_amr_arg ; AMR arg LDW .D1T1 *+A_Mpeg2v[WORD1_M2OFF], A_word1 || LDW .D2T2 *+B_Mpeg2v[WORD2_M2OFF], B_word2 || MVC .S2 B_amr_arg, AMR ; AMR arg || MVK .S1 31, A_const31 LDW .D1T1 *+A_Mpeg2v[QSCL_M2OFF], A_qscl || LDW .D2T2 *+B_Mpeg2v[CBP_M2OFF], B_cbp ||[B_12Q4]MVK .S1 16, A_constSHR ; 12Q4 MERGE ; B_constFFF0 and B_Mpeg2v share the same register * ========================================================================= * * Setup bitstream pointers: top0h:top0l, top1 contain top bitstream * ========================================================================= * .asg B25, B_word2_bk .asg A28, A_word1_bk .asg A10, A_word1_rw ; RWD .asg A21, A_word1_rw_bk ; RWD .asg A31, A_top0h_bk .asg A30, A_top0l_bk .asg B28, B_bptr_bk .asg B30, B_bsbuf_circ_bk SHL .S1 A_top0_bk, 8, A_tbs1 || STW .D2T1 A11, *+B_SP[3] ||[B_12Q4]MVKL .S2 0xFFF0, B_constFFF0 ; 12Q4 MERGE SHRU .S1 A_top1, 24, A_tbs2 || STW .D2T1 A12, *+B_SP[4] ||[!B_12Q4]MVKL .S2 0xFFFF, B_constFFF0 ; non-12Q4 MERGE ADD .L1 A_tbs1, A_tbs2, A_top0l_bk || ADD .S1 A_bptr, 8, A_bptr1 || STW .D2T1 A13, *+B_SP[5] CMPGT .L1 A_bptr1, A_const31, A_test2 || AND .S1 A_bptr1, A_const31, A_bptr || MVK .S2 32, B_const32 || ADDAW .D2 B_bsbuf, B_next_wptr,B_bsbuf_circ [A_test2]MV .S1 A_word1, A_word1_rw ; RWD ||[A_test2]MV .L1X B_word2, A_word1 ||[A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 || SUB .S2 B_const32, A_bptr, B_bptr_cmpl MVKL .S1 _IMG_len_tbl0, A_len_tbl_adr || MVKL .S2 _IMG_rld_table0, B_rld_table_adr || MV .L2X A_bptr, B_bptr_bk || STW .D2T2 B_cnt, *+B_SP[8] ; 2 W-mat MVKH .S1 _IMG_len_tbl0, A_len_tbl_adr || MVKH .S2 _IMG_rld_table0, B_rld_table_adr [B_12Q4]MVK .S1 16, A_const16 ; 12Q4 [!B_12Q4]MVK .S1 1, A_const16 ; non-12Q4 SHL .S1 A_word1, A_bptr, A_tbs1 || SHRU .S2 B_word2, B_bptr_cmpl,B_tbs3 ADD .L1X A_tbs1, B_tbs3, A_top1 || SHRU .S1 A_top0_bk, 24, A_top0h_bk || MV .D1 A_word1, A_word1_bk || MV .D2 B_word2, B_word2_bk || MV .L2 B_bsbuf_circ, B_bsbuf_circ_bk block_loop: * ------------------------------------------------------------------------- * * check cbp, etc. * ------------------------------------------------------------------------- * .asg B17, B_cbp_mask .asg B0, B_coded .asg A5, A_last_coeff .asg A2, A2_odd .asg B31, B_run_bk .asg B26, B_num_blocks ; 2 W-mat SUB .S2 B_cnt, 1, B_cnt ; cbp, cnt-- || ZERO .L2 B_sum || ZERO .D2 B_run_bk ; not coded || ZERO .L1 A2_odd ; not coded || MVK .S1 1, A_const1 ; cbp SHL .S2X A_const1, B_cnt, B_cbp_mask ; cbp || MV .L2 B_Wptr_end, B_Wptr ; not coded || MVK .S1 0, A_last_coeff ; not coded AND .D2 B_cbp_mask, B_cbp, B_coded ; cbp || MVK .S2 126, B_const126 ; const [!B_coded]B .S1 mismatch ; not coded ||[!B_coded]ADD .L2 B_outi, B_const126, B_outi ; not coded ||[B_coded]LDW .D2T2 *+B_SP[8], B_num_blocks ; 2 W-mat * =========================== PIPE LOOP PROLOG ============================ * .asg A0, A_tm .asg B0, B_tm_neg ; the added lines below calculate cc which is required for weighting ; matrix selection in 4:2:2 and 4:4:4 mode ; the following additional registers are required: B_block, B_flag, B_cc .asg B31, B_block .asg B1, B_cc .asg B0, B_flag NORM .L1 A_top0h_bk:A_top0l_bk, A_nrm ;[ 1,1] || SHRU .S1 A_top0h_bk, 7, A_tm ;table mod MPY .M1 A_nrm, -16, A_t2 ;[ 2,1] || SHL .S1 A_top0h_bk:A_top0l_bk, A_nrm, A_t1h:A_t1l;[ 2,1] MVK .S1 36, A_const36 ;const SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l ;[ 4,1] || SUB .L1 A_len_tbl_adr, A_t2, A_t3 ;[ 4,1] ||[B_coded]LDW .D2T2 *+B_SP[7], B_Wptr ;get W-mat base adr [!A_tm]LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,1] ||[B_coded] SUB .L2 B_num_blocks, 1, B_num_blocks;2 W-mat ; branch occurs if not coded MB SUB .S2 B_num_blocks, B_cnt, B_block ;cc for 2 W-mat || CMPGT .L2 B_num_blocks, 6, B_flag ;prevent 2 W-mat if 4:2:0 SHRU .S1 A_top0h_bk:A_top0l_bk, 8, A_empty:A_top0_bk;[ 8,1] ||[B_flag]CMPGT .L2 B_block, 3, B_flag ;cc for 2 W-mat || ZERO .S2 B_cc ;cc for 2 W-mat [A_tm]MVK .L1 2, A_len ;table mod ||[B_flag] AND .D2 B_block, 1, B_cc ;cc for 2 W-mat MV .L1X B_bptr_bk, A_bptr ;restore || MVK .S2 128, B_const128 ;const ||[B_flag] ADD .D2 B_cc, 1, B_cc ;cc for 2 W-mat SUB .S2X A_len, 5, B_rld_left ;[10,1] || CMPLT .L2X A_len, 5, B_test3 ;[10,1] || ADD .L1 A_bptr, A_len, A_bptr1 ;[10,1] || SHL .S1 A_top0h_bk:A_top0l_bk, A_len, A_ptop0h:A_ptop0l;[10,1] ||[!A_tm]SUB .D1 A_const32, A_len, A_len_c ;[10,1] ||[B_cc]ADD .D2 B_Wptr, B_const128, B_Wptr ;if cc!=0 select 2nd W-mat [B_test3]MPY .M2 B_rld_left, 0, B_rld_left ;[11,1] || MV .L2X A_top0_bk, B_top0_bk ;[11,1] || AND .S1 A_const31, A_bptr1, A_bptr ;[11,1] || MV .D1 A_ptop0h, A_top0h ;[11,1] || NORM .L1 A_ptop0h:A_ptop0l, A_nrm ;[ 1,2] CMPGT .L1 A_bptr1, A_const31, A_test2 ;[12,1] || MPY .M1 A_nrm, -16, A_t2 ;[ 2,2] || SHL .S1 A_ptop0h:A_ptop0l, A_nrm, A_t1h:A_t1l ;[ 2,2] || ADD .L2 B_Wptr, B_const128, B_Wptr_end ;reset SHL .S2 B_top0_bk, B_rld_left, B_t13 ;[13,1] || MPY .M2X B_const32, A_len, B_t12 ;[13,1] ||[A_tm]MVK .S1 30, A_len_c ;table mod || MV .L1 A_word1_bk, A_word1 ;restore || MV .L2 B_word2_bk, B_word2 ;restore || MV .D2 B_bsbuf_circ_bk, B_bsbuf_circ ;restore SHRU .S2 B_t13, 27, B_t14 ;[14,1] ||[ A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 ;[14,1] || SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l ;[ 4,2] || SUB .L1 A_len_tbl_adr, A_t2, A_t3;[ 4,2] ADD .L2 B_t14, B_t12, B_t15 ;[15,1] || SUB .S2X B_const32, A_bptr, B_bptr_cmpl ;[15,1] ||[ A_test2]MV .L1X B_word2, A_word1 ;[15,1] || [A_test2]MV .S1 A_word1, A_word1_rw ; RWD || LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,2] || ZERO .D2 B_tm_neg ;table mod ADD .L2 B_t15, B_t15, B_t16 ;[16,1] ||[!A_tm]SUB .D1 A_len, 24, A_test1 ;[16,1] ||[A_tm]ZERO .L1 A_test1 ;table mod || SHRU .S1 A_top1, A_len_c, A_t7 ;[16,1] ||[A_tm]EXTU .S2 B_top0_bk, 1, 31, B_tm_neg ;table mod [ A_test1]LDB .D2T2 *B_rld_table_adr[B_t16],B_level ;[17,1] || ADD .D1 A_ptop0l, A_t7, A_top0l ;[17,1] || ADD .L2 B_rld_table_adr, 1, B_rld_table_adr_1;const ||[A_tm]MVK .S2 1, B_level ;table mod [ A_test1]LDB .D2T2 *B_rld_table_adr_1[B_t16], B_run ;[18,1] ||[!A_tm]EXT .S2 B_top0_bk, 12, 20, B_level ;[18,1] || SHRU .S1 A_top0h:A_top0l, 8, A_empty:A_top0_bk;[ 8,2] ||[B_tm_neg]NEG .L2 B_level, B_level ;table mod SHRU .S2 B_word2, B_bptr_cmpl,B_t9 ;[19,1] || SHL .S1 A_word1, A_bptr, A_t8 ;[19,1] SUB .S2X A_len, 5, B_rld_left ;[10,2] || CMPLT .L2X A_len, 5, B_test3 ;[10,2] || ADD .L1 A_bptr, A_len, A_bptr1 ;[10,2] || SHL .S1 A_top0h:A_top0l, A_len, A_ptop0h:A_ptop0l;[10,2] || SUB .D1 A_const32, A_len, A_len_c ;[10,2] [!A_tm]EXTU .S2 B_top0_bk, 6, 26, B_run ;[21,1] ||[A_tm] ZERO .D2 B_run ;table mod ||[B_test3]MPY .M2 B_rld_left, 0, B_rld_left ;[11,2] || MV .L2X A_top0_bk, B_top0_bk ;[11,2] || AND .S1 A_const31, A_bptr1, A_bptr ;[11,2] || MV .D1 A_ptop0h, A_top0h ;[11,2] || NORM .L1 A_ptop0h:A_ptop0l, A_nrm ;[ 1,3] MPY .M2 B_level, 2, B_level2 ;[22,1] || CMPGT .L1 A_bptr1, A_const31, A_test2 ;[12,2] || MPY .M1 A_nrm, -16, A_t2 ;[ 2,3] || SHL .S1 A_ptop0h:A_ptop0l, A_nrm, A_t1h:A_t1l ;[ 2,3] || LDW .D1T2 *+A_Mpeg2v[ZPTR_M2OFF], B_Zptr ;reset || MVK .S2 63, B_const63 ;const LDH .D2T1 *++B_Wptr[B_run], A_W ;[23,1] || CMPLT .L1X B_level, 0, A_neg ;[23,1] || SHL .S2 B_top0_bk, B_rld_left, B_t13 ;[13,2] || MPY .M2X B_const32, A_len, B_t12 ;[13,2] ADD .L2 B_Wptr, 2, B_Wptr ;[24,1] || ADD .D1X A_t8, B_t9, A_top1 ;[24,1] || SHRU .S2 B_t13, 27, B_t14 ;[14,2] ||[ A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 ;[14,2] || SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l;[ 4,3] || SUB .L1 A_len_tbl_adr, A_t2, A_t3;[ 4,3] [ A_neg]SUB .D2 B_level2, 1, B_level3 ;[25,1] || ADD .L2 B_t14, B_t12, B_t15 ;[15,2] || SUB .S2X B_const32, A_bptr, B_bptr_cmpl ;[15,2] ||[ A_test2]MV .L1X B_word2, A_word1 ;[15,2] || [A_test2]MV .S1 A_word1, A_word1_rw ; RWD || LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,3] [!A_neg]ADD .L2 B_level2, 1, B_level3 ;[26,1] || ADD .S2 B_t15, B_t15, B_t16 ;[16,2] || SUB .D1 A_len, 24, A_test1 ;[16,2] || SHRU .S1 A_top1, A_len_c, A_t7 ;[16,2] CMPGT .L2 B_run, B_const63, B_eob ;[27,1] ||[ A_test1]LDB .D2T2 *B_rld_table_adr[B_t16],B_level ;[17,2] || ADD .D1 A_ptop0l, A_t7, A_top0l ;[17,2] || MV .L1X B_outi, A_outi ; 12Q4 * =========================== PIPE LOOP KERNEL ============================ * .asg A2, A2_top0l loop: MPY .M1 A_qscl, A_W, A_qw ;[28,1] ||[!B_eob]CMPGT .L2 B_Wptr, B_Wptr_end, B_eob ;[28,1] ||[A_test1]LDB .D2T2 *B_rld_table_adr_1[B_t16], B_run ;[18,2] || EXT .S2 B_top0_bk, 12, 20, B_level ;[18,2] || SHRU .S1 A_top0h:A_top0l, 8, A_empty:A_top0_bk;[ 8,3] ||[B_eob]MPY .M2 0, B_Wptr, B_Wptr ;err det LDB .D2T1 *++B_Zptr[B_run], A_cnum ;[29,1] || SHRU .S2 B_word2, B_bptr_cmpl,B_t9 ;[19,2] || SHL .S1 A_word1, A_bptr, A_t8 ;[19,2] ||[!B_eob]MV .L1 A_top0h:A_top0l, A_top0h_bk:A_top0l_bk ;preserve ||[B_eob]ADD .L2 B_outi, B_const63, B_outi ;mismatch ||[!B_eob]MPY .M2X 1, A_bptr, B_bptr_bk ;preserve ADD .D2 B_Zptr, 1, B_Zptr ;[30,1] || MPY .M1X A_qw, B_level3, A_level4 ;[30,1] || SUB .S2X A_len, 5, B_rld_left ;[10,3] || CMPLT .L2X A_len, 5, B_test3 ;[10,3] || ADD .L1 A_bptr, A_len, A_bptr1 ;[10,3] || SHL .S1 A_top0h:A_top0l, A_len, A_ptop0h:A_ptop0l;[10,3] || SUB .D1 A_const32, A_len, A_len_c ;[10,3] ||[B_eob]MPY .M2 1, B_run, B_run_bk ;preserve EXTU .S2 B_top0_bk, 6, 26, B_run ;[21,2] ||[B_test3]MPY .M2 B_rld_left, 0, B_rld_left ;[11,3] || MV .L2X A_top0_bk, B_top0_bk ;[11,3] || AND .S1 A_const31, A_bptr1, A_bptr ;[11,3] || MV .D1 A_ptop0h, A_top0h ;[11,3] || NORM .L1 A_ptop0h:A_ptop0l, A_nrm ;[ 1,4] ||[B_eob]ADD .D2 B_outi, B_const63, B_outi ;mismatch [!B_eob]B .S2 loop ;[32,1] ||[ A_neg]ADD .D1 A_level4, A_const31, A_level4 ;[32,1] || MPY .M2 B_level, 2, B_level2 ;[22,2] || CMPGT .L1 A_bptr1, A_const31, A_test2 ;[12,3] || MPY .M1 A_nrm, -16, A_t2 ;[ 2,4] || SHL .S1 A_ptop0h:A_ptop0l, A_nrm, A_t1h:A_t1l ;[ 2,4] ||[!B_eob]MV .L2 B_bsbuf_circ, B_bsbuf_circ_bk ;preserve ||[B_eob]LDH .D2T1 *B_outi, A_last_coeff ;mismatch SSHL .S1 A_level4, 15, A_level5 ;[33,1] ||[!B_eob]LDH .D2T1 *++B_Wptr[B_run], A_W ;[23,2] || CMPLT .L1X B_level, 0, A_neg ;[23,2] || SHL .S2 B_top0_bk, B_rld_left, B_t13 ;[13,3] || MPY .M2X B_const32, A_len, B_t12 ;[13,3] ||[!B_eob]MV .L2 B_word2, B_word2_bk ;preserve ||[!B_eob]MV .D1 A_word1, A_word1_bk ;preserve ||[!B_eob]MVD .M1 A_word1_rw, A_word1_rw_bk ;preserve [!B_eob]ADD .L2 B_Wptr, 2, B_Wptr ;[24,2] ||[!B_eob]ADD .D1X A_t8, B_t9, A_top1 ;[24,2] || SHRU .S2 B_t13, 27, B_t14 ;[14,3] ||[ A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 ;[14,3] || SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l ;[ 4,4] || SUB .L1 A_len_tbl_adr, A_t2, A_t3;[ 4,4] SHR .S1 A_level5, A_constSHR, A_level_f ;[35,1] 12Q4 ||[ A_neg]SUB .D2 B_level2, 1, B_level3 ;[25,2] || ADD .L2 B_t14, B_t12, B_t15 ;[15,3] || SUB .S2X B_const32, A_bptr, B_bptr_cmpl ;[15,3] ||[ A_test2]MV .L1X B_word2, A_word1 ;[15,3] || [A_test2]MVD .M1 A_word1, A_word1_rw ; RWD || LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,4] [!A_neg]ADD .L2 B_level2, 1, B_level3 ;[26,2] || ADD .S2 B_t15, B_t15, B_t16 ;[16,3] || SUB .D1 A_len, 24, A_test1 ;[16,3] || SHRU .S1 A_top1, A_len_c, A_t7 ;[16,3] ||[!B_eob]AND .L1X B_constFFF0,A_level_f, A_level_f ; 12Q4 [!B_eob]STH .D1T1 A_level_f, *+A_outi[A_cnum] ;[36,1] BC ||[!B_eob]ADD .S2X B_sum, A_level_f, B_sum ;[37,1] || CMPGT .L2 B_run, B_const63, B_eob ;[27,2] ||[ A_test1]LDB .D2T2 *B_rld_table_adr[B_t16],B_level ;[17,3] ||[!B_eob]ADD .L1 A_ptop0l, A_t7, A2_top0l ;[17,3] ||[B_eob] XOR .S1 A_const16, A_last_coeff, A_last_coeff ;mismatch 12Q4 ||[B_eob] MVD .M1 A_word1_rw_bk, A_word1_rw ; RWD * =========================== PIPE LOOP EPILOG ============================ * * ========================================================================= * ; live-out: top0h:top0k, top1, word1, word2, bsbuf_circ, run, Wptr, Wptr_end, ; sum, bptr .asg B0, B_err ; same reg as B_eob .asg A29, A_Mpeg2v .asg B31, B_bsbuf .asg B3, B_ret ; Return address .asg B15, B_SP ; Stack pointer, B datapath .asg B1, B_const65 mismatch: [B_cnt] B .S1 block_loop ; -- BRANCH -- || MVK .S2 65, B_const65 ; invalid VLC || CMPGTU .L2 B_Wptr, B_Wptr_end, B_err ; overrun || AND .L1X A_const16, B_sum, A2_odd ; mismatch 12Q4 [!B_err]CMPGT .L2 B_run_bk, B_const65, B_err ; invalid VLC || ADD .S2 B_outi, 2, B_outi || LDW .D1T2 *+A_Mpeg2v[BSBUF_M2OFF], B_bsbuf ; exit ||[!A2_odd]STH .D2T1 A_last_coeff, *B_outi ; mismatch [B_err]B .S2 exit ; -- BRANCH -- || LDW .D2T2 *+B_SP[2], B_ret ; exit || MV .L2 B_bsbuf_circ_bk, B_bsbuf_circ ; AMR arg NOP 3 ; MERGE ; branch occurs to block_loop ; branch occurs to exit occurs after 2 cycles in block_loop ; (preserve B0_err for exit) ; this will execute only if B_cnt was 0 NOP 2 ; MERGE * =================================== EXIT =============================== * .asg B26, B_csr ; CSR value to restore .asg B22, B_byte_diff .asg B29, B_next_wptr .asg B1, B_lz .asg B27, B_amr_config .asg B4, B_constBUFMASK exit: SUB .L2 B_bsbuf_circ, B_bsbuf, B_byte_diff || SHRU .S1 A_top1, 8, A_t2 || SUB .S2 B_bptr_bk, 8, B_bptr_bk || LDW .D2T1 *+B_SP[6], A14 ; MERGE SHR .S2 B_byte_diff, 2, B_next_wptr || SHL .S1 A_top0l_bk, 24, A_t3 || CMPLT .L2 B_bptr_bk, 0, B_lz || LDW .D2T1 *+B_SP[3], A11 ADD .L1 A_t2, A_t3, A_top1 || SHRU .S1 A_top0h_bk:A_top0l_bk, 8, A_empty:A_top0_bk || STW .D1T2 B_err, *+A_Mpeg2v[FAULT_M2OFF] ||[B_lz]MVD .M1 A_word1_rw, A_word1_bk ; RWD ||[B_lz]MV .L2X A_word1_bk, B_word2_bk || LDW .D2T1 *+B_SP[4], A12 LDW .D2T2 *+B_SP[1], B_csr ; Get CSR's value || STW .D1T1 A_top1, *+A_Mpeg2v[TOP1_M2OFF] ||[B_lz]ADD .L2 B_bptr_bk, A_const32, B_bptr_bk STW .D1T2 B_bptr_bk, *+A_Mpeg2v[BPTR_M2OFF] || LDW .D2T1 *+B_SP[5], A13 RET .S2 B_ret ; Return to caller || STW .D1T1 A_top0_bk, *+A_Mpeg2v[TOP0_M2OFF] ||[B_lz]SUBAW .D2 B_bsbuf_circ, 1, B_bsbuf_circ ; AMR arg STW .D1T2 B_next_wptr, *+A_Mpeg2v[NEXTWPTR_M2OFF] || ZERO .L2 B_amr_config || LDW .D2T1 *++B_SP[9], A10 ; MERGE, 2 W-mat ||[B_lz]SUB .S2 B_bsbuf_circ, B_bsbuf, B_byte_diff ; AMR arg STW .D1T1 A_word1_bk, *+A_Mpeg2v[WORD1_M2OFF] ||[B_lz]SHR .S2 B_byte_diff, 2, B_next_wptr ; AMR arg STW .D1T2 B_word2_bk, *+A_Mpeg2v[WORD2_M2OFF] ; ===== Interruptibility state restored here ===== STW .D1T2 B_next_wptr, *+A_Mpeg2v[NEXTWPTR_M2OFF] || MVC .S2 B_amr_config, AMR MVC .S2 B_csr, CSR ; Restore CSR ; Branch occurs * ========================================================================= * * End of file: img_mpeg2_vld_inter.asm * * ------------------------------------------------------------------------- * * Copyright (c) 2003 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= *