You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
839 lines
47 KiB
839 lines
47 KiB
;* ======================================================================== *;
|
|
;* TEXAS INSTRUMENTS, INC. *;
|
|
;* *;
|
|
;* IMGLIB DSP Image/Video Processing Library *;
|
|
;* *;
|
|
;* Release: Revision 1.04b *;
|
|
;* CVS Revision: 1.11 Sun Sep 29 03:32:26 2002 (UTC) *;
|
|
;* Snapshot date: 23-Oct-2003 *;
|
|
;* *;
|
|
;* This library contains proprietary intellectual property of Texas *;
|
|
;* Instruments, Inc. The library and its source code are protected by *;
|
|
;* various copyrights, and portions may also be protected by patents or *;
|
|
;* other legal protections. *;
|
|
;* *;
|
|
;* This software is licensed for use with Texas Instruments TMS320 *;
|
|
;* family DSPs. This license was provided to you prior to installing *;
|
|
;* the software. You may review this license by consulting the file *;
|
|
;* TI_license.PDF which accompanies the files in this library. *;
|
|
;* ------------------------------------------------------------------------ *;
|
|
;* Copyright (C) 2003 Texas Instruments, Incorporated. *;
|
|
;* All Rights Reserved. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
;* ======================================================================== *;
|
|
;* Assembler compatibility shim for assembling 4.30 and later code on *;
|
|
;* tools prior to 4.30. *;
|
|
;* ======================================================================== *;
|
|
|
|
.if $isdefed(".ASSEMBLER_VERSION")
|
|
.asg .ASSEMBLER_VERSION, $asmver
|
|
.else
|
|
.asg 0, $asmver
|
|
.endif
|
|
|
|
.if ($asmver < 430)
|
|
|
|
.asg B, CALL ; Function Call
|
|
.asg B, RET ; Return from a Function
|
|
.asg B, CALLRET ; Function call with Call / Ret chaining.
|
|
|
|
.if .TMS320C6400
|
|
.asg BNOP, CALLNOP ; C64x BNOP as a Fn. Call
|
|
.asg BNOP, RETNOP ; C64x BNOP as a Fn. Return
|
|
.asg BNOP, CRNOP ; C64x Fn call w/, Call/Ret chaining via BNOP.
|
|
.endif
|
|
|
|
.asg , .asmfunc ; .func equivalent for hand-assembly code
|
|
.asg , .endasmfunc ; .endfunc equivalent for hand-assembly code
|
|
|
|
.endif
|
|
|
|
;* ======================================================================== *;
|
|
;* End of assembler compatibility shim. *;
|
|
;* ======================================================================== *;
|
|
|
|
|
|
* ========================================================================= *
|
|
* TEXAS INSTRUMENTS, INC. *
|
|
* *
|
|
* NAME *
|
|
* IMG_mpeg2_vld_inter *
|
|
* *
|
|
* PLATFORM *
|
|
* C6400 *
|
|
* *
|
|
* REVISION DATE *
|
|
* 23-May-2002 *
|
|
* *
|
|
* DESCRIPTION *
|
|
* This routine takes a bitstream of an MPEG-2 non-intra coded *
|
|
* macroblock and returns the decoded IDCT coefficients. The routine *
|
|
* is implemented as specified in the MPEG-2 standard text (ISO/IEC *
|
|
* 13818-2). The routine checks the coded block pattern (cbp), *
|
|
* performs coefficient decoding inlcuding, variable length decode, *
|
|
* run-length expansion, inverse zigzag, dequantization, saturation *
|
|
* and mismatch control. *
|
|
* *
|
|
* USAGE *
|
|
* This routine is C callable, and has the following C prototype: *
|
|
* *
|
|
* void IMG_mpeg2_vld_inter *
|
|
* ( *
|
|
* const short *restrict Wptr, *
|
|
* short *restrict outi, *
|
|
* IMG_mpeg2_vld *restrict Mpeg2v, *
|
|
* int mode_12Q4, *
|
|
* int num_blocks, *
|
|
* int bsbuf_words *
|
|
* ); *
|
|
* *
|
|
* Wptr: Pointer to array that contains quantization matrix. The *
|
|
* elements of the quantization matrix in *Wptr must be *
|
|
* ordered according to the scan pattern used (zigzag or *
|
|
* alternate scan). Video format 4:2:0 requires one *
|
|
* quantization matrix (64 array elements). For formats *
|
|
* 4:2:2 and 4:4:4 two quantization matrices (one for luma *
|
|
* and one for chroma) must specified in the array (128 *
|
|
* array elements). *
|
|
* *
|
|
* outi: Pointer to the IDCT coefficients output array *
|
|
* (6*64 elements), elements must be set to zero prior to *
|
|
* function call. *
|
|
* *
|
|
* Mpeg2v: Pointer to the context object containing the coding *
|
|
* parameters of the MB to be decoded and the current state *
|
|
* of the bitstream buffer. The structure is described *
|
|
* below. *
|
|
* *
|
|
* mode_12Q4: 0: Coefficients are returned in normal 16-bit integer *
|
|
* format. *
|
|
* Otherwise: Coefficients are returned in 12Q4 format *
|
|
* (normal 16-bit integer format left shifted by 4). This *
|
|
* mode is useful for directly passing the coefficients *
|
|
* into the IMG_idct_8x8 routine. *
|
|
* *
|
|
* num_blocks: Number of blocks that the MB contains. Valid values are *
|
|
* 6 for 4:2:0, 8 for 4:2:2 and 12 for 4:4:4 format. *
|
|
* *
|
|
* bsbuf_words: Size of bitstream buffer in words. Must be a power of 2. *
|
|
* Bitstream buffer must be aligned at an address boundary *
|
|
* equal to its size in bytes (bitstream buffer is *
|
|
* addressed circularly by this routine.) *
|
|
* *
|
|
* The structure Mpeg2v is defined as follows: *
|
|
* *
|
|
*C #ifndef IMG_MPEG2_VLD_STRUCT_ *
|
|
*C #define IMG_MPEG2_VLD_STRUCT_ 1 *
|
|
*C *
|
|
*C typedef struct { *
|
|
*C unsigned int *bsbuf; // pointer to bitstream buffer *
|
|
*C unsigned int next_wptr; // next word to read from buffer *
|
|
*C unsigned int bptr; // bit position within word *
|
|
*C unsigned int word1; // word aligned buffer *
|
|
*C unsigned int word2; // word aligned buffer *
|
|
*C unsigned int top0; // top 32 bits of bitstream *
|
|
*C unsigned int top1; // next 32 bits of bitstream *
|
|
*C const unsigned char *scan; // inverse zigzag scan matrix *
|
|
*C unsigned int intravlc; // intra_vlc_format *
|
|
*C unsigned int quant_scale; // quant_scale *
|
|
*C unsigned int dc_prec; // intra_dc_precision *
|
|
*C unsigned int cbp; // coded_block_pattern *
|
|
*C unsigned int fault; // fault condition (returned) *
|
|
*C unsigned int reserved; // reserved *
|
|
*C } IMG_mpeg2_vld; *
|
|
*C *
|
|
*C #endif *
|
|
* *
|
|
* The Mpeg2v variables should have a fixed layout since they are *
|
|
* accessed by this routine. If the layout is changed, the *
|
|
* corresponding changes have to be made in the assembly code too. *
|
|
* *
|
|
* The routine sets the fault flag Mpeg2v.fault to 1 if an invalid *
|
|
* VLC code was encountered or the total run went beyond 63. In *
|
|
* theses cases the decoder has to resynchronize. *
|
|
* *
|
|
* The required lookup tables for this routine are provided in *
|
|
* IMGLIB and are linked in automatically when linking against *
|
|
* IMGLIB. *
|
|
* *
|
|
* Before calling the routine the bitstream varaibles in Mpeg2v *
|
|
* have to be initialized. If bsbuf is a circular buffer and bsptr *
|
|
* contains the number of bits in the buffer that already have *
|
|
* been consumed, then next_wptr, bptr, word1, word2, top0 and *
|
|
* top1 are initialized as follows: *
|
|
* *
|
|
* 1. nextwptr: bsptr may not be a multiple of 32, therefore obtain *
|
|
* the next lower multiple of 32. *
|
|
* *
|
|
* next_wptr = (bsptr >> 5); *
|
|
* *
|
|
* 2. bptr: bptr is the bit pointer which points to the current *
|
|
* bit WITHIN the word pointed to by next_wptr. *
|
|
* *
|
|
* bptr = bsptr & 31; *
|
|
* bptr_cmpl = 32 - bptr; *
|
|
* *
|
|
* 3. word1 and word2: read next 3 words from the bitstream buffer *
|
|
* (word0 is a temporary variable). bsbuf_words is the size of the *
|
|
* bitstream buffer in words. *
|
|
* *
|
|
* word0 = bsbuf[next_wptr]; *
|
|
* next_wptr = (next_wptr + 1) & (bsbuf_words-1); *
|
|
* *
|
|
* word1 = bsbuf[next_wptr]; *
|
|
* next_wptr = (next_wptr + 1) & (bsbuf_words-1); *
|
|
* *
|
|
* word2 = bsbuf[next_wptr]; *
|
|
* next_wptr = (next_wptr + 1) & (bsbuf_words-1); *
|
|
* *
|
|
* 4. top0 and top1: Shift words word0, word1, word2 by bptr to the *
|
|
* left so that the current bit becomes the MSB in word0. word0 can *
|
|
* simply be shifted by bptr; the then empty LSBs of word0 have to be *
|
|
* filled with the MSBs of word1. To do that the required MSBs are *
|
|
* brought into the position of empty LSBs of word0 by shifting word1 *
|
|
* to the right by (32-bptr). The result is then copied into word0 by *
|
|
* an addition. Rather than overwriting word0, top0 is used to hold *
|
|
* the new bit aligned word. The same procedure is used to obtain *
|
|
* top1. top0 and top1 contain the next 64 bits of the bitstream. *
|
|
* *
|
|
* s1 = word0 << bptr; *
|
|
* s2 = word1 >> bptr_cmpl; /* unsigned right-shift */ *
|
|
* top0 = s1 + s2; *
|
|
* *
|
|
* s3 = word1 << bptr; *
|
|
* s4 = word2 >> bptr_cmpl; /* unsigned right-shift */ *
|
|
* top1 = s3 + s4; *
|
|
* *
|
|
* Note that the routine returns the updated state of the bitstream *
|
|
* buffer variables, top0, top1, word1, word2, bptr and next_wptr. If *
|
|
* all other functions which access the bitstream in a decoder system *
|
|
* maintain the buffer variables in the same way, then the above *
|
|
* initialization procedure has to be performed only once at the *
|
|
* beginning. *
|
|
* *
|
|
* *
|
|
* TECHNIQUES *
|
|
* The instruction NORM is used to detect the number of leading zeros *
|
|
* or ones in a code word. This value together with additional bits *
|
|
* extracted from the codeword is then used as an index into look-up *
|
|
* tables to determine the length, run, level and sign. Escape code *
|
|
* sequences are directly extracted from the code word. *
|
|
* *
|
|
* ASSUMPTIONS *
|
|
* The bitstream must be stored in memory in 32-bit words which are *
|
|
* in little endian byte order. *
|
|
* *
|
|
* Wptr is allowed to overrun once (to detect total run overrun), so *
|
|
* maximum overrun that can occur is 66 (Error mark). Therefore, *
|
|
* in memory 66+1 halfwords behind the weighting matrix should be *
|
|
* valid (e.g. peripherals). No memory is overwritten, *
|
|
* only loads occurr. *
|
|
* *
|
|
* Note that the AMR register is set to zero on exit. *
|
|
* *
|
|
* NOTES *
|
|
* This code is little ENDIAN. *
|
|
* This code is interrupt-tolerant but not interruptible. *
|
|
* *
|
|
* MEMORY NOTES *
|
|
* No bank conflicts *
|
|
* *
|
|
* CYCLES *
|
|
* 10 * (S - CB) + 37 * CB + 15 * NCB + 34 *
|
|
* where S: Number of symbols in MB, CB: Number of coded blocks, *
|
|
* NCB: Number of not-coded blocks, and CB+NCB=6 *
|
|
* *
|
|
* CODE SIZE *
|
|
* 1248 bytes *
|
|
* *
|
|
* MEMORY REQUIREMENTS *
|
|
* 1792 bytes for the lookup tables *
|
|
* (can be shared with mpeg2_vld_intra) *
|
|
* *
|
|
* *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|
|
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ====================== *
|
|
.asg A0, A_neg
|
|
.asg A1, A_bptr1
|
|
.asg A1, A_qw
|
|
.asg A1, A_test1
|
|
.asg A1, A_test2
|
|
.asg A16, A_len_c
|
|
.asg A17, A_bptr
|
|
.asg A18, A_len_tbl_adr
|
|
.asg A19, A_const31
|
|
.asg A2, A_top0l
|
|
.asg A20, A_const32
|
|
.asg A22, A_const36
|
|
.asg A23, A_qscl
|
|
.asg A24, A_level4
|
|
.asg A24, A_t2
|
|
.asg A24, A_top0_bk
|
|
.asg A25, A_empty
|
|
.asg A25, A_len
|
|
.asg A26, A_nrm
|
|
.asg A26, A_t1l
|
|
.asg A26, A_t4l
|
|
.asg A27, A_t1h
|
|
.asg A27, A_t4h
|
|
.asg A3, A_top0h
|
|
.asg A4, A_ptop0l
|
|
.asg A5, A_level_f
|
|
.asg A5, A_level5
|
|
.asg A5, A_ptop0h
|
|
.asg A6, A_W
|
|
.asg A7, A_top1
|
|
.asg A8, A_word1
|
|
.asg A9, A_t3
|
|
.asg A9, A_t7
|
|
.asg A9, A_t8
|
|
.asg B0, B_eob
|
|
.asg B1, B_run
|
|
.asg B1, B_test3
|
|
.asg B1, B_12Q4 ; 12Q4 MERGE
|
|
.asg B16, B_level2
|
|
.asg B16, B_rld_left
|
|
.asg B17, B_bptr_cmpl
|
|
.asg B17, B_t14
|
|
.asg B17, B_t9
|
|
.asg B19, B_word2
|
|
.asg B20, B_Wptr_end
|
|
.asg B21, B_Zptr
|
|
.asg B22, B_outi
|
|
.asg B23, B_sum
|
|
.asg B24, B_top0_bk
|
|
.asg B26, B_level3
|
|
.asg B3, B_const63
|
|
.asg B4, B_rld_table_adr
|
|
.asg B5, B_const32
|
|
.asg B6, B_rld_table_adr_1
|
|
.asg B7, B_bsbuf_circ
|
|
.asg B8, B_Wptr
|
|
.asg B9, B_level
|
|
.asg B9, B_t12
|
|
.asg B9, B_t13
|
|
.asg B9, B_t15
|
|
.asg B9, B_t16
|
|
.asg A11, A_outi ; 12Q4
|
|
.asg A12, A_cnum ; 12Q4
|
|
.asg A13, A_const16 ; 12Q4
|
|
.asg B18, B_constFFF0 ; 12Q4
|
|
|
|
* ========================================================================= *
|
|
|
|
.global _IMG_len_tbl0
|
|
.global _IMG_rld_table0
|
|
|
|
; Mpeg2v structure:
|
|
BSBUF_M2OFF .set 0x0
|
|
NEXTWPTR_M2OFF .set 0x1
|
|
BPTR_M2OFF .set 0x2
|
|
WORD1_M2OFF .set 0x3
|
|
WORD2_M2OFF .set 0x4
|
|
TOP0_M2OFF .set 0x5
|
|
TOP1_M2OFF .set 0x6
|
|
ZPTR_M2OFF .set 0x7
|
|
QSCL_M2OFF .set 0x9
|
|
CBP_M2OFF .set 0xB
|
|
FAULT_M2OFF .set 0xC
|
|
|
|
.sect ".text:_mpeg2_vld_inter"
|
|
.global _IMG_mpeg2_vld_inter
|
|
_IMG_mpeg2_vld_inter:
|
|
; parameters: B_Wptr, B_outi, A_Mpeg2v, B_12Q4, A_num_blocks, B_bsbuf_words
|
|
; A4, B4, A6, B6, A8, B8
|
|
|
|
* ========================================================================= *
|
|
* Setup
|
|
* ========================================================================= *
|
|
.asg B15, B_SP ; Stack pointer, B datapath
|
|
.asg A16, A_SP ; Stack pointer, A datapath
|
|
.asg B0, B_csr ; CSR's value
|
|
.asg B1, B_no_gie ; CSR w/ GIE bit cleared
|
|
.asg B3, B_ret ; Return address
|
|
.asg A29, A_Mpeg2v
|
|
.asg B18, B_Mpeg2v
|
|
.asg B2, B_cnt
|
|
.asg A10, A_amr_arg ; AMR arg
|
|
.asg B9, B_amr_arg ; AMR arg
|
|
|
|
STW .D2T1 A10, *B_SP--[9] ; RWD, MERGE, 2 W-mat
|
|
|| MVC .S2 CSR, B_csr ; Get CSR's state
|
|
|| MV .L2 B4, B_outi
|
|
|| MV .L1 A6, A_Mpeg2v
|
|
|| MV .S1X B8, A_amr_arg ; AMR arg
|
|
|
|
STW .D2T2 B_csr, *+B_SP[1] ; Save CSR
|
|
|| AND .L2 B_csr, -2, B_no_gie ; Clear GIE
|
|
|| MV .S2X A4, B_Wptr
|
|
|| MV .D1X B_SP, A_SP ; 12Q4 MERGE
|
|
|| LMBD .L1 1, A_amr_arg, A_amr_arg; AMR arg
|
|
|| MVK .S1 32, A_const32 ; AMR arg
|
|
|
|
STW .D2T2 B_ret, *+B_SP[2] ; Save return addr.
|
|
|| STW .D1T1 A14, *+A_SP[6] ; MERGE
|
|
|| MV .L2X A6, B_Mpeg2v
|
|
|| MVC .S2 B_no_gie, CSR ; Disable ints.
|
|
|| SUB .L1 A_const32, A_amr_arg, A_amr_arg; AMR arg
|
|
; ===== Interrupts masked here =====
|
|
|
|
* ========================================================================= *
|
|
* Get bitstream info
|
|
* Setup circuar bitstream buffer
|
|
* Load table addresses and constants
|
|
* Block loop setup
|
|
* ========================================================================= *
|
|
.asg B31, B_bsbuf
|
|
.asg B29, B_next_wptr
|
|
.asg B27, B_cbp
|
|
.asg B0, B0_amr_config
|
|
.asg A21, A_const1
|
|
.asg A9, A_tbs1
|
|
.asg A4, A_tbs2
|
|
.asg B17, B_tbs3
|
|
.asg B3, B_const126
|
|
.asg B3, B_const128
|
|
.asg A14, A_constSHR ; 12Q4 MERGE
|
|
|
|
LDW .D2T2 *+B_Mpeg2v[BSBUF_M2OFF], B_bsbuf
|
|
|| LDW .D1T1 *+A_Mpeg2v[TOP0_M2OFF], A_top0_bk
|
|
|| MVK .S2 128, B_const128
|
|
|| MV .L2 B6, B_12Q4 ; 12Q4 MERGE
|
|
|
|
LDW .D2T2 *+B_Mpeg2v[NEXTWPTR_M2OFF], B_next_wptr
|
|
|| LDW .D1T1 *+A_Mpeg2v[TOP1_M2OFF], A_top1
|
|
|| ADD .L2 B_Wptr, B_const128, B_Wptr_end
|
|
||[!B_12Q4]MVK .S1 20, A_constSHR ; non-12Q4 MERGE
|
|
|| SHL .S2X A_amr_arg, 16, B_amr_arg; AMR arg
|
|
|
|
LDW .D1T1 *+A_Mpeg2v[BPTR_M2OFF], A_bptr
|
|
|| STW .D2T2 B_Wptr, *+B_SP[7] ; 2 W-mat
|
|
|| MV .L2X A8, B_cnt ; 2 W-mat
|
|
|| SET .S2 B_amr_arg, 14, 14, B_amr_arg ; AMR arg
|
|
|
|
LDW .D1T1 *+A_Mpeg2v[WORD1_M2OFF], A_word1
|
|
|| LDW .D2T2 *+B_Mpeg2v[WORD2_M2OFF], B_word2
|
|
|| MVC .S2 B_amr_arg, AMR ; AMR arg
|
|
|| MVK .S1 31, A_const31
|
|
|
|
LDW .D1T1 *+A_Mpeg2v[QSCL_M2OFF], A_qscl
|
|
|| LDW .D2T2 *+B_Mpeg2v[CBP_M2OFF], B_cbp
|
|
||[B_12Q4]MVK .S1 16, A_constSHR ; 12Q4 MERGE
|
|
; B_constFFF0 and B_Mpeg2v share the same register
|
|
|
|
* ========================================================================= *
|
|
* Setup bitstream pointers: top0h:top0l, top1 contain top bitstream
|
|
* ========================================================================= *
|
|
.asg B25, B_word2_bk
|
|
.asg A28, A_word1_bk
|
|
.asg A10, A_word1_rw ; RWD
|
|
.asg A21, A_word1_rw_bk ; RWD
|
|
.asg A31, A_top0h_bk
|
|
.asg A30, A_top0l_bk
|
|
.asg B28, B_bptr_bk
|
|
.asg B30, B_bsbuf_circ_bk
|
|
|
|
SHL .S1 A_top0_bk, 8, A_tbs1
|
|
|| STW .D2T1 A11, *+B_SP[3]
|
|
||[B_12Q4]MVKL .S2 0xFFF0, B_constFFF0 ; 12Q4 MERGE
|
|
|
|
SHRU .S1 A_top1, 24, A_tbs2
|
|
|| STW .D2T1 A12, *+B_SP[4]
|
|
||[!B_12Q4]MVKL .S2 0xFFFF, B_constFFF0 ; non-12Q4 MERGE
|
|
|
|
ADD .L1 A_tbs1, A_tbs2, A_top0l_bk
|
|
|| ADD .S1 A_bptr, 8, A_bptr1
|
|
|| STW .D2T1 A13, *+B_SP[5]
|
|
|
|
CMPGT .L1 A_bptr1, A_const31, A_test2
|
|
|| AND .S1 A_bptr1, A_const31, A_bptr
|
|
|| MVK .S2 32, B_const32
|
|
|| ADDAW .D2 B_bsbuf, B_next_wptr,B_bsbuf_circ
|
|
|
|
[A_test2]MV .S1 A_word1, A_word1_rw ; RWD
|
|
||[A_test2]MV .L1X B_word2, A_word1
|
|
||[A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2
|
|
|| SUB .S2 B_const32, A_bptr, B_bptr_cmpl
|
|
|
|
MVKL .S1 _IMG_len_tbl0, A_len_tbl_adr
|
|
|| MVKL .S2 _IMG_rld_table0, B_rld_table_adr
|
|
|| MV .L2X A_bptr, B_bptr_bk
|
|
|| STW .D2T2 B_cnt, *+B_SP[8] ; 2 W-mat
|
|
|
|
MVKH .S1 _IMG_len_tbl0, A_len_tbl_adr
|
|
|| MVKH .S2 _IMG_rld_table0, B_rld_table_adr
|
|
|
|
[B_12Q4]MVK .S1 16, A_const16 ; 12Q4
|
|
|
|
[!B_12Q4]MVK .S1 1, A_const16 ; non-12Q4
|
|
|
|
SHL .S1 A_word1, A_bptr, A_tbs1
|
|
|| SHRU .S2 B_word2, B_bptr_cmpl,B_tbs3
|
|
|
|
ADD .L1X A_tbs1, B_tbs3, A_top1
|
|
|| SHRU .S1 A_top0_bk, 24, A_top0h_bk
|
|
|| MV .D1 A_word1, A_word1_bk
|
|
|| MV .D2 B_word2, B_word2_bk
|
|
|| MV .L2 B_bsbuf_circ, B_bsbuf_circ_bk
|
|
|
|
|
|
block_loop:
|
|
* ------------------------------------------------------------------------- *
|
|
* check cbp, etc.
|
|
* ------------------------------------------------------------------------- *
|
|
.asg B17, B_cbp_mask
|
|
.asg B0, B_coded
|
|
.asg A5, A_last_coeff
|
|
.asg A2, A2_odd
|
|
.asg B31, B_run_bk
|
|
.asg B26, B_num_blocks ; 2 W-mat
|
|
|
|
SUB .S2 B_cnt, 1, B_cnt ; cbp, cnt--
|
|
|| ZERO .L2 B_sum
|
|
|| ZERO .D2 B_run_bk ; not coded
|
|
|| ZERO .L1 A2_odd ; not coded
|
|
|| MVK .S1 1, A_const1 ; cbp
|
|
|
|
SHL .S2X A_const1, B_cnt, B_cbp_mask ; cbp
|
|
|| MV .L2 B_Wptr_end, B_Wptr ; not coded
|
|
|| MVK .S1 0, A_last_coeff ; not coded
|
|
|
|
AND .D2 B_cbp_mask, B_cbp, B_coded ; cbp
|
|
|| MVK .S2 126, B_const126 ; const
|
|
|
|
[!B_coded]B .S1 mismatch ; not coded
|
|
||[!B_coded]ADD .L2 B_outi, B_const126, B_outi ; not coded
|
|
||[B_coded]LDW .D2T2 *+B_SP[8], B_num_blocks ; 2 W-mat
|
|
|
|
* =========================== PIPE LOOP PROLOG ============================ *
|
|
.asg A0, A_tm
|
|
.asg B0, B_tm_neg
|
|
|
|
; the added lines below calculate cc which is required for weighting
|
|
; matrix selection in 4:2:2 and 4:4:4 mode
|
|
; the following additional registers are required: B_block, B_flag, B_cc
|
|
.asg B31, B_block
|
|
.asg B1, B_cc
|
|
.asg B0, B_flag
|
|
|
|
NORM .L1 A_top0h_bk:A_top0l_bk, A_nrm ;[ 1,1]
|
|
|| SHRU .S1 A_top0h_bk, 7, A_tm ;table mod
|
|
|
|
MPY .M1 A_nrm, -16, A_t2 ;[ 2,1]
|
|
|| SHL .S1 A_top0h_bk:A_top0l_bk, A_nrm, A_t1h:A_t1l;[ 2,1]
|
|
|
|
MVK .S1 36, A_const36 ;const
|
|
|
|
SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l ;[ 4,1]
|
|
|| SUB .L1 A_len_tbl_adr, A_t2, A_t3 ;[ 4,1]
|
|
||[B_coded]LDW .D2T2 *+B_SP[7], B_Wptr ;get W-mat base adr
|
|
|
|
[!A_tm]LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,1]
|
|
||[B_coded] SUB .L2 B_num_blocks, 1, B_num_blocks;2 W-mat
|
|
; branch occurs if not coded MB
|
|
|
|
SUB .S2 B_num_blocks, B_cnt, B_block ;cc for 2 W-mat
|
|
|| CMPGT .L2 B_num_blocks, 6, B_flag
|
|
;prevent 2 W-mat if 4:2:0
|
|
|
|
SHRU .S1 A_top0h_bk:A_top0l_bk, 8, A_empty:A_top0_bk;[ 8,1]
|
|
||[B_flag]CMPGT .L2 B_block, 3, B_flag ;cc for 2 W-mat
|
|
|| ZERO .S2 B_cc ;cc for 2 W-mat
|
|
|
|
[A_tm]MVK .L1 2, A_len ;table mod
|
|
||[B_flag] AND .D2 B_block, 1, B_cc ;cc for 2 W-mat
|
|
|
|
MV .L1X B_bptr_bk, A_bptr ;restore
|
|
|| MVK .S2 128, B_const128 ;const
|
|
||[B_flag] ADD .D2 B_cc, 1, B_cc ;cc for 2 W-mat
|
|
|
|
SUB .S2X A_len, 5, B_rld_left ;[10,1]
|
|
|| CMPLT .L2X A_len, 5, B_test3 ;[10,1]
|
|
|| ADD .L1 A_bptr, A_len, A_bptr1 ;[10,1]
|
|
|| SHL .S1 A_top0h_bk:A_top0l_bk, A_len, A_ptop0h:A_ptop0l;[10,1]
|
|
||[!A_tm]SUB .D1 A_const32, A_len, A_len_c ;[10,1]
|
|
||[B_cc]ADD .D2 B_Wptr, B_const128, B_Wptr
|
|
;if cc!=0 select 2nd W-mat
|
|
|
|
[B_test3]MPY .M2 B_rld_left, 0, B_rld_left ;[11,1]
|
|
|| MV .L2X A_top0_bk, B_top0_bk ;[11,1]
|
|
|| AND .S1 A_const31, A_bptr1, A_bptr ;[11,1]
|
|
|| MV .D1 A_ptop0h, A_top0h ;[11,1]
|
|
|| NORM .L1 A_ptop0h:A_ptop0l, A_nrm ;[ 1,2]
|
|
|
|
CMPGT .L1 A_bptr1, A_const31, A_test2 ;[12,1]
|
|
|| MPY .M1 A_nrm, -16, A_t2 ;[ 2,2]
|
|
|| SHL .S1 A_ptop0h:A_ptop0l, A_nrm, A_t1h:A_t1l ;[ 2,2]
|
|
|| ADD .L2 B_Wptr, B_const128, B_Wptr_end ;reset
|
|
|
|
SHL .S2 B_top0_bk, B_rld_left, B_t13 ;[13,1]
|
|
|| MPY .M2X B_const32, A_len, B_t12 ;[13,1]
|
|
||[A_tm]MVK .S1 30, A_len_c ;table mod
|
|
|| MV .L1 A_word1_bk, A_word1 ;restore
|
|
|| MV .L2 B_word2_bk, B_word2 ;restore
|
|
|| MV .D2 B_bsbuf_circ_bk, B_bsbuf_circ ;restore
|
|
|
|
SHRU .S2 B_t13, 27, B_t14 ;[14,1]
|
|
||[ A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 ;[14,1]
|
|
|| SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l ;[ 4,2]
|
|
|| SUB .L1 A_len_tbl_adr, A_t2, A_t3;[ 4,2]
|
|
|
|
ADD .L2 B_t14, B_t12, B_t15 ;[15,1]
|
|
|| SUB .S2X B_const32, A_bptr, B_bptr_cmpl ;[15,1]
|
|
||[ A_test2]MV .L1X B_word2, A_word1 ;[15,1]
|
|
|| [A_test2]MV .S1 A_word1, A_word1_rw ; RWD
|
|
|| LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,2]
|
|
|| ZERO .D2 B_tm_neg ;table mod
|
|
|
|
ADD .L2 B_t15, B_t15, B_t16 ;[16,1]
|
|
||[!A_tm]SUB .D1 A_len, 24, A_test1 ;[16,1]
|
|
||[A_tm]ZERO .L1 A_test1 ;table mod
|
|
|| SHRU .S1 A_top1, A_len_c, A_t7 ;[16,1]
|
|
||[A_tm]EXTU .S2 B_top0_bk, 1, 31, B_tm_neg ;table mod
|
|
|
|
[ A_test1]LDB .D2T2 *B_rld_table_adr[B_t16],B_level ;[17,1]
|
|
|| ADD .D1 A_ptop0l, A_t7, A_top0l ;[17,1]
|
|
|| ADD .L2 B_rld_table_adr, 1, B_rld_table_adr_1;const
|
|
||[A_tm]MVK .S2 1, B_level ;table mod
|
|
|
|
[ A_test1]LDB .D2T2 *B_rld_table_adr_1[B_t16], B_run ;[18,1]
|
|
||[!A_tm]EXT .S2 B_top0_bk, 12, 20, B_level ;[18,1]
|
|
|| SHRU .S1 A_top0h:A_top0l, 8, A_empty:A_top0_bk;[ 8,2]
|
|
||[B_tm_neg]NEG .L2 B_level, B_level ;table mod
|
|
|
|
SHRU .S2 B_word2, B_bptr_cmpl,B_t9 ;[19,1]
|
|
|| SHL .S1 A_word1, A_bptr, A_t8 ;[19,1]
|
|
|
|
SUB .S2X A_len, 5, B_rld_left ;[10,2]
|
|
|| CMPLT .L2X A_len, 5, B_test3 ;[10,2]
|
|
|| ADD .L1 A_bptr, A_len, A_bptr1 ;[10,2]
|
|
|| SHL .S1 A_top0h:A_top0l, A_len, A_ptop0h:A_ptop0l;[10,2]
|
|
|| SUB .D1 A_const32, A_len, A_len_c ;[10,2]
|
|
|
|
[!A_tm]EXTU .S2 B_top0_bk, 6, 26, B_run ;[21,1]
|
|
||[A_tm] ZERO .D2 B_run ;table mod
|
|
||[B_test3]MPY .M2 B_rld_left, 0, B_rld_left ;[11,2]
|
|
|| MV .L2X A_top0_bk, B_top0_bk ;[11,2]
|
|
|| AND .S1 A_const31, A_bptr1, A_bptr ;[11,2]
|
|
|| MV .D1 A_ptop0h, A_top0h ;[11,2]
|
|
|| NORM .L1 A_ptop0h:A_ptop0l, A_nrm ;[ 1,3]
|
|
|
|
MPY .M2 B_level, 2, B_level2 ;[22,1]
|
|
|| CMPGT .L1 A_bptr1, A_const31, A_test2 ;[12,2]
|
|
|| MPY .M1 A_nrm, -16, A_t2 ;[ 2,3]
|
|
|| SHL .S1 A_ptop0h:A_ptop0l, A_nrm, A_t1h:A_t1l ;[ 2,3]
|
|
|| LDW .D1T2 *+A_Mpeg2v[ZPTR_M2OFF], B_Zptr ;reset
|
|
|| MVK .S2 63, B_const63 ;const
|
|
|
|
LDH .D2T1 *++B_Wptr[B_run], A_W ;[23,1]
|
|
|| CMPLT .L1X B_level, 0, A_neg ;[23,1]
|
|
|| SHL .S2 B_top0_bk, B_rld_left, B_t13 ;[13,2]
|
|
|| MPY .M2X B_const32, A_len, B_t12 ;[13,2]
|
|
|
|
ADD .L2 B_Wptr, 2, B_Wptr ;[24,1]
|
|
|| ADD .D1X A_t8, B_t9, A_top1 ;[24,1]
|
|
|| SHRU .S2 B_t13, 27, B_t14 ;[14,2]
|
|
||[ A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 ;[14,2]
|
|
|| SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l;[ 4,3]
|
|
|| SUB .L1 A_len_tbl_adr, A_t2, A_t3;[ 4,3]
|
|
|
|
[ A_neg]SUB .D2 B_level2, 1, B_level3 ;[25,1]
|
|
|| ADD .L2 B_t14, B_t12, B_t15 ;[15,2]
|
|
|| SUB .S2X B_const32, A_bptr, B_bptr_cmpl ;[15,2]
|
|
||[ A_test2]MV .L1X B_word2, A_word1 ;[15,2]
|
|
|| [A_test2]MV .S1 A_word1, A_word1_rw ; RWD
|
|
|| LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,3]
|
|
|
|
[!A_neg]ADD .L2 B_level2, 1, B_level3 ;[26,1]
|
|
|| ADD .S2 B_t15, B_t15, B_t16 ;[16,2]
|
|
|| SUB .D1 A_len, 24, A_test1 ;[16,2]
|
|
|| SHRU .S1 A_top1, A_len_c, A_t7 ;[16,2]
|
|
|
|
CMPGT .L2 B_run, B_const63, B_eob ;[27,1]
|
|
||[ A_test1]LDB .D2T2 *B_rld_table_adr[B_t16],B_level ;[17,2]
|
|
|| ADD .D1 A_ptop0l, A_t7, A_top0l ;[17,2]
|
|
|| MV .L1X B_outi, A_outi ; 12Q4
|
|
|
|
* =========================== PIPE LOOP KERNEL ============================ *
|
|
.asg A2, A2_top0l
|
|
|
|
loop:
|
|
MPY .M1 A_qscl, A_W, A_qw ;[28,1]
|
|
||[!B_eob]CMPGT .L2 B_Wptr, B_Wptr_end, B_eob ;[28,1]
|
|
||[A_test1]LDB .D2T2 *B_rld_table_adr_1[B_t16], B_run ;[18,2]
|
|
|| EXT .S2 B_top0_bk, 12, 20, B_level ;[18,2]
|
|
|| SHRU .S1 A_top0h:A_top0l, 8, A_empty:A_top0_bk;[ 8,3]
|
|
||[B_eob]MPY .M2 0, B_Wptr, B_Wptr ;err det
|
|
|
|
LDB .D2T1 *++B_Zptr[B_run], A_cnum ;[29,1]
|
|
|| SHRU .S2 B_word2, B_bptr_cmpl,B_t9 ;[19,2]
|
|
|| SHL .S1 A_word1, A_bptr, A_t8 ;[19,2]
|
|
||[!B_eob]MV .L1 A_top0h:A_top0l, A_top0h_bk:A_top0l_bk ;preserve
|
|
||[B_eob]ADD .L2 B_outi, B_const63, B_outi ;mismatch
|
|
||[!B_eob]MPY .M2X 1, A_bptr, B_bptr_bk ;preserve
|
|
|
|
ADD .D2 B_Zptr, 1, B_Zptr ;[30,1]
|
|
|| MPY .M1X A_qw, B_level3, A_level4 ;[30,1]
|
|
|| SUB .S2X A_len, 5, B_rld_left ;[10,3]
|
|
|| CMPLT .L2X A_len, 5, B_test3 ;[10,3]
|
|
|| ADD .L1 A_bptr, A_len, A_bptr1 ;[10,3]
|
|
|| SHL .S1 A_top0h:A_top0l, A_len, A_ptop0h:A_ptop0l;[10,3]
|
|
|| SUB .D1 A_const32, A_len, A_len_c ;[10,3]
|
|
||[B_eob]MPY .M2 1, B_run, B_run_bk ;preserve
|
|
|
|
EXTU .S2 B_top0_bk, 6, 26, B_run ;[21,2]
|
|
||[B_test3]MPY .M2 B_rld_left, 0, B_rld_left ;[11,3]
|
|
|| MV .L2X A_top0_bk, B_top0_bk ;[11,3]
|
|
|| AND .S1 A_const31, A_bptr1, A_bptr ;[11,3]
|
|
|| MV .D1 A_ptop0h, A_top0h ;[11,3]
|
|
|| NORM .L1 A_ptop0h:A_ptop0l, A_nrm ;[ 1,4]
|
|
||[B_eob]ADD .D2 B_outi, B_const63, B_outi ;mismatch
|
|
|
|
[!B_eob]B .S2 loop ;[32,1]
|
|
||[ A_neg]ADD .D1 A_level4, A_const31, A_level4 ;[32,1]
|
|
|| MPY .M2 B_level, 2, B_level2 ;[22,2]
|
|
|| CMPGT .L1 A_bptr1, A_const31, A_test2 ;[12,3]
|
|
|| MPY .M1 A_nrm, -16, A_t2 ;[ 2,4]
|
|
|| SHL .S1 A_ptop0h:A_ptop0l, A_nrm, A_t1h:A_t1l ;[ 2,4]
|
|
||[!B_eob]MV .L2 B_bsbuf_circ, B_bsbuf_circ_bk ;preserve
|
|
||[B_eob]LDH .D2T1 *B_outi, A_last_coeff ;mismatch
|
|
|
|
SSHL .S1 A_level4, 15, A_level5 ;[33,1]
|
|
||[!B_eob]LDH .D2T1 *++B_Wptr[B_run], A_W ;[23,2]
|
|
|| CMPLT .L1X B_level, 0, A_neg ;[23,2]
|
|
|| SHL .S2 B_top0_bk, B_rld_left, B_t13 ;[13,3]
|
|
|| MPY .M2X B_const32, A_len, B_t12 ;[13,3]
|
|
||[!B_eob]MV .L2 B_word2, B_word2_bk ;preserve
|
|
||[!B_eob]MV .D1 A_word1, A_word1_bk ;preserve
|
|
||[!B_eob]MVD .M1 A_word1_rw, A_word1_rw_bk ;preserve
|
|
|
|
[!B_eob]ADD .L2 B_Wptr, 2, B_Wptr ;[24,2]
|
|
||[!B_eob]ADD .D1X A_t8, B_t9, A_top1 ;[24,2]
|
|
|| SHRU .S2 B_t13, 27, B_t14 ;[14,3]
|
|
||[ A_test2]LDW .D2T2 *B_bsbuf_circ++, B_word2 ;[14,3]
|
|
|| SHRU .S1 A_t1h:A_t1l, A_const36, A_t4h:A_t4l ;[ 4,4]
|
|
|| SUB .L1 A_len_tbl_adr, A_t2, A_t3;[ 4,4]
|
|
|
|
SHR .S1 A_level5, A_constSHR, A_level_f ;[35,1] 12Q4
|
|
||[ A_neg]SUB .D2 B_level2, 1, B_level3 ;[25,2]
|
|
|| ADD .L2 B_t14, B_t12, B_t15 ;[15,3]
|
|
|| SUB .S2X B_const32, A_bptr, B_bptr_cmpl ;[15,3]
|
|
||[ A_test2]MV .L1X B_word2, A_word1 ;[15,3]
|
|
|| [A_test2]MVD .M1 A_word1, A_word1_rw ; RWD
|
|
|| LDBU .D1T1 *A_t3[A_t4l], A_len ;[ 5,4]
|
|
|
|
[!A_neg]ADD .L2 B_level2, 1, B_level3 ;[26,2]
|
|
|| ADD .S2 B_t15, B_t15, B_t16 ;[16,3]
|
|
|| SUB .D1 A_len, 24, A_test1 ;[16,3]
|
|
|| SHRU .S1 A_top1, A_len_c, A_t7 ;[16,3]
|
|
||[!B_eob]AND .L1X B_constFFF0,A_level_f, A_level_f ; 12Q4
|
|
|
|
[!B_eob]STH .D1T1 A_level_f, *+A_outi[A_cnum] ;[36,1] BC
|
|
||[!B_eob]ADD .S2X B_sum, A_level_f, B_sum ;[37,1]
|
|
|| CMPGT .L2 B_run, B_const63, B_eob ;[27,2]
|
|
||[ A_test1]LDB .D2T2 *B_rld_table_adr[B_t16],B_level ;[17,3]
|
|
||[!B_eob]ADD .L1 A_ptop0l, A_t7, A2_top0l ;[17,3]
|
|
||[B_eob] XOR .S1 A_const16, A_last_coeff, A_last_coeff ;mismatch 12Q4
|
|
||[B_eob] MVD .M1 A_word1_rw_bk, A_word1_rw ; RWD
|
|
|
|
* =========================== PIPE LOOP EPILOG ============================ *
|
|
* ========================================================================= *
|
|
; live-out: top0h:top0k, top1, word1, word2, bsbuf_circ, run, Wptr, Wptr_end,
|
|
; sum, bptr
|
|
|
|
.asg B0, B_err ; same reg as B_eob
|
|
.asg A29, A_Mpeg2v
|
|
.asg B31, B_bsbuf
|
|
.asg B3, B_ret ; Return address
|
|
.asg B15, B_SP ; Stack pointer, B datapath
|
|
.asg B1, B_const65
|
|
|
|
mismatch:
|
|
[B_cnt] B .S1 block_loop ; -- BRANCH --
|
|
|| MVK .S2 65, B_const65 ; invalid VLC
|
|
|| CMPGTU .L2 B_Wptr, B_Wptr_end, B_err ; overrun
|
|
|| AND .L1X A_const16, B_sum, A2_odd ; mismatch 12Q4
|
|
|
|
[!B_err]CMPGT .L2 B_run_bk, B_const65, B_err ; invalid VLC
|
|
|| ADD .S2 B_outi, 2, B_outi
|
|
|| LDW .D1T2 *+A_Mpeg2v[BSBUF_M2OFF], B_bsbuf ; exit
|
|
||[!A2_odd]STH .D2T1 A_last_coeff, *B_outi ; mismatch
|
|
|
|
[B_err]B .S2 exit ; -- BRANCH --
|
|
|| LDW .D2T2 *+B_SP[2], B_ret ; exit
|
|
|| MV .L2 B_bsbuf_circ_bk, B_bsbuf_circ ; AMR arg
|
|
|
|
NOP 3 ; MERGE
|
|
; branch occurs to block_loop
|
|
; branch occurs to exit occurs after 2 cycles in block_loop
|
|
; (preserve B0_err for exit)
|
|
|
|
; this will execute only if B_cnt was 0
|
|
NOP 2 ; MERGE
|
|
|
|
* =================================== EXIT =============================== *
|
|
.asg B26, B_csr ; CSR value to restore
|
|
.asg B22, B_byte_diff
|
|
.asg B29, B_next_wptr
|
|
.asg B1, B_lz
|
|
.asg B27, B_amr_config
|
|
.asg B4, B_constBUFMASK
|
|
|
|
exit:
|
|
SUB .L2 B_bsbuf_circ, B_bsbuf, B_byte_diff
|
|
|| SHRU .S1 A_top1, 8, A_t2
|
|
|| SUB .S2 B_bptr_bk, 8, B_bptr_bk
|
|
|| LDW .D2T1 *+B_SP[6], A14 ; MERGE
|
|
|
|
SHR .S2 B_byte_diff, 2, B_next_wptr
|
|
|| SHL .S1 A_top0l_bk, 24, A_t3
|
|
|| CMPLT .L2 B_bptr_bk, 0, B_lz
|
|
|| LDW .D2T1 *+B_SP[3], A11
|
|
|
|
ADD .L1 A_t2, A_t3, A_top1
|
|
|| SHRU .S1 A_top0h_bk:A_top0l_bk, 8, A_empty:A_top0_bk
|
|
|| STW .D1T2 B_err, *+A_Mpeg2v[FAULT_M2OFF]
|
|
||[B_lz]MVD .M1 A_word1_rw, A_word1_bk ; RWD
|
|
||[B_lz]MV .L2X A_word1_bk, B_word2_bk
|
|
|| LDW .D2T1 *+B_SP[4], A12
|
|
|
|
LDW .D2T2 *+B_SP[1], B_csr ; Get CSR's value
|
|
|| STW .D1T1 A_top1, *+A_Mpeg2v[TOP1_M2OFF]
|
|
||[B_lz]ADD .L2 B_bptr_bk, A_const32, B_bptr_bk
|
|
|
|
STW .D1T2 B_bptr_bk, *+A_Mpeg2v[BPTR_M2OFF]
|
|
|| LDW .D2T1 *+B_SP[5], A13
|
|
|
|
RET .S2 B_ret ; Return to caller
|
|
|| STW .D1T1 A_top0_bk, *+A_Mpeg2v[TOP0_M2OFF]
|
|
||[B_lz]SUBAW .D2 B_bsbuf_circ, 1, B_bsbuf_circ ; AMR arg
|
|
|
|
STW .D1T2 B_next_wptr, *+A_Mpeg2v[NEXTWPTR_M2OFF]
|
|
|| ZERO .L2 B_amr_config
|
|
|| LDW .D2T1 *++B_SP[9], A10 ; MERGE, 2 W-mat
|
|
||[B_lz]SUB .S2 B_bsbuf_circ, B_bsbuf, B_byte_diff ; AMR arg
|
|
|
|
STW .D1T1 A_word1_bk, *+A_Mpeg2v[WORD1_M2OFF]
|
|
||[B_lz]SHR .S2 B_byte_diff, 2, B_next_wptr ; AMR arg
|
|
|
|
STW .D1T2 B_word2_bk, *+A_Mpeg2v[WORD2_M2OFF]
|
|
; ===== Interruptibility state restored here =====
|
|
|
|
STW .D1T2 B_next_wptr, *+A_Mpeg2v[NEXTWPTR_M2OFF]
|
|
|| MVC .S2 B_amr_config, AMR
|
|
|
|
MVC .S2 B_csr, CSR ; Restore CSR
|
|
; Branch occurs
|
|
|
|
* ========================================================================= *
|
|
* End of file: img_mpeg2_vld_inter.asm *
|
|
* ------------------------------------------------------------------------- *
|
|
* Copyright (c) 2003 Texas Instruments, Incorporated. *
|
|
* All Rights Reserved. *
|
|
* ========================================================================= *
|
|
|