@ -1,5 +1,5 @@
/ *
* Copyright ( c ) 2013 - 2014 , ARM Limited and Contributors. All rights reserved.
* Copyright ( c ) 2013 - 2017 , ARM Limited and Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
@ -37,6 +37,8 @@
.globl eret
.globl smc
.globl zero_normalmem
.globl zeromem
.globl zeromem16
.globl memcpy16
@ -80,31 +82,358 @@ endfunc smc
*
* Initialise a memory region to 0 .
* The memory address must be 16 - byte aligned.
* NOTE: This function is deprecated and zeromem should be used instead.
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* /
func zeromem16
.equ zeromem16 , zeromem
/ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* void zero_normalmem ( void * mem , unsigned int length ) ;
*
* Initialise a region in normal memory to 0 . This functions complies with the
* AAPCS and can be called from C code.
*
* NOTE: MMU must be enabled when using this function as it can only operate on
* normal memory. It is intended to be mainly used from C code when MMU
* is usually enabled.
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* /
.equ zero_normalmem , zeromem_dczva
/ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* void zeromem ( void * mem , unsigned int length ) ;
*
* Initialise a region of device memory to 0 . This functions complies with the
* AAPCS and can be called from C code.
*
* NOTE: When data caches and MMU are enabled , zero_normalmem can usually be
* used instead for faster zeroing.
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* /
func zeromem
/ * x2 is the address past the last zeroed address * /
add x2 , x0 , x1
/ *
* Uses the fallback path that does not use DC ZVA instruction and
* therefore does not need enabled MMU
* /
b .Lzeromem_dczva_fallback_entry
endfunc zeromem
/ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* void zeromem_dczva ( void * mem , unsigned int length ) ;
*
* Fill a region of normal memory of size " length " in bytes with null bytes.
* MMU must be enabled and the memory be of
* normal type. This is because this function internally uses the DC ZVA
* instruction , which generates an Alignment fault if used on any type of
* Device memory ( see section D3.4.9 of the ARMv8 ARM , issue k ). When the MMU
* is disabled , all memory behaves like Device-nGnRnE memory ( see section
* D4.2.8 ), hence the requirement on the MMU being enabled.
* NOTE: The code assumes that the block size as defined in DCZID_EL0
* register is at least 16 bytes.
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* /
func zeromem_dczva
/ *
* The function consists of a series of loops that zero memory one byte
* at a time , 16 bytes at a time or using the DC ZVA instruction to
* zero aligned block of bytes , which is assumed to be more than 16 .
* In the case where the DC ZVA instruction cannot be used or if the
* first 16 bytes loop would overflow , there is fallback path that does
* not use DC ZVA.
* Note: The fallback path is also used by the zeromem function that
* branches to it directly.
*
* + - - - - - - - - - + zeromem_dczva
* | entry |
* + - - - - + - - - - +
* |
* v
* + - - - - - - - - - +
* | checks | > o------- + ( If any check fails , fallback )
* + - - - - + - - - - + |
* | | - - - - - - - - - - - - - - - +
* v | Fallback path |
* + - - - - - - + - - - - - - + | - - - - - - - - - - - - - - - +
* | 1 byte loop | |
* + - - - - - - + - - - - - - + .Lzeromem_dczva_initial_1byte_aligned_end
* | |
* v |
* + - - - - - - - + - - - - - - - + |
* | 1 6 bytes loop | |
* + - - - - - - - + - - - - - - - + |
* | |
* v |
* + - - - - - - + - - - - - - + .Lzeromem_dczva_blocksize_aligned
* | DC ZVA loop | |
* + - - - - - - + - - - - - - + |
* + - - - - - - - - + | |
* | | | |
* | v v |
* | + - - - - - - - + - - - - - - - + .Lzeromem_dczva_final_16bytes_aligned
* | | 1 6 bytes loop | |
* | + - - - - - - - + - - - - - - - + |
* | | |
* | v |
* | + - - - - - - + - - - - - - + .Lzeromem_dczva_final_1byte_aligned
* | | 1 byte loop | |
* | + - - - - - - - - - - - - - + |
* | | |
* | v |
* | + - - - + - - + |
* | | exit | |
* | + - - - - - - + |
* | |
* | + - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - + zeromem
* | | + - - - - - - - - - - - - - - - - | zeromem function |
* | | | + - - - - - - - - - - - - - - - - - - +
* | v v
* | + - - - - - - - - - - - - - + .Lzeromem_dczva_fallback_entry
* | | 1 byte loop |
* | + - - - - - - + - - - - - - +
* | |
* + - - - - - - - - - - - +
* /
/ *
* Readable names for registers
*
* Registers x0 , x1 and x2 are also set by zeromem which
* branches into the fallback path directly , so cursor , length and
* stop_address should not be retargeted to other registers.
* /
cursor .req x0 / * Start address and then current address * /
length .req x1 / * Length in bytes of the region to zero out * /
/ * Reusing x1 as length is never used after block_mask is set * /
block_mask .req x1 / * Bitmask of the block size read in DCZID_EL0 * /
stop_address .req x2 / * Address past the last zeroed byte * /
block_size .req x3 / * Size of a block in bytes as read in DCZID_EL0 * /
tmp1 .req x4
tmp2 .req x5
# if ASM_ASSERTION
tst x0 , # 0xf
ASM_ASSERT ( eq )
/ *
* Check for M bit ( MMU enabled ) of the current SCTLR_EL ( 1 | 3 )
* register value and panic if the MMU is disabled.
* /
# if defined ( IMAGE_BL1 ) | | defined ( IMAGE_BL31 )
mrs tmp1 , sctlr_el3
# else
mrs tmp1 , sctlr_el1
# endif
add x2 , x0 , x1
/ * zero 16 bytes at a time * /
z_loop16:
sub x3 , x2 , x0
cmp x3 , # 16
b.lt z_loop1
stp xzr , xzr , [ x0 ], # 16
b z_loop16
/ * zero byte per byte * /
z_loop1:
cmp x0 , x2
b.eq z_end
strb wzr , [ x0 ], # 1
b z_loop1
z_end:
tst tmp1 , # SCTLR_M_BIT
ASM_ASSERT ( ne )
# endif / * ASM_ASSERTION * /
/ * stop_address is the address past the last to zero * /
add stop_address , cursor , length
/ *
* Get block_size = ( log2 ( < block size > ) > > 2 ) ( see encoding of
* dczid_el0 reg )
* /
mrs block_size , dczid_el0
/ *
* Select the 4 lowest bits and convert the extracted log2 ( < block size
* in words > ) to < block size in bytes >
* /
ubfx block_size , block_size , # 0 , # 4
mov tmp2 , # ( 1 < < 2 )
lsl block_size , tmp2 , block_size
# if ASM_ASSERTION
/ *
* Assumes block size is at least 16 bytes to avoid manual realignment
* of the cursor at the end of the DCZVA loop.
* /
cmp block_size , # 16
ASM_ASSERT ( hs )
# endif
/ *
* Not worth doing all the setup for a region less than a block and
* protects against zeroing a whole block when the area to zero is
* smaller than that. Also , as it is assumed that the block size is at
* least 16 bytes , this also protects the initial aligning loops from
* trying to zero 16 bytes when length is less than 16 .
* /
cmp length , block_size
b.lo .Lzeromem_dczva_fallback_entry
/ *
* Calculate the bitmask of the block alignment. It will never
* underflow as the block size is between 4 bytes and 2 kB.
* block_mask = block_size - 1
* /
sub block_mask , block_size , # 1
/ *
* length alias should not be used after this point unless it is
* defined as a register other than block_mask ' s.
* /
.unreq length
/ *
* If the start address is already aligned to zero block size , go
* straight to the cache zeroing loop. This is safe because at this
* point , the length cannot be smaller than a block size.
* /
tst cursor , block_mask
b.eq .Lzeromem_dczva_blocksize_aligned
/ *
* Calculate the first block-size-aligned address. It is assumed that
* the zero block size is at least 16 bytes. This address is the last
* address of this initial loop.
* /
orr tmp1 , cursor , block_mask
add tmp1 , tmp1 , # 1
/ *
* If the addition overflows , skip the cache zeroing loops. This is
* quite unlikely however.
* /
cbz tmp1 , .Lzeromem_dczva_fallback_entry
/ *
* If the first block-size-aligned address is past the last address ,
* fallback to the simpler code.
* /
cmp tmp1 , stop_address
b.hi .Lzeromem_dczva_fallback_entry
/ *
* If the start address is already aligned to 16 bytes , skip this loop.
* It is safe to do this because tmp1 ( the stop address of the initial
* 1 6 bytes loop ) will never be greater than the final stop address.
* /
tst cursor , # 0xf
b.eq .Lzeromem_dczva_initial_1byte_aligned_end
/ * Calculate the next address aligned to 16 bytes * /
orr tmp2 , cursor , # 0xf
add tmp2 , tmp2 , # 1
/ * If it overflows , fallback to the simple path ( unlikely ) * /
cbz tmp2 , .Lzeromem_dczva_fallback_entry
/ *
* Next aligned address cannot be after the stop address because the
* length cannot be smaller than 16 at this point.
* /
/ * First loop : zero byte per byte * /
1 :
strb wzr , [ cursor ], # 1
cmp cursor , tmp2
b.ne 1 b
.Lzeromem_dczva_initial_1byte_aligned_end:
/ *
* Second loop : we need to zero 16 bytes at a time from cursor to tmp1
* before being able to use the code that deals with block-size-aligned
* addresses.
* /
cmp cursor , tmp1
b.hs 2 f
1 :
stp xzr , xzr , [ cursor ], # 16
cmp cursor , tmp1
b.lo 1 b
2 :
/ *
* Third loop : zero a block at a time using DC ZVA cache block zeroing
* instruction.
* /
.Lzeromem_dczva_blocksize_aligned:
/ *
* Calculate the last block-size-aligned address. If the result equals
* to the start address , the loop will exit immediately.
* /
bic tmp1 , stop_address , block_mask
cmp cursor , tmp1
b.hs 2 f
1 :
/ * Zero the block containing the cursor * /
dc zva , cursor
/ * Increment the cursor by the size of a block * /
add cursor , cursor , block_size
cmp cursor , tmp1
b.lo 1 b
2 :
/ *
* Fourth loop : zero 16 bytes at a time and then byte per byte the
* remaining area
* /
.Lzeromem_dczva_final_16bytes_aligned:
/ *
* Calculate the last 16 bytes aligned address. It is assumed that the
* block size will never be smaller than 16 bytes so that the current
* cursor is aligned to at least 16 bytes boundary.
* /
bic tmp1 , stop_address , # 15
cmp cursor , tmp1
b.hs 2 f
1 :
stp xzr , xzr , [ cursor ], # 16
cmp cursor , tmp1
b.lo 1 b
2 :
/ * Fifth and final loop : zero byte per byte * /
.Lzeromem_dczva_final_1byte_aligned:
cmp cursor , stop_address
b.eq 2 f
1 :
strb wzr , [ cursor ], # 1
cmp cursor , stop_address
b.ne 1 b
2 :
ret
endfunc zeromem16
/ * Fallback for unaligned start addresses * /
.Lzeromem_dczva_fallback_entry:
/ *
* If the start address is already aligned to 16 bytes , skip this loop.
* /
tst cursor , # 0xf
b.eq .Lzeromem_dczva_final_16bytes_aligned
/ * Calculate the next address aligned to 16 bytes * /
orr tmp1 , cursor , # 15
add tmp1 , tmp1 , # 1
/ * If it overflows , fallback to byte per byte zeroing * /
cbz tmp1 , .Lzeromem_dczva_final_1byte_aligned
/ * If the next aligned address is after the stop address , fall back * /
cmp tmp1 , stop_address
b.hs .Lzeromem_dczva_final_1byte_aligned
/ * Fallback entry loop : zero byte per byte * /
1 :
strb wzr , [ cursor ], # 1
cmp cursor , tmp1
b.ne 1 b
b .Lzeromem_dczva_final_16bytes_aligned
.unreq cursor
/ *
* length is already unreq ' ed to reuse the register for another
* variable.
* /
.unreq stop_address
.unreq block_size
.unreq block_mask
.unreq tmp1
.unreq tmp2
endfunc zeromem_dczva
/ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* void memcpy16 ( void * dest , const void * src , unsigned int length )