/*
* Copyright (c) 2011 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*
* This file implements the following functions for the Swift micro-arch:
*
* void bzero(void * destination,
* size_t length) * void __bzero(void * destination,
* size_t length) * zeros out a buffer length bytes long, beginning at the address destination.
*
* void *memset(void * destination,
* int value,
* size_t n) * writes value converted to an unsigned char to n successive bytes, beginning
* at destination.
*/
#include <arm/arch.h>
#if defined _ARM_ARCH_7
.syntax unified
.code 32
.globl ___bzero$VARIANT$Swift
.globl _bzero$VARIANT$Swift
.globl _memset$VARIANT$Swift
.text
.align 4
___bzero$VARIANT$Swift:
_bzero$VARIANT$Swift:
mov r2, r1 // Set value to zero and move length to the
eor r1, r1 // correct register to match the memset API.
_memset$VARIANT$Swift:
push {r7,lr} // Establish a frame, and make a copy of the
mov r7, sp // pointer to increment so that we can
mov ip, r0 // return the original pointer unmodified.
vdup.8 q0, r1 // Splat the low byte of value across q0.
subs r3, r2, #64 // If length < 64, jump to a dedicated
blo L_lengthLessThan64 // code path to handle small buffers.
vmov q1, q0 // Copy the splatted value to q1.
orr lr, r2, r0 // If the length is not a multiple of 16 or
ands lr, #0xf // the buffer is not 16-byte aligned, then
bne L_edgingNeeded // some edging is needed0: subs r3, #64 // Write 64 bytes at a time to the 16-byte
vst1.8 {q0,q1}, [ip,:128]! // aligned buffer. Terminate this loop when
vst1.8 {q0,q1}, [ip,:128]! // 64 or fewer bytes remain to be written.
bhi 0b
add ip, r3 // Backtrack the destination pointer by
vst1.8 {q0,q1}, [ip,:128]! // 64 - remaining bytes, and write 64 bytes
vst1.8 {q0,q1}, [ip,:128] // to that address. This takes us precisely
pop {r7,pc} // to the end of the buffer.
L_edgingNeeded:
vst1.8 {q0}, [ip] // Write 16 bytes to the [possibly unaligned]
and lr, ip, #0xf // buffer, then advance the pointer to the
bic ip, #0xf // next aligned location, and adjust the
add r3, lr // length accordingly. Note that this means
add ip, #16 // that the first write in the loop may
subs r3, #16 // overlap with the write we just performed nop // on Swift.
0: subs r3, #64 // Write 64 bytes at a time to the 16-byte
vst1.8 {q0,q1}, [ip,:128]! // aligned buffer. Terminate this loop when
vst1.8 {q0,q1}, [ip,:128]! // 64 or fewer bytes remain to be written.
bhi 0b
1: add ip, r3 // Backtrack the destination pointer by
vst1.8 {q0,q1}, [ip]! // 64 - remaining bytes, and write 64 bytes
vst1.8 {q0,q1}, [ip] // to that address. This takes us precisely
pop {r7,pc} // to the end of the buffer.
L_lengthLessThan64:
subs r3, r2, #8 // If the length is smaller than eight, jump
blo 1f // into a dedicated byte store loop.
0: subs r3, #8 // Write 8 bytes at a time to the destination
vst1.8 {d0}, [ip]! // buffer, terminating when eight or fewer
bhi 0b // bytes remain to be written.
add ip, r3 // Backtrack the destination pointer by
vst1.8 {d0}, [ip] // 8 - remaining bytes, and write 8 bytes
pop {r7,pc} // to that address, then return.
1: subs r2, #1 // Store one byte at a time to the destination
strbhs r1, [ip], #1 // buffer, until we exhaust the length.
bhi 1b
pop {r7,pc}
#endif // defined _ARM_ARCH_7