diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc index 09bf7c8f251e..b1369841bc74 100644 --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -3,17 +3,28 @@ MDSRCS+= \ bcmp.S \ memchr.S \ memcmp.S \ + memccpy.S \ memcpy.S \ memmove.S \ + memrchr.S \ memset.S \ stpcpy.S \ + stpncpy.S \ strcat.S \ strchrnul.S \ strcmp.S \ strcpy.c \ strcspn.S \ + strlcat.c \ + strlcpy.S \ strlen.S \ + strncat.c \ + strncmp.S \ + strncpy.c \ strnlen.c \ + strpbrk.c \ + strrchr.S \ + strsep.c \ strspn.S \ timingsafe_bcmp.S \ timingsafe_memcmp.S diff --git a/lib/libc/amd64/string/memccpy.S b/lib/libc/amd64/string/memccpy.S new file mode 100644 index 000000000000..a2d9e33b3d36 --- /dev/null +++ b/lib/libc/amd64/string/memccpy.S @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + + .weak memccpy + .set memccpy, __memccpy +ARCHFUNCS(__memccpy) + ARCHFUNC(__memccpy, scalar) + ARCHFUNC(__memccpy, baseline) +ENDARCHFUNCS(__memccpy) + +ARCHENTRY(__memccpy, scalar) + push %rbp # establish stack frame + mov %rsp, %rbp + push %rax # dummy push for alignment + push %rbx + push %rdi + push %rsi + + mov %rsi, %rdi + mov %edx, %esi + mov %rcx, %rdx + mov %rcx, %rbx + call CNAME(__memchr) # ptr = memchr(src, c, len) + + pop %rsi + pop %rdi + lea 1(%rax), %rdx + sub %rsi, %rdx # size = ptr - src + 1 + mov %rbx, %rcx + lea (%rdi, %rdx, 1), %rbx # res = dest + size + test %rax, %rax # if (ptr == NULL) + cmovz %rcx, %rdx # size = len + cmovz %rax, %rbx # res = NULL + call CNAME(memcpy) + + mov %rbx, %rax # return (res) + pop %rbx + leave + ret +ARCHEND(__memccpy, scalar) + +ARCHENTRY(__memccpy, baseline) + sub $1, %rcx # RCX refers to last character in buffer + jb .L0 # go to special code path if len was 0 + + movd %edx, %xmm4 + mov %rcx, %rdx + punpcklbw %xmm4, %xmm4 # c -> cc + mov %esi, %ecx + punpcklwd %xmm4, %xmm4 # cc -> cccc + mov %rsi, %r9 # stash a copy of the source pointer for later + pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc + and $~0xf, %rsi + movdqa %xmm4, %xmm1 + pcmpeqb (%rsi), %xmm1 # NUL found in head? + mov $-1, %r8d + and $0xf, %ecx + shl %cl, %r8d # mask of bytes in the string + pmovmskb %xmm1, %eax + and %r8d, %eax + jnz .Lhead_nul + + movdqa 16(%rsi), %xmm3 # load second string chunk + movdqu (%r9), %xmm2 # load unaligned string head + mov $32, %r8d + sub %ecx, %r8d # head length + length of second chunk + movdqa %xmm4, %xmm1 + pcmpeqb %xmm3, %xmm1 # NUL found in second chunk? + + sub %r8, %rdx # enough space left for the second chunk? + jb .Lhead_buf_end + + /* process second chunk */ + pmovmskb %xmm1, %eax + test %eax, %eax + jnz .Lsecond_nul + + /* string didn't end in second chunk and neither did buffer -- not a runt! */ + movdqa 32(%rsi), %xmm0 # load next string chunk + movdqa %xmm4, %xmm1 + movdqu %xmm2, (%rdi) # deposit head into buffer + sub %rcx, %rdi # adjust RDI to correspond to RSI + movdqu %xmm3, 16(%rdi) # deposit second chunk + sub %rsi, %rdi # express RDI as distance from RSI + add $32, %rsi # advance RSI past first two chunks + sub $16, %rdx # enough left for another round? + jb 1f + + /* main loop unrolled twice */ + ALIGN_TEXT +0: pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 3f + + movdqu %xmm0, (%rsi, %rdi) + movdqa 16(%rsi), %xmm0 # load next string chunk + movdqa %xmm4, %xmm1 + cmp $16, %rdx # more than a full chunk left? + jb 2f + + add $32, %rsi # advance pointers to next chunk + pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 4f + + movdqu %xmm0, -16(%rsi, %rdi) + movdqa (%rsi), %xmm0 # load next string chunk + movdqa %xmm4, %xmm1 + sub $32, %rdx + jae 0b + +1: sub $16, %rsi # undo second advancement + add $16, %edx + + /* 1--16 bytes left in the buffer but string has not ended yet */ +2: pcmpeqb %xmm1, %xmm0 # NUL byte encountered? + pmovmskb %xmm0, %r8d + mov %r8d, %ecx + bts %edx, %r8d # treat end of buffer as end of string + or $0x10000, %eax # ensure TZCNT finds a set bit + tzcnt %r8d, %r8d # find tail length + add %rsi, %rdi # restore RDI + movdqu 1(%rsi, %r8, 1), %xmm0 # load string tail + movdqu %xmm0, 1(%rdi, %r8, 1) # store string tail + lea 17(%rdi, %r8, 1), %rsi # return value if terminator encountered + xor %eax, %eax # return value if no terminator encountered + bt %r8d, %ecx # terminator encountered inside buffer? + cmovc %rsi, %rax # if yes, return pointer, else NULL + ret + +4: sub $16, %rsi # undo second advancement + add $16, %rdx # restore number of remaining bytes + + /* string has ended but buffer has not */ +3: tzcnt %eax, %eax # find length of string tail + movdqu -15(%rsi, %rax, 1), %xmm0 # load string tail (incl. NUL) + add %rsi, %rdi # restore destination pointer + movdqu %xmm0, -15(%rdi, %rax, 1) # store string tail (incl. NUL) + lea 1(%rdi, %rax, 1), %rax # compute return value + ret + +.Lhead_buf_end: + pmovmskb %xmm1, %r8d + add $32, %edx # restore edx to (len-1) + ecx + shl $16, %r8d # place 2nd chunk NUL mask into bits 16--31 + mov %r8d, %r10d + bts %rdx, %r8 # treat end of buffer as if terminator present + xor %eax, %eax # return value if terminator not found + tzcnt %r8, %rdx # find string/buffer len from alignment boundary + lea 1(%rdi, %rdx, 1), %r8 # return value if terminator found + rcx + sub %rcx, %r8 # subtract rcx + bt %rdx, %r10 # was the terminator present? + cmovc %r8, %rax # if yes, return pointer, else NULL + sub %ecx, %edx # find actual string/buffer len + jmp .L0132 + +.Lsecond_nul: + add %r8, %rdx # restore buffer length + tzcnt %eax, %r8d # where is the NUL byte? + lea -16(%rcx), %eax + sub %eax, %r8d # string length + lea 1(%rdi, %r8, 1), %rax # return value if NUL before end of buffer + xor %ecx, %ecx # return value if not + cmp %r8, %rdx # is the string shorter than the buffer? + cmova %r8, %rdx # copy only min(buflen, srclen) bytes + cmovb %rcx, %rax # return NUL if buffer ended before string +.L0132: cmp $16, %rdx # at least 17 bytes to copy (not incl NUL)? + jb .L0116 + + /* copy 17--32 bytes */ + movdqu (%r9), %xmm0 # load first 16 bytes + movdqu -15(%r9, %rdx, 1), %xmm1 # load last 16 bytes + movdqu %xmm0, (%rdi) + movdqu %xmm1, -15(%rdi, %rdx, 1) + ret + +.Lhead_nul: + tzcnt %eax, %r8d # where is the NUL byte? + sub %ecx, %r8d # ... from the beginning of the string? + lea 1(%rdi, %r8, 1), %rax # return value if NUL before end of buffer + xor %ecx, %ecx # return value if not + cmp %r8, %rdx # is the string shorter than the buffer? + cmova %r8, %rdx # copy only min(buflen, srclen) bytes + cmovb %rcx, %rax # return NUL if buffer ended before string + + /* process strings of 1--16 bytes (rdx: min(buflen, srclen), rax: srclen) */ +.L0116: cmp $8, %rdx # at least 9 bytes to copy? + jae .L0916 + + cmp $4, %rdx # at least 5 bytes to copy? + jae .L0508 + + cmp $2, %rdx # at least 3 bytes to copy? + jae .L0304 + + /* copy one or two bytes */ + movzbl (%r9), %ecx # load first byte from src + movzbl (%r9, %rdx, 1), %esi # load last byte from src + mov %cl, (%rdi) # deposit into destination + mov %sil, (%rdi, %rdx, 1) + ret + +.L0304: movzwl (%r9), %ecx + movzwl -1(%r9, %rdx, 1), %esi + mov %cx, (%rdi) + mov %si, -1(%rdi, %rdx, 1) + ret + +.L0508: mov (%r9), %ecx + mov -3(%r9, %rdx, 1), %esi + mov %ecx, (%rdi) + mov %esi, -3(%rdi, %rdx, 1) + ret + +.L0916: mov (%r9), %rcx + mov -7(%r9, %rdx, 1), %rsi + mov %rcx, (%rdi) + mov %rsi, -7(%rdi, %rdx, 1) + ret + + /* length zero destination: return null pointer */ +.L0: xor %eax, %eax + ret +ARCHEND(__memccpy, baseline) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S new file mode 100644 index 000000000000..4f6c5a238daa --- /dev/null +++ b/lib/libc/amd64/string/memrchr.S @@ -0,0 +1,166 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Robert Clausecker + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + +ARCHFUNCS(memrchr) + ARCHFUNC(memrchr, scalar) + ARCHFUNC(memrchr, baseline) +ENDARCHFUNCS(memrchr) + +ARCHENTRY(memrchr, scalar) + xor %eax, %eax # prospective return value + sub $4, %rdx # 4 bytes left to process? + jb 1f + + ALIGN_TEXT +0: xor %r8, %r8 + lea 2(%rdi), %r10 + cmp %sil, 2(%rdi) + cmovne %r8, %r10 # point to null if no match + + cmp %sil, (%rdi) + cmove %rdi, %r8 # point to first char if match + + lea 1(%rdi), %r9 + cmp %sil, 1(%rdi) + cmovne %r8, %r9 # point to first result if no match in second + + lea 3(%rdi), %r11 + cmp %sil, 3(%rdi) + cmovne %r10, %r11 + + test %r11, %r11 + cmovz %r9, %r11 # take first pair match if none in second + + test %r11, %r11 + cmovnz %r11, %rax # take match in current set if any + + add $4, %rdi + sub $4, %rdx + jae 0b + +1: cmp $-3, %edx # a least one character left to process? + jb 2f + + cmp %sil, (%rdi) + cmove %rdi, %rax + + lea 1(%rdi), %rcx + cmp $-2, %edx # at least two characters left to process? + jb 2f + + cmp %sil, 1(%rdi) + cmove %rcx, %rax + + lea 2(%rdi), %rcx + cmp $-1, %edx # at least three character left to process? + jb 2f + + cmp %sil, 2(%rdi) + cmove %rcx, %rax + +2: ret +ARCHEND(memrchr, scalar) + +ARCHENTRY(memrchr, baseline) + movd %esi, %xmm4 + test %rdx, %rdx # empty buffer? + jz .L0 # if yes, return immediately + + punpcklbw %xmm4, %xmm4 # c -> cc + mov %edi, %ecx + punpcklwd %xmm4, %xmm4 # cc -> cccc + and $~0xf, %rdi # align source pointer + pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc + and $0xf, %ecx + movdqa %xmm4, %xmm0 + mov $-1, %r8d + pcmpeqb (%rdi), %xmm0 # compare aligned head + shl %cl, %r8d # mask of bytes in the head of the buffer + pmovmskb %xmm0, %eax + + sub $16, %rcx + and %r8d, %eax # match mask + add %rcx, %rdx # advance past head + cmc + jbe .Lrunt # did the string end in the buffer? + + mov %rdi, %rsi # pointer to matching chunk + add $16, %rdi + sub $16, %rdx # enough left for another round? + jbe 1f + + /* main loop unrolled twice */ + ALIGN_TEXT +0: movdqa %xmm4, %xmm0 + pcmpeqb (%rdi), %xmm0 + pmovmskb %xmm0, %r8d + + cmp $16, %rdx # enough left for second chunk? + jbe 2f + + movdqa %xmm4, %xmm0 + pcmpeqb 16(%rdi), %xmm0 + pmovmskb %xmm0, %ecx + + lea 16(%rdi), %r9 + test %ecx, %ecx # match found in second chunk? + cmovz %r8d, %ecx # if not, use match data from first chunk + cmovz %rdi, %r9 + + test %ecx, %ecx # any match found? + cmovnz %ecx, %eax # if yes, overwrite previously found match + cmovnz %r9, %rsi + + add $32, %rdi # advance to next iteration + sub $32, %rdx # advance to next chunks + ja 0b + + /* process remaining 1--16 bytes */ +1: pcmpeqb (%rdi), %xmm4 + mov $0xffff, %r8d + xor %ecx, %ecx + sub %edx, %ecx # number of bytes to be masked out + pmovmskb %xmm4, %r9d + shr %cl, %r8d # mask of bytes to be kept in the buffer + and %r9d, %r8d + cmovnz %r8d, %eax + cmovnz %rdi, %rsi + bsr %eax, %eax + lea (%rsi, %rax, 1), %rsi # pointer to match (or junk) + cmovnz %rsi, %rax # if any match was found, return it + ret + + /* end of chunk reached within first half iteration */ +2: test %r8d, %r8d # match in previous chunk? + cmovnz %r8d, %eax # if yes, overwrite previous chunks + cmovnz %rdi, %rsi + add $16, %rdi # point to tail + sub $16, %edx + jmp 1b # handle tail the same otherwise + + /* runt: string ends within head, edx has negated amount of invalid head bytes */ +.Lrunt: mov $0xffff, %r8d + xor %ecx, %ecx + sub %edx, %ecx + shr %cl, %r8d + and %r8d, %eax + bsr %eax, %eax + lea (%rdi, %rax, 1), %rdi + cmovnz %rdi, %rax + ret + + /* empty buffer: return a null pointer */ +.L0: xor %eax, %eax + ret +ARCHEND(memrchr, baseline) + + .section .note.GNU-stack, "", %progbits diff --git a/lib/libc/amd64/string/stpncpy.S b/lib/libc/amd64/string/stpncpy.S new file mode 100644 index 000000000000..5ce0dd093a9e --- /dev/null +++ b/lib/libc/amd64/string/stpncpy.S @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + + .weak stpncpy + .set stpncpy, __stpncpy +ARCHFUNCS(__stpncpy) + ARCHFUNC(__stpncpy, scalar) + ARCHFUNC(__stpncpy, baseline) +ENDARCHFUNCS(__stpncpy) + +ARCHENTRY(__stpncpy, scalar) + push %rbp # establish stack frame + mov %rsp, %rbp + + push %rdx + push %rdi + push %rsi + push %rax # dummy push for alignment + + mov %rsi, %rdi + xor %esi, %esi + call CNAME(__memchr) # memchr(src, '\0', len) + pop %rcx # dummy pop + pop %rsi + mov -16(%rbp), %rdi + + test %rax, %rax # NUL found? + jz .Lfullcopy + + mov %rax, %rdx + sub %rsi, %rdx # copy until the NUL byte + add %rdx, -16(%rbp) # advance destination by string length + sub %rdx, -8(%rbp) # and shorten buffer size by string length + call CNAME(memcpy) + + pop %rdi + pop %rdx + xor %esi, %esi + pop %rbp + jmp CNAME(memset) # clear remaining buffer + +.Lfullcopy: + mov -8(%rbp), %rdx + call CNAME(memcpy) # copy whole string + add -8(%rbp), %rax # point to dest[n] + leave + ret +ARCHEND(__stpncpy, scalar) + + /* + * this mask allows us to generate masks of 16-n 0xff bytes + * followed by n 0x00 bytes by loading from .Lmask+n. + */ + .section .rodata +.Lmask: .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x0000000000000000 + .quad 0x0000000000000000 + +/* stpncpy(char *restrict rdi, const char *rsi, size_t rdx) */ +ARCHENTRY(__stpncpy, baseline) +#define bounce (-3*16-8) /* location of on-stack bounce buffer */ + + test %rdx, %rdx # no bytes to copy? + jz .L0 + + mov %esi, %ecx + and $~0xf, %rsi # align source to 16 bytes + movdqa (%rsi), %xmm0 # load head + and $0xf, %ecx # offset from alignment + mov $-1, %r9d + lea -32(%rcx), %rax # set up overflow-proof comparison rdx+rcx<=32 + shl %cl, %r9d # mask of bytes belonging to the string + sub %rcx, %rdi # adjust RDI to correspond to RSI + pxor %xmm1, %xmm1 + movdqa %xmm0, bounce(%rsp) # stash copy of head on the stack + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %r8d + + lea (%rdx, %rcx, 1), %r10 # buffer length from alignment boundary + add %rdx, %rax # less than 2 chunks (32 bytes) to play with? + jnc .Lrunt # if yes, use special runt processing + + movdqu %xmm1, -16(%rdi, %r10, 1) # clear final bytes of destination + and %r9d, %r8d # end of string within head? + jnz .Lheadnul + + movdqu (%rsi, %rcx, 1), %xmm2 # load head from source buffer + movdqu %xmm2, (%rdi, %rcx, 1) # an deposit + + add $16, %rsi + add $16, %rdi + sub $32, %r10 + + /* main loop unrolled twice */ + ALIGN_TEXT +0: movdqa (%rsi), %xmm0 + pxor %xmm1, %xmm1 + pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %r8d + test %r8d, %r8d + jnz 3f + + movdqu %xmm0, (%rdi) + cmp $16, %r10 # more than a full chunk left? + jbe 1f + + movdqa 16(%rsi), %xmm0 + add $32, %rdi # advance pointers to next chunk + add $32, %rsi + pxor %xmm1, %xmm1 + pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %r8d + test %r8d, %r8d + jnz 2f + + movdqu %xmm0, -16(%rdi) + sub $32, %r10 # more than another full chunk left? + ja 0b + + sub $16, %rdi # undo second advancement + sub $16, %rsi + add $16, %r10d # restore number of remaining bytes + + /* 1--16 bytes left but string has not ended yet */ +1: pxor %xmm1, %xmm1 + pcmpeqb 16(%rsi), %xmm1 # NUL byte in source tail? + pmovmskb %xmm1, %r8d + bts %r10d, %r8d # treat end of buffer as NUL + tzcnt %r8d, %r8d # where is the NUL byte? + movdqu (%rsi, %r8, 1), %xmm0 # load source tail before NUL + lea 16(%rdi, %r8, 1), %rax # point return value to NUL byte + # or end of buffer + movdqu %xmm0, (%rdi, %r8, 1) # store tail into the buffer + ret + +2: sub $16, %rdi # undo second advancement + sub $16, %rsi + sub $16, %r10 + + /* string has ended and buffer has not */ +3: tzcnt %r8d, %r8d # where did the string end? + lea .Lmask+16(%rip), %rcx + lea (%rdi, %r8, 1), %rax # where the NUL byte will be + neg %r8 + movdqu (%rcx, %r8, 1), %xmm1 # mask with FF where the string is, + # 00 where it is not + pand %xmm1, %xmm0 # mask out bytes after the string + movdqu %xmm0, (%rdi) # store masked current chunk + pxor %xmm1, %xmm1 + sub $16, %r10 # another full chunk left? + jbe 1f + + /* clear remaining destination buffer (tail has been cleared earlier) */ + ALIGN_TEXT +0: movdqu %xmm1, 16(%rdi) + cmp $16, %r10 + jbe 1f + + movdqu %xmm1, 32(%rdi) + add $32, %rdi + sub $32, %r10 + ja 0b + +1: ret + + /* at least two chunks to play with and NUL while processing head */ +.Lheadnul: + movdqu bounce(%rsp, %rcx, 1), %xmm0 # load start of source from stack + tzcnt %r8d, %r8d # find location of NUL byte + movdqu %xmm0, (%rdi, %rcx, 1) # deposit head in the destination + movdqu %xmm1, (%rdi, %r8, 1) # clear out following bytes + movdqu %xmm1, 16(%rdi) # clear out second chunk + lea (%rdi, %r8, 1), %rax # make RAX point to the NUL byte + + add $32, %rdi # advance past first two chunks + sub $32+16, %r10 # advance past first three chunks + jbe 1f # did we pass the end of the buffer? + + /* clear remaining destination buffer (tail has been cleared earlier) */ + ALIGN_TEXT +0: movdqu %xmm1, (%rdi) # clear out buffer chunk + cmp $16, %r10 + jbe 1f + + movdqu %xmm1, 16(%rdi) + add $32, %rdi + sub $32, %r10 + ja 0b + +1: ret + + /* 1--32 bytes to copy, bounce through the stack */ +.Lrunt: movdqa %xmm1, bounce+16(%rsp) # clear out rest of on-stack copy + bts %r10d, %r8d # treat end of buffer as end of string + and %r9w, %r8w # end of string within first buffer? + jnz 0f # if yes, do not inspect second buffer + + movdqa 16(%rsi), %xmm0 # load second chunk of input + movdqa %xmm0, bounce+16(%rsp) # stash copy on stack + pcmpeqb %xmm1, %xmm0 # NUL in second chunk? + pmovmskb %xmm0, %r9d + shl $16, %r9d + or %r9d, %r8d # merge found NUL bytes into NUL mask + + /* end of string after one buffer */ +0: tzcnt %r8d, %r8d # location of last char in string + movdqu %xmm1, bounce(%rsp, %r8, 1) # clear bytes behind string + lea bounce(%rsp, %rcx, 1), %rsi # start of string copy on stack + lea (%rdi, %r8, 1), %rax # return pointer to NUL byte + + cmp $16, %edx # at least 16 bytes to transfer? + jae .L1631 + + mov (%rsi), %r8 # load string head + cmp $8, %edx # at least 8 bytes to transfer? + jae .L0815 + + cmp $4, %edx # at least 4 bytes to transfer? + jae .L0407 + + movzwl -2(%rsi, %rdx, 1), %esi # load last two bytes of string + mov %r8b, (%rdi, %rcx, 1) # store first byte + + cmp $2, %edx # at least 2 bytes to transfer? + jb .L1 + + mov %si, -2(%rdi, %r10, 1) # store last two bytes of string +.L1: ret + +.L1631: movdqu (%rsi), %xmm0 # load first 16 bytes of string + movdqu -16(%rsi, %rdx, 1), %xmm1 # load last 16 bytes of string + movdqu %xmm0, (%rdi, %rcx, 1) + movdqu %xmm1, -16(%rdi, %r10, 1) + ret + +.L0815: mov -8(%rsi, %rdx, 1), %rdx # load last 8 bytes of string + mov %r8, (%rdi, %rcx, 1) + mov %rdx, -8(%rdi, %r10, 1) + ret + +.L0407: mov -4(%rsi, %rdx, 1), %edx # load last four bytes of string + mov %r8d, (%rdi, %rcx, 1) + mov %edx, -4(%rdi, %r10, 1) + ret + + /* length 0 buffer: just return dest */ +.L0: mov %rdi, %rax + ret +ARCHEND(__stpncpy, baseline) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strcat.S b/lib/libc/amd64/string/strcat.S index 0834408acfb7..081e98840cee 100644 --- a/lib/libc/amd64/string/strcat.S +++ b/lib/libc/amd64/string/strcat.S @@ -1,6 +1,14 @@ -/* - * Written by J.T. Conklin - * Public domain. +/*- + * Copyright (c) 2023, The FreeBSD Foundation + * + * SPDX-License-Expression: BSD-2-Clause + * + * Portions of this software were developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcat.S + * written by J.T. Conklin + * that was originally dedicated to the public domain */ #include @@ -8,7 +16,14 @@ RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $") #endif -ENTRY(strcat) +#include "amd64_archlevel.h" + +ARCHFUNCS(strcat) + ARCHFUNC(strcat, scalar) + ARCHFUNC(strcat, baseline) +ENDARCHFUNCS(strcat) + +ARCHENTRY(strcat, scalar) movq %rdi,%rax movabsq $0x0101010101010101,%r8 movabsq $0x8080808080808080,%r9 @@ -161,6 +176,28 @@ ENTRY(strcat) .Ldone: ret -END(strcat) +ARCHEND(strcat, scalar) + +/* + * Call into strlen + strcpy if we have any SIMD at all. + * The scalar implementation above is better for the scalar + * case as it avoids the function call overhead, but pessimal + * if we could call SIMD routines instead. + */ +ARCHENTRY(strcat, baseline) + push %rbp + mov %rsp, %rbp + push %rsi + push %rbx + mov %rdi, %rbx # remember destination for later + call CNAME(strlen) # strlen(dest) + mov -8(%rbp), %rsi + lea (%rbx, %rax, 1), %rdi # dest + strlen(dest) + call CNAME(__stpcpy) # stpcpy(dest + strlen(dest), src) + mov %rbx, %rax # return dest + pop %rbx + leave + ret +ARCHEND(strcat, baseline) .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strcmp.S b/lib/libc/amd64/string/strcmp.S index 437db7eca43a..eb354bd2af82 100644 --- a/lib/libc/amd64/string/strcmp.S +++ b/lib/libc/amd64/string/strcmp.S @@ -1,14 +1,33 @@ -/* - * Written by J.T. Conklin - * Public domain. +/*- + * Copyright (c) 2023, The FreeBSD Foundation + * + * SPDX-License-Expression: BSD-2-Clause + * + * Portions of this software were developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcmp.S + * written by J.T. Conklin that was originally + * dedicated to the public domain. */ #include +#include + #if 0 RCSID("$NetBSD: strcmp.S,v 1.3 2004/07/19 20:04:41 drochner Exp $") #endif -ENTRY(strcmp) +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + +ARCHFUNCS(strcmp) + ARCHFUNC(strcmp, scalar) + ARCHFUNC(strcmp, baseline) +ENDARCHFUNCS(strcmp) + +ARCHENTRY(strcmp, scalar) /* * Align s1 to word boundary. * Consider unrolling loop? @@ -39,7 +58,7 @@ ENTRY(strcmp) movabsq $0x8080808080808080,%r9 subq $8,%rsi - .align 4 + ALIGN_TEXT .Lword_loop: movq 8(%rdi),%rax addq $8,%rdi @@ -53,7 +72,7 @@ ENTRY(strcmp) testq %r9,%rdx je .Lword_loop - .align 4 + ALIGN_TEXT .Lbyte_loop: movb (%rdi),%al incq %rdi @@ -69,6 +88,272 @@ ENTRY(strcmp) movzbq %dl,%rdx subq %rdx,%rax ret -END(strcmp) +ARCHEND(strcmp, scalar) + +ARCHENTRY(strcmp, baseline) + /* check if either string crosses a page in the head */ + lea 15(%rdi), %r8d # end of head + lea 15(%rsi), %r9d + mov %edi, %eax + mov %esi, %edx + xor %edi, %r8d # bits that changed between first and last byte + xor %esi, %r9d + and $~0xf, %rdi # align heads to 16 bytes + and $~0xf, %rsi + or %r8d, %r9d # in either RSI or RDI + and $0xf, %eax # offset from alignment + and $0xf, %edx + pxor %xmm1, %xmm1 + test $PAGE_SIZE, %r9d # did the page change? + jz 0f # if not, take fast path + + /* heads may cross page boundary, avoid unmapped loads */ + movdqa (%rdi), %xmm0 # load aligned heads + movdqa (%rsi), %xmm2 + mov $-1, %r8d + mov $-1, %r9d + mov %eax, %ecx + shl %cl, %r8d # string head in XMM0 + mov %edx, %ecx + shl %cl, %r9d # string head in XMM2 + movdqa %xmm0, -40(%rsp) # stash copies of the heads on the stack + movdqa %xmm2, -24(%rsp) + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm1, %xmm2 + pmovmskb %xmm0, %r10d + pmovmskb %xmm2, %r11d + test %r8d, %r10d # NUL byte present in first string? + lea -40(%rsp), %r8 + cmovz %rdi, %r8 + test %r9d, %r11d # NUL byte present in second string? + lea -24(%rsp), %r9 + cmovz %rsi, %r9 + movdqu (%r8, %rax, 1), %xmm0 # load true (or fake) heads + movdqu (%r9, %rdx, 1), %xmm4 + jmp 1f + +0: movdqu (%rdi, %rax, 1), %xmm0 # load true heads + movdqu (%rsi, %rdx, 1), %xmm4 +1: pxor %xmm2, %xmm2 + pcmpeqb %xmm0, %xmm2 # NUL byte present? + pcmpeqb %xmm0, %xmm4 # which bytes match? + pandn %xmm4, %xmm2 # match and not NUL byte? + pmovmskb %xmm2, %r9d + xor $0xffff, %r9d # mismatch or NUL byte? + jnz .Lhead_mismatch + + /* load head and second chunk */ + movdqa 16(%rdi), %xmm2 # load second chunks + movdqa 16(%rsi), %xmm3 + sub %rdx, %rax # is a&0xf >= b&0xf? + jb .Lswapped # if not, proceed with swapped operands + + neg %rax + movdqu 16(%rsi, %rax, 1), %xmm0 + sub %rdi, %rsi # express RSI as distance from RDI + lea (%rsi, %rax, 1), %rdx # point RDX to offset in second string + neg %rax + pcmpeqb %xmm3, %xmm1 # ... corresponding to RDI + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add $16, %rdi + test %r8d, %r8d + jnz .Lnul_found + xor $0xffff, %r9d + jnz .Lmismatch + add $16, %rdi # advance aligned pointers + + /* + * During the main loop, the layout of the two strings is something like: + * + * v ------1------ v ------2------ v + * RDI: AAAAAAAAAAAAABBBBBBBBBBBBBBBB... + * RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC... + * + * where v indicates the alignment boundaries and corresponding chunks + * of the strings have the same letters. Chunk A has been checked in + * the previous iteration. This iteration, we first check that string + * RSI doesn't end within region 2, then we compare chunk B between the + * two strings. As RSI is known not to hold a NUL byte in regsions 1 + * and 2 at this point, this also ensures that RDI has not ended yet. + */ + ALIGN_TEXT +0: movdqu (%rdi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI? + pxor %xmm1, %xmm1 + pcmpeqb (%rdi, %rsi, 1), %xmm1 # end of string in RSI? + pcmpeqb (%rdi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + test %r8d, %r8d + jnz .Lnul_found + xor $0xffff, %r9d # any mismatches? + jnz .Lmismatch + + /* main loop unrolled twice */ + movdqu 16(%rdi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI? + pxor %xmm1, %xmm1 + pcmpeqb 16(%rdi, %rsi, 1), %xmm1 # end of string in RSI? + pcmpeqb 16(%rdi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add $32, %rdi + test %r8d, %r8d + jnz .Lnul_found2 + xor $0xffff, %r9d # any mismatches? + jz 0b + + sub $16, %rdi # roll back second increment + + /* a mismatch has been found between RDX and RSI */ +.Lmismatch: + tzcnt %r9d, %r9d # where is the mismatch? + add %rdi, %rdx # turn RDX from offset to pointer + movzbl (%rdx, %r9, 1), %ecx + movzbl (%rdi, %r9, 1), %eax + sub %ecx, %eax # difference of the mismatching chars + ret + + /* mismatch in true heads */ +.Lhead_mismatch: + tzcnt %r9d, %r9d # where is the mismatch? + add %rax, %rdi # return to true heads + add %rdx, %rsi + movzbl (%rdi, %r9, 1), %eax # mismatching characters + movzbl (%rsi, %r9, 1), %ecx + sub %ecx, %eax + ret + +.Lnul_found2: + sub $16, %rdi # roll back second increment + + /* a NUL has been found in RSI */ +.Lnul_found: + mov %eax, %ecx + mov %r8d, %r10d + shl %cl, %r8w # adjust NUL mask to positions in RDI/RDX + xor $0xffff, %r9d # mask of mismatches + or %r8d, %r9d # NUL bytes also count as mismatches + jnz .Lmismatch + + /* + * (RDI) == (RSI) and NUL is past the string. + * Compare (RSI) with the corresponding part + * of the other string until the NUL byte. + */ + movdqu (%rdi, %rax, 1), %xmm0 + pcmpeqb (%rdi, %rsi, 1), %xmm0 + add %rdi, %rsi # restore RSI pointer + add %rax, %rdi # point RDI to chunk corresponding to (RSI) + pmovmskb %xmm0, %ecx # mask of matches + not %ecx # mask of mismatches + or %r10d, %ecx # mask of mismatches or NUL bytes + tzcnt %ecx, %ecx # location of first mismatch + movzbl (%rdi, %rcx, 1), %eax + movzbl (%rsi, %rcx, 1), %ecx + sub %ecx, %eax + ret + + /* + * If (a&0xf) < (b&0xf), we do the same thing but with swapped + * operands. I found that this performs slightly better than + * using conditional moves to do the swap branchless. + */ +.Lswapped: + movdqu 16(%rdi, %rax, 1), %xmm0 + sub %rsi, %rdi # express RDI as distance from RSI + lea (%rdi, %rax, 1), %rdx # point RDX to offset in RDI corresponding to RSI + neg %rax # make difference positive + pcmpeqb %xmm2, %xmm1 + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add $16, %rsi # advance aligned pointers + test %r8d, %r8d + jnz .Lnul_founds + xor $0xffff, %r9d + jnz .Lmismatchs + add $16, %rsi + + /* + * During the main loop, the layout of the two strings is something like: + * + * v ------1------ v ------2------ v + * RDI: AAAAAAAAAAAAABBBBBBBBBBBBBBBB... + * RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC... + * + * where v indicates the alignment boundaries and corresponding chunks + * of the strings have the same letters. Chunk A has been checked in + * the previous iteration. This iteration, we first check that string + * RSI doesn't end within region 2, then we compare chunk B between the + * two strings. As RSI is known not to hold a NUL byte in regsions 1 + * and 2 at this point, this also ensures that RDI has not ended yet. + */ + ALIGN_TEXT +0: movdqu (%rsi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI? + pxor %xmm1, %xmm1 + pcmpeqb (%rsi, %rdi, 1), %xmm1 # end of string in RSI? + pcmpeqb (%rsi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + test %r8d, %r8d + jnz .Lnul_founds + xor $0xffff, %r9d # any mismatches? + jnz .Lmismatchs + + /* main loop unrolled twice */ + movdqu 16(%rsi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI? + pxor %xmm1, %xmm1 + pcmpeqb 16(%rsi, %rdi, 1), %xmm1 # end of string in RSI? + pcmpeqb 16(%rsi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add $32, %rsi + test %r8d, %r8d + jnz .Lnul_found2s + xor $0xffff, %r9d # any mismatches? + jz 0b + + sub $16, %rsi # roll back second increment + + /* a mismatch has been found between RDX and RDI */ +.Lmismatchs: + tzcnt %r9d, %r9d # where is the mismatch? + add %rsi, %rdx # turn RDX from offset to pointer + movzbl (%rdx, %r9, 1), %eax + movzbl (%rsi, %r9, 1), %ecx + sub %ecx, %eax # difference of the mismatching chars + ret + +.Lnul_found2s: + sub $16, %rsi # roll back second increment + + /* a NUL has been found in RSI */ +.Lnul_founds: + mov %eax, %ecx + mov %r8d, %r10d + shl %cl, %r8w # adjust NUL mask to positions in RDI/RDX + xor $0xffff, %r9d # mask of mismatches + or %r8d, %r9d # NUL bytes also count as mismatches + jnz .Lmismatchs + + /* + * (RDI) == (RSI) and NUL is past the string. + * Compare (RSI) with the corresponding part + * of the other string until the NUL byte. + */ + movdqu (%rsi, %rax, 1), %xmm0 + pcmpeqb (%rsi, %rdi, 1), %xmm0 + add %rsi, %rdi # restore RDI pointer + add %rax, %rsi # point RSI to chunk corresponding to (RDI) + pmovmskb %xmm0, %ecx # mask of matches + not %ecx # mask of mismatches + or %r10d, %ecx # mask of mismatches or NUL bytes + tzcnt %ecx, %ecx # location of first mismatch + movzbl (%rdi, %rcx, 1), %eax + movzbl (%rsi, %rcx, 1), %ecx + sub %ecx, %eax + ret +ARCHEND(strcmp, baseline) .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strcspn.S b/lib/libc/amd64/string/strcspn.S index 53100eeea9a5..eab669edce72 100644 --- a/lib/libc/amd64/string/strcspn.S +++ b/lib/libc/amd64/string/strcspn.S @@ -33,13 +33,15 @@ #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ -ARCHFUNCS(strcspn) - ARCHFUNC(strcspn, scalar) + .weak strcspn + .set strcspn, __strcspn +ARCHFUNCS(__strcspn) + ARCHFUNC(__strcspn, scalar) NOARCHFUNC - ARCHFUNC(strcspn, x86_64_v2) -ENDARCHFUNCS(strcspn) + ARCHFUNC(__strcspn, x86_64_v2) +ENDARCHFUNCS(__strcspn) -ARCHENTRY(strcspn, scalar) +ARCHENTRY(__strcspn, scalar) push %rbp # align stack to enable function call mov %rsp, %rbp sub $256, %rsp # allocate space for lookup table @@ -122,7 +124,7 @@ ARCHENTRY(strcspn, scalar) sub (%rsp), %rax # length of prefix before match leave ret -ARCHEND(strcspn, scalar) +ARCHEND(__strcspn, scalar) /* * This kernel uses pcmpistri to do the heavy lifting. @@ -134,7 +136,7 @@ ARCHEND(strcspn, scalar) * 17--32: two pcmpistri per 16 bytes of input * >=33: fall back to look up table */ -ARCHENTRY(strcspn, x86_64_v2) +ARCHENTRY(__strcspn, x86_64_v2) push %rbp mov %rsp, %rbp sub $256, %rsp @@ -368,6 +370,6 @@ ARCHENTRY(strcspn, x86_64_v2) 2: sub %rdi, %rax # number of characters preceding match leave ret -ARCHEND(strcspn, x86_64_v2) +ARCHEND(__strcspn, x86_64_v2) .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strlcat.c b/lib/libc/amd64/string/strlcat.c new file mode 100644 index 000000000000..0c1e1c5d05f7 --- /dev/null +++ b/lib/libc/amd64/string/strlcat.c @@ -0,0 +1,25 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Robert Clausecker + */ + +#include + +#include + +void *__memchr(const void *, int, size_t); +size_t __strlcpy(char *restrict, const char *restrict, size_t); + +size_t +strlcat(char *restrict dst, const char *restrict src, size_t dstsize) +{ + char *loc = __memchr(dst, '\0', dstsize); + + if (loc != NULL) { + size_t dstlen = (size_t)(loc - dst); + + return (dstlen + __strlcpy(loc, src, dstsize - dstlen)); + } else + return (dstsize + strlen(src)); +} diff --git a/lib/libc/amd64/string/strlcpy.S b/lib/libc/amd64/string/strlcpy.S new file mode 100644 index 000000000000..2b32c6c78047 --- /dev/null +++ b/lib/libc/amd64/string/strlcpy.S @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + + .weak strlcpy + .set strlcpy, __strlcpy +ARCHFUNCS(__strlcpy) + ARCHFUNC(__strlcpy, scalar) + ARCHFUNC(__strlcpy, baseline) +ENDARCHFUNCS(__strlcpy) + +ARCHENTRY(__strlcpy, scalar) + push %rbp # establish stack frame + mov %rsp, %rbp + push %rsi + push %rbx + push %rdi + push %rdx + mov %rsi, %rdi + call CNAME(strlen) # strlen(src) + pop %rdx + pop %rdi + mov -8(%rbp), %rsi + mov %rax, %rbx # remember string length for return value + sub $1, %rdx # do not copy into the final byte of the buffer + jc 0f # skip copying altogether if buffer was empty + cmp %rax, %rdx # is the buffer longer than the input? + cmova %rax, %rdx # if yes, only copy the part that fits + movb $0, (%rdi, %rdx, 1) # NUL-terminate output buffer + call CNAME(memcpy) # copy string to output +0: mov %rbx, %rax # restore return value + pop %rbx + leave + ret +ARCHEND(__strlcpy, scalar) + +ARCHENTRY(__strlcpy, baseline) + sub $1, %rdx # do not count NUL byte in buffer length + jb .L0 # go to special code path if len was 0 + + mov %esi, %ecx + pxor %xmm1, %xmm1 + mov %rsi, %r9 # stash a copy of the source pointer for later + and $~0xf, %rsi + pcmpeqb (%rsi), %xmm1 # NUL found in head? + mov $-1, %r8d + and $0xf, %ecx + shl %cl, %r8d # mask of bytes in the string + pmovmskb %xmm1, %eax + and %r8d, %eax + jnz .Lhead_nul + + movdqa 16(%rsi), %xmm3 # load second string chunk + movdqu (%r9), %xmm2 # load unaligned string head + mov $32, %r8d + sub %ecx, %r8d # head length + length of second chunk + pxor %xmm1, %xmm1 + pcmpeqb %xmm3, %xmm1 # NUL found in second chunk? + + sub %r8, %rdx # enough space left for the second chunk? + jbe .Lhead_buf_end + + /* process second chunk */ + pmovmskb %xmm1, %eax + test %eax, %eax + jnz .Lsecond_nul + + /* string didn't end in second chunk and neither did buffer -- not a runt! */ + movdqa 32(%rsi), %xmm0 # load next string chunk + pxor %xmm1, %xmm1 + movdqu %xmm2, (%rdi) # deposit head into buffer + sub %rcx, %rdi # adjust RDI to correspond to RSI + movdqu %xmm3, 16(%rdi) # deposit second chunk + sub %rsi, %rdi # express RDI as distance from RSI + add $32, %rsi # advance RSI past first two chunks + sub $16, %rdx # enough left for another round? + jbe 1f + + /* main loop unrolled twice */ + ALIGN_TEXT +0: pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 3f + + movdqu %xmm0, (%rsi, %rdi) + movdqa 16(%rsi), %xmm0 # load next string chunk + pxor %xmm1, %xmm1 + cmp $16, %rdx # more than a full chunk left? + jbe 2f + + add $32, %rsi # advance pointers to next chunk + pcmpeqb %xmm0, %xmm1 # NUL byte encountered? + pmovmskb %xmm1, %eax + test %eax, %eax + jnz 4f + + movdqu %xmm0, -16(%rsi, %rdi) + movdqa (%rsi), %xmm0 # load next string chunk + pxor %xmm1, %xmm1 + sub $32, %rdx + ja 0b + +1: sub $16, %rsi # undo second advancement + add $16, %edx + + /* 1--16 bytes left in the buffer but string has not ended yet */ +2: pcmpeqb %xmm1, %xmm0 # NUL byte encountered? + pmovmskb %xmm0, %r8d + mov %r8d, %eax + bts %edx, %r8d # treat end of buffer as end of string + tzcnt %r8d, %r8d # find tail length + add %rsi, %rdi # restore RDI + movdqu (%rsi, %r8, 1), %xmm0 # load string tail + movdqu %xmm0, (%rdi, %r8, 1) # store string tail + movb $0, 16(%rdi, %r8, 1) # NUL terminate + + /* continue to find the end of the string */ + test %eax, %eax # end of string already reached? + jnz 1f + + ALIGN_TEXT +0: pcmpeqb 32(%rsi), %xmm1 + pmovmskb %xmm1, %eax + pxor %xmm1, %xmm1 + test %eax, %eax + jnz 2f + + pcmpeqb 48(%rsi), %xmm1 + pmovmskb %xmm1, %eax + add $32, %rsi + pxor %xmm1, %xmm1 + test %eax, %eax + jz 0b + +1: sub $16, %rsi # undo second advancement +2: tzcnt %eax, %eax # where is the NUL byte? + sub %r9, %rsi + lea 32(%rsi, %rax, 1), %rax # return string length + ret + +4: sub $16, %rsi # undo second advancement + add $16, %rdx # restore number of remaining bytes + + /* string has ended but buffer has not */ +3: tzcnt %eax, %eax # find length of string tail + movdqu -15(%rsi, %rax, 1), %xmm0 # load string tail (incl. NUL) + add %rsi, %rdi # restore destination pointer + movdqu %xmm0, -15(%rdi, %rax, 1) # store string tail (incl. NUL) + sub %r9, %rsi # string length to current chunk + add %rsi, %rax # plus length of current chunk + ret + +.Lhead_buf_end: + pmovmskb %xmm1, %r8d + add $32, %edx # restore edx to (len-1) + ecx + mov %r8d, %eax + shl $16, %r8d # place 2nd chunk NUL mask into bits 16--31 + bts %rdx, %r8 # treat end of buffer as end of string + tzcnt %r8, %rdx # find string/bufer len from alignment boundary + sub %ecx, %edx # find actual string/buffer len + movb $0, (%rdi, %rdx, 1) # write NUL terminator + + /* continue to find the end of the string */ + test %eax, %eax # end of string already reached? + jnz 1f + + ALIGN_TEXT +0: pcmpeqb 32(%rsi), %xmm1 + pmovmskb %xmm1, %eax + pxor %xmm1, %xmm1 + test %eax, %eax + jnz 2f + + pcmpeqb 48(%rsi), %xmm1 + pmovmskb %xmm1, %eax + add $32, %rsi + pxor %xmm1, %xmm1 + test %eax, %eax + jz 0b + +1: sub $16, %rsi +2: tzcnt %eax, %eax + sub %r9, %rsi + lea 32(%rsi, %rax, 1), %rax # return string length + jmp .L0031 + +.Lsecond_nul: + add %r8, %rdx # restore buffer length + tzcnt %eax, %eax # where is the NUL byte? + lea -16(%rcx), %r8d + sub %r8d, %eax # string length + cmp %rax, %rdx # is the string shorter than the buffer? + cmova %rax, %rdx # copy only min(buflen, srclen) bytes + movb $0, (%rdi, %rdx, 1) # write NUL terminator +.L0031: cmp $16, %rdx # at least 16 bytes to copy (not incl NUL)? + jb .L0015 + + /* copy 16--31 bytes */ + movdqu (%r9), %xmm0 # load first 16 bytes + movdqu -16(%r9, %rdx, 1), %xmm1 # load last 16 bytes + movdqu %xmm0, (%rdi) + movdqu %xmm1, -16(%rdi, %rdx, 1) + ret + +.Lhead_nul: + tzcnt %eax, %eax # where is the NUL byte? + sub %ecx, %eax # ... from the beginning of the string? + cmp %rax, %rdx # is the string shorter than the buffer? + cmova %rax, %rdx # copy only min(buflen, srclen) bytes + movb $0, (%rdi, %rdx, 1) # write NUL terminator + + /* process strings of 0--15 bytes (rdx: min(buflen, srclen), rax: srclen) */ +.L0015: cmp $8, %rdx # at least 8 bytes to copy? + jae .L0815 + + cmp $4, %rdx # at least 4 bytes to copy? + jae .L0407 + + cmp $2, %rdx # at least 2 bytes to copy? + jae .L0203 + + movzbl (%r9), %ecx # load first byte from src + mov %cl, (%rdi) # deposit into destination + movb $0, (%rdi, %rdx, 1) # add NUL terminator (again) + ret + +.L0203: movzwl (%r9), %ecx + movzwl -2(%r9, %rdx, 1), %esi + mov %cx, (%rdi) + mov %si, -2(%rdi, %rdx, 1) + ret + +.L0407: mov (%r9), %ecx + mov -4(%r9, %rdx, 1), %esi + mov %ecx, (%rdi) + mov %esi, -4(%rdi, %rdx, 1) + ret + +.L0815: mov (%r9), %rcx + mov -8(%r9, %rdx, 1), %rsi + mov %rcx, (%rdi) + mov %rsi, -8(%rdi, %rdx, 1) + ret + + /* length zero destination: just return the string length */ +.L0: mov %rsi, %rdi + jmp CNAME(strlen) +ARCHEND(__strlcpy, baseline) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strncat.c b/lib/libc/amd64/string/strncat.c new file mode 100644 index 000000000000..33b278ac5e04 --- /dev/null +++ b/lib/libc/amd64/string/strncat.c @@ -0,0 +1,29 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Robert Clausecker + */ + +#include + +#include + +void *__memccpy(void *restrict, const void *restrict, int, size_t); + +char * +strncat(char *dest, const char *src, size_t n) +{ + size_t len; + char *endptr; + + len = strlen(dest); + endptr = __memccpy(dest + len, src, '\0', n); + + /* avoid an extra branch */ + if (endptr == NULL) + endptr = dest + len + n + 1; + + endptr[-1] = '\0'; + + return (dest); +} diff --git a/lib/libc/amd64/string/strncmp.S b/lib/libc/amd64/string/strncmp.S new file mode 100644 index 000000000000..932cf078bdfc --- /dev/null +++ b/lib/libc/amd64/string/strncmp.S @@ -0,0 +1,488 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4, 0x90 + +ARCHFUNCS(strncmp) + ARCHFUNC(strncmp, scalar) + ARCHFUNC(strncmp, baseline) +ENDARCHFUNCS(strncmp) + +/* + * This is just the scalar loop unrolled a bunch of times. + */ +ARCHENTRY(strncmp, scalar) + xor %eax, %eax + sub $4, %rdx # 4 chars left to compare? + jbe 1f + + ALIGN_TEXT +0: movzbl (%rdi), %ecx + test %ecx, %ecx # NUL char in first string? + jz .L0 + cmpb (%rsi), %cl # mismatch between strings? + jnz .L0 + + movzbl 1(%rdi), %ecx + test %ecx, %ecx + jz .L1 + cmpb 1(%rsi), %cl + jnz .L1 + + movzbl 2(%rdi), %ecx + test %ecx, %ecx + jz .L2 + cmpb 2(%rsi), %cl + jnz .L2 + + movzbl 3(%rdi), %ecx + test %ecx, %ecx + jz .L3 + cmpb 3(%rsi), %cl + jnz .L3 + + add $4, %rdi # advance to next iteration + add $4, %rsi + sub $4, %rdx + ja 0b + + /* end of string within the next 4 characters */ +1: cmp $-4, %edx # end of string reached immediately? + jz .Leq + movzbl (%rdi), %ecx + test %ecx, %ecx + jz .L0 + cmpb (%rsi), %cl + jnz .L0 + + cmp $-3, %edx # end of string reached after 1 char? + jz .Leq + movzbl 1(%rdi), %ecx + test %ecx, %ecx + jz .L1 + cmpb 1(%rsi), %cl + jnz .L1 + + cmp $-2, %edx + jz .Leq + movzbl 2(%rdi), %ecx + test %ecx, %ecx + jz .L2 + cmpb 2(%rsi), %cl + jnz .L2 + + cmp $-1, %edx # either end of string after 3 chars, + jz .Leq # or it boils down to the last char + +.L3: inc %eax +.L2: inc %eax +.L1: inc %eax +.L0: movzbl (%rsi, %rax, 1), %ecx + movzbl (%rdi, %rax, 1), %eax + sub %ecx, %eax +.Leq: ret +ARCHEND(strncmp, scalar) + +ARCHENTRY(strncmp, baseline) + push %rbx + sub $1, %rdx # RDX--, so RDX points to the last byte to compare + jb .Lempty # where there any bytes to compare at all? + + lea 15(%rdi), %r8d # end of head + lea 15(%rsi), %r9d + mov %edi, %eax + mov %esi, %ebx + xor %edi, %r8d # bits that changed between first and last byte + xor %esi, %r9d + and $~0xf, %rdi # align heads to 16 bytes + and $~0xf, %rsi + or %r8d, %r9d + and $0xf, %eax # offset from alignment + and $0xf, %ebx + movdqa (%rdi), %xmm0 # load aligned heads + movdqa (%rsi), %xmm2 + pxor %xmm1, %xmm1 + cmp $16, %rdx # end of buffer within the first 32 bytes? + jb .Llt16 + + test $PAGE_SIZE, %r9d # did the page change? + jz 0f # if not, take fast path + + + /* heads may cross page boundary, avoid unmapped loads */ + movdqa %xmm0, -32(%rsp) # stash copies of the heads on the stack + movdqa %xmm2, -16(%rsp) + mov $-1, %r8d + mov $-1, %r9d + mov %eax, %ecx + shl %cl, %r8d # string head in XMM0 + mov %ebx, %ecx + shl %cl, %r9d # string head in XMM2 + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm1, %xmm2 + pmovmskb %xmm0, %r10d + pmovmskb %xmm2, %r11d + test %r8d, %r10d # NUL byte present in first string? + lea -32(%rsp), %r8 + cmovz %rdi, %r8 + test %r9d, %r11d # NUL byte present in second string? + lea -16(%rsp), %r9 + cmovz %rsi, %r9 + movdqu (%r8, %rax, 1), %xmm0 # load true (or fake) heads + movdqu (%r9, %rbx, 1), %xmm4 + jmp 1f + + /* rdx == 0 */ +.Lempty: + xor %eax, %eax # zero-length buffers compare equal + pop %rbx + ret + +0: movdqu (%rdi, %rax, 1), %xmm0 # load true heads + movdqu (%rsi, %rbx, 1), %xmm4 +1: pxor %xmm2, %xmm2 + pcmpeqb %xmm0, %xmm2 # NUL byte present? + pcmpeqb %xmm0, %xmm4 # which bytes match? + pandn %xmm4, %xmm2 # match and not NUL byte? + pmovmskb %xmm2, %r9d + xor $0xffff, %r9d # mismatch or NUL byte? + jnz .Lhead_mismatch + + /* load head and second chunk */ + movdqa 16(%rdi), %xmm2 # load second chunks + movdqa 16(%rsi), %xmm3 + lea -16(%rdx, %rbx, 1), %rdx # account for length of RSI chunk + sub %rbx, %rax # is a&0xf >= b&0xf? + jb .Lswapped # if not, proceed with swapped operands + jmp .Lnormal + + /* buffer ends within the first 16 bytes */ +.Llt16: test $PAGE_SIZE, %r9d # did the page change? + jz 0f # if not, take fast path + + /* heads may cross page boundary */ + movdqa %xmm0, -32(%rsp) # stash copies of the heads on the stack + movdqa %xmm2, -16(%rsp) + mov $-1, %r8d + mov $-1, %r9d + mov %eax, %ecx + shl %cl, %r8d # string head in XMM0 + mov %ebx, %ecx + shl %cl, %r9d # string head in XMM2 + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm1, %xmm2 + pmovmskb %xmm0, %r10d + pmovmskb %xmm2, %r11d + lea (%rdx, %rax, 1), %ecx # location of last buffer byte in xmm0 + bts %ecx, %r10d # treat as if NUL byte present + lea (%rdx, %rbx, 1), %ecx + bts %ecx, %r11d + test %r8w, %r10w # NUL byte present in first string head? + lea -32(%rsp), %r8 + cmovz %rdi, %r8 + test %r9w, %r11w # NUL byte present in second string head? + lea -16(%rsp), %r9 + cmovz %rsi, %r9 + movdqu (%r8, %rax, 1), %xmm0 # load true (or fake) heads + movdqu (%r9, %rbx, 1), %xmm4 + jmp 1f + +0: movdqu (%rdi, %rax, 1), %xmm0 # load true heads + movdqu (%rsi, %rbx, 1), %xmm4 +1: pxor %xmm2, %xmm2 + pcmpeqb %xmm0, %xmm2 # NUL byte present? + pcmpeqb %xmm0, %xmm4 # which bytes match? + pandn %xmm4, %xmm2 # match and not NUL byte? + pmovmskb %xmm2, %r9d + btr %edx, %r9d # induce mismatch in last byte of buffer + not %r9d # mismatch or NUL byte? + + /* mismatch in true heads */ + ALIGN_TEXT +.Lhead_mismatch: + tzcnt %r9d, %r9d # where is the mismatch? + add %rax, %rdi # return to true heads + add %rbx, %rsi + movzbl (%rdi, %r9, 1), %eax # mismatching characters + movzbl (%rsi, %r9, 1), %ecx + sub %ecx, %eax + pop %rbx + ret + + /* rax >= 0 */ + ALIGN_TEXT +.Lnormal: + neg %rax + movdqu 16(%rsi, %rax, 1), %xmm0 + sub %rdi, %rsi # express RSI as distance from RDI + lea (%rsi, %rax, 1), %rbx # point RBX to offset in second string + neg %rax # ... corresponding to RDI + pcmpeqb %xmm3, %xmm1 # NUL present? + pcmpeqb %xmm2, %xmm0 # Mismatch between chunks? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + mov $16, %ecx + cmp %rcx, %rdx # does the buffer end within (RDI,RSI,1)? + cmovb %edx, %ecx # ECX = min(16, RDX) + add $32, %rdi # advance to next iteration + bts %ecx, %r8d # mark end-of-buffer as if there was a NUL byte + test %r8w, %r8w # NUL or end of buffer found? + jnz .Lnul_found2 + xor $0xffff, %r9d + jnz .Lmismatch2 + sub $48, %rdx # end of buffer within first main loop iteration? + jb .Ltail # if yes, process tail + + /* + * During the main loop, the layout of the two strings is something like: + * + * v ------1------ v ------2------ v + * RDI: AAAAAAAAAAAAABBBBBBBBBBBBBBBB... + * RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC... + * + * where v indicates the alignment boundaries and corresponding chunks + * of the strings have the same letters. Chunk A has been checked in + * the previous iteration. This iteration, we first check that string + * RSI doesn't end within region 2, then we compare chunk B between the + * two strings. As RSI is known not to hold a NUL byte in regsions 1 + * and 2 at this point, this also ensures that RDI has not ended yet. + */ + ALIGN_TEXT +0: movdqu (%rdi, %rbx, 1), %xmm0 # chunk of 2nd string corresponding to RDI + pxor %xmm1, %xmm1 + pcmpeqb (%rdi, %rsi, 1), %xmm1 # end of string in RSI? + pcmpeqb (%rdi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + test %r8d, %r8d + jnz .Lnul_found + xor $0xffff, %r9d # any mismatches? + jnz .Lmismatch + + /* main loop unrolled twice */ + movdqu 16(%rdi, %rbx, 1), %xmm0 + pxor %xmm1, %xmm1 + pcmpeqb 16(%rdi, %rsi, 1), %xmm1 + pcmpeqb 16(%rdi), %xmm0 + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add $32, %rdi + test %r8d, %r8d + jnz .Lnul_found2 + xor $0xffff, %r9d + jnz .Lmismatch2 + sub $32, %rdx # end of buffer within next iteration? + jae 0b + + /* end of buffer will occur in next 32 bytes */ +.Ltail: movdqu (%rdi, %rbx, 1), %xmm0 # chunk of 2nd string corresponding to RDI + pxor %xmm1, %xmm1 + pcmpeqb (%rdi, %rsi, 1), %xmm1 # end of string in RSI? + pcmpeqb (%rdi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + bts %edx, %r8d # indicate NUL byte at last byte in buffer + test %r8w, %r8w # NUL byte in first chunk? + jnz .Lnul_found + xor $0xffff, %r9d # any mismatches? + jnz .Lmismatch + + /* main loop unrolled twice */ + movdqu 16(%rdi, %rbx, 1), %xmm0 + pxor %xmm1, %xmm1 + pcmpeqb 16(%rdi, %rsi, 1), %xmm1 + pcmpeqb 16(%rdi), %xmm0 + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + sub $16, %edx # take first half into account + bts %edx, %r8d # indicate NUL byte at last byte in buffer + add $32, %rdi + +.Lnul_found2: + sub $16, %rdi + +.Lnul_found: + mov %eax, %ecx + mov %r8d, %r10d + shl %cl, %r8d # adjust NUL mask to positions in RDI/RBX + not %r9d # mask of mismatches + or %r8w, %r9w # NUL bytes als count as mismatches + jnz .Lmismatch + + /* + * (RDI) == (RSI) and NUL is past the string. + * compare (RSI) with the corresponding part + * of the other string until the NUL byte. + */ + movdqu (%rdi, %rax, 1), %xmm0 + pcmpeqb (%rdi, %rsi, 1), %xmm0 + add %rdi, %rsi # restore RSI pointer + add %rax, %rdi # point RDI to chunk corresponding to (RSI) + pmovmskb %xmm0, %ecx # mask of matches + not %ecx # mask of mismatches + or %r10d, %ecx # mask of mismatches or NUL bytes + tzcnt %ecx, %ecx # location of first mismatch + movzbl (%rdi, %rcx, 1), %eax + movzbl (%rsi, %rcx, 1), %ecx + sub %ecx, %eax + pop %rbx + ret + +.Lmismatch2: + sub $16, %rdi + + /* a mismatch has been found between RBX and RSI */ +.Lmismatch: + tzcnt %r9d, %r9d # where is the mismatch? + add %rdi, %rbx # turn RBX from offset into pointer + movzbl (%rbx, %r9, 1), %ecx + movzbl (%rdi, %r9, 1), %eax + sub %ecx, %eax + pop %rbx + ret + + /* rax < 0 */ + ALIGN_TEXT +.Lswapped: + movdqu 16(%rdi, %rax, 1), %xmm0 + sub %rsi, %rdi # express RDI as distance from RDI + lea (%rdi, %rax, 1), %rbx # point RBX to offset in first string + pcmpeqb %xmm2, %xmm1 # NUL present? + pcmpeqb %xmm3, %xmm0 # mismatch between chunks? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add %rax, %rdx # RDX points to buffer end in RSI + neg %rax # ... corresponding to RSI + mov $16, %ecx + cmp %rcx, %rdx # does the buffer end within (RSI,RDI,1)? + cmovb %edx, %ecx # ECX = min(16, RDX) + add $32, %rsi + bts %ecx, %r8d # mark end-of-buffer as if there was a NUL byte + test %r8w, %r8w # NUL or end of buffer found? + jnz .Lnul_found2s + xor $0xffff, %r9d + jnz .Lmismatch2s + sub $48, %rdx # end of buffer within first main loop iteration? + jb .Ltails # if yes, process tail + + ALIGN_TEXT +0: movdqu (%rsi, %rbx, 1), %xmm0 # chunk of 1st string corresponding to RSI + pxor %xmm1, %xmm1 + pcmpeqb (%rsi, %rdi, 1), %xmm1 # end of string in RDI? + pcmpeqb (%rsi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + test %r8d, %r8d + jnz .Lnul_founds + xor $0xffff, %r9d # any mismatches? + jnz .Lmismatchs + + /* main loop unrolled twice */ + movdqu 16(%rsi, %rbx, 1), %xmm0 + pxor %xmm1, %xmm1 + pcmpeqb 16(%rsi, %rdi, 1), %xmm1 + pcmpeqb 16(%rsi), %xmm0 + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + add $32, %rsi + test %r8d, %r8d + jnz .Lnul_found2s + xor $0xffff, %r9d + jnz .Lmismatch2s + sub $32, %rdx # end of buffer within next iteration? + jae 0b + + /* end of buffer will occur in next 32 bytes */ +.Ltails: + movdqu (%rsi, %rbx, 1), %xmm0 # chunk of 1st string corresponding to RSI + pxor %xmm1, %xmm1 + pcmpeqb (%rsi, %rdi, 1), %xmm1 # end of string in RDI? + pcmpeqb (%rsi), %xmm0 # where do the chunks match? + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + bts %edx, %r8d # indicate NUL byte at laste byte in buffer + test %r8w, %r8w # NUL byte in first chunk? + jnz .Lnul_founds + xor $0xffff, %r9d # any mismatches? + jnz .Lmismatchs + + /* main loop unrolled twice */ + movdqu 16(%rsi, %rbx, 1), %xmm0 + pxor %xmm1, %xmm1 + pcmpeqb 16(%rsi, %rdi, 1), %xmm1 + pcmpeqb 16(%rsi), %xmm0 + pmovmskb %xmm1, %r8d + pmovmskb %xmm0, %r9d + sub $16, %edx # take first half into account + bts %edx, %r8d # indicate NUL byte at laste byte in buffer + add $32, %rsi + +.Lnul_found2s: + sub $16, %rsi + +.Lnul_founds: + mov %eax, %ecx + mov %r8d, %r10d + shl %cl, %r8d # adjust NUL mask to positions in RSI/RBX + not %r9d # mask of mismatches + or %r8w, %r9w # NUL bytes also count as mismatches + jnz .Lmismatchs + + movdqu (%rsi, %rax, 1), %xmm0 + pcmpeqb (%rsi, %rdi, 1), %xmm0 + add %rsi, %rdi # restore RDI pointer + add %rax, %rsi # point RSI to chunk corresponding to (RDI) + pmovmskb %xmm0, %ecx # mask of matches + not %ecx # mask of mismatches + or %r10d, %ecx # mask of mismatches or NUL bytes + tzcnt %ecx, %ecx # location of first mismatch + movzbl (%rdi, %rcx, 1), %eax + movzbl (%rsi, %rcx, 1), %ecx + sub %ecx, %eax + pop %rbx + ret + +.Lmismatch2s: + sub $16, %rsi + +.Lmismatchs: + tzcnt %r9d, %r9d # where is the mismatch? + add %rsi, %rbx # turn RBX from offset into pointer + movzbl (%rbx, %r9, 1), %eax + movzbl (%rsi, %r9, 1), %ecx + sub %ecx, %eax + pop %rbx + ret +ARCHEND(strncmp, baseline) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strncpy.c b/lib/libc/amd64/string/strncpy.c new file mode 100644 index 000000000000..b3d868787fbe --- /dev/null +++ b/lib/libc/amd64/string/strncpy.c @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include +#include + +char *__stpncpy(char *restrict, const char *restrict, size_t); + +char * +strncpy(char *restrict dst, const char *restrict src, size_t len) +{ + + __stpncpy(dst, src, len); + + return (dst); +} diff --git a/lib/libc/amd64/string/strpbrk.c b/lib/libc/amd64/string/strpbrk.c new file mode 100644 index 000000000000..87f587789991 --- /dev/null +++ b/lib/libc/amd64/string/strpbrk.c @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include + +size_t __strcspn(const char *, const char *); + +char * +strpbrk(const char *s, const char *charset) +{ + size_t loc; + + loc = __strcspn(s, charset); + + return (s[loc] == '\0' ? NULL : (char *)&s[loc]); +} diff --git a/lib/libc/amd64/string/strrchr.S b/lib/libc/amd64/string/strrchr.S new file mode 100644 index 000000000000..e397bbcd3478 --- /dev/null +++ b/lib/libc/amd64/string/strrchr.S @@ -0,0 +1,209 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include "amd64_archlevel.h" + +#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled + + .weak rindex + .set rindex, strrchr + +ARCHFUNCS(strrchr) + ARCHFUNC(strrchr, scalar) + ARCHFUNC(strrchr, baseline) +ENDARCHFUNCS(strrchr) + +ARCHENTRY(strrchr, scalar) + mov %edi, %ecx + and $~7, %rdi # align to 8 byte + movzbl %sil, %esi # clear stray high bits + movabs $0x0101010101010101, %r8 + mov (%rdi), %rax # load first word + imul %r8, %rsi # replicate char 8 times + + /* + * Unaligned input: align to 8 bytes. Then proceed the same + * way as with aligned input, but prevent matches before the + * beginning of the string. This is achieved by oring 0x01 + * into each byte of the buffer before the string + */ + shl $3, %ecx + mov %r8, %r10 + shl %cl, %r10 # 0x01 where the string is + xor %r8, %r10 # 0x01 where it is not + neg %r8 # negate 01..01 so we can use lea + movabs $0x8080808080808080, %r9 + + mov %rsi, %rcx + xor %rax, %rcx # str ^ c + or %r10, %rax # ensure str != 0 before string + or %r10, %rcx # ensure str^c != 0 before string + bswap %rcx # in reverse order, to find last match + mov %rdi, %r10 # location of initial mismatch (if any) + xor %r11, %r11 # initial mismatch (none) + add $8, %rdi # advance to next iteration + lea (%rax, %r8, 1), %rdx # str - 0x01..01 + not %rax # ~str + and %rdx, %rax # (str - 0x01..01) & ~str + and %r9, %rax # not including junk bits + jnz 1f # end of string? + + lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 + not %rcx # ~(str ^ c) + and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) + and %r9, %rcx # not including junk bits + mov %rcx, %r11 # remember mismatch in head + jmp 0f + + /* main loop unrolled twice */ + ALIGN_TEXT +3: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 + not %rcx # ~(str ^ c) + and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) + and %r9, %rcx # not including junk bits + lea -8(%rdi), %rdx + cmovnz %rdx, %r10 # remember location of current mismatch + cmovnz %rcx, %r11 + +0: mov (%rdi), %rax # str + mov %rsi, %rcx + xor %rax, %rcx # str ^ c + bswap %rcx # in reverse order, to find last match + lea (%rax, %r8, 1), %rdx # str - 0x01..01 + not %rax # ~str + and %rdx, %rax # (str - 0x01..01) & ~str + and %r9, %rax # not including junk bits + jnz 2f # end of string? + + lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 + not %rcx # ~(str ^ c) + and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) + and %r9, %rcx # not including junk bits + cmovnz %rdi, %r10 # remember location of current mismatch + cmovnz %rcx, %r11 + + mov 8(%rdi), %rax # str + add $16, %rdi + mov %rsi, %rcx + xor %rax, %rcx # str ^ c + bswap %rcx + lea (%rax, %r8, 1), %rdx # str - 0x01..01 + not %rax # ~str + and %rdx, %rax # (str - 0x01..01) & ~str + and %r9, %rax # not including junk bits + jz 3b # end of string? + + /* NUL found */ +1: sub $8, %rdi # undo advance past buffer +2: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 + not %rcx # ~(str ^ c) + and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) + and %r9, %rcx # not including junk bits + lea -1(%rax), %rdx + xor %rdx, %rax # mask of bytes in the string + bswap %rdx # in reverse order + and %rdx, %rcx # c found in the tail? + cmovnz %rdi, %r10 + cmovnz %rcx, %r11 + bswap %r11 # unreverse byte order + bsr %r11, %rcx # last location of c in (R10) + shr $3, %rcx # as byte offset + lea (%r10, %rcx, 1), %rax # pointer to match + test %r11, %r11 # was there actually a match? + cmovz %r11, %rax # if not, return null pointer + ret +ARCHEND(strrchr, scalar) + +ARCHENTRY(strrchr, baseline) + mov %edi, %ecx + and $~0xf, %rdi # align to 16 bytes + movdqa (%rdi), %xmm1 + movd %esi, %xmm0 + and $0xf, %ecx # offset from alignment + pxor %xmm2, %xmm2 + mov $-1, %edx + punpcklbw %xmm0, %xmm0 # c -> cc + shl %cl, %edx # bits corresponding to bytes in the string + punpcklwd %xmm0, %xmm0 # cc -> cccc + xor %r8, %r8 # address of latest match + mov $1, %esi # bit mask of latest match + mov %rdi, %r9 # candidate location for next match + add $16, %rdi # advance to next chunk + + /* check for match in head */ + pcmpeqb %xmm1, %xmm2 # NUL byte present? + pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc + pcmpeqb %xmm0, %xmm1 # c present? + pmovmskb %xmm2, %eax + pmovmskb %xmm1, %ecx + and %edx, %ecx # c present in the string? + and %edx, %eax # NUL present in the string? + jnz .Lend2 + + /* main loop unrolled twice */ + ALIGN_TEXT +0: movdqa (%rdi), %xmm1 + test %ecx, %ecx # was there a match in the last iter.? + cmovnz %r9, %r8 # remember match if any + cmovnz %ecx, %esi + pxor %xmm2, %xmm2 + pcmpeqb %xmm1, %xmm2 # NUL byte present? + pcmpeqb %xmm0, %xmm1 # c present? + pmovmskb %xmm2, %eax + pmovmskb %xmm1, %ecx + test %eax, %eax # end of string in first half? + jnz .Lend + + movdqa 16(%rdi), %xmm1 + test %ecx, %ecx # was there a match in the last iter.? + cmovnz %rdi, %r8 # remember match if any + cmovnz %ecx, %esi + pxor %xmm2, %xmm2 + pcmpeqb %xmm1, %xmm2 # NUL byte present? + pcmpeqb %xmm0, %xmm1 # c present? + pmovmskb %xmm2, %eax + pmovmskb %xmm1, %ecx + lea 16(%rdi), %r9 + add $32, %rdi + test %eax, %eax # end of string in second half? + jz 0b + + ALIGN_TEXT +.Lend2: sub $16, %rdi +.Lend: lea -1(%rax), %edx + xor %edx, %eax # mask of bytes in the string + and %eax, %ecx # c found in the tail? + cmovnz %rdi, %r8 + cmovnz %ecx, %esi + bsr %esi, %esi # last location of c in (R8) + lea (%r8, %rsi, 1), %rax # pointer to match + ret +ARCHEND(strrchr, baseline) + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strsep.c b/lib/libc/amd64/string/strsep.c new file mode 100644 index 000000000000..9fda56d7e135 --- /dev/null +++ b/lib/libc/amd64/string/strsep.c @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include +#include + +size_t __strcspn(const char *, const char *); + +/* + * We have a fast strcspn() on amd64. Use it over a direct + * implementation of strsep for better performance. + */ +char * +strsep(char **stringp, const char *delim) +{ + size_t n; + char *s; + + s = *stringp; + if (s == NULL) + return (NULL); + + n = __strcspn(s, delim); + if (s[n] == '\0') + *stringp = NULL; + else { + s[n] = '\0'; + *stringp = s + n + 1; + } + + return (s); +} diff --git a/lib/libc/string/bstring.3 b/lib/libc/string/bstring.3 index fd976c7676b7..91603fe6dbac 100644 --- a/lib/libc/string/bstring.3 +++ b/lib/libc/string/bstring.3 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 4, 1993 +.Dd December 5, 2023 .Dt BSTRING 3 .Os .Sh NAME @@ -56,7 +56,12 @@ .Ft int .Fn memcmp "const void *b1" "const void *b2" "size_t len" .Ft void * -.Fn memccpy "void *dst" "const void *src" "int c" "size_t len" +.Fo memccpy +.Fa "void * restrict dst" +.Fa "const void * restrict src" +.Fa "int c" +.Fa "size_t len" +.Fc .Ft void * .Fn memcpy "void *dst" "const void *src" "size_t len" .Ft void * @@ -78,6 +83,7 @@ See the specific manual pages for more information. .Xr memccpy 3 , .Xr memchr 3 , .Xr memcmp 3 , +.Xr memccpy 3 , .Xr memcpy 3 , .Xr memmove 3 , .Xr memset 3 diff --git a/lib/libc/string/memccpy.3 b/lib/libc/string/memccpy.3 index ce8d5f65ac93..3bdae24354c1 100644 --- a/lib/libc/string/memccpy.3 +++ b/lib/libc/string/memccpy.3 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 9, 1993 +.Dd December 5, 2023 .Dt MEMCCPY 3 .Os .Sh NAME @@ -36,7 +36,12 @@ .Sh SYNOPSIS .In string.h .Ft void * -.Fn memccpy "void *dst" "const void *src" "int c" "size_t len" +.Fo memccpy +.Fa "void * restrict dst" +.Fa "const void * restrict src" +.Fa "int c" +.Fa "size_t len" +.Fc .Sh DESCRIPTION The .Fn memccpy @@ -59,13 +64,35 @@ is returned. Otherwise, .Fa len bytes are copied, and a NULL pointer is returned. +If +.Fa src +and +.Fa dst +overlap, behavior is undefined. .Sh SEE ALSO .Xr bcopy 3 , .Xr memcpy 3 , .Xr memmove 3 , .Xr strcpy 3 +.Sh STANDARDS +The +.Fn memccpy +function conforms to +.St -p1003.1-2004 +and +.\" St -isoC-2024 . +ISO/IEC 9899:2024 (\(lqISO\~C23\(rq). .Sh HISTORY The .Fn memccpy function first appeared in -.Bx 4.4 . +.Bx 4.4 +and was first specified in the +.\" St -svid1 . +System\~V Interface Definition, First Edition (\(lqSVID1\(rq). +The +.Ft restrict +keyword was added to the prototype in +.Fx 5.0.0 +in accordance with the updated specification of +.St -p1003.1-2004 . diff --git a/lib/libc/string/memccpy.c b/lib/libc/string/memccpy.c index 174824ba2393..d6a446503eb6 100644 --- a/lib/libc/string/memccpy.c +++ b/lib/libc/string/memccpy.c @@ -32,7 +32,7 @@ #include void * -memccpy(void *t, const void *f, int c, size_t n) +memccpy(void * restrict t, const void * restrict f, int c, size_t n) { if (n) { diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile index a090e1bd3463..4fce79685c0e 100644 --- a/lib/libc/tests/string/Makefile +++ b/lib/libc/tests/string/Makefile @@ -9,12 +9,16 @@ ATF_TESTS_C+= ffsll_test ATF_TESTS_C+= fls_test ATF_TESTS_C+= flsl_test ATF_TESTS_C+= flsll_test +ATF_TESTS_C+= memccpy_test ATF_TESTS_C+= memcmp_test +ATF_TESTS_C+= memrchr_test ATF_TESTS_C+= memset_s_test +ATF_TESTS_C+= strncmp_test ATF_TESTS_C+= stpncpy_test ATF_TESTS_C+= strcmp2_test ATF_TESTS_C+= strcspn_test ATF_TESTS_C+= strerror2_test +ATF_TESTS_C+= strlcpy_test ATF_TESTS_C+= strspn_test ATF_TESTS_C+= strverscmp_test ATF_TESTS_C+= strxfrm_test diff --git a/lib/libc/tests/string/memccpy_test.c b/lib/libc/tests/string/memccpy_test.c new file mode 100644 index 000000000000..82f4ef34af54 --- /dev/null +++ b/lib/libc/tests/string/memccpy_test.c @@ -0,0 +1,205 @@ +/*- + * Copyright (c) 2009 David Schultz + * Copyright (c) 2023 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +void *(*memccpy_fn)(void *restrict, const void *restrict, int, size_t); + +static char * +makebuf(size_t len, int guard_at_end) +{ + char *buf; + size_t alloc_size, page_size; + + page_size = getpagesize(); + alloc_size = roundup2(len, page_size) + page_size; + + buf = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0); + assert(buf); + if (guard_at_end) { + assert(munmap(buf + alloc_size - page_size, page_size) == 0); + return (buf + alloc_size - page_size - len); + } else { + assert(munmap(buf, page_size) == 0); + return (buf + page_size); + } +} + +static void +test_memccpy(const char *s) +{ + char *src, *dst, *expected; + size_t size, bufsize, x; + int i, j; + + size = strlen(s) + 1; + for (i = 0; i <= 1; i++) { + for (j = 0; j <= 1; j++) { + for (bufsize = 0; bufsize <= size + 10; bufsize++) { + src = makebuf(size, i); + memcpy(src, s, size); + dst = makebuf(bufsize, j); + memset(dst, 'X', bufsize); + expected = bufsize >= size ? dst + size : NULL; + assert(memccpy_fn(dst, src, src[size-1], bufsize) == expected); + assert(bufsize == 0 || strncmp(src, dst, bufsize - 1) == 0); + for (x = size; x < bufsize; x++) + assert(dst[x] == 'X'); + } + } + } +} + +static void +test_sentinel(char *dest, char *src, size_t destlen, size_t srclen) +{ + size_t i, effective_len; + void *res, *wantres; + const char *fail = NULL; + char terminator; + + for (i = 0; i < srclen; i++) + /* src will never include (){} */ + src[i] = '0' + i; + + /* source sentinels: not to be copied */ + src[-1] = '('; + src[srclen] = ')'; + + memset(dest, '\xee', destlen); + + /* destination sentinels: not to be touched */ + dest[-1] = '{'; + dest[destlen] = '}'; + + effective_len = srclen < destlen ? srclen : destlen; + wantres = srclen <= destlen ? dest + srclen : NULL; + terminator = src[srclen-1]; + res = memccpy_fn(dest, src, terminator, destlen); + + if (dest[-1] != '{') + fail = "start sentinel overwritten"; + else if (dest[destlen] != '}') + fail = "end sentinel overwritten"; + else if (res != wantres) + fail = "incorrect return value"; + else if (destlen > 0 && memcmp(src, dest, effective_len) != 0) + fail = "string not copied correctly"; + else for (i = srclen; i < destlen; i++) + if (dest[i] != '\xee') { + fail = "buffer mutilated behind string"; + break; + } + + if (fail) + atf_tc_fail_nonfatal("%s\n" + "memccpy(%p \"%s\", %p \"%s\", %u '%c', %zu) = %p (want %p)\n", + fail, dest, dest, src, src, terminator, terminator, destlen, res, wantres); +} + +ATF_TC_WITHOUT_HEAD(null); +ATF_TC_BODY(null, tc) +{ + ATF_CHECK_EQ(memccpy_fn(NULL, "foo", 42, 0), NULL); +} + +ATF_TC(zero_extension); +ATF_TC_HEAD(zero_extension, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Ensure the upper bits of the terminator are ignored"); +} +ATF_TC_BODY(zero_extension, tc) +{ + int mask = -1 & ~UCHAR_MAX; + char buf[16]; + + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ(memccpy(buf, "foobar", 'r', sizeof(buf)), buf + sizeof("foobar") - 1); + ATF_CHECK_EQ(memcmp(buf, "foobar", sizeof("foobar") - 1), 0); + + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ(memccpy(buf, "foobar", mask | 'r', sizeof(buf)), buf + sizeof("foobar") - 1); + ATF_CHECK_EQ(memcmp(buf, "foobar", sizeof("foobar") - 1), 0); +} + +ATF_TC_WITHOUT_HEAD(bounds); +ATF_TC_BODY(bounds, tc) +{ + size_t i; + char buf[64]; + + for (i = 0; i < sizeof(buf) - 1; i++) { + buf[i] = ' ' + i; + test_memccpy(buf); + } +} + +ATF_TC_WITHOUT_HEAD(alignments); +ATF_TC_BODY(alignments, tc) +{ + size_t srcalign, destalign, srclen, destlen; + char src[15+2+64]; /* 15 offsets + 64 max length + sentinels */ + char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */ + + for (srcalign = 0; srcalign < 16; srcalign++) + for (destalign = 0; destalign < 16; destalign++) + for (srclen = 1; srclen < 64; srclen++) + for (destlen = 0; destlen < 64; destlen++) + test_sentinel(dest+destalign+1, + src+srcalign+1, destlen, srclen); +} + +ATF_TP_ADD_TCS(tp) +{ + void *dl_handle; + + dl_handle = dlopen(NULL, RTLD_LAZY); + memccpy_fn = dlsym(dl_handle, "test_memccpy"); + if (memccpy_fn == NULL) + memccpy_fn = memccpy; + + ATF_TP_ADD_TC(tp, null); + ATF_TP_ADD_TC(tp, zero_extension); + ATF_TP_ADD_TC(tp, bounds); + ATF_TP_ADD_TC(tp, alignments); + + return (atf_no_error()); +} diff --git a/lib/libc/tests/string/memrchr_test.c b/lib/libc/tests/string/memrchr_test.c new file mode 100644 index 000000000000..12f696c9dc1e --- /dev/null +++ b/lib/libc/tests/string/memrchr_test.c @@ -0,0 +1,116 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Robert Clausecker + */ + +#include + +#include +#include +#include + +#include + +static void *(*memrchr_fn)(const void *, int, size_t); + +ATF_TC_WITHOUT_HEAD(null); +ATF_TC_BODY(null, tc) +{ + ATF_CHECK_EQ(memrchr_fn(NULL, 42, 0), NULL); +} + +ATF_TC_WITHOUT_HEAD(not_found); +ATF_TC_BODY(not_found, tc) +{ + size_t i, j; + char buf[1+15+64+1]; /* offset [0..15] + 64 buffer bytes + sentinels */ + + buf[0] = 'X'; + memset(buf + 1, '-', sizeof(buf) - 1); + + for (i = 0; i < 16; i++) + for (j = 0; j < 64; j++) { + buf[i + j + 1] = 'X'; + ATF_CHECK_EQ(memrchr_fn(buf + i + 1, 'X', j), NULL); + buf[i + j + 1] = '-'; + } +} + +static void +do_found_test(char buf[], size_t len, size_t first, size_t second) +{ + /* invariant: first <= second */ + + buf[first] = 'X'; + buf[second] = 'X'; + ATF_CHECK_EQ(memrchr_fn(buf, 'X', len), buf + second); + buf[first] = '-'; + buf[second] = '-'; +} + +ATF_TC_WITHOUT_HEAD(found); +ATF_TC_BODY(found, tc) +{ + size_t i, j, k, l; + char buf[1+15+64+1]; + + buf[0] = 'X'; + memset(buf + 1, '-', sizeof(buf) - 1); + + for (i = 0; i < 16; i++) + for (j = 0; j < 64; j++) + for (k = 0; k < j; k++) + for (l = 0; l <= k; l++) { + buf[i + j + 1] = 'X'; + do_found_test(buf + i + 1, j, l, k); + buf[i + j + 1] = '-'; + } +} + +/* check that the right character is found */ +static void +do_values_test(unsigned char buf[], size_t len, size_t i, int c) +{ + /* sentinels */ + buf[-1] = c; + buf[len] = c; + memset(buf, c + 1, len); + + if (i < len) { + buf[i] = c; + ATF_CHECK_EQ(memrchr_fn(buf, c, len), buf + i); + } else + ATF_CHECK_EQ(memrchr_fn(buf, c, len), NULL); +} + +ATF_TC_WITHOUT_HEAD(values); +ATF_TC_BODY(values, tc) +{ + size_t i, j, k; + int c; + unsigned char buf[1+15+64+1]; + + for (i = 0; i < 16; i++) + for (j = 0; j < 64; j++) + for (k = 0; k <= j; k++) + for (c = 0; c <= UCHAR_MAX; c++) + do_values_test(buf + i + 1, j, k, c); +} + +ATF_TP_ADD_TCS(tp) +{ + void *dl_handle; + + dl_handle = dlopen(NULL, RTLD_LAZY); + memrchr_fn = dlsym(dl_handle, "test_memrchr"); + if (memrchr_fn == NULL) + memrchr_fn = memrchr; + + ATF_TP_ADD_TC(tp, null); + ATF_TP_ADD_TC(tp, not_found); + ATF_TP_ADD_TC(tp, found); + ATF_TP_ADD_TC(tp, values); + + return (atf_no_error()); +} diff --git a/lib/libc/tests/string/stpncpy_test.c b/lib/libc/tests/string/stpncpy_test.c index 8154237eb8c2..8574b2d591be 100644 --- a/lib/libc/tests/string/stpncpy_test.c +++ b/lib/libc/tests/string/stpncpy_test.c @@ -1,7 +1,11 @@ /*- * Copyright (c) 2009 David Schultz + * Copyright (c) 2023 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -27,12 +31,15 @@ #include #include #include +#include #include #include #include #include +static char *(*stpncpy_fn)(char *restrict, const char *restrict, size_t); + static char * makebuf(size_t len, int guard_at_end) { @@ -69,7 +76,7 @@ test_stpncpy(const char *s) dst = makebuf(bufsize, j); memset(dst, 'X', bufsize); len = (bufsize < size) ? bufsize : size - 1; - assert(stpncpy(dst, src, bufsize) == dst+len); + assert(stpncpy_fn(dst, src, bufsize) == dst+len); assert(memcmp(src, dst, len) == 0); for (x = len; x < bufsize; x++) assert(dst[x] == '\0'); @@ -78,33 +85,97 @@ test_stpncpy(const char *s) } } -ATF_TC_WITHOUT_HEAD(nul); -ATF_TC_BODY(nul, tc) +static void +test_sentinel(char *dest, char *src, size_t destlen, size_t srclen) { + size_t i; + const char *res, *wantres; + const char *fail = NULL; + + for (i = 0; i < srclen; i++) + /* src will never include (){} */ + src[i] = '0' + i; + src[srclen] = '\0'; + + /* source sentinels: not to be copied */ + src[-1] = '('; + src[srclen+1] = ')'; + + memset(dest, 0xee, destlen); + + /* destination sentinels: not to be touched */ + dest[-1] = '{'; + dest[destlen] = '}'; + + wantres = dest + (srclen > destlen ? destlen : srclen); + res = stpncpy_fn(dest, src, destlen); + + if (dest[-1] != '{') + fail = "start sentinel overwritten"; + else if (dest[destlen] != '}') + fail = "end sentinel overwritten"; + else if (strncmp(src, dest, destlen) != 0) + fail = "string not copied correctly"; + else if (res != wantres) + fail = "incorrect return value"; + else for (i = srclen; i < destlen; i++) + if (dest[i] != '\0') { + fail = "incomplete NUL padding"; + break; + } - test_stpncpy(""); + if (fail) + atf_tc_fail_nonfatal("%s\n" + "stpncpy(%p \"%s\", %p \"%s\", %zu) = %p (want %p)\n", + fail, dest, dest, src, src, destlen, res, wantres); } -ATF_TC_WITHOUT_HEAD(foo); -ATF_TC_BODY(foo, tc) +ATF_TC_WITHOUT_HEAD(null); +ATF_TC_BODY(null, tc) { - - test_stpncpy("foo"); + ATF_CHECK_EQ(stpncpy_fn(NULL, NULL, 0), NULL); } -ATF_TC_WITHOUT_HEAD(glorp); -ATF_TC_BODY(glorp, tc) +ATF_TC_WITHOUT_HEAD(bounds); +ATF_TC_BODY(bounds, tc) { + size_t i; + char buf[64+1]; - test_stpncpy("glorp"); + for (i = 0; i < sizeof(buf) - 1; i++) { + buf[i] = ' ' + i; + buf[i+1] = '\0'; + test_stpncpy(buf); + } +} + +ATF_TC_WITHOUT_HEAD(alignments); +ATF_TC_BODY(alignments, tc) +{ + size_t srcalign, destalign, srclen, destlen; + char src[15+3+64]; /* 15 offsets + 64 max length + NUL + sentinels */ + char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */ + + for (srcalign = 0; srcalign < 16; srcalign++) + for (destalign = 0; destalign < 16; destalign++) + for (srclen = 0; srclen < 64; srclen++) + for (destlen = 0; destlen < 64; destlen++) + test_sentinel(dest+destalign+1, + src+srcalign+1, destlen, srclen); } ATF_TP_ADD_TCS(tp) { + void *dl_handle; + + dl_handle = dlopen(NULL, RTLD_LAZY); + stpncpy_fn = dlsym(dl_handle, "test_stpncpy"); + if (stpncpy_fn == NULL) + stpncpy_fn = stpncpy; - ATF_TP_ADD_TC(tp, nul); - ATF_TP_ADD_TC(tp, foo); - ATF_TP_ADD_TC(tp, glorp); + ATF_TP_ADD_TC(tp, null); + ATF_TP_ADD_TC(tp, bounds); + ATF_TP_ADD_TC(tp, alignments); return (atf_no_error()); } diff --git a/lib/libc/tests/string/strlcpy_test.c b/lib/libc/tests/string/strlcpy_test.c new file mode 100644 index 000000000000..646bef42683e --- /dev/null +++ b/lib/libc/tests/string/strlcpy_test.c @@ -0,0 +1,183 @@ +/*- + * Copyright (c) 2009 David Schultz + * Copyright (c) 2023 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +size_t (*strlcpy_fn)(char *restrict, const char *restrict, size_t); + +static char * +makebuf(size_t len, int guard_at_end) +{ + char *buf; + size_t alloc_size, page_size; + + page_size = getpagesize(); + alloc_size = roundup2(len, page_size) + page_size; + + buf = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0); + assert(buf); + if (guard_at_end) { + assert(munmap(buf + alloc_size - page_size, page_size) == 0); + return (buf + alloc_size - page_size - len); + } else { + assert(munmap(buf, page_size) == 0); + return (buf + page_size); + } +} + +static void +test_strlcpy(const char *s) +{ + char *src, *dst; + size_t size, bufsize, x; + int i, j; + + size = strlen(s) + 1; + for (i = 0; i <= 1; i++) { + for (j = 0; j <= 1; j++) { + for (bufsize = 0; bufsize <= size + 10; bufsize++) { + src = makebuf(size, i); + memcpy(src, s, size); + dst = makebuf(bufsize, j); + memset(dst, 'X', bufsize); + assert(strlcpy_fn(dst, src, bufsize) == size-1); + assert(bufsize == 0 || strncmp(src, dst, bufsize - 1) == 0); + for (x = size; x < bufsize; x++) + assert(dst[x] == 'X'); + } + } + } +} + +static void +test_sentinel(char *dest, char *src, size_t destlen, size_t srclen) +{ + size_t i; + size_t res, wantres; + const char *fail = NULL; + + for (i = 0; i < srclen; i++) + /* src will never include (){} */ + src[i] = '0' + i; + src[srclen] = '\0'; + + /* source sentinels: not to be copied */ + src[-1] = '('; + src[srclen+1] = ')'; + + memset(dest, '\xee', destlen); + + /* destination sentinels: not to be touched */ + dest[-1] = '{'; + dest[destlen] = '}'; + + wantres = srclen; + res = strlcpy_fn(dest, src, destlen); + + if (dest[-1] != '{') + fail = "start sentinel overwritten"; + else if (dest[destlen] != '}') + fail = "end sentinel overwritten"; + else if (res != wantres) + fail = "incorrect return value"; + else if (destlen > 0 && strncmp(src, dest, destlen - 1) != 0) + fail = "string not copied correctly"; + else if (destlen > 0 && srclen >= destlen - 1 && dest[destlen-1] != '\0') + fail = "string not NUL terminated"; + else for (i = srclen + 1; i < destlen; i++) + if (dest[i] != '\xee') { + fail = "buffer mutilated behind string"; + break; + } + + if (fail) + atf_tc_fail_nonfatal("%s\n" + "strlcpy(%p \"%s\", %p \"%s\", %zu) = %zu (want %zu)\n", + fail, dest, dest, src, src, destlen, res, wantres); +} + +ATF_TC_WITHOUT_HEAD(null); +ATF_TC_BODY(null, tc) +{ + ATF_CHECK_EQ(strlcpy_fn(NULL, "foo", 0), 3); +} + +ATF_TC_WITHOUT_HEAD(bounds); +ATF_TC_BODY(bounds, tc) +{ + size_t i; + char buf[64+1]; + + for (i = 0; i < sizeof(buf) - 1; i++) { + buf[i] = ' ' + i; + buf[i+1] = '\0'; + test_strlcpy(buf); + } +} + +ATF_TC_WITHOUT_HEAD(alignments); +ATF_TC_BODY(alignments, tc) +{ + size_t srcalign, destalign, srclen, destlen; + char src[15+3+64]; /* 15 offsets + 64 max length + NUL + sentinels */ + char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */ + + for (srcalign = 0; srcalign < 16; srcalign++) + for (destalign = 0; destalign < 16; destalign++) + for (srclen = 0; srclen < 64; srclen++) + for (destlen = 0; destlen < 64; destlen++) + test_sentinel(dest+destalign+1, + src+srcalign+1, destlen, srclen); +} + +ATF_TP_ADD_TCS(tp) +{ + void *dl_handle; + + dl_handle = dlopen(NULL, RTLD_LAZY); + strlcpy_fn = dlsym(dl_handle, "test_strlcpy"); + if (strlcpy_fn == NULL) + strlcpy_fn = strlcpy; + + ATF_TP_ADD_TC(tp, null); + ATF_TP_ADD_TC(tp, bounds); + ATF_TP_ADD_TC(tp, alignments); + + return (atf_no_error()); +} diff --git a/lib/libc/tests/string/strncmp_test.c b/lib/libc/tests/string/strncmp_test.c new file mode 100644 index 000000000000..989c58bcfedf --- /dev/null +++ b/lib/libc/tests/string/strncmp_test.c @@ -0,0 +1,165 @@ +/*- + * Copyright (c) 2023 The FreeBSD Foundation + * + * This software was developed by Robert Clausecker + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE + */ + +#include + +#include +#include +#include + +int (*volatile strncmp_fn)(const char *, const char *, size_t); + +static void +alignment_testcase(char *a, char *b, int want, size_t len) +{ + int res; + + res = strncmp_fn(a, b, len); + ATF_CHECK_MSG(want == (res > 0) - (res < 0), + "strcmp(%p \"%s\", %p \"%s\", %zu) = %d != %d", + (void *)a, a, (void *)b, b, len, res, want); +} + +static void +check_strncmp_alignments(char a[], char b[], + size_t a_off, size_t b_off, size_t len, size_t pos) +{ + char *a_str, *b_str, a_orig, b_orig; + + a[a_off] = '\0'; + b[b_off] = '\0'; + + a_str = a + a_off + 1; + b_str = b + b_off + 1; + + a_str[len] = '\0'; + b_str[len] = '\0'; + a_str[len+1] = 'A'; + b_str[len+1] = 'B'; + + a_orig = a_str[pos]; + b_orig = b_str[pos]; + + alignment_testcase(a_str, b_str, 0, len + 16); + alignment_testcase(a_str, b_str, 0, len + 1); + alignment_testcase(a_str, b_str, 0, len); + + if (pos < len) { + a_str[pos] = '\0'; + alignment_testcase(a_str, b_str, -1, len + 16); + alignment_testcase(a_str, b_str, -1, len + 1); + alignment_testcase(a_str, b_str, -1, len); + alignment_testcase(a_str, b_str, -1, pos + 1); + alignment_testcase(a_str, b_str, 0, pos); + a_str[pos] = a_orig; + + b_str[pos] = '\0'; + alignment_testcase(a_str, b_str, 1, len + 16); + alignment_testcase(a_str, b_str, 1, len + 1); + alignment_testcase(a_str, b_str, 1, len); + alignment_testcase(a_str, b_str, 1, pos + 1); + alignment_testcase(a_str, b_str, 0, pos); + b_str[pos] = b_orig; + } + + a_str[pos] = 'X'; + alignment_testcase(a_str, b_str, 1, len + 16); + alignment_testcase(a_str, b_str, 0, pos); + alignment_testcase(a_str, b_str, 1, pos + 1); + if (pos < len) { + alignment_testcase(a_str, b_str, 1, len); + alignment_testcase(a_str, b_str, 1, len + 1); + } + a_str[pos] = a_orig; + + b_str[pos] = 'X'; + alignment_testcase(a_str, b_str, -1, len + 16); + alignment_testcase(a_str, b_str, 0, pos); + alignment_testcase(a_str, b_str, -1, pos + 1); + if (pos < len) { + alignment_testcase(a_str, b_str, -1, len); + alignment_testcase(a_str, b_str, -1, len + 1); + } + b_str[pos] = b_orig; + + a[a_off] = '-'; + b[b_off] = '-'; + a_str[len] = '-'; + b_str[len] = '-'; + a_str[len+1] = '-'; + b_str[len+1] = '-'; +} + +ATF_TC(strncmp_alignments); +ATF_TC_HEAD(strncmp_alignments, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test strncmp(3) with various alignments"); +} + +ATF_TC_BODY(strncmp_alignments, tc) +{ + size_t a_off, b_off, len, pos; + char a[64+16+16+3], b[64+16+16+3]; + + memset(a, '-', sizeof(a)); + memset(b, '-', sizeof(b)); + a[sizeof(a) - 1] = '\0'; + b[sizeof(b) - 1] = '\0'; + + for (a_off = 0; a_off < 16; a_off++) + for (b_off = 0; b_off < 16; b_off++) + for (len = 1; len <= 64; len++) + for (pos = 0; pos <= len; pos++) + check_strncmp_alignments(a, b, a_off, b_off, len, pos); +} + +ATF_TC(strncmp_null); +ATF_TC_HEAD(strncmp_null, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test strncmp(3) with null pointers"); +} + +ATF_TC_BODY(strncmp_null, tc) +{ + alignment_testcase(NULL, NULL, 0, 0); +} + +ATF_TP_ADD_TCS(tp) +{ + void *dl_handle; + + dl_handle = dlopen(NULL, RTLD_LAZY); + strncmp_fn = dlsym(dl_handle, "test_strncmp"); + if (strncmp_fn == NULL) + strncmp_fn = strncmp; + + ATF_TP_ADD_TC(tp, strncmp_alignments); + ATF_TP_ADD_TC(tp, strncmp_null); + + return atf_no_error(); +} diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7 index 3fd8890c4f53..fd9485524aef 100644 --- a/share/man/man7/simd.7 +++ b/share/man/man7/simd.7 @@ -24,7 +24,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE . -.Dd September 2, 2023 +.Dd December 6, 2023 .Dt SIMD 7 .Os .Sh NAME @@ -48,7 +48,7 @@ the environment variable .Ev ARCHLEVEL can be used to override this mechanism. .Pp -Enhanced functions are present in the following architectures: +Enhanced functions are present for the following architectures: .Bl -column FUNCTION_________ aarch64_ arm_ amd64_ i386_ ppc64_ -offset indent .It Em FUNCTION Ta Em AARCH64 Ta Em ARM Ta Em AMD64 Ta Em I386 Ta Em PPC64 .It bcmp Ta Ta Ta S1 Ta S @@ -58,24 +58,32 @@ Enhanced functions are present in the following architectures: .It index Ta S Ta Ta S1 .It ldiv Ta Ta Ta S Ta S .It lldiv Ta Ta Ta S -.It memchr Ta Ta Ta S1 -.It memcmp Ta Ta S Ta S1 Ta S +.It memchr Ta S Ta Ta S1 +.It memcmp Ta S Ta S Ta S1 Ta S +.It memccpy Ta Ta Ta S1 .It memcpy Ta S Ta S Ta S Ta S Ta SV .It memmove Ta S Ta S Ta S Ta S Ta SV -.It memset Ta Ta S Ta S Ta S -.It rindex Ta S -.It stpcpy Ta Ta Ta S1 -.It strcat Ta Ta Ta S Ta S +.It memrchr Ta Ta Ta S1 +.It memset Ta S Ta S Ta S Ta S +.It rindex Ta S Ta Ta S1 Ta S +.It stpcpy Ta S Ta Ta S1 +.It stpncpy Ta Ta Ta S1 +.It strcat Ta Ta Ta S1 Ta S .It strchr Ta S Ta Ta S1 Ta S -.It strchrnul Ta Ta Ta S1 -.It strcmp Ta Ta S Ta S Ta S -.It strcpy Ta Ta Ta S1 Ta S Ta S2 +.It strchrnul Ta S Ta Ta S1 +.It strcmp Ta S Ta S Ta S1 Ta S +.It strcpy Ta S Ta Ta S1 Ta S Ta S2 .It strcspn Ta Ta Ta S2 -.It strlen Ta Ta S Ta S1 -.It strncmp Ta Ta S Ta Ta S -.It strncpy Ta Ta Ta Ta Ta S2 -.It strnlen Ta Ta Ta S1 -.It strrchr Ta S Ta Ta Ta S +.It strlcat Ta Ta Ta S1 +.It strlcpy Ta Ta Ta S1 +.It strlen Ta S Ta S Ta S1 +.It strncat Ta Ta Ta S1 +.It strncmp Ta S Ta S Ta S1 Ta S +.It strncpy Ta Ta Ta S1 Ta Ta S2 +.It strnlen Ta S Ta Ta S1 +.It strrchr Ta S Ta Ta S1 Ta S +.It strpbrk Ta Ta Ta S2 +.It strsep Ta Ta Ta S2 .It strspn Ta Ta Ta S2 .It swab Ta Ta Ta Ta S .It timingsafe_bcmp Ta Ta Ta S1 @@ -207,14 +215,14 @@ SIMD-enhanced functions were first added with for .Cm powerpc64 and with -.Fx 14.0 +.Fx 14.1 for .Cm amd64 . .Pp A .Nm manual page appeared in -.Fx 14.0 . +.Fx 14.1 . . .Sh AUTHOR .An Robert Clausecker Aq Mt fuz@FreeBSD.org