diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
index 09bf7c8f251e..b1369841bc74 100644
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -3,17 +3,28 @@ MDSRCS+= \
 	bcmp.S \
 	memchr.S \
 	memcmp.S \
+	memccpy.S \
 	memcpy.S \
 	memmove.S \
+	memrchr.S \
 	memset.S \
 	stpcpy.S \
+	stpncpy.S \
 	strcat.S \
 	strchrnul.S \
 	strcmp.S \
 	strcpy.c \
 	strcspn.S \
+	strlcat.c \
+	strlcpy.S \
 	strlen.S \
+	strncat.c \
+	strncmp.S \
+	strncpy.c \
 	strnlen.c \
+	strpbrk.c \
+	strrchr.S \
+	strsep.c \
 	strspn.S \
 	timingsafe_bcmp.S \
 	timingsafe_memcmp.S
diff --git a/lib/libc/amd64/string/memccpy.S b/lib/libc/amd64/string/memccpy.S
new file mode 100644
index 000000000000..a2d9e33b3d36
--- /dev/null
+++ b/lib/libc/amd64/string/memccpy.S
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4, 0x90
+
+	.weak memccpy
+	.set memccpy, __memccpy
+ARCHFUNCS(__memccpy)
+	ARCHFUNC(__memccpy, scalar)
+	ARCHFUNC(__memccpy, baseline)
+ENDARCHFUNCS(__memccpy)
+
+ARCHENTRY(__memccpy, scalar)
+	push	%rbp			# establish stack frame
+	mov	%rsp, %rbp
+	push	%rax			# dummy push for alignment
+	push	%rbx
+	push	%rdi
+	push	%rsi
+
+	mov	%rsi, %rdi
+	mov	%edx, %esi
+	mov	%rcx, %rdx
+	mov	%rcx, %rbx
+	call	CNAME(__memchr)		# ptr = memchr(src, c, len)
+
+	pop	%rsi
+	pop	%rdi
+	lea	1(%rax), %rdx
+	sub	%rsi, %rdx		# size = ptr - src + 1
+	mov	%rbx, %rcx
+	lea	(%rdi, %rdx, 1), %rbx	# res = dest + size
+	test	%rax, %rax		# if (ptr == NULL)
+	cmovz	%rcx, %rdx		# size = len
+	cmovz	%rax, %rbx		# res = NULL
+	call	CNAME(memcpy)
+
+	mov	%rbx, %rax		# return (res)
+	pop	%rbx
+	leave
+	ret
+ARCHEND(__memccpy, scalar)
+
+ARCHENTRY(__memccpy, baseline)
+	sub		$1, %rcx		# RCX refers to last character in buffer
+	jb		.L0			# go to special code path if len was 0
+
+	movd		%edx, %xmm4
+	mov		%rcx, %rdx
+	punpcklbw	%xmm4, %xmm4		# c -> cc
+	mov		%esi, %ecx
+	punpcklwd	%xmm4, %xmm4		# cc -> cccc
+	mov		%rsi, %r9		# stash a copy of the source pointer for later
+	pshufd		$0, %xmm4, %xmm4	# cccc -> cccccccccccccccc
+	and		$~0xf, %rsi
+	movdqa		%xmm4, %xmm1
+	pcmpeqb		(%rsi), %xmm1		# NUL found in head?
+	mov		$-1, %r8d
+	and		$0xf, %ecx
+	shl		%cl, %r8d		# mask of bytes in the string
+	pmovmskb	%xmm1, %eax
+	and		%r8d, %eax
+	jnz		.Lhead_nul
+
+	movdqa		16(%rsi), %xmm3		# load second string chunk
+	movdqu		(%r9), %xmm2		# load unaligned string head
+	mov		$32, %r8d
+	sub		%ecx, %r8d		# head length + length of second chunk
+	movdqa		%xmm4, %xmm1
+	pcmpeqb		%xmm3, %xmm1		# NUL found in second chunk?
+
+	sub		%r8, %rdx		# enough space left for the second chunk?
+	jb		.Lhead_buf_end
+
+	/* process second chunk */
+	pmovmskb	%xmm1, %eax
+	test		%eax, %eax
+	jnz		.Lsecond_nul
+
+	/* string didn't end in second chunk and neither did buffer -- not a runt! */
+	movdqa		32(%rsi), %xmm0		# load next string chunk
+	movdqa		%xmm4, %xmm1
+	movdqu		%xmm2, (%rdi)		# deposit head into buffer
+	sub		%rcx, %rdi		# adjust RDI to correspond to RSI
+	movdqu		%xmm3, 16(%rdi)		# deposit second chunk
+	sub		%rsi, %rdi		# express RDI as distance from RSI
+	add		$32, %rsi		# advance RSI past first two chunks
+	sub		$16, %rdx		# enough left for another round?
+	jb		1f
+
+	/* main loop unrolled twice */
+	ALIGN_TEXT
+0:	pcmpeqb		%xmm0, %xmm1		# NUL byte encountered?
+	pmovmskb	%xmm1, %eax
+	test		%eax, %eax
+	jnz		3f
+
+	movdqu		%xmm0, (%rsi, %rdi)
+	movdqa		16(%rsi), %xmm0		# load next string chunk
+	movdqa		%xmm4, %xmm1
+	cmp		$16, %rdx		# more than a full chunk left?
+	jb		2f
+
+	add		$32, %rsi		# advance pointers to next chunk
+	pcmpeqb		%xmm0, %xmm1		# NUL byte encountered?
+	pmovmskb	%xmm1, %eax
+	test		%eax, %eax
+	jnz		4f
+
+	movdqu		%xmm0, -16(%rsi, %rdi)
+	movdqa		(%rsi), %xmm0		# load next string chunk
+	movdqa		%xmm4, %xmm1
+	sub		$32, %rdx
+	jae		0b
+
+1:	sub		$16, %rsi		# undo second advancement
+	add		$16, %edx
+
+	/* 1--16 bytes left in the buffer but string has not ended yet */
+2:	pcmpeqb		%xmm1, %xmm0		# NUL byte encountered?
+	pmovmskb	%xmm0, %r8d
+	mov		%r8d, %ecx
+	bts		%edx, %r8d		# treat end of buffer as end of string
+	or		$0x10000, %eax		# ensure TZCNT finds a set bit
+	tzcnt		%r8d, %r8d		# find tail length
+	add		%rsi, %rdi		# restore RDI
+	movdqu		1(%rsi, %r8, 1), %xmm0	# load string tail
+	movdqu		%xmm0, 1(%rdi, %r8, 1)	# store string tail
+	lea		17(%rdi, %r8, 1), %rsi	# return value if terminator encountered
+	xor		%eax, %eax		# return value if no terminator encountered
+	bt		%r8d, %ecx		# terminator encountered inside buffer?
+	cmovc		%rsi, %rax		# if yes, return pointer, else NULL
+	ret
+
+4:	sub		$16, %rsi		# undo second advancement
+	add		$16, %rdx		# restore number of remaining bytes
+
+	/* string has ended but buffer has not */
+3:	tzcnt		%eax, %eax		# find length of string tail
+	movdqu		-15(%rsi, %rax, 1), %xmm0 # load string tail (incl. NUL)
+	add		%rsi, %rdi		# restore destination pointer
+	movdqu		%xmm0, -15(%rdi, %rax, 1) # store string tail (incl. NUL)
+	lea		1(%rdi, %rax, 1), %rax	# compute return value
+	ret
+
+.Lhead_buf_end:
+	pmovmskb	%xmm1, %r8d
+	add		$32, %edx		# restore edx to (len-1) + ecx
+	shl		$16, %r8d		# place 2nd chunk NUL mask into bits 16--31
+	mov		%r8d, %r10d
+	bts		%rdx, %r8		# treat end of buffer as if terminator present
+	xor		%eax, %eax		# return value if terminator not found
+	tzcnt		%r8, %rdx		# find string/buffer len from alignment boundary
+	lea		1(%rdi, %rdx, 1), %r8	# return value if terminator found + rcx
+	sub		%rcx, %r8		# subtract rcx
+	bt		%rdx, %r10		# was the terminator present?
+	cmovc		%r8, %rax		# if yes, return pointer, else NULL
+	sub		%ecx, %edx		# find actual string/buffer len
+	jmp		.L0132
+
+.Lsecond_nul:
+	add		%r8, %rdx		# restore buffer length
+	tzcnt		%eax, %r8d		# where is the NUL byte?
+	lea		-16(%rcx), %eax
+	sub		%eax, %r8d		# string length
+	lea		1(%rdi, %r8, 1), %rax	# return value if NUL before end of buffer
+	xor		%ecx, %ecx		# return value if not
+	cmp		%r8, %rdx		# is the string shorter than the buffer?
+	cmova		%r8, %rdx		# copy only min(buflen, srclen) bytes
+	cmovb		%rcx, %rax		# return NUL if buffer ended before string
+.L0132:	cmp		$16, %rdx		# at least 17 bytes to copy (not incl NUL)?
+	jb		.L0116
+
+	/* copy 17--32 bytes */
+	movdqu		(%r9), %xmm0		# load first 16 bytes
+	movdqu		-15(%r9, %rdx, 1), %xmm1 # load last 16 bytes
+	movdqu		%xmm0, (%rdi)
+	movdqu		%xmm1, -15(%rdi, %rdx, 1)
+	ret
+
+.Lhead_nul:
+	tzcnt		%eax, %r8d		# where is the NUL byte?
+	sub		%ecx, %r8d		# ... from the beginning of the string?
+	lea		1(%rdi, %r8, 1), %rax	# return value if NUL before end of buffer
+	xor		%ecx, %ecx		# return value if not
+	cmp		%r8, %rdx		# is the string shorter than the buffer?
+	cmova		%r8, %rdx		# copy only min(buflen, srclen) bytes
+	cmovb		%rcx, %rax		# return NUL if buffer ended before string
+
+	/* process strings of 1--16 bytes (rdx: min(buflen, srclen), rax: srclen) */
+.L0116:	cmp		$8, %rdx		# at least 9 bytes to copy?
+	jae		.L0916
+
+	cmp		$4, %rdx		# at least 5 bytes to copy?
+	jae		.L0508
+
+	cmp		$2, %rdx		# at least 3 bytes to copy?
+	jae		.L0304
+
+	/* copy one or two bytes */
+	movzbl		(%r9), %ecx		# load first byte from src
+	movzbl		(%r9, %rdx, 1), %esi	# load last byte from src
+	mov		%cl, (%rdi)		# deposit into destination
+	mov		%sil, (%rdi, %rdx, 1)
+	ret
+
+.L0304:	movzwl		(%r9), %ecx
+	movzwl		-1(%r9, %rdx, 1), %esi
+	mov		%cx, (%rdi)
+	mov		%si, -1(%rdi, %rdx, 1)
+	ret
+
+.L0508:	mov		(%r9), %ecx
+	mov		-3(%r9, %rdx, 1), %esi
+	mov		%ecx, (%rdi)
+	mov		%esi, -3(%rdi, %rdx, 1)
+	ret
+
+.L0916:	mov		(%r9), %rcx
+	mov		-7(%r9, %rdx, 1), %rsi
+	mov		%rcx, (%rdi)
+	mov		%rsi, -7(%rdi, %rdx, 1)
+	ret
+
+	/* length zero destination: return null pointer */
+.L0:	xor		%eax, %eax
+	ret
+ARCHEND(__memccpy, baseline)
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S
new file mode 100644
index 000000000000..4f6c5a238daa
--- /dev/null
+++ b/lib/libc/amd64/string/memrchr.S
@@ -0,0 +1,166 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define	ALIGN_TEXT	.p2align 4, 0x90
+
+ARCHFUNCS(memrchr)
+	ARCHFUNC(memrchr, scalar)
+	ARCHFUNC(memrchr, baseline)
+ENDARCHFUNCS(memrchr)
+
+ARCHENTRY(memrchr, scalar)
+	xor	%eax, %eax		# prospective return value
+	sub	$4, %rdx		# 4 bytes left to process?
+	jb	1f
+
+	ALIGN_TEXT
+0:	xor	%r8, %r8
+	lea	2(%rdi), %r10
+	cmp	%sil, 2(%rdi)
+	cmovne	%r8, %r10		# point to null if no match
+
+	cmp	%sil, (%rdi)
+	cmove	%rdi, %r8		# point to first char if match
+
+	lea	1(%rdi), %r9
+	cmp	%sil, 1(%rdi)
+	cmovne	%r8, %r9		# point to first result if no match in second
+
+	lea	3(%rdi), %r11
+	cmp	%sil, 3(%rdi)
+	cmovne	%r10, %r11
+
+	test	%r11, %r11
+	cmovz	%r9, %r11		# take first pair match if none in second
+
+	test	%r11, %r11
+	cmovnz	%r11, %rax		# take match in current set if any
+
+	add	$4, %rdi
+	sub	$4, %rdx
+	jae	0b
+
+1:	cmp	$-3, %edx		# a least one character left to process?
+	jb	2f
+
+	cmp	%sil, (%rdi)
+	cmove	%rdi, %rax
+
+	lea	1(%rdi), %rcx
+	cmp	$-2, %edx		# at least two characters left to process?
+	jb	2f
+
+	cmp	%sil, 1(%rdi)
+	cmove	%rcx, %rax
+
+	lea	2(%rdi), %rcx
+	cmp	$-1, %edx		# at least three character left to process?
+	jb	2f
+
+	cmp	%sil, 2(%rdi)
+	cmove	%rcx, %rax
+
+2:	ret
+ARCHEND(memrchr, scalar)
+
+ARCHENTRY(memrchr, baseline)
+	movd		%esi, %xmm4
+	test		%rdx, %rdx		# empty buffer?
+	jz		.L0			# if yes, return immediately
+
+	punpcklbw	%xmm4, %xmm4		# c -> cc
+	mov		%edi, %ecx
+	punpcklwd	%xmm4, %xmm4		# cc -> cccc
+	and		$~0xf, %rdi		# align source pointer
+	pshufd		$0, %xmm4, %xmm4	# cccc -> cccccccccccccccc
+	and		$0xf, %ecx
+	movdqa		%xmm4, %xmm0
+	mov		$-1, %r8d
+	pcmpeqb		(%rdi), %xmm0		# compare aligned head
+	shl		%cl, %r8d		# mask of bytes in the head of the buffer
+	pmovmskb	%xmm0, %eax
+
+	sub		$16, %rcx
+	and		%r8d, %eax		# match mask
+	add		%rcx, %rdx		# advance past head
+	cmc
+	jbe		.Lrunt			# did the string end in the buffer?
+
+	mov		%rdi, %rsi		# pointer to matching chunk
+	add		$16, %rdi
+	sub		$16, %rdx		# enough left for another round?
+	jbe		1f
+
+	/* main loop unrolled twice */
+	ALIGN_TEXT
+0:	movdqa		%xmm4, %xmm0
+	pcmpeqb		(%rdi), %xmm0
+	pmovmskb	%xmm0, %r8d
+
+	cmp		$16, %rdx		# enough left for second chunk?
+	jbe		2f
+
+	movdqa		%xmm4, %xmm0
+	pcmpeqb		16(%rdi), %xmm0
+	pmovmskb	%xmm0, %ecx
+
+	lea		16(%rdi), %r9
+	test		%ecx, %ecx		# match found in second chunk?
+	cmovz		%r8d, %ecx		# if not, use match data from first chunk
+	cmovz		%rdi, %r9
+
+	test		%ecx, %ecx		# any match found?
+	cmovnz		%ecx, %eax		# if yes, overwrite previously found match
+	cmovnz		%r9, %rsi
+
+	add		$32, %rdi		# advance to next iteration
+	sub		$32, %rdx		# advance to next chunks
+	ja		0b
+
+	/* process remaining 1--16 bytes */
+1:	pcmpeqb		(%rdi), %xmm4
+	mov		$0xffff, %r8d
+	xor		%ecx, %ecx
+	sub		%edx, %ecx		# number of bytes to be masked out
+	pmovmskb	%xmm4, %r9d
+	shr		%cl, %r8d		# mask of bytes to be kept in the buffer
+	and		%r9d, %r8d
+	cmovnz		%r8d, %eax
+	cmovnz		%rdi, %rsi
+	bsr		%eax, %eax
+	lea		(%rsi, %rax, 1), %rsi	# pointer to match (or junk)
+	cmovnz		%rsi, %rax		# if any match was found, return it
+	ret
+
+	/* end of chunk reached within first half iteration */
+2:	test		%r8d, %r8d		# match in previous chunk?
+	cmovnz		%r8d, %eax		# if yes, overwrite previous chunks
+	cmovnz		%rdi, %rsi
+	add		$16, %rdi		# point to tail
+	sub		$16, %edx
+	jmp		1b			# handle tail the same otherwise
+
+	/* runt: string ends within head, edx has negated amount of invalid head bytes */
+.Lrunt:	mov		$0xffff, %r8d
+	xor		%ecx, %ecx
+	sub		%edx, %ecx
+	shr		%cl, %r8d
+	and		%r8d, %eax
+	bsr		%eax, %eax
+	lea		(%rdi, %rax, 1), %rdi
+	cmovnz		%rdi, %rax
+	ret
+
+	/* empty buffer: return a null pointer */
+.L0:	xor		%eax, %eax
+	ret
+ARCHEND(memrchr, baseline)
+
+	.section	.note.GNU-stack, "", %progbits
diff --git a/lib/libc/amd64/string/stpncpy.S b/lib/libc/amd64/string/stpncpy.S
new file mode 100644
index 000000000000..5ce0dd093a9e
--- /dev/null
+++ b/lib/libc/amd64/string/stpncpy.S
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4, 0x90
+
+	.weak stpncpy
+	.set stpncpy, __stpncpy
+ARCHFUNCS(__stpncpy)
+	ARCHFUNC(__stpncpy, scalar)
+	ARCHFUNC(__stpncpy, baseline)
+ENDARCHFUNCS(__stpncpy)
+
+ARCHENTRY(__stpncpy, scalar)
+	push	%rbp		# establish stack frame
+	mov	%rsp, %rbp
+
+	push	%rdx
+	push	%rdi
+	push	%rsi
+	push	%rax		# dummy push for alignment
+
+	mov	%rsi, %rdi
+	xor	%esi, %esi
+	call	CNAME(__memchr)	# memchr(src, '\0', len)
+	pop	%rcx		# dummy pop
+	pop	%rsi
+	mov	-16(%rbp), %rdi
+
+	test	%rax, %rax	# NUL found?
+	jz	.Lfullcopy
+
+	mov	%rax, %rdx
+	sub	%rsi, %rdx	# copy until the NUL byte
+	add	%rdx, -16(%rbp)	# advance destination by string length
+	sub	%rdx, -8(%rbp)	# and shorten buffer size by string length
+	call	CNAME(memcpy)
+
+	pop	%rdi
+	pop	%rdx
+	xor	%esi, %esi
+	pop	%rbp
+	jmp	CNAME(memset)	# clear remaining buffer
+
+.Lfullcopy:
+	mov	-8(%rbp), %rdx
+	call	CNAME(memcpy)	# copy whole string
+	add	-8(%rbp), %rax	# point to dest[n]
+	leave
+	ret
+ARCHEND(__stpncpy, scalar)
+
+	/*
+	 * this mask allows us to generate masks of 16-n 0xff bytes
+	 * followed by n 0x00 bytes by loading from .Lmask+n.
+	 */
+	.section	.rodata
+.Lmask:	.quad		0xffffffffffffffff
+	.quad		0xffffffffffffffff
+	.quad		0x0000000000000000
+	.quad		0x0000000000000000
+
+/* stpncpy(char *restrict rdi, const char *rsi, size_t rdx) */
+ARCHENTRY(__stpncpy, baseline)
+#define bounce		(-3*16-8)		/* location of on-stack bounce buffer */
+
+	test		%rdx, %rdx		# no bytes to copy?
+	jz		.L0
+
+	mov		%esi, %ecx
+	and		$~0xf, %rsi		# align source to 16 bytes
+	movdqa		(%rsi), %xmm0		# load head
+	and		$0xf, %ecx		# offset from alignment
+	mov		$-1, %r9d
+	lea		-32(%rcx), %rax		# set up overflow-proof comparison rdx+rcx<=32
+	shl		%cl, %r9d		# mask of bytes belonging to the string
+	sub		%rcx, %rdi		# adjust RDI to correspond to RSI
+	pxor		%xmm1, %xmm1
+	movdqa		%xmm0, bounce(%rsp)	# stash copy of head on the stack
+	pcmpeqb		%xmm1, %xmm0
+	pmovmskb	%xmm0, %r8d
+
+	lea		(%rdx, %rcx, 1), %r10	# buffer length from alignment boundary
+	add		%rdx, %rax		# less than 2 chunks (32 bytes) to play with?
+	jnc		.Lrunt			# if yes, use special runt processing
+
+	movdqu		%xmm1, -16(%rdi, %r10, 1) # clear final bytes of destination
+	and		%r9d, %r8d		# end of string within head?
+	jnz		.Lheadnul
+
+	movdqu		(%rsi, %rcx, 1), %xmm2	# load head from source buffer
+	movdqu		%xmm2, (%rdi, %rcx, 1)	# an deposit
+
+	add		$16, %rsi
+	add		$16, %rdi
+	sub		$32, %r10
+
+	/* main loop unrolled twice */
+	ALIGN_TEXT
+0:	movdqa		(%rsi), %xmm0
+	pxor		%xmm1, %xmm1
+	pcmpeqb		%xmm0, %xmm1		# NUL byte encountered?
+	pmovmskb	%xmm1, %r8d
+	test		%r8d, %r8d
+	jnz		3f
+
+	movdqu		%xmm0, (%rdi)
+	cmp		$16, %r10		# more than a full chunk left?
+	jbe		1f
+
+	movdqa		16(%rsi), %xmm0
+	add		$32, %rdi		# advance pointers to next chunk
+	add		$32, %rsi
+	pxor		%xmm1, %xmm1
+	pcmpeqb		%xmm0, %xmm1		# NUL byte encountered?
+	pmovmskb	%xmm1, %r8d
+	test		%r8d, %r8d
+	jnz		2f
+
+	movdqu		%xmm0, -16(%rdi)
+	sub		$32, %r10		# more than another full chunk left?
+	ja		0b
+
+	sub		$16, %rdi		# undo second advancement
+	sub		$16, %rsi
+	add		$16, %r10d		# restore number of remaining bytes
+
+	/* 1--16 bytes left but string has not ended yet */
+1:	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rsi), %xmm1		# NUL byte in source tail?
+	pmovmskb	%xmm1, %r8d
+	bts		%r10d, %r8d		# treat end of buffer as NUL
+	tzcnt		%r8d, %r8d		# where is the NUL byte?
+	movdqu		(%rsi, %r8, 1), %xmm0	# load source tail before NUL
+	lea		16(%rdi, %r8, 1), %rax	# point return value to NUL byte
+						# or end of buffer
+	movdqu		%xmm0, (%rdi, %r8, 1)	# store tail into the buffer
+	ret
+
+2:	sub		$16, %rdi		# undo second advancement
+	sub		$16, %rsi
+	sub		$16, %r10
+
+	/* string has ended and buffer has not */
+3:	tzcnt		%r8d, %r8d		# where did the string end?
+	lea		.Lmask+16(%rip), %rcx
+	lea		(%rdi, %r8, 1), %rax 	# where the NUL byte will be
+	neg		%r8
+	movdqu		(%rcx, %r8, 1), %xmm1	# mask with FF where the string is,
+						# 00 where it is not
+	pand		%xmm1, %xmm0		# mask out bytes after the string
+	movdqu		%xmm0, (%rdi)	 	# store masked current chunk
+	pxor		%xmm1, %xmm1
+	sub		$16, %r10		# another full chunk left?
+	jbe		1f
+
+	/* clear remaining destination buffer (tail has been cleared earlier) */
+	ALIGN_TEXT
+0:	movdqu		%xmm1, 16(%rdi)
+	cmp		$16, %r10
+	jbe		1f
+
+	movdqu		%xmm1, 32(%rdi)
+	add		$32, %rdi
+	sub		$32, %r10
+	ja		0b
+
+1:	ret
+
+	/* at least two chunks to play with and NUL while processing head */
+.Lheadnul:
+	movdqu		bounce(%rsp, %rcx, 1), %xmm0 # load start of source from stack
+	tzcnt		%r8d, %r8d		# find location of NUL byte
+	movdqu		%xmm0, (%rdi, %rcx, 1)	# deposit head in the destination
+	movdqu		%xmm1, (%rdi, %r8, 1)	# clear out following bytes
+	movdqu		%xmm1, 16(%rdi)		# clear out second chunk
+	lea		(%rdi, %r8, 1), %rax	# make RAX point to the NUL byte
+
+	add		$32, %rdi		# advance past first two chunks
+	sub		$32+16, %r10		# advance past first three chunks
+	jbe		1f			# did we pass the end of the buffer?
+
+	/* clear remaining destination buffer (tail has been cleared earlier) */
+	ALIGN_TEXT
+0:	movdqu		%xmm1, (%rdi)		# clear out buffer chunk
+	cmp		$16, %r10
+	jbe		1f
+
+	movdqu		%xmm1, 16(%rdi)
+	add		$32, %rdi
+	sub		$32, %r10
+	ja		0b
+
+1:	ret
+
+	/* 1--32 bytes to copy, bounce through the stack */
+.Lrunt:	movdqa		%xmm1, bounce+16(%rsp)	# clear out rest of on-stack copy
+	bts		%r10d, %r8d		# treat end of buffer as end of string
+	and		%r9w, %r8w		# end of string within first buffer?
+	jnz		0f			# if yes, do not inspect second buffer
+
+	movdqa		16(%rsi), %xmm0		# load second chunk of input
+	movdqa		%xmm0, bounce+16(%rsp)	# stash copy on stack
+	pcmpeqb		%xmm1, %xmm0		# NUL in second chunk?
+	pmovmskb	%xmm0, %r9d
+	shl		$16, %r9d
+	or		%r9d, %r8d		# merge found NUL bytes into NUL mask
+
+	/* end of string after one buffer */
+0:	tzcnt		%r8d, %r8d		# location of last char in string
+	movdqu		%xmm1, bounce(%rsp, %r8, 1) # clear bytes behind string
+	lea		bounce(%rsp, %rcx, 1), %rsi # start of string copy on stack
+	lea		(%rdi, %r8, 1), %rax	# return pointer to NUL byte
+
+	cmp		$16, %edx		# at least 16 bytes to transfer?
+	jae		.L1631
+
+	mov		(%rsi), %r8		# load string head
+	cmp		$8, %edx		# at least 8 bytes to transfer?
+	jae		.L0815
+
+	cmp		$4, %edx		# at least 4 bytes to transfer?
+	jae		.L0407
+
+	movzwl		-2(%rsi, %rdx, 1), %esi	# load last two bytes of string
+	mov		%r8b, (%rdi, %rcx, 1)	# store first byte
+
+	cmp		$2, %edx		# at least 2 bytes to transfer?
+	jb		.L1
+
+	mov		%si, -2(%rdi, %r10, 1)	# store last two bytes of string
+.L1:	ret
+
+.L1631:	movdqu		(%rsi), %xmm0		# load first 16 bytes of string
+	movdqu		-16(%rsi, %rdx, 1), %xmm1 # load last 16 bytes of string
+	movdqu		%xmm0, (%rdi, %rcx, 1)
+	movdqu		%xmm1, -16(%rdi, %r10, 1)
+	ret
+
+.L0815:	mov		-8(%rsi, %rdx, 1), %rdx	# load last 8 bytes of string
+	mov		%r8, (%rdi, %rcx, 1)
+	mov		%rdx, -8(%rdi, %r10, 1)
+	ret
+
+.L0407:	mov		-4(%rsi, %rdx, 1), %edx	# load last four bytes of string
+	mov		%r8d, (%rdi, %rcx, 1)
+	mov		%edx, -4(%rdi, %r10, 1)
+	ret
+
+	/* length 0 buffer: just return dest */
+.L0:	mov		%rdi, %rax
+	ret
+ARCHEND(__stpncpy, baseline)
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strcat.S b/lib/libc/amd64/string/strcat.S
index 0834408acfb7..081e98840cee 100644
--- a/lib/libc/amd64/string/strcat.S
+++ b/lib/libc/amd64/string/strcat.S
@@ -1,6 +1,14 @@
-/*
- * Written by J.T. Conklin <jtc@acorntoolworks.com>
- * Public domain.
+/*-
+ * Copyright (c) 2023, The FreeBSD Foundation
+ *
+ * SPDX-License-Expression: BSD-2-Clause
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcat.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com>
+ * that was originally dedicated to the public domain
  */
 
 #include <machine/asm.h>
@@ -8,7 +16,14 @@
 	RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
 #endif
 
-ENTRY(strcat)
+#include "amd64_archlevel.h"
+
+ARCHFUNCS(strcat)
+	ARCHFUNC(strcat, scalar)
+	ARCHFUNC(strcat, baseline)
+ENDARCHFUNCS(strcat)
+
+ARCHENTRY(strcat, scalar)
 	movq	%rdi,%rax
 	movabsq	$0x0101010101010101,%r8
 	movabsq	$0x8080808080808080,%r9
@@ -161,6 +176,28 @@ ENTRY(strcat)
 
 .Ldone:
 	ret
-END(strcat)
+ARCHEND(strcat, scalar)
+
+/*
+ * Call into strlen + strcpy if we have any SIMD at all.
+ * The scalar implementation above is better for the scalar
+ * case as it avoids the function call overhead, but pessimal
+ * if we could call SIMD routines instead.
+ */
+ARCHENTRY(strcat, baseline)
+	push	%rbp
+	mov	%rsp, %rbp
+	push	%rsi
+	push	%rbx
+	mov	%rdi, %rbx		# remember destination for later
+	call	CNAME(strlen)		# strlen(dest)
+	mov	-8(%rbp), %rsi
+	lea	(%rbx, %rax, 1), %rdi	# dest + strlen(dest)
+	call	CNAME(__stpcpy)		# stpcpy(dest + strlen(dest), src)
+	mov	%rbx, %rax		# return dest
+	pop	%rbx
+	leave
+	ret
+ARCHEND(strcat, baseline)
 
 	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strcmp.S b/lib/libc/amd64/string/strcmp.S
index 437db7eca43a..eb354bd2af82 100644
--- a/lib/libc/amd64/string/strcmp.S
+++ b/lib/libc/amd64/string/strcmp.S
@@ -1,14 +1,33 @@
-/*
- * Written by J.T. Conklin <jtc@acorntoolworks.com>
- * Public domain.
+/*-
+ * Copyright (c) 2023, The FreeBSD Foundation
+ *
+ * SPDX-License-Expression: BSD-2-Clause
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcmp.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com> that was originally
+ * dedicated to the public domain.
  */
 
 #include <machine/asm.h>
+#include <machine/param.h>
+
 #if 0
 	RCSID("$NetBSD: strcmp.S,v 1.3 2004/07/19 20:04:41 drochner Exp $")
 #endif
 
-ENTRY(strcmp)
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4, 0x90
+
+ARCHFUNCS(strcmp)
+	ARCHFUNC(strcmp, scalar)
+	ARCHFUNC(strcmp, baseline)
+ENDARCHFUNCS(strcmp)
+
+ARCHENTRY(strcmp, scalar)
 	/*
 	 * Align s1 to word boundary.
 	 * Consider unrolling loop?
@@ -39,7 +58,7 @@ ENTRY(strcmp)
 	movabsq	$0x8080808080808080,%r9
 	subq	$8,%rsi
 
-	.align	4
+	ALIGN_TEXT
 .Lword_loop:
 	movq	8(%rdi),%rax
 	addq	$8,%rdi
@@ -53,7 +72,7 @@ ENTRY(strcmp)
 	testq	%r9,%rdx
 	je	.Lword_loop
 
-	.align	4
+	ALIGN_TEXT
 .Lbyte_loop:
 	movb	(%rdi),%al
 	incq	%rdi
@@ -69,6 +88,272 @@ ENTRY(strcmp)
 	movzbq	%dl,%rdx
 	subq	%rdx,%rax
 	ret
-END(strcmp)
+ARCHEND(strcmp, scalar)
+
+ARCHENTRY(strcmp, baseline)
+	/* check if either string crosses a page in the head */
+	lea		15(%rdi), %r8d	# end of head
+	lea		15(%rsi), %r9d
+	mov		%edi, %eax
+	mov		%esi, %edx
+	xor		%edi, %r8d	# bits that changed between first and last byte
+	xor		%esi, %r9d
+	and		$~0xf, %rdi	# align heads to 16 bytes
+	and		$~0xf, %rsi
+	or		%r8d, %r9d	# in either RSI or RDI
+	and		$0xf, %eax	# offset from alignment
+	and		$0xf, %edx
+	pxor		%xmm1, %xmm1
+	test		$PAGE_SIZE, %r9d # did the page change?
+	jz		0f		# if not, take fast path
+
+	/* heads may cross page boundary, avoid unmapped loads */
+	movdqa		(%rdi), %xmm0	# load aligned heads
+	movdqa		(%rsi), %xmm2
+	mov		$-1, %r8d
+	mov		$-1, %r9d
+	mov		%eax, %ecx
+	shl		%cl, %r8d	# string head in XMM0
+	mov		%edx, %ecx
+	shl		%cl, %r9d	# string head in XMM2
+	movdqa		%xmm0, -40(%rsp) # stash copies of the heads on the stack
+	movdqa		%xmm2, -24(%rsp)
+	pcmpeqb		%xmm1, %xmm0
+	pcmpeqb		%xmm1, %xmm2
+	pmovmskb	%xmm0, %r10d
+	pmovmskb	%xmm2, %r11d
+	test		%r8d, %r10d	# NUL byte present in first string?
+	lea		-40(%rsp), %r8
+	cmovz		%rdi, %r8
+	test		%r9d, %r11d	# NUL byte present in second string?
+	lea		-24(%rsp), %r9
+	cmovz		%rsi, %r9
+	movdqu		(%r8, %rax, 1), %xmm0 # load true (or fake) heads
+	movdqu		(%r9, %rdx, 1), %xmm4
+	jmp		1f
+
+0:	movdqu		(%rdi, %rax, 1), %xmm0 # load true heads
+	movdqu		(%rsi, %rdx, 1), %xmm4
+1:	pxor		%xmm2, %xmm2
+	pcmpeqb		%xmm0, %xmm2	# NUL byte present?
+	pcmpeqb		%xmm0, %xmm4	# which bytes match?
+	pandn		%xmm4, %xmm2	# match and not NUL byte?
+	pmovmskb	%xmm2, %r9d
+	xor		$0xffff, %r9d	# mismatch or NUL byte?
+	jnz		.Lhead_mismatch
+
+	/* load head and second chunk */
+	movdqa		16(%rdi), %xmm2	# load second chunks
+	movdqa		16(%rsi), %xmm3
+	sub		%rdx, %rax	# is a&0xf >= b&0xf?
+	jb		.Lswapped	# if not, proceed with swapped operands
+
+	neg		%rax
+	movdqu		16(%rsi, %rax, 1), %xmm0
+	sub		%rdi, %rsi	# express RSI as distance from RDI
+	lea		(%rsi, %rax, 1), %rdx # point RDX to offset in second string
+	neg		%rax
+	pcmpeqb		%xmm3, %xmm1	# ... corresponding to RDI
+	pcmpeqb		%xmm2, %xmm0
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		$16, %rdi
+	test		%r8d, %r8d
+	jnz		.Lnul_found
+	xor		$0xffff, %r9d
+	jnz		.Lmismatch
+	add		$16, %rdi	# advance aligned pointers
+
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *     RDI:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *     RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * RSI doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As RSI is known not to hold a NUL byte in regsions 1
+	 * and 2 at this point, this also ensures that RDI has not ended yet.
+	 */
+	ALIGN_TEXT
+0:	movdqu		(%rdi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI?
+	pxor		%xmm1, %xmm1
+	pcmpeqb		(%rdi, %rsi, 1), %xmm1 # end of string in RSI?
+	pcmpeqb		(%rdi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	test		%r8d, %r8d
+	jnz		.Lnul_found
+	xor		$0xffff, %r9d	# any mismatches?
+	jnz		.Lmismatch
+
+	/* main loop unrolled twice */
+	movdqu		16(%rdi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI?
+	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rdi, %rsi, 1), %xmm1 # end of string in RSI?
+	pcmpeqb		16(%rdi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		$32, %rdi
+	test		%r8d, %r8d
+	jnz		.Lnul_found2
+	xor		$0xffff, %r9d	# any mismatches?
+	jz		0b
+
+	sub		$16, %rdi	# roll back second increment
+
+	/* a mismatch has been found between RDX and RSI */
+.Lmismatch:
+	tzcnt		%r9d, %r9d	# where is the mismatch?
+	add		%rdi, %rdx	# turn RDX from offset to pointer
+	movzbl		(%rdx, %r9, 1), %ecx
+	movzbl		(%rdi, %r9, 1), %eax
+	sub		%ecx, %eax	# difference of the mismatching chars
+	ret
+
+	/* mismatch in true heads */
+.Lhead_mismatch:
+	tzcnt		%r9d, %r9d	# where is the mismatch?
+	add		%rax, %rdi	# return to true heads
+	add		%rdx, %rsi
+	movzbl		(%rdi, %r9, 1), %eax # mismatching characters
+	movzbl		(%rsi, %r9, 1), %ecx
+	sub		%ecx, %eax
+	ret
+
+.Lnul_found2:
+	sub		$16, %rdi	# roll back second increment
+
+	/* a NUL has been found in RSI */
+.Lnul_found:
+	mov		%eax, %ecx
+	mov		%r8d, %r10d
+	shl		%cl, %r8w	# adjust NUL mask to positions in RDI/RDX
+	xor		$0xffff, %r9d	# mask of mismatches
+	or		%r8d, %r9d	# NUL bytes also count as mismatches
+	jnz		.Lmismatch
+
+	/*
+	 * (RDI) == (RSI) and NUL is past the string.
+	 * Compare (RSI) with the corresponding part
+	 * of the other string until the NUL byte.
+	 */
+	movdqu		(%rdi, %rax, 1), %xmm0
+	pcmpeqb		(%rdi, %rsi, 1), %xmm0
+	add		%rdi, %rsi	# restore RSI pointer
+	add		%rax, %rdi	# point RDI to chunk corresponding to (RSI)
+	pmovmskb	%xmm0, %ecx	# mask of matches
+	not		%ecx		# mask of mismatches
+	or		%r10d, %ecx	# mask of mismatches or NUL bytes
+	tzcnt		%ecx, %ecx	# location of first mismatch
+	movzbl		(%rdi, %rcx, 1), %eax
+	movzbl		(%rsi, %rcx, 1), %ecx
+	sub		%ecx, %eax
+	ret
+
+	/*
+	 * If (a&0xf) < (b&0xf), we do the same thing but with swapped
+	 * operands.  I found that this performs slightly better than
+	 * using conditional moves to do the swap branchless.
+	 */
+.Lswapped:
+	movdqu		16(%rdi, %rax, 1), %xmm0
+	sub		%rsi, %rdi	# express RDI as distance from RSI
+	lea		(%rdi, %rax, 1), %rdx # point RDX to offset in RDI corresponding to RSI
+	neg		%rax		# make difference positive
+	pcmpeqb		%xmm2, %xmm1
+	pcmpeqb		%xmm3, %xmm0
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		$16, %rsi	# advance aligned pointers
+	test		%r8d, %r8d
+	jnz		.Lnul_founds
+	xor		$0xffff, %r9d
+	jnz		.Lmismatchs
+	add		$16, %rsi
+
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *     RDI:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *     RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * RSI doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As RSI is known not to hold a NUL byte in regsions 1
+	 * and 2 at this point, this also ensures that RDI has not ended yet.
+	 */
+	ALIGN_TEXT
+0:	movdqu		(%rsi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI?
+	pxor		%xmm1, %xmm1
+	pcmpeqb		(%rsi, %rdi, 1), %xmm1 # end of string in RSI?
+	pcmpeqb		(%rsi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	test		%r8d, %r8d
+	jnz		.Lnul_founds
+	xor		$0xffff, %r9d	# any mismatches?
+	jnz		.Lmismatchs
+
+	/* main loop unrolled twice */
+	movdqu		16(%rsi, %rdx, 1), %xmm0 # chunk of 2nd string corresponding to RDI?
+	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rsi, %rdi, 1), %xmm1 # end of string in RSI?
+	pcmpeqb		16(%rsi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		$32, %rsi
+	test		%r8d, %r8d
+	jnz		.Lnul_found2s
+	xor		$0xffff, %r9d	# any mismatches?
+	jz		0b
+
+	sub		$16, %rsi	# roll back second increment
+
+	/* a mismatch has been found between RDX and RDI */
+.Lmismatchs:
+	tzcnt		%r9d, %r9d	# where is the mismatch?
+	add		%rsi, %rdx	# turn RDX from offset to pointer
+	movzbl		(%rdx, %r9, 1), %eax
+	movzbl		(%rsi, %r9, 1), %ecx
+	sub		%ecx, %eax	# difference of the mismatching chars
+	ret
+
+.Lnul_found2s:
+	sub		$16, %rsi	# roll back second increment
+
+	/* a NUL has been found in RSI */
+.Lnul_founds:
+	mov		%eax, %ecx
+	mov		%r8d, %r10d
+	shl		%cl, %r8w	# adjust NUL mask to positions in RDI/RDX
+	xor		$0xffff, %r9d	# mask of mismatches
+	or		%r8d, %r9d	# NUL bytes also count as mismatches
+	jnz		.Lmismatchs
+
+	/*
+	 * (RDI) == (RSI) and NUL is past the string.
+	 * Compare (RSI) with the corresponding part
+	 * of the other string until the NUL byte.
+	 */
+	movdqu		(%rsi, %rax, 1), %xmm0
+	pcmpeqb		(%rsi, %rdi, 1), %xmm0
+	add		%rsi, %rdi	# restore RDI pointer
+	add		%rax, %rsi	# point RSI to chunk corresponding to (RDI)
+	pmovmskb	%xmm0, %ecx	# mask of matches
+	not		%ecx		# mask of mismatches
+	or		%r10d, %ecx	# mask of mismatches or NUL bytes
+	tzcnt		%ecx, %ecx	# location of first mismatch
+	movzbl		(%rdi, %rcx, 1), %eax
+	movzbl		(%rsi, %rcx, 1), %ecx
+	sub		%ecx, %eax
+	ret
+ARCHEND(strcmp, baseline)
 
 	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strcspn.S b/lib/libc/amd64/string/strcspn.S
index 53100eeea9a5..eab669edce72 100644
--- a/lib/libc/amd64/string/strcspn.S
+++ b/lib/libc/amd64/string/strcspn.S
@@ -33,13 +33,15 @@
 
 #define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
 
-ARCHFUNCS(strcspn)
-	ARCHFUNC(strcspn, scalar)
+	.weak strcspn
+	.set strcspn, __strcspn
+ARCHFUNCS(__strcspn)
+	ARCHFUNC(__strcspn, scalar)
 	NOARCHFUNC
-	ARCHFUNC(strcspn, x86_64_v2)
-ENDARCHFUNCS(strcspn)
+	ARCHFUNC(__strcspn, x86_64_v2)
+ENDARCHFUNCS(__strcspn)
 
-ARCHENTRY(strcspn, scalar)
+ARCHENTRY(__strcspn, scalar)
 	push	%rbp			# align stack to enable function call
 	mov	%rsp, %rbp
 	sub	$256, %rsp		# allocate space for lookup table
@@ -122,7 +124,7 @@ ARCHENTRY(strcspn, scalar)
 	sub	(%rsp), %rax		# length of prefix before match
 	leave
 	ret
-ARCHEND(strcspn, scalar)
+ARCHEND(__strcspn, scalar)
 
 	/*
 	 * This kernel uses pcmpistri to do the heavy lifting.
@@ -134,7 +136,7 @@ ARCHEND(strcspn, scalar)
 	 * 17--32: two pcmpistri per 16 bytes of input
 	 *   >=33: fall back to look up table
 	 */
-ARCHENTRY(strcspn, x86_64_v2)
+ARCHENTRY(__strcspn, x86_64_v2)
 	push		%rbp
 	mov		%rsp, %rbp
 	sub		$256, %rsp
@@ -368,6 +370,6 @@ ARCHENTRY(strcspn, x86_64_v2)
 2:	sub	%rdi, %rax		# number of characters preceding match
 	leave
 	ret
-ARCHEND(strcspn, x86_64_v2)
+ARCHEND(__strcspn, x86_64_v2)
 
 	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strlcat.c b/lib/libc/amd64/string/strlcat.c
new file mode 100644
index 000000000000..0c1e1c5d05f7
--- /dev/null
+++ b/lib/libc/amd64/string/strlcat.c
@@ -0,0 +1,25 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+void *__memchr(const void *, int, size_t);
+size_t __strlcpy(char *restrict, const char *restrict, size_t);
+
+size_t
+strlcat(char *restrict dst, const char *restrict src, size_t dstsize)
+{
+	char *loc = __memchr(dst, '\0', dstsize);
+
+	if (loc != NULL) {
+		size_t dstlen = (size_t)(loc - dst);
+
+		return (dstlen + __strlcpy(loc, src, dstsize - dstlen));
+	} else
+		return (dstsize + strlen(src));
+}
diff --git a/lib/libc/amd64/string/strlcpy.S b/lib/libc/amd64/string/strlcpy.S
new file mode 100644
index 000000000000..2b32c6c78047
--- /dev/null
+++ b/lib/libc/amd64/string/strlcpy.S
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4, 0x90
+
+	.weak strlcpy
+	.set strlcpy, __strlcpy
+ARCHFUNCS(__strlcpy)
+	ARCHFUNC(__strlcpy, scalar)
+	ARCHFUNC(__strlcpy, baseline)
+ENDARCHFUNCS(__strlcpy)
+
+ARCHENTRY(__strlcpy, scalar)
+	push	%rbp		# establish stack frame
+	mov	%rsp, %rbp
+	push	%rsi
+	push	%rbx
+	push	%rdi
+	push	%rdx
+	mov	%rsi, %rdi
+	call	CNAME(strlen)	# strlen(src)
+	pop	%rdx
+	pop	%rdi
+	mov	-8(%rbp), %rsi
+	mov	%rax, %rbx	# remember string length for return value
+	sub	$1, %rdx	# do not copy into the final byte of the buffer
+	jc	0f		# skip copying altogether if buffer was empty
+	cmp	%rax, %rdx	# is the buffer longer than the input?
+	cmova	%rax, %rdx	# if yes, only copy the part that fits
+	movb	$0, (%rdi, %rdx, 1) # NUL-terminate output buffer
+	call	CNAME(memcpy)	# copy string to output
+0:	mov	%rbx, %rax	# restore return value
+	pop	%rbx
+	leave
+	ret
+ARCHEND(__strlcpy, scalar)
+
+ARCHENTRY(__strlcpy, baseline)
+	sub		$1, %rdx		# do not count NUL byte in buffer length
+	jb		.L0			# go to special code path if len was 0
+
+	mov		%esi, %ecx
+	pxor		%xmm1, %xmm1
+	mov		%rsi, %r9		# stash a copy of the source pointer for later
+	and		$~0xf, %rsi
+	pcmpeqb		(%rsi), %xmm1		# NUL found in head?
+	mov		$-1, %r8d
+	and		$0xf, %ecx
+	shl		%cl, %r8d		# mask of bytes in the string
+	pmovmskb	%xmm1, %eax
+	and		%r8d, %eax
+	jnz		.Lhead_nul
+
+	movdqa		16(%rsi), %xmm3		# load second string chunk
+	movdqu		(%r9), %xmm2		# load unaligned string head
+	mov		$32, %r8d
+	sub		%ecx, %r8d		# head length + length of second chunk
+	pxor		%xmm1, %xmm1
+	pcmpeqb		%xmm3, %xmm1		# NUL found in second chunk?
+
+	sub		%r8, %rdx		# enough space left for the second chunk?
+	jbe		.Lhead_buf_end
+
+	/* process second chunk */
+	pmovmskb	%xmm1, %eax
+	test		%eax, %eax
+	jnz		.Lsecond_nul
+
+	/* string didn't end in second chunk and neither did buffer -- not a runt! */
+	movdqa		32(%rsi), %xmm0		# load next string chunk
+	pxor		%xmm1, %xmm1
+	movdqu		%xmm2, (%rdi)		# deposit head into buffer
+	sub		%rcx, %rdi		# adjust RDI to correspond to RSI
+	movdqu		%xmm3, 16(%rdi)		# deposit second chunk
+	sub		%rsi, %rdi		# express RDI as distance from RSI
+	add		$32, %rsi		# advance RSI past first two chunks
+	sub		$16, %rdx		# enough left for another round?
+	jbe		1f
+
+	/* main loop unrolled twice */
+	ALIGN_TEXT
+0:	pcmpeqb		%xmm0, %xmm1		# NUL byte encountered?
+	pmovmskb	%xmm1, %eax
+	test		%eax, %eax
+	jnz		3f
+
+	movdqu		%xmm0, (%rsi, %rdi)
+	movdqa		16(%rsi), %xmm0		# load next string chunk
+	pxor		%xmm1, %xmm1
+	cmp		$16, %rdx		# more than a full chunk left?
+	jbe		2f
+
+	add		$32, %rsi		# advance pointers to next chunk
+	pcmpeqb		%xmm0, %xmm1		# NUL byte encountered?
+	pmovmskb	%xmm1, %eax
+	test		%eax, %eax
+	jnz		4f
+
+	movdqu		%xmm0, -16(%rsi, %rdi)
+	movdqa		(%rsi), %xmm0		# load next string chunk
+	pxor		%xmm1, %xmm1
+	sub		$32, %rdx
+	ja		0b
+
+1:	sub		$16, %rsi		# undo second advancement
+	add		$16, %edx
+
+	/* 1--16 bytes left in the buffer but string has not ended yet */
+2:	pcmpeqb		%xmm1, %xmm0		# NUL byte encountered?
+	pmovmskb	%xmm0, %r8d
+	mov		%r8d, %eax
+	bts		%edx, %r8d		# treat end of buffer as end of string
+	tzcnt		%r8d, %r8d		# find tail length
+	add		%rsi, %rdi		# restore RDI
+	movdqu		(%rsi, %r8, 1), %xmm0	# load string tail
+	movdqu		%xmm0, (%rdi, %r8, 1)	# store string tail
+	movb		$0, 16(%rdi, %r8, 1)	# NUL terminate
+
+	/* continue to find the end of the string */
+	test		%eax, %eax		# end of string already reached?
+	jnz		1f
+
+	ALIGN_TEXT
+0:	pcmpeqb		32(%rsi), %xmm1
+	pmovmskb	%xmm1, %eax
+	pxor		%xmm1, %xmm1
+	test		%eax, %eax
+	jnz		2f
+
+	pcmpeqb		48(%rsi), %xmm1
+	pmovmskb	%xmm1, %eax
+	add		$32, %rsi
+	pxor		%xmm1, %xmm1
+	test		%eax, %eax
+	jz		0b
+
+1:	sub		$16, %rsi		# undo second advancement
+2:	tzcnt		%eax, %eax		# where is the NUL byte?
+	sub		%r9, %rsi
+	lea		32(%rsi, %rax, 1), %rax	# return string length
+	ret
+
+4:	sub		$16, %rsi		# undo second advancement
+	add		$16, %rdx		# restore number of remaining bytes
+
+	/* string has ended but buffer has not */
+3:	tzcnt		%eax, %eax		# find length of string tail
+	movdqu		-15(%rsi, %rax, 1), %xmm0 # load string tail (incl. NUL)
+	add		%rsi, %rdi		# restore destination pointer
+	movdqu		%xmm0, -15(%rdi, %rax, 1) # store string tail (incl. NUL)
+	sub		%r9, %rsi		# string length to current chunk
+	add		%rsi, %rax		# plus length of current chunk
+	ret
+
+.Lhead_buf_end:
+	pmovmskb	%xmm1, %r8d
+	add		$32, %edx		# restore edx to (len-1) + ecx
+	mov		%r8d, %eax
+	shl		$16, %r8d		# place 2nd chunk NUL mask into bits 16--31
+	bts		%rdx, %r8		# treat end of buffer as end of string
+	tzcnt		%r8, %rdx		# find string/bufer len from alignment boundary
+	sub		%ecx, %edx		# find actual string/buffer len
+	movb		$0, (%rdi, %rdx, 1)	# write NUL terminator
+
+	/* continue to find the end of the string */
+	test		%eax, %eax		# end of string already reached?
+	jnz		1f
+
+	ALIGN_TEXT
+0:	pcmpeqb		32(%rsi), %xmm1
+	pmovmskb	%xmm1, %eax
+	pxor		%xmm1, %xmm1
+	test		%eax, %eax
+	jnz		2f
+
+	pcmpeqb		48(%rsi), %xmm1
+	pmovmskb	%xmm1, %eax
+	add		$32, %rsi
+	pxor		%xmm1, %xmm1
+	test		%eax, %eax
+	jz		0b
+
+1:	sub		$16, %rsi
+2:	tzcnt		%eax, %eax
+	sub		%r9, %rsi
+	lea		32(%rsi, %rax, 1), %rax	# return string length
+	jmp		.L0031
+
+.Lsecond_nul:
+	add		%r8, %rdx		# restore buffer length
+	tzcnt		%eax, %eax		# where is the NUL byte?
+	lea		-16(%rcx), %r8d
+	sub		%r8d, %eax		# string length
+	cmp		%rax, %rdx		# is the string shorter than the buffer?
+	cmova		%rax, %rdx		# copy only min(buflen, srclen) bytes
+	movb		$0, (%rdi, %rdx, 1)	# write NUL terminator
+.L0031:	cmp		$16, %rdx		# at least 16 bytes to copy (not incl NUL)?
+	jb		.L0015
+
+	/* copy 16--31 bytes */
+	movdqu		(%r9), %xmm0		# load first 16 bytes
+	movdqu		-16(%r9, %rdx, 1), %xmm1 # load last 16 bytes
+	movdqu		%xmm0, (%rdi)
+	movdqu		%xmm1, -16(%rdi, %rdx, 1)
+	ret
+
+.Lhead_nul:
+	tzcnt		%eax, %eax		# where is the NUL byte?
+	sub		%ecx, %eax		# ... from the beginning of the string?
+	cmp		%rax, %rdx		# is the string shorter than the buffer?
+	cmova		%rax, %rdx		# copy only min(buflen, srclen) bytes
+	movb		$0, (%rdi, %rdx, 1)	# write NUL terminator
+
+	/* process strings of 0--15 bytes (rdx: min(buflen, srclen), rax: srclen) */
+.L0015:	cmp		$8, %rdx		# at least 8 bytes to copy?
+	jae		.L0815
+
+	cmp		$4, %rdx		# at least 4 bytes to copy?
+	jae		.L0407
+
+	cmp		$2, %rdx		# at least 2 bytes to copy?
+	jae		.L0203
+
+	movzbl		(%r9), %ecx		# load first byte from src
+	mov		%cl, (%rdi)		# deposit into destination
+	movb		$0, (%rdi, %rdx, 1)	# add NUL terminator (again)
+	ret
+
+.L0203:	movzwl		(%r9), %ecx
+	movzwl		-2(%r9, %rdx, 1), %esi
+	mov		%cx, (%rdi)
+	mov		%si, -2(%rdi, %rdx, 1)
+	ret
+
+.L0407:	mov		(%r9), %ecx
+	mov		-4(%r9, %rdx, 1), %esi
+	mov		%ecx, (%rdi)
+	mov		%esi, -4(%rdi, %rdx, 1)
+	ret
+
+.L0815:	mov		(%r9), %rcx
+	mov		-8(%r9, %rdx, 1), %rsi
+	mov		%rcx, (%rdi)
+	mov		%rsi, -8(%rdi, %rdx, 1)
+	ret
+
+	/* length zero destination: just return the string length */
+.L0:	mov		%rsi, %rdi
+	jmp		CNAME(strlen)
+ARCHEND(__strlcpy, baseline)
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strncat.c b/lib/libc/amd64/string/strncat.c
new file mode 100644
index 000000000000..33b278ac5e04
--- /dev/null
+++ b/lib/libc/amd64/string/strncat.c
@@ -0,0 +1,29 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+void *__memccpy(void *restrict, const void *restrict, int, size_t);
+
+char *
+strncat(char *dest, const char *src, size_t n)
+{
+	size_t len;
+	char *endptr;
+
+	len = strlen(dest);
+	endptr = __memccpy(dest + len, src, '\0', n);
+
+	/* avoid an extra branch */
+	if (endptr == NULL)
+		endptr = dest + len + n + 1;
+
+	endptr[-1] = '\0';
+
+	return (dest);
+}
diff --git a/lib/libc/amd64/string/strncmp.S b/lib/libc/amd64/string/strncmp.S
new file mode 100644
index 000000000000..932cf078bdfc
--- /dev/null
+++ b/lib/libc/amd64/string/strncmp.S
@@ -0,0 +1,488 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4, 0x90
+
+ARCHFUNCS(strncmp)
+	ARCHFUNC(strncmp, scalar)
+	ARCHFUNC(strncmp, baseline)
+ENDARCHFUNCS(strncmp)
+
+/*
+ * This is just the scalar loop unrolled a bunch of times.
+ */
+ARCHENTRY(strncmp, scalar)
+	xor	%eax, %eax
+	sub	$4, %rdx	# 4 chars left to compare?
+	jbe	1f
+
+	ALIGN_TEXT
+0:	movzbl	(%rdi), %ecx
+	test	%ecx, %ecx	# NUL char in first string?
+	jz	.L0
+	cmpb	(%rsi), %cl	# mismatch between strings?
+	jnz	.L0
+
+	movzbl	1(%rdi), %ecx
+	test	%ecx, %ecx
+	jz	.L1
+	cmpb	1(%rsi), %cl
+	jnz	.L1
+
+	movzbl	2(%rdi), %ecx
+	test	%ecx, %ecx
+	jz	.L2
+	cmpb	2(%rsi), %cl
+	jnz	.L2
+
+	movzbl	3(%rdi), %ecx
+	test	%ecx, %ecx
+	jz	.L3
+	cmpb	3(%rsi), %cl
+	jnz	.L3
+
+	add	$4, %rdi	# advance to next iteration
+	add	$4, %rsi
+	sub	$4, %rdx
+	ja	0b
+
+	/* end of string within the next 4 characters */
+1:	cmp	$-4, %edx	# end of string reached immediately?
+	jz	.Leq
+	movzbl	(%rdi), %ecx
+	test	%ecx, %ecx
+	jz	.L0
+	cmpb	(%rsi), %cl
+	jnz	.L0
+
+	cmp	$-3, %edx	# end of string reached after 1 char?
+	jz	.Leq
+	movzbl	1(%rdi), %ecx
+	test	%ecx, %ecx
+	jz	.L1
+	cmpb	1(%rsi), %cl
+	jnz	.L1
+
+	cmp	$-2, %edx
+	jz	.Leq
+	movzbl	2(%rdi), %ecx
+	test	%ecx, %ecx
+	jz	.L2
+	cmpb	2(%rsi), %cl
+	jnz	.L2
+
+	cmp	$-1, %edx	# either end of string after 3 chars,
+	jz	.Leq		# or it boils down to the last char
+
+.L3:	inc	%eax
+.L2:	inc	%eax
+.L1:	inc	%eax
+.L0:	movzbl	(%rsi, %rax, 1), %ecx
+	movzbl	(%rdi, %rax, 1), %eax
+	sub	%ecx, %eax
+.Leq:	ret
+ARCHEND(strncmp, scalar)
+
+ARCHENTRY(strncmp, baseline)
+	push		%rbx
+	sub		$1, %rdx	# RDX--, so RDX points to the last byte to compare
+	jb		.Lempty		# where there any bytes to compare at all?
+
+	lea		15(%rdi), %r8d	# end of head
+	lea		15(%rsi), %r9d
+	mov		%edi, %eax
+	mov		%esi, %ebx
+	xor		%edi, %r8d	# bits that changed between first and last byte
+	xor		%esi, %r9d
+	and		$~0xf, %rdi	# align heads to 16 bytes
+	and		$~0xf, %rsi
+	or		%r8d, %r9d
+	and		$0xf, %eax	# offset from alignment
+	and		$0xf, %ebx
+	movdqa		(%rdi), %xmm0	# load aligned heads
+	movdqa		(%rsi), %xmm2
+	pxor		%xmm1, %xmm1
+	cmp		$16, %rdx	# end of buffer within the first 32 bytes?
+	jb		.Llt16
+
+	test		$PAGE_SIZE, %r9d # did the page change?
+	jz		0f		# if not, take fast path
+
+
+	/* heads may cross page boundary, avoid unmapped loads */
+	movdqa		%xmm0, -32(%rsp) # stash copies of the heads on the stack
+	movdqa		%xmm2, -16(%rsp)
+	mov		$-1, %r8d
+	mov		$-1, %r9d
+	mov		%eax, %ecx
+	shl		%cl, %r8d	# string head in XMM0
+	mov		%ebx, %ecx
+	shl		%cl, %r9d	# string head in XMM2
+	pcmpeqb		%xmm1, %xmm0
+	pcmpeqb		%xmm1, %xmm2
+	pmovmskb	%xmm0, %r10d
+	pmovmskb	%xmm2, %r11d
+	test		%r8d, %r10d	# NUL byte present in first string?
+	lea		-32(%rsp), %r8
+	cmovz		%rdi, %r8
+	test		%r9d, %r11d	# NUL byte present in second string?
+	lea		-16(%rsp), %r9
+	cmovz		%rsi, %r9
+	movdqu		(%r8, %rax, 1), %xmm0 # load true (or fake) heads
+	movdqu		(%r9, %rbx, 1), %xmm4
+	jmp		1f
+
+	/* rdx == 0 */
+.Lempty:
+	xor		%eax, %eax	# zero-length buffers compare equal
+	pop		%rbx
+	ret
+
+0:	movdqu		(%rdi, %rax, 1), %xmm0 # load true heads
+	movdqu		(%rsi, %rbx, 1), %xmm4
+1:	pxor		%xmm2, %xmm2
+	pcmpeqb		%xmm0, %xmm2	# NUL byte present?
+	pcmpeqb		%xmm0, %xmm4	# which bytes match?
+	pandn		%xmm4, %xmm2	# match and not NUL byte?
+	pmovmskb	%xmm2, %r9d
+	xor		$0xffff, %r9d	# mismatch or NUL byte?
+	jnz		.Lhead_mismatch
+
+	/* load head and second chunk */
+	movdqa		16(%rdi), %xmm2	# load second chunks
+	movdqa		16(%rsi), %xmm3
+	lea		-16(%rdx, %rbx, 1), %rdx # account for length of RSI chunk
+	sub		%rbx, %rax	# is a&0xf >= b&0xf?
+	jb		.Lswapped	# if not, proceed with swapped operands
+	jmp		.Lnormal
+
+	/* buffer ends within the first 16 bytes */
+.Llt16:	test		$PAGE_SIZE, %r9d # did the page change?
+	jz		0f		# if not, take fast path
+
+	/* heads may cross page boundary */
+	movdqa		%xmm0, -32(%rsp) # stash copies of the heads on the stack
+	movdqa		%xmm2, -16(%rsp)
+	mov		$-1, %r8d
+	mov		$-1, %r9d
+	mov		%eax, %ecx
+	shl		%cl, %r8d	# string head in XMM0
+	mov		%ebx, %ecx
+	shl		%cl, %r9d	# string head in XMM2
+	pcmpeqb		%xmm1, %xmm0
+	pcmpeqb		%xmm1, %xmm2
+	pmovmskb	%xmm0, %r10d
+	pmovmskb	%xmm2, %r11d
+	lea		(%rdx, %rax, 1), %ecx # location of last buffer byte in xmm0
+	bts		%ecx, %r10d	# treat as if NUL byte present
+	lea		(%rdx, %rbx, 1), %ecx
+	bts		%ecx, %r11d
+	test		%r8w, %r10w	# NUL byte present in first string head?
+	lea		-32(%rsp), %r8
+	cmovz		%rdi, %r8
+	test		%r9w, %r11w	# NUL byte present in second string head?
+	lea		-16(%rsp), %r9
+	cmovz		%rsi, %r9
+	movdqu		(%r8, %rax, 1), %xmm0 # load true (or fake) heads
+	movdqu		(%r9, %rbx, 1), %xmm4
+	jmp		1f
+
+0:	movdqu		(%rdi, %rax, 1), %xmm0 # load true heads
+	movdqu		(%rsi, %rbx, 1), %xmm4
+1:	pxor		%xmm2, %xmm2
+	pcmpeqb		%xmm0, %xmm2	# NUL byte present?
+	pcmpeqb		%xmm0, %xmm4	# which bytes match?
+	pandn		%xmm4, %xmm2	# match and not NUL byte?
+	pmovmskb	%xmm2, %r9d
+	btr		%edx, %r9d	# induce mismatch in last byte of buffer
+	not		%r9d		# mismatch or NUL byte?
+
+	/* mismatch in true heads */
+	ALIGN_TEXT
+.Lhead_mismatch:
+	tzcnt		%r9d, %r9d	# where is the mismatch?
+	add		%rax, %rdi	# return to true heads
+	add		%rbx, %rsi
+	movzbl		(%rdi, %r9, 1), %eax # mismatching characters
+	movzbl		(%rsi, %r9, 1), %ecx
+	sub		%ecx, %eax
+	pop		%rbx
+	ret
+
+	/* rax >= 0 */
+	ALIGN_TEXT
+.Lnormal:
+	neg		%rax
+	movdqu		16(%rsi, %rax, 1), %xmm0
+	sub		%rdi, %rsi	# express RSI as distance from RDI
+	lea		(%rsi, %rax, 1), %rbx # point RBX to offset in second string
+	neg		%rax		# ... corresponding to RDI
+	pcmpeqb		%xmm3, %xmm1	# NUL present?
+	pcmpeqb		%xmm2, %xmm0	# Mismatch between chunks?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	mov		$16, %ecx
+	cmp		%rcx, %rdx	# does the buffer end within (RDI,RSI,1)?
+	cmovb		%edx, %ecx	# ECX = min(16, RDX)
+	add		$32, %rdi	# advance to next iteration
+	bts		%ecx, %r8d	# mark end-of-buffer as if there was a NUL byte
+	test		%r8w, %r8w	# NUL or end of buffer found?
+	jnz		.Lnul_found2
+	xor		$0xffff, %r9d
+	jnz		.Lmismatch2
+	sub		$48, %rdx	# end of buffer within first main loop iteration?
+	jb		.Ltail		# if yes, process tail
+
+	/*
+	 * During the main loop, the layout of the two strings is something like:
+	 *
+	 *          v ------1------ v ------2------ v
+	 *     RDI:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
+	 *     RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
+	 *
+	 * where v indicates the alignment boundaries and corresponding chunks
+	 * of the strings have the same letters.  Chunk A has been checked in
+	 * the previous iteration.  This iteration, we first check that string
+	 * RSI doesn't end within region 2, then we compare chunk B between the
+	 * two strings.  As RSI is known not to hold a NUL byte in regsions 1
+	 * and 2 at this point, this also ensures that RDI has not ended yet.
+	 */
+	ALIGN_TEXT
+0:	movdqu		(%rdi, %rbx, 1), %xmm0 # chunk of 2nd string corresponding to RDI
+	pxor		%xmm1, %xmm1
+	pcmpeqb		(%rdi, %rsi, 1), %xmm1 # end of string in RSI?
+	pcmpeqb		(%rdi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	test		%r8d, %r8d
+	jnz		.Lnul_found
+	xor		$0xffff, %r9d	# any mismatches?
+	jnz		.Lmismatch
+
+	/* main loop unrolled twice */
+	movdqu		16(%rdi, %rbx, 1), %xmm0
+	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rdi, %rsi, 1), %xmm1
+	pcmpeqb		16(%rdi), %xmm0
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		$32, %rdi
+	test		%r8d, %r8d
+	jnz		.Lnul_found2
+	xor		$0xffff, %r9d
+	jnz		.Lmismatch2
+	sub		$32, %rdx	# end of buffer within next iteration?
+	jae		0b
+
+	/* end of buffer will occur in next 32 bytes */
+.Ltail:	movdqu		(%rdi, %rbx, 1), %xmm0 # chunk of 2nd string corresponding to RDI
+	pxor		%xmm1, %xmm1
+	pcmpeqb		(%rdi, %rsi, 1), %xmm1 # end of string in RSI?
+	pcmpeqb		(%rdi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	bts		%edx, %r8d	# indicate NUL byte at last byte in buffer
+	test		%r8w, %r8w	# NUL byte in first chunk?
+	jnz		.Lnul_found
+	xor		$0xffff, %r9d	# any mismatches?
+	jnz		.Lmismatch
+
+	/* main loop unrolled twice */
+	movdqu		16(%rdi, %rbx, 1), %xmm0
+	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rdi, %rsi, 1), %xmm1
+	pcmpeqb		16(%rdi), %xmm0
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	sub		$16, %edx	# take first half into account
+	bts		%edx, %r8d	# indicate NUL byte at last byte in buffer
+	add		$32, %rdi
+
+.Lnul_found2:
+	sub		$16, %rdi
+
+.Lnul_found:
+	mov		%eax, %ecx
+	mov		%r8d, %r10d
+	shl		%cl, %r8d	# adjust NUL mask to positions in RDI/RBX
+	not		%r9d		# mask of mismatches
+	or		%r8w, %r9w	# NUL bytes als count as mismatches
+	jnz		.Lmismatch
+
+	/*
+	 * (RDI) == (RSI) and NUL is past the string.
+	 * compare (RSI) with the corresponding part
+	 * of the other string until the NUL byte.
+	 */
+	movdqu		(%rdi, %rax, 1), %xmm0
+	pcmpeqb		(%rdi, %rsi, 1), %xmm0
+	add		%rdi, %rsi	# restore RSI pointer
+	add		%rax, %rdi	# point RDI to chunk corresponding to (RSI)
+	pmovmskb	%xmm0, %ecx	# mask of matches
+	not		%ecx		# mask of mismatches
+	or		%r10d, %ecx	# mask of mismatches or NUL bytes
+	tzcnt		%ecx, %ecx	# location of first mismatch
+	movzbl		(%rdi, %rcx, 1), %eax
+	movzbl		(%rsi, %rcx, 1), %ecx
+	sub		%ecx, %eax
+	pop		%rbx
+	ret
+
+.Lmismatch2:
+	sub		$16, %rdi
+
+	/* a mismatch has been found between RBX and RSI */
+.Lmismatch:
+	tzcnt		%r9d, %r9d	# where is the mismatch?
+	add		%rdi, %rbx	# turn RBX from offset into pointer
+	movzbl		(%rbx, %r9, 1), %ecx
+	movzbl		(%rdi, %r9, 1), %eax
+	sub		%ecx, %eax
+	pop		%rbx
+	ret
+
+	/* rax < 0 */
+	ALIGN_TEXT
+.Lswapped:
+	movdqu		16(%rdi, %rax, 1), %xmm0
+	sub		%rsi, %rdi	# express RDI as distance from RDI
+	lea		(%rdi, %rax, 1), %rbx # point RBX to offset in first string
+	pcmpeqb		%xmm2, %xmm1	# NUL present?
+	pcmpeqb		%xmm3, %xmm0	# mismatch between chunks?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		%rax, %rdx	# RDX points to buffer end in RSI
+	neg		%rax		# ... corresponding to RSI
+	mov		$16, %ecx
+	cmp		%rcx, %rdx	# does the buffer end within (RSI,RDI,1)?
+	cmovb		%edx, %ecx	# ECX = min(16, RDX)
+	add		$32, %rsi
+	bts		%ecx, %r8d	# mark end-of-buffer as if there was a NUL byte
+	test		%r8w, %r8w	# NUL or end of buffer found?
+	jnz		.Lnul_found2s
+	xor		$0xffff, %r9d
+	jnz		.Lmismatch2s
+	sub		$48, %rdx	# end of buffer within first main loop iteration?
+	jb		.Ltails		# if yes, process tail
+
+	ALIGN_TEXT
+0:	movdqu		(%rsi, %rbx, 1), %xmm0 # chunk of 1st string corresponding to RSI
+	pxor		%xmm1, %xmm1
+	pcmpeqb		(%rsi, %rdi, 1), %xmm1 # end of string in RDI?
+	pcmpeqb		(%rsi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	test		%r8d, %r8d
+	jnz		.Lnul_founds
+	xor		$0xffff, %r9d	# any mismatches?
+	jnz		.Lmismatchs
+
+	/* main loop unrolled twice */
+	movdqu		16(%rsi, %rbx, 1), %xmm0
+	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rsi, %rdi, 1), %xmm1
+	pcmpeqb		16(%rsi), %xmm0
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	add		$32, %rsi
+	test		%r8d, %r8d
+	jnz		.Lnul_found2s
+	xor		$0xffff, %r9d
+	jnz		.Lmismatch2s
+	sub		$32, %rdx	# end of buffer within next iteration?
+	jae		0b
+
+	/* end of buffer will occur in next 32 bytes */
+.Ltails:
+	movdqu		(%rsi, %rbx, 1), %xmm0 # chunk of 1st string corresponding to RSI
+	pxor		%xmm1, %xmm1
+	pcmpeqb		(%rsi, %rdi, 1), %xmm1 # end of string in RDI?
+	pcmpeqb		(%rsi), %xmm0	# where do the chunks match?
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	bts		%edx, %r8d	# indicate NUL byte at laste byte in buffer
+	test		%r8w, %r8w	# NUL byte in first chunk?
+	jnz		.Lnul_founds
+	xor		$0xffff, %r9d	# any mismatches?
+	jnz		.Lmismatchs
+
+	/* main loop unrolled twice */
+	movdqu		16(%rsi, %rbx, 1), %xmm0
+	pxor		%xmm1, %xmm1
+	pcmpeqb		16(%rsi, %rdi, 1), %xmm1
+	pcmpeqb		16(%rsi), %xmm0
+	pmovmskb	%xmm1, %r8d
+	pmovmskb	%xmm0, %r9d
+	sub		$16, %edx	# take first half into account
+	bts		%edx, %r8d	# indicate NUL byte at laste byte in buffer
+	add		$32, %rsi
+
+.Lnul_found2s:
+	sub		$16, %rsi
+
+.Lnul_founds:
+	mov		%eax, %ecx
+	mov		%r8d, %r10d
+	shl		%cl, %r8d	# adjust NUL mask to positions in RSI/RBX
+	not		%r9d		# mask of mismatches
+	or		%r8w, %r9w	# NUL bytes also count as mismatches
+	jnz		.Lmismatchs
+
+	movdqu		(%rsi, %rax, 1), %xmm0
+	pcmpeqb		(%rsi, %rdi, 1), %xmm0
+	add		%rsi, %rdi	# restore RDI pointer
+	add		%rax, %rsi	# point RSI to chunk corresponding to (RDI)
+	pmovmskb	%xmm0, %ecx	# mask of matches
+	not		%ecx		# mask of mismatches
+	or		%r10d, %ecx	# mask of mismatches or NUL bytes
+	tzcnt		%ecx, %ecx	# location of first mismatch
+	movzbl		(%rdi, %rcx, 1), %eax
+	movzbl		(%rsi, %rcx, 1), %ecx
+	sub		%ecx, %eax
+	pop		%rbx
+	ret
+
+.Lmismatch2s:
+	sub		$16, %rsi
+
+.Lmismatchs:
+	tzcnt		%r9d, %r9d	# where is the mismatch?
+	add		%rsi, %rbx	# turn RBX from offset into pointer
+	movzbl		(%rbx, %r9, 1), %eax
+	movzbl		(%rsi, %r9, 1), %ecx
+	sub		%ecx, %eax
+	pop		%rbx
+	ret
+ARCHEND(strncmp, baseline)
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strncpy.c b/lib/libc/amd64/string/strncpy.c
new file mode 100644
index 000000000000..b3d868787fbe
--- /dev/null
+++ b/lib/libc/amd64/string/strncpy.c
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+char *__stpncpy(char *restrict, const char *restrict, size_t);
+
+char *
+strncpy(char *restrict dst, const char *restrict src, size_t len)
+{
+
+	__stpncpy(dst, src, len);
+
+	return (dst);
+}
diff --git a/lib/libc/amd64/string/strpbrk.c b/lib/libc/amd64/string/strpbrk.c
new file mode 100644
index 000000000000..87f587789991
--- /dev/null
+++ b/lib/libc/amd64/string/strpbrk.c
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+size_t __strcspn(const char *, const char *);
+
+char *
+strpbrk(const char *s, const char *charset)
+{
+	size_t loc;
+
+	loc = __strcspn(s, charset);
+
+	return (s[loc] == '\0' ? NULL : (char *)&s[loc]);
+}
diff --git a/lib/libc/amd64/string/strrchr.S b/lib/libc/amd64/string/strrchr.S
new file mode 100644
index 000000000000..e397bbcd3478
--- /dev/null
+++ b/lib/libc/amd64/string/strrchr.S
@@ -0,0 +1,209 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT	.p2align 4,0x90	# 16-byte alignment, nop-filled
+
+	.weak	rindex
+	.set	rindex, strrchr
+
+ARCHFUNCS(strrchr)
+	ARCHFUNC(strrchr, scalar)
+	ARCHFUNC(strrchr, baseline)
+ENDARCHFUNCS(strrchr)
+
+ARCHENTRY(strrchr, scalar)
+	mov	%edi, %ecx
+	and	$~7, %rdi		# align to 8 byte
+	movzbl	%sil, %esi		# clear stray high bits
+	movabs	$0x0101010101010101, %r8
+	mov	(%rdi), %rax		# load first word
+	imul	%r8, %rsi		# replicate char 8 times
+
+	/*
+	 * Unaligned input: align to 8 bytes.  Then proceed the same
+	 * way as with aligned input, but prevent matches before the
+	 * beginning of the string.  This is achieved by oring 0x01
+	 * into each byte of the buffer before the string
+	 */
+	shl	$3, %ecx
+	mov	%r8, %r10
+	shl	%cl, %r10		# 0x01 where the string is
+	xor	%r8, %r10		# 0x01 where it is not
+	neg	%r8			# negate 01..01 so we can use lea
+	movabs	$0x8080808080808080, %r9
+
+	mov	%rsi, %rcx
+	xor	%rax, %rcx		# str ^ c
+	or	%r10, %rax		# ensure str != 0 before string
+	or	%r10, %rcx		# ensure str^c != 0 before string
+	bswap	%rcx			# in reverse order, to find last match
+	mov	%rdi, %r10		# location of initial mismatch (if any)
+	xor	%r11, %r11		# initial mismatch (none)
+	add	$8, %rdi		# advance to next iteration
+	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
+	not	%rax			# ~str
+	and	%rdx, %rax		# (str - 0x01..01) & ~str
+	and	%r9, %rax		# not including junk bits
+	jnz	1f			# end of string?
+
+	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
+	not	%rcx			# ~(str ^ c)
+	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
+	and	%r9, %rcx		# not including junk bits
+	mov	%rcx, %r11		# remember mismatch in head
+	jmp	0f
+
+	/* main loop unrolled twice */
+	ALIGN_TEXT
+3:	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
+	not	%rcx			# ~(str ^ c)
+	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
+	and	%r9, %rcx		# not including junk bits
+	lea	-8(%rdi), %rdx
+	cmovnz	%rdx, %r10		# remember location of current mismatch
+	cmovnz	%rcx, %r11
+
+0:	mov	(%rdi), %rax		# str
+	mov	%rsi, %rcx
+	xor	%rax, %rcx		# str ^ c
+	bswap	%rcx			# in reverse order, to find last match
+	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
+	not	%rax			# ~str
+	and	%rdx, %rax		# (str - 0x01..01) & ~str
+	and	%r9, %rax		# not including junk bits
+	jnz	2f			# end of string?
+
+	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
+	not	%rcx			# ~(str ^ c)
+	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
+	and	%r9, %rcx		# not including junk bits
+	cmovnz	%rdi, %r10		# remember location of current mismatch
+	cmovnz	%rcx, %r11
+
+	mov	8(%rdi), %rax		# str
+	add	$16, %rdi
+	mov	%rsi, %rcx
+	xor	%rax, %rcx		# str ^ c
+	bswap	%rcx
+	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
+	not	%rax			# ~str
+	and	%rdx, %rax		# (str - 0x01..01) & ~str
+	and	%r9, %rax		# not including junk bits
+	jz	3b			# end of string?
+
+	/* NUL found */
+1:	sub	$8, %rdi		# undo advance past buffer
+2:	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
+	not	%rcx			# ~(str ^ c)
+	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
+	and	%r9, %rcx		# not including junk bits
+	lea	-1(%rax), %rdx
+	xor	%rdx, %rax		# mask of bytes in the string
+	bswap	%rdx			# in reverse order
+	and	%rdx, %rcx		# c found in the tail?
+	cmovnz	%rdi, %r10
+	cmovnz	%rcx, %r11
+	bswap	%r11			# unreverse byte order
+	bsr	%r11, %rcx		# last location of c in (R10)
+	shr	$3, %rcx		# as byte offset
+	lea	(%r10, %rcx, 1), %rax	# pointer to match
+	test	%r11, %r11		# was there actually a match?
+	cmovz	%r11, %rax		# if not, return null pointer
+	ret
+ARCHEND(strrchr, scalar)
+
+ARCHENTRY(strrchr, baseline)
+	mov		%edi, %ecx
+	and		$~0xf, %rdi		# align to 16 bytes
+	movdqa		(%rdi), %xmm1
+	movd		%esi, %xmm0
+	and		$0xf, %ecx		# offset from alignment
+	pxor		%xmm2, %xmm2
+	mov		$-1, %edx
+	punpcklbw	%xmm0, %xmm0		# c -> cc
+	shl		%cl, %edx		# bits corresponding to bytes in the string
+	punpcklwd	%xmm0, %xmm0		# cc -> cccc
+	xor		%r8, %r8		# address of latest match
+	mov		$1, %esi		# bit mask of latest match
+	mov		%rdi, %r9		# candidate location for next match
+	add		$16, %rdi		# advance to next chunk
+
+	/* check for match in head */
+	pcmpeqb		%xmm1, %xmm2		# NUL byte present?
+	pshufd		$0, %xmm0, %xmm0	# cccc -> cccccccccccccccc
+	pcmpeqb		%xmm0, %xmm1		# c present?
+	pmovmskb	%xmm2, %eax
+	pmovmskb	%xmm1, %ecx
+	and		%edx, %ecx		# c present in the string?
+	and		%edx, %eax		# NUL present in the string?
+	jnz		.Lend2
+
+	/* main loop unrolled twice */
+	ALIGN_TEXT
+0:	movdqa		(%rdi), %xmm1
+	test		%ecx, %ecx		# was there a match in the last iter.?
+	cmovnz		%r9, %r8		# remember match if any
+	cmovnz		%ecx, %esi
+	pxor		%xmm2, %xmm2
+	pcmpeqb		%xmm1, %xmm2		# NUL byte present?
+	pcmpeqb		%xmm0, %xmm1		# c present?
+	pmovmskb	%xmm2, %eax
+	pmovmskb	%xmm1, %ecx
+	test		%eax, %eax		# end of string in first half?
+	jnz		.Lend
+
+	movdqa		16(%rdi), %xmm1
+	test		%ecx, %ecx		# was there a match in the last iter.?
+	cmovnz		%rdi, %r8		# remember match if any
+	cmovnz		%ecx, %esi
+	pxor		%xmm2, %xmm2
+	pcmpeqb		%xmm1, %xmm2		# NUL byte present?
+	pcmpeqb		%xmm0, %xmm1		# c present?
+	pmovmskb	%xmm2, %eax
+	pmovmskb	%xmm1, %ecx
+	lea		16(%rdi), %r9
+	add		$32, %rdi
+	test		%eax, %eax		# end of string in second half?
+	jz		0b
+
+	ALIGN_TEXT
+.Lend2:	sub		$16, %rdi
+.Lend:	lea 		-1(%rax), %edx
+	xor		%edx, %eax		# mask of bytes in the string
+	and		%eax, %ecx		# c found in the tail?
+	cmovnz		%rdi, %r8
+	cmovnz		%ecx, %esi
+	bsr		%esi, %esi		# last location of c in (R8)
+	lea		(%r8, %rsi, 1), %rax	# pointer to match
+	ret
+ARCHEND(strrchr, baseline)
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strsep.c b/lib/libc/amd64/string/strsep.c
new file mode 100644
index 000000000000..9fda56d7e135
--- /dev/null
+++ b/lib/libc/amd64/string/strsep.c
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+size_t __strcspn(const char *, const char *);
+
+/*
+ * We have a fast strcspn() on amd64.  Use it over a direct
+ * implementation of strsep for better performance.
+ */
+char *
+strsep(char **stringp, const char *delim)
+{
+	size_t n;
+	char *s;
+
+	s = *stringp;
+	if (s == NULL)
+		return (NULL);
+
+	n = __strcspn(s, delim);
+	if (s[n] == '\0')
+		*stringp = NULL;
+	else {
+		s[n] = '\0';
+		*stringp = s + n + 1;
+	}
+
+	return (s);
+}
diff --git a/lib/libc/string/bstring.3 b/lib/libc/string/bstring.3
index fd976c7676b7..91603fe6dbac 100644
--- a/lib/libc/string/bstring.3
+++ b/lib/libc/string/bstring.3
@@ -27,7 +27,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd June 4, 1993
+.Dd December 5, 2023
 .Dt BSTRING 3
 .Os
 .Sh NAME
@@ -56,7 +56,12 @@
 .Ft int
 .Fn memcmp "const void *b1" "const void *b2" "size_t len"
 .Ft void *
-.Fn memccpy "void *dst" "const void *src" "int c" "size_t len"
+.Fo memccpy
+.Fa "void * restrict dst"
+.Fa "const void * restrict src"
+.Fa "int c"
+.Fa "size_t len"
+.Fc
 .Ft void *
 .Fn memcpy "void *dst" "const void *src" "size_t len"
 .Ft void *
@@ -78,6 +83,7 @@ See the specific manual pages for more information.
 .Xr memccpy 3 ,
 .Xr memchr 3 ,
 .Xr memcmp 3 ,
+.Xr memccpy 3 ,
 .Xr memcpy 3 ,
 .Xr memmove 3 ,
 .Xr memset 3
diff --git a/lib/libc/string/memccpy.3 b/lib/libc/string/memccpy.3
index ce8d5f65ac93..3bdae24354c1 100644
--- a/lib/libc/string/memccpy.3
+++ b/lib/libc/string/memccpy.3
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd June 9, 1993
+.Dd December 5, 2023
 .Dt MEMCCPY 3
 .Os
 .Sh NAME
@@ -36,7 +36,12 @@
 .Sh SYNOPSIS
 .In string.h
 .Ft void *
-.Fn memccpy "void *dst" "const void *src" "int c" "size_t len"
+.Fo memccpy
+.Fa "void * restrict dst"
+.Fa "const void * restrict src"
+.Fa "int c"
+.Fa "size_t len"
+.Fc
 .Sh DESCRIPTION
 The
 .Fn memccpy
@@ -59,13 +64,35 @@ is returned.
 Otherwise,
 .Fa len
 bytes are copied, and a NULL pointer is returned.
+If
+.Fa src
+and
+.Fa dst
+overlap, behavior is undefined.
 .Sh SEE ALSO
 .Xr bcopy 3 ,
 .Xr memcpy 3 ,
 .Xr memmove 3 ,
 .Xr strcpy 3
+.Sh STANDARDS
+The
+.Fn memccpy
+function conforms to
+.St -p1003.1-2004
+and
+.\" St -isoC-2024 .
+ISO/IEC 9899:2024 (\(lqISO\~C23\(rq).
 .Sh HISTORY
 The
 .Fn memccpy
 function first appeared in
-.Bx 4.4 .
+.Bx 4.4
+and was first specified in the
+.\" St -svid1 .
+System\~V Interface Definition, First Edition (\(lqSVID1\(rq).
+The
+.Ft restrict
+keyword was added to the prototype in
+.Fx 5.0.0
+in accordance with the updated specification of
+.St -p1003.1-2004 .
diff --git a/lib/libc/string/memccpy.c b/lib/libc/string/memccpy.c
index 174824ba2393..d6a446503eb6 100644
--- a/lib/libc/string/memccpy.c
+++ b/lib/libc/string/memccpy.c
@@ -32,7 +32,7 @@
 #include <string.h>
 
 void *
-memccpy(void *t, const void *f, int c, size_t n)
+memccpy(void * restrict t, const void * restrict f, int c, size_t n)
 {
 
 	if (n) {
diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile
index a090e1bd3463..4fce79685c0e 100644
--- a/lib/libc/tests/string/Makefile
+++ b/lib/libc/tests/string/Makefile
@@ -9,12 +9,16 @@ ATF_TESTS_C+=		ffsll_test
 ATF_TESTS_C+=		fls_test
 ATF_TESTS_C+=		flsl_test
 ATF_TESTS_C+=		flsll_test
+ATF_TESTS_C+=		memccpy_test
 ATF_TESTS_C+=		memcmp_test
+ATF_TESTS_C+=		memrchr_test
 ATF_TESTS_C+=		memset_s_test
+ATF_TESTS_C+=		strncmp_test
 ATF_TESTS_C+=		stpncpy_test
 ATF_TESTS_C+=		strcmp2_test
 ATF_TESTS_C+=		strcspn_test
 ATF_TESTS_C+=		strerror2_test
+ATF_TESTS_C+=		strlcpy_test
 ATF_TESTS_C+=		strspn_test
 ATF_TESTS_C+=		strverscmp_test
 ATF_TESTS_C+=		strxfrm_test
diff --git a/lib/libc/tests/string/memccpy_test.c b/lib/libc/tests/string/memccpy_test.c
new file mode 100644
index 000000000000..82f4ef34af54
--- /dev/null
+++ b/lib/libc/tests/string/memccpy_test.c
@@ -0,0 +1,205 @@
+/*-
+ * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
+ * Copyright (c) 2023 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atf-c.h>
+
+void *(*memccpy_fn)(void *restrict, const void *restrict, int, size_t);
+
+static char *
+makebuf(size_t len, int guard_at_end)
+{
+	char *buf;
+	size_t alloc_size, page_size;
+
+	page_size = getpagesize();
+	alloc_size = roundup2(len, page_size) + page_size;
+
+	buf = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0);
+	assert(buf);
+	if (guard_at_end) {
+		assert(munmap(buf + alloc_size - page_size, page_size) == 0);
+		return (buf + alloc_size - page_size - len);
+	} else {
+		assert(munmap(buf, page_size) == 0);
+		return (buf + page_size);
+	}
+}
+
+static void
+test_memccpy(const char *s)
+{
+	char *src, *dst, *expected;
+	size_t size, bufsize, x;
+	int i, j;
+
+	size = strlen(s) + 1;
+	for (i = 0; i <= 1; i++) {
+		for (j = 0; j <= 1; j++) {
+			for (bufsize = 0; bufsize <= size + 10; bufsize++) {
+				src = makebuf(size, i);
+				memcpy(src, s, size);
+				dst = makebuf(bufsize, j);
+				memset(dst, 'X', bufsize);
+				expected = bufsize >= size ? dst + size : NULL;
+				assert(memccpy_fn(dst, src, src[size-1], bufsize) == expected);
+				assert(bufsize == 0 || strncmp(src, dst, bufsize - 1) == 0);
+				for (x = size; x < bufsize; x++)
+					assert(dst[x] == 'X');
+			}
+		}
+	}
+}
+
+static void
+test_sentinel(char *dest, char *src, size_t destlen, size_t srclen)
+{
+	size_t i, effective_len;
+	void *res, *wantres;
+	const char *fail = NULL;
+	char terminator;
+
+	for (i = 0; i < srclen; i++)
+		/* src will never include (){} */
+		src[i] = '0' + i;
+
+	/* source sentinels: not to be copied */
+	src[-1] = '(';
+	src[srclen] = ')';
+
+	memset(dest, '\xee', destlen);
+
+	/* destination sentinels: not to be touched */
+	dest[-1] = '{';
+	dest[destlen] = '}';
+
+	effective_len = srclen < destlen ? srclen : destlen;
+	wantres = srclen <= destlen ? dest + srclen : NULL;
+	terminator = src[srclen-1];
+	res = memccpy_fn(dest, src, terminator, destlen);
+
+	if (dest[-1] != '{')
+		fail = "start sentinel overwritten";
+	else if (dest[destlen] != '}')
+		fail = "end sentinel overwritten";
+	else if (res != wantres)
+		fail = "incorrect return value";
+	else if (destlen > 0 && memcmp(src, dest, effective_len) != 0)
+		fail = "string not copied correctly";
+	else for (i = srclen; i < destlen; i++)
+		if (dest[i] != '\xee') {
+			fail = "buffer mutilated behind string";
+			break;
+		}
+
+	if (fail)
+		atf_tc_fail_nonfatal("%s\n"
+		    "memccpy(%p \"%s\", %p \"%s\", %u '%c', %zu) = %p (want %p)\n",
+		    fail, dest, dest, src, src, terminator, terminator, destlen, res, wantres);
+}
+
+ATF_TC_WITHOUT_HEAD(null);
+ATF_TC_BODY(null, tc)
+{
+	ATF_CHECK_EQ(memccpy_fn(NULL, "foo", 42, 0), NULL);
+}
+
+ATF_TC(zero_extension);
+ATF_TC_HEAD(zero_extension, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "Ensure the upper bits of the terminator are ignored");
+}
+ATF_TC_BODY(zero_extension, tc)
+{
+	int mask = -1 & ~UCHAR_MAX;
+	char buf[16];
+
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ(memccpy(buf, "foobar", 'r', sizeof(buf)), buf + sizeof("foobar") - 1);
+	ATF_CHECK_EQ(memcmp(buf, "foobar", sizeof("foobar") - 1), 0);
+
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ(memccpy(buf, "foobar", mask | 'r', sizeof(buf)), buf + sizeof("foobar") - 1);
+	ATF_CHECK_EQ(memcmp(buf, "foobar", sizeof("foobar") - 1), 0);
+}
+
+ATF_TC_WITHOUT_HEAD(bounds);
+ATF_TC_BODY(bounds, tc)
+{
+	size_t i;
+	char buf[64];
+
+	for (i = 0; i < sizeof(buf) - 1; i++) {
+		buf[i] = ' ' + i;
+		test_memccpy(buf);
+	}
+}
+
+ATF_TC_WITHOUT_HEAD(alignments);
+ATF_TC_BODY(alignments, tc)
+{
+	size_t srcalign, destalign, srclen, destlen;
+	char src[15+2+64]; /* 15 offsets + 64 max length + sentinels */
+	char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */
+
+	for (srcalign = 0; srcalign < 16; srcalign++)
+		for (destalign = 0; destalign < 16; destalign++)
+			for (srclen = 1; srclen < 64; srclen++)
+				for (destlen = 0; destlen < 64; destlen++)
+					test_sentinel(dest+destalign+1,
+					    src+srcalign+1, destlen, srclen);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	void *dl_handle;
+
+	dl_handle = dlopen(NULL, RTLD_LAZY);
+	memccpy_fn = dlsym(dl_handle, "test_memccpy");
+	if (memccpy_fn == NULL)
+		memccpy_fn = memccpy;
+
+	ATF_TP_ADD_TC(tp, null);
+	ATF_TP_ADD_TC(tp, zero_extension);
+	ATF_TP_ADD_TC(tp, bounds);
+	ATF_TP_ADD_TC(tp, alignments);
+
+	return (atf_no_error());
+}
diff --git a/lib/libc/tests/string/memrchr_test.c b/lib/libc/tests/string/memrchr_test.c
new file mode 100644
index 000000000000..12f696c9dc1e
--- /dev/null
+++ b/lib/libc/tests/string/memrchr_test.c
@@ -0,0 +1,116 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Robert Clausecker
+ */
+
+#include <sys/cdefs.h>
+
+#include <dlfcn.h>
+#include <limits.h>
+#include <string.h>
+
+#include <atf-c.h>
+
+static void *(*memrchr_fn)(const void *, int, size_t);
+
+ATF_TC_WITHOUT_HEAD(null);
+ATF_TC_BODY(null, tc)
+{
+	ATF_CHECK_EQ(memrchr_fn(NULL, 42, 0), NULL);
+}
+
+ATF_TC_WITHOUT_HEAD(not_found);
+ATF_TC_BODY(not_found, tc)
+{
+	size_t i, j;
+	char buf[1+15+64+1]; /* offset [0..15] + 64 buffer bytes + sentinels */
+
+	buf[0] = 'X';
+	memset(buf + 1, '-', sizeof(buf) - 1);
+
+	for (i = 0; i < 16; i++)
+		for (j = 0; j < 64; j++) {
+			buf[i + j + 1] = 'X';
+			ATF_CHECK_EQ(memrchr_fn(buf + i + 1, 'X', j), NULL);
+			buf[i + j + 1] = '-';
+		}
+}
+
+static void
+do_found_test(char buf[], size_t len, size_t first, size_t second)
+{
+	/* invariant: first <= second */
+
+	buf[first] = 'X';
+	buf[second] = 'X';
+	ATF_CHECK_EQ(memrchr_fn(buf, 'X', len), buf + second);
+	buf[first] = '-';
+	buf[second] = '-';
+}
+
+ATF_TC_WITHOUT_HEAD(found);
+ATF_TC_BODY(found, tc)
+{
+	size_t i, j, k, l;
+	char buf[1+15+64+1];
+
+	buf[0] = 'X';
+	memset(buf + 1, '-', sizeof(buf) - 1);
+
+	for (i = 0; i < 16; i++)
+		for (j = 0; j < 64; j++)
+			for (k = 0; k < j; k++)
+				for (l = 0; l <= k; l++) {
+					buf[i + j + 1] = 'X';
+					do_found_test(buf + i + 1, j, l, k);
+					buf[i + j + 1] = '-';
+				}
+}
+
+/* check that the right character is found */
+static void
+do_values_test(unsigned char buf[], size_t len, size_t i, int c)
+{
+	/* sentinels */
+	buf[-1] = c;
+	buf[len] = c;
+	memset(buf, c + 1, len);
+
+	if (i < len) {
+		buf[i] = c;
+		ATF_CHECK_EQ(memrchr_fn(buf, c, len), buf + i);
+	} else
+		ATF_CHECK_EQ(memrchr_fn(buf, c, len), NULL);
+}
+
+ATF_TC_WITHOUT_HEAD(values);
+ATF_TC_BODY(values, tc)
+{
+	size_t i, j, k;
+	int c;
+	unsigned char buf[1+15+64+1];
+
+	for (i = 0; i < 16; i++)
+		for (j = 0; j < 64; j++)
+			for (k = 0; k <= j; k++)
+				for (c = 0; c <= UCHAR_MAX; c++)
+					do_values_test(buf + i + 1, j, k, c);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	void *dl_handle;
+
+	dl_handle = dlopen(NULL, RTLD_LAZY);
+	memrchr_fn = dlsym(dl_handle, "test_memrchr");
+	if (memrchr_fn == NULL)
+		memrchr_fn = memrchr;
+
+	ATF_TP_ADD_TC(tp, null);
+	ATF_TP_ADD_TC(tp, not_found);
+	ATF_TP_ADD_TC(tp, found);
+	ATF_TP_ADD_TC(tp, values);
+
+	return (atf_no_error());
+}
diff --git a/lib/libc/tests/string/stpncpy_test.c b/lib/libc/tests/string/stpncpy_test.c
index 8154237eb8c2..8574b2d591be 100644
--- a/lib/libc/tests/string/stpncpy_test.c
+++ b/lib/libc/tests/string/stpncpy_test.c
@@ -1,7 +1,11 @@
 /*-
  * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
+ * Copyright (c) 2023 The FreeBSD Foundation
  * All rights reserved.
  *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -27,12 +31,15 @@
 #include <sys/param.h>
 #include <sys/mman.h>
 #include <assert.h>
+#include <dlfcn.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <atf-c.h>
 
+static char *(*stpncpy_fn)(char *restrict, const char *restrict, size_t);
+
 static char *
 makebuf(size_t len, int guard_at_end)
 {
@@ -69,7 +76,7 @@ test_stpncpy(const char *s)
 				dst = makebuf(bufsize, j);
 				memset(dst, 'X', bufsize);
 				len = (bufsize < size) ? bufsize : size - 1;
-				assert(stpncpy(dst, src, bufsize) == dst+len);
+				assert(stpncpy_fn(dst, src, bufsize) == dst+len);
 				assert(memcmp(src, dst, len) == 0);
 				for (x = len; x < bufsize; x++)
 					assert(dst[x] == '\0');
@@ -78,33 +85,97 @@ test_stpncpy(const char *s)
 	}
 }
 
-ATF_TC_WITHOUT_HEAD(nul);
-ATF_TC_BODY(nul, tc)
+static void
+test_sentinel(char *dest, char *src, size_t destlen, size_t srclen)
 {
+	size_t i;
+	const char *res, *wantres;
+	const char *fail = NULL;
+
+	for (i = 0; i < srclen; i++)
+		/* src will never include (){} */
+		src[i] = '0' + i;
+	src[srclen] = '\0';
+
+	/* source sentinels: not to be copied */
+	src[-1] = '(';
+	src[srclen+1] = ')';
+
+	memset(dest, 0xee, destlen);
+
+	/* destination sentinels: not to be touched */
+	dest[-1] = '{';
+	dest[destlen] = '}';
+
+	wantres = dest + (srclen > destlen ? destlen : srclen);
+	res = stpncpy_fn(dest, src, destlen);
+
+	if (dest[-1] != '{')
+		fail = "start sentinel overwritten";
+	else if (dest[destlen] != '}')
+		fail = "end sentinel overwritten";
+	else if (strncmp(src, dest, destlen) != 0)
+		fail = "string not copied correctly";
+	else if (res != wantres)
+		fail = "incorrect return value";
+	else for (i = srclen; i < destlen; i++)
+		if (dest[i] != '\0') {
+			fail = "incomplete NUL padding";
+			break;
+		}
 
-	test_stpncpy("");
+	if (fail)
+		atf_tc_fail_nonfatal("%s\n"
+		    "stpncpy(%p \"%s\", %p \"%s\", %zu) = %p (want %p)\n",
+		    fail, dest, dest, src, src, destlen, res, wantres);
 }
 
-ATF_TC_WITHOUT_HEAD(foo);
-ATF_TC_BODY(foo, tc)
+ATF_TC_WITHOUT_HEAD(null);
+ATF_TC_BODY(null, tc)
 {
-
-	test_stpncpy("foo");
+	ATF_CHECK_EQ(stpncpy_fn(NULL, NULL, 0), NULL);
 }
 
-ATF_TC_WITHOUT_HEAD(glorp);
-ATF_TC_BODY(glorp, tc)
+ATF_TC_WITHOUT_HEAD(bounds);
+ATF_TC_BODY(bounds, tc)
 {
+	size_t i;
+	char buf[64+1];
 
-	test_stpncpy("glorp");
+	for (i = 0; i < sizeof(buf) - 1; i++) {
+		buf[i] = ' ' + i;
+		buf[i+1] = '\0';
+		test_stpncpy(buf);
+	}
+}
+
+ATF_TC_WITHOUT_HEAD(alignments);
+ATF_TC_BODY(alignments, tc)
+{
+	size_t srcalign, destalign, srclen, destlen;
+	char src[15+3+64]; /* 15 offsets + 64 max length + NUL + sentinels */
+	char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */
+
+	for (srcalign = 0; srcalign < 16; srcalign++)
+		for (destalign = 0; destalign < 16; destalign++)
+			for (srclen = 0; srclen < 64; srclen++)
+				for (destlen = 0; destlen < 64; destlen++)
+					test_sentinel(dest+destalign+1,
+					    src+srcalign+1, destlen, srclen);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
+	void *dl_handle;
+
+	dl_handle = dlopen(NULL, RTLD_LAZY);
+	stpncpy_fn = dlsym(dl_handle, "test_stpncpy");
+	if (stpncpy_fn == NULL)
+		stpncpy_fn = stpncpy;
 
-	ATF_TP_ADD_TC(tp, nul);
-	ATF_TP_ADD_TC(tp, foo);
-	ATF_TP_ADD_TC(tp, glorp);
+	ATF_TP_ADD_TC(tp, null);
+	ATF_TP_ADD_TC(tp, bounds);
+	ATF_TP_ADD_TC(tp, alignments);
 
 	return (atf_no_error());
 }
diff --git a/lib/libc/tests/string/strlcpy_test.c b/lib/libc/tests/string/strlcpy_test.c
new file mode 100644
index 000000000000..646bef42683e
--- /dev/null
+++ b/lib/libc/tests/string/strlcpy_test.c
@@ -0,0 +1,183 @@
+/*-
+ * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
+ * Copyright (c) 2023 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atf-c.h>
+
+size_t (*strlcpy_fn)(char *restrict, const char *restrict, size_t);
+
+static char *
+makebuf(size_t len, int guard_at_end)
+{
+	char *buf;
+	size_t alloc_size, page_size;
+
+	page_size = getpagesize();
+	alloc_size = roundup2(len, page_size) + page_size;
+
+	buf = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_ANON, -1, 0);
+	assert(buf);
+	if (guard_at_end) {
+		assert(munmap(buf + alloc_size - page_size, page_size) == 0);
+		return (buf + alloc_size - page_size - len);
+	} else {
+		assert(munmap(buf, page_size) == 0);
+		return (buf + page_size);
+	}
+}
+
+static void
+test_strlcpy(const char *s)
+{
+	char *src, *dst;
+	size_t size, bufsize, x;
+	int i, j;
+
+	size = strlen(s) + 1;
+	for (i = 0; i <= 1; i++) {
+		for (j = 0; j <= 1; j++) {
+			for (bufsize = 0; bufsize <= size + 10; bufsize++) {
+				src = makebuf(size, i);
+				memcpy(src, s, size);
+				dst = makebuf(bufsize, j);
+				memset(dst, 'X', bufsize);
+				assert(strlcpy_fn(dst, src, bufsize) == size-1);
+				assert(bufsize == 0 || strncmp(src, dst, bufsize - 1) == 0);
+				for (x = size; x < bufsize; x++)
+					assert(dst[x] == 'X');
+			}
+		}
+	}
+}
+
+static void
+test_sentinel(char *dest, char *src, size_t destlen, size_t srclen)
+{
+	size_t i;
+	size_t res, wantres;
+	const char *fail = NULL;
+
+	for (i = 0; i < srclen; i++)
+		/* src will never include (){} */
+		src[i] = '0' + i;
+	src[srclen] = '\0';
+
+	/* source sentinels: not to be copied */
+	src[-1] = '(';
+	src[srclen+1] = ')';
+
+	memset(dest, '\xee', destlen);
+
+	/* destination sentinels: not to be touched */
+	dest[-1] = '{';
+	dest[destlen] = '}';
+
+	wantres = srclen;
+	res = strlcpy_fn(dest, src, destlen);
+
+	if (dest[-1] != '{')
+		fail = "start sentinel overwritten";
+	else if (dest[destlen] != '}')
+		fail = "end sentinel overwritten";
+	else if (res != wantres)
+		fail = "incorrect return value";
+	else if (destlen > 0 && strncmp(src, dest, destlen - 1) != 0)
+		fail = "string not copied correctly";
+	else if (destlen > 0 && srclen >= destlen - 1 && dest[destlen-1] != '\0')
+		fail = "string not NUL terminated";
+	else for (i = srclen + 1; i < destlen; i++)
+		if (dest[i] != '\xee') {
+			fail = "buffer mutilated behind string";
+			break;
+		}
+
+	if (fail)
+		atf_tc_fail_nonfatal("%s\n"
+		    "strlcpy(%p \"%s\", %p \"%s\", %zu) = %zu (want %zu)\n",
+		    fail, dest, dest, src, src, destlen, res, wantres);
+}
+
+ATF_TC_WITHOUT_HEAD(null);
+ATF_TC_BODY(null, tc)
+{
+	ATF_CHECK_EQ(strlcpy_fn(NULL, "foo", 0), 3);
+}
+
+ATF_TC_WITHOUT_HEAD(bounds);
+ATF_TC_BODY(bounds, tc)
+{
+	size_t i;
+	char buf[64+1];
+
+	for (i = 0; i < sizeof(buf) - 1; i++) {
+		buf[i] = ' ' + i;
+		buf[i+1] = '\0';
+		test_strlcpy(buf);
+	}
+}
+
+ATF_TC_WITHOUT_HEAD(alignments);
+ATF_TC_BODY(alignments, tc)
+{
+	size_t srcalign, destalign, srclen, destlen;
+	char src[15+3+64]; /* 15 offsets + 64 max length + NUL + sentinels */
+	char dest[15+2+64]; /* 15 offsets + 64 max length + sentinels */
+
+	for (srcalign = 0; srcalign < 16; srcalign++)
+		for (destalign = 0; destalign < 16; destalign++)
+			for (srclen = 0; srclen < 64; srclen++)
+				for (destlen = 0; destlen < 64; destlen++)
+					test_sentinel(dest+destalign+1,
+					    src+srcalign+1, destlen, srclen);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	void *dl_handle;
+
+	dl_handle = dlopen(NULL, RTLD_LAZY);
+	strlcpy_fn = dlsym(dl_handle, "test_strlcpy");
+	if (strlcpy_fn == NULL)
+		strlcpy_fn = strlcpy;
+
+	ATF_TP_ADD_TC(tp, null);
+	ATF_TP_ADD_TC(tp, bounds);
+	ATF_TP_ADD_TC(tp, alignments);
+
+	return (atf_no_error());
+}
diff --git a/lib/libc/tests/string/strncmp_test.c b/lib/libc/tests/string/strncmp_test.c
new file mode 100644
index 000000000000..989c58bcfedf
--- /dev/null
+++ b/lib/libc/tests/string/strncmp_test.c
@@ -0,0 +1,165 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <sys/cdefs.h>
+
+#include <atf-c.h>
+#include <dlfcn.h>
+#include <string.h>
+
+int (*volatile strncmp_fn)(const char *, const char *, size_t);
+
+static void
+alignment_testcase(char *a, char *b, int want, size_t len)
+{
+	int res;
+
+	res = strncmp_fn(a, b, len);
+	ATF_CHECK_MSG(want == (res > 0) - (res < 0),
+	    "strcmp(%p \"%s\", %p \"%s\", %zu) = %d != %d",
+	    (void *)a, a, (void *)b, b, len, res, want);
+}
+
+static void
+check_strncmp_alignments(char a[], char b[],
+    size_t a_off, size_t b_off, size_t len, size_t pos)
+{
+	char *a_str, *b_str, a_orig, b_orig;
+
+	a[a_off] = '\0';
+	b[b_off] = '\0';
+
+	a_str = a + a_off + 1;
+	b_str = b + b_off + 1;
+
+	a_str[len] = '\0';
+	b_str[len] = '\0';
+	a_str[len+1] = 'A';
+	b_str[len+1] = 'B';
+
+	a_orig = a_str[pos];
+	b_orig = b_str[pos];
+
+	alignment_testcase(a_str, b_str, 0, len + 16);
+	alignment_testcase(a_str, b_str, 0, len + 1);
+	alignment_testcase(a_str, b_str, 0, len);
+
+	if (pos < len) {
+		a_str[pos] = '\0';
+		alignment_testcase(a_str, b_str, -1, len + 16);
+		alignment_testcase(a_str, b_str, -1, len + 1);
+		alignment_testcase(a_str, b_str, -1, len);
+		alignment_testcase(a_str, b_str, -1, pos + 1);
+		alignment_testcase(a_str, b_str, 0, pos);
+		a_str[pos] = a_orig;
+
+		b_str[pos] = '\0';
+		alignment_testcase(a_str, b_str, 1, len + 16);
+		alignment_testcase(a_str, b_str, 1, len + 1);
+		alignment_testcase(a_str, b_str, 1, len);
+		alignment_testcase(a_str, b_str, 1, pos + 1);
+		alignment_testcase(a_str, b_str, 0, pos);
+		b_str[pos] = b_orig;
+	}
+
+	a_str[pos] = 'X';
+	alignment_testcase(a_str, b_str, 1, len + 16);
+	alignment_testcase(a_str, b_str, 0, pos);
+	alignment_testcase(a_str, b_str, 1, pos + 1);
+	if (pos < len) {
+		alignment_testcase(a_str, b_str, 1, len);
+		alignment_testcase(a_str, b_str, 1, len + 1);
+	}
+	a_str[pos] = a_orig;
+
+	b_str[pos] = 'X';
+	alignment_testcase(a_str, b_str, -1, len + 16);
+	alignment_testcase(a_str, b_str, 0, pos);
+	alignment_testcase(a_str, b_str, -1, pos + 1);
+	if (pos < len) {
+		alignment_testcase(a_str, b_str, -1, len);
+		alignment_testcase(a_str, b_str, -1, len + 1);
+	}
+	b_str[pos] = b_orig;
+
+	a[a_off] = '-';
+	b[b_off] = '-';
+	a_str[len] = '-';
+	b_str[len] = '-';
+	a_str[len+1] = '-';
+	b_str[len+1] = '-';
+}
+
+ATF_TC(strncmp_alignments);
+ATF_TC_HEAD(strncmp_alignments, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test strncmp(3) with various alignments");
+}
+
+ATF_TC_BODY(strncmp_alignments, tc)
+{
+	size_t a_off, b_off, len, pos;
+	char a[64+16+16+3], b[64+16+16+3];
+
+	memset(a, '-', sizeof(a));
+	memset(b, '-', sizeof(b));
+	a[sizeof(a) - 1] = '\0';
+	b[sizeof(b) - 1] = '\0';
+
+	for (a_off = 0; a_off < 16; a_off++)
+		for (b_off = 0; b_off < 16; b_off++)
+			for (len = 1; len <= 64; len++)
+				for (pos = 0; pos <= len; pos++)
+					check_strncmp_alignments(a, b, a_off, b_off, len, pos);
+}
+
+ATF_TC(strncmp_null);
+ATF_TC_HEAD(strncmp_null, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test strncmp(3) with null pointers");
+}
+
+ATF_TC_BODY(strncmp_null, tc)
+{
+	alignment_testcase(NULL, NULL, 0, 0);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	void *dl_handle;
+
+	dl_handle = dlopen(NULL, RTLD_LAZY);
+	strncmp_fn = dlsym(dl_handle, "test_strncmp");
+	if (strncmp_fn == NULL)
+		strncmp_fn = strncmp;
+
+	ATF_TP_ADD_TC(tp, strncmp_alignments);
+	ATF_TP_ADD_TC(tp, strncmp_null);
+
+	return atf_no_error();
+}
diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
index 3fd8890c4f53..fd9485524aef 100644
--- a/share/man/man7/simd.7
+++ b/share/man/man7/simd.7
@@ -24,7 +24,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE
 .
-.Dd September 2, 2023
+.Dd December 6, 2023
 .Dt SIMD 7
 .Os
 .Sh NAME
@@ -48,7 +48,7 @@ the environment variable
 .Ev ARCHLEVEL
 can be used to override this mechanism.
 .Pp
-Enhanced functions are present in the following architectures:
+Enhanced functions are present for the following architectures:
 .Bl -column FUNCTION_________ aarch64_ arm_ amd64_ i386_ ppc64_ -offset indent
 .It Em FUNCTION          Ta Em AARCH64 Ta Em ARM Ta Em AMD64  Ta Em I386 Ta Em PPC64
 .It    bcmp              Ta            Ta        Ta    S1     Ta    S
@@ -58,24 +58,32 @@ Enhanced functions are present in the following architectures:
 .It    index             Ta    S       Ta        Ta    S1
 .It    ldiv              Ta            Ta        Ta    S      Ta    S
 .It    lldiv             Ta            Ta        Ta    S
-.It    memchr            Ta            Ta        Ta    S1
-.It    memcmp            Ta            Ta    S   Ta    S1     Ta    S
+.It    memchr            Ta    S       Ta        Ta    S1
+.It    memcmp            Ta    S       Ta    S   Ta    S1     Ta    S
+.It    memccpy           Ta            Ta        Ta    S1
 .It    memcpy            Ta    S       Ta    S   Ta    S      Ta    S    Ta    SV
 .It    memmove           Ta    S       Ta    S   Ta    S      Ta    S    Ta    SV
-.It    memset            Ta            Ta    S   Ta    S      Ta    S
-.It    rindex            Ta    S
-.It    stpcpy            Ta            Ta        Ta    S1
-.It    strcat            Ta            Ta        Ta    S      Ta    S
+.It    memrchr           Ta            Ta        Ta    S1
+.It    memset            Ta    S       Ta    S   Ta    S      Ta    S
+.It    rindex            Ta    S       Ta        Ta    S1     Ta    S
+.It    stpcpy            Ta    S       Ta        Ta    S1
+.It    stpncpy           Ta            Ta        Ta    S1
+.It    strcat            Ta            Ta        Ta    S1     Ta    S
 .It    strchr            Ta    S       Ta        Ta    S1     Ta    S
-.It    strchrnul         Ta            Ta        Ta    S1
-.It    strcmp            Ta            Ta    S   Ta    S      Ta    S
-.It    strcpy            Ta            Ta        Ta    S1     Ta    S    Ta    S2
+.It    strchrnul         Ta    S       Ta        Ta    S1
+.It    strcmp            Ta    S       Ta    S   Ta    S1     Ta    S
+.It    strcpy            Ta    S       Ta        Ta    S1     Ta    S    Ta    S2
 .It    strcspn           Ta            Ta        Ta    S2
-.It    strlen            Ta            Ta    S   Ta    S1
-.It    strncmp           Ta            Ta    S   Ta           Ta    S
-.It    strncpy           Ta            Ta        Ta           Ta         Ta    S2
-.It    strnlen           Ta            Ta        Ta    S1
-.It    strrchr           Ta    S       Ta        Ta           Ta    S
+.It    strlcat           Ta            Ta        Ta    S1
+.It    strlcpy           Ta            Ta        Ta    S1
+.It    strlen            Ta    S       Ta    S   Ta    S1
+.It    strncat           Ta            Ta        Ta    S1
+.It    strncmp           Ta    S       Ta    S   Ta    S1     Ta    S
+.It    strncpy           Ta            Ta        Ta    S1     Ta         Ta    S2
+.It    strnlen           Ta    S       Ta        Ta    S1
+.It    strrchr           Ta    S       Ta        Ta    S1     Ta    S
+.It    strpbrk           Ta            Ta        Ta    S2
+.It    strsep            Ta            Ta        Ta    S2
 .It    strspn            Ta            Ta        Ta    S2
 .It    swab              Ta            Ta        Ta           Ta    S
 .It    timingsafe_bcmp   Ta            Ta        Ta    S1
@@ -207,14 +215,14 @@ SIMD-enhanced functions were first added with
 for
 .Cm powerpc64
 and with
-.Fx 14.0
+.Fx 14.1
 for
 .Cm amd64 .
 .Pp
 A
 .Nm
 manual page appeared in
-.Fx 14.0 .
+.Fx 14.1 .
 .
 .Sh AUTHOR
 .An Robert Clausecker Aq Mt fuz@FreeBSD.org