diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7466f805c7414..9de785679de8f4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,12 @@ jobs: runs_on: ubuntu-22.04 shell: bash build_options: "LKL_FUZZING=1 fuzzers" + - displayTargetName: zpoline + # maybe integrate with default Linux build once the function becomes stable + os: unix + runs_on: ubuntu-22.04 + shell: bash + build_options: "zpoline=./zpoline" timeout-minutes: 100 env: CCACHE_DIR: ${{ github.workspace }}/.ccache @@ -128,6 +134,20 @@ jobs: which gcc ccache -z + - name: install zpoline + if: matrix.displayTargetName == 'zpoline' + run: | + sudo apt install -y binutils-dev + git clone https://github.com/yasukata/zpoline + cd zpoline + git checkout 022a3b8c7a5c23bfd99162b478bf3eb5f70c07a2 + make + cd .. + # This is the whole point of zpoline + echo "==== setting mmap_min_addr ====" + sudo sh -c "echo 0 > /proc/sys/vm/mmap_min_addr" + echo "setting env variable (debug)" + echo "ZPOLINE_DEBUG=0" >> "$GITHUB_ENV" - name: Build run: | make -j4 -C tools/lkl ${{ matrix.build_options }} diff --git a/Documentation/lkl.txt b/Documentation/lkl.txt index e480d64dd5966a..42db98c51506c8 100644 --- a/Documentation/lkl.txt +++ b/Documentation/lkl.txt @@ -460,6 +460,71 @@ The following are the list of keys to describe a JSON file. "nameserver":"8.8.8.8" ``` +LKL hijack library with zpoline +------------------------------- + +[zpoline](https://github.com/yasukata/zpoline) is an alternative to +syscall hijack based on LD_PRELOAD, which is still default on LKL. +The zpoline library works with binary rewrites to the loaded programs +upon instantiation, then load hook function for the original syscalls. +The LKL hijack library works together with zpoline by loading LKL. + +zpoline currently only works on x86_64 machines. + +To use the zpoline-enabled hijack library, please follow the +instruction below. + +- Build +``` +make -C tools/lkl -j8 zpoline=../zpoline +``` + +Suppose `zpoline` is downloaded at `../zpoline` and already build +before LKL build. + +- Execution + +zpoline rewrites the memory address 0x0 to hook syscalls, but non-root +users don't have a privilege to operate that address. The following +configuration allows us to use zpoline without root privilege. + +``` +sudo sh -c "echo 0 > /proc/sys/vm/mmap_min_addr" +``` + +then, execute command with the environment variable `LKL_HIJACK_ZPOLINE=1`. + +``` +LKL_HIJACK_ZPOLINE=1 LKL_HIJACK_CONFIG_FILE=lkl-tap.json \ + ./tools/lkl/bin/lkl-hijack.sh ping www.google.com +``` + +The file `lkl-tap.json` can be prepared like this. + +``` +{ + "gateway": "172.17.0.1", + "nameserver": "8.8.8.8", + "interfaces": [ + { + "ip": "172.17.0.39", + "masklen": "16", + "mac": "00:0d:0b:94:4e:97", + "param": "tap0", + "type": "tap" + } + ], +} +``` + +With the preload hijack library, which is the default one, it uses the +host name resolver and if the host uses a nameserver, defined at +`/etc/resolv.conf`, like 127.0.0.53, is not accepting DNS requests, in +a view of the LKL instance. + +But with zpoline, it can successfully replace all syscalls for name +resolution so can `ping` with a name. + FAQ === diff --git a/tools/lkl/.gitignore b/tools/lkl/.gitignore index 1a8ee8acbc4e3f..2a5f4273b0b079 100644 --- a/tools/lkl/.gitignore +++ b/tools/lkl/.gitignore @@ -10,6 +10,7 @@ tests/net-test tests/disk tests/disk-vfio-pci tests/config +tests/test-dlmopen Makefile.conf include/lkl_autoconf.h include/kernel_config.h diff --git a/tools/lkl/Makefile.autoconf b/tools/lkl/Makefile.autoconf index 3c0d07ac6835d6..a15753193a4f6e 100644 --- a/tools/lkl/Makefile.autoconf +++ b/tools/lkl/Makefile.autoconf @@ -62,6 +62,11 @@ define virtio_net_vde LDLIBS += $(shell pkg-config --libs vdeplug) endef +define zpoline_conf + $(eval zpoline_dir=$(abspath $(srctree)/$(1))) + $(if $(strip $(foreach f, $(zpoline_dir), $(wildcard $(f)/libzpoline.so))),$(call set_autoconf_var,ZPOLINE_DIR,$(zpoline_dir))) +endef + define posix_host $(call set_autoconf_var,POSIX,y) $(call set_autoconf_var,VIRTIO_NET,y) @@ -82,6 +87,7 @@ define posix_host $(if $(filter $(1),elf64-x86-64-freebsd),$(call set_autoconf_var,NEEDS_LARGP,y)) $(if $(filter $(1),elf32-i386),$(call set_autoconf_var,I386,y)) $(if $(strip $(call find_include,jsmn.h)),$(call set_autoconf_var,JSMN,y)) + $(if $(filter %,$(zpoline)),$(call zpoline_conf,$(zpoline))) endef define nt64_host diff --git a/tools/lkl/Targets b/tools/lkl/Targets index 7da425e774da91..8f1b66c5073906 100644 --- a/tools/lkl/Targets +++ b/tools/lkl/Targets @@ -2,13 +2,17 @@ libs-y += lib/liblkl ifneq ($(LKL_HOST_CONFIG_BSD),y) libs-$(LKL_HOST_CONFIG_POSIX) += lib/hijack/liblkl-hijack +libs-$(LKL_HOST_CONFIG_POSIX) += lib/hijack/liblkl-zpoline endif LDFLAGS_lib/hijack/liblkl-hijack-y += -shared -nodefaultlibs LDLIBS_lib/hijack/liblkl-hijack-y += -ldl LDLIBS_lib/hijack/liblkl-hijack-$(LKL_HOST_CONFIG_ARM) += -lgcc -lc -LDLIBS_lib/hijack/liblkl-hijack-$(LKL_HOST_CONFIG_AARCH64) += -lc +LDLIBS_lib/hijack/liblkl-hijack-$(LKL_HOST_CONFIG_AARCH64) += -lgcc -lc LDLIBS_lib/hijack/liblkl-hijack-$(LKL_HOST_CONFIG_I386) += -lc_nonshared +LDFLAGS_lib/hijack/liblkl-zpoline-$(LKL_HOST_CONFIG_POSIX) += -shared -nodefaultlibs +LDLIBS_lib/hijack/liblkl-zpoline-$(LKL_HOST_CONFIG_POSIX) += -ldl -lc + progs-$(LKL_HOST_CONFIG_FUSE) += lklfuse LDLIBS_lklfuse-y := -lfuse @@ -26,6 +30,10 @@ progs-y += tests/disk progs-y += tests/disk-vfio-pci progs-y += tests/net-test progs-y += tests/config +ifneq ($(LKL_HOST_CONFIG_BSD),y) +progs-y += tests/test-dlmopen +LDLIBS_tests/test-dlmopen-$(LKL_HOST_CONFIG_POSIX) += -ldl +endif # LKL fuzzers fuzzers-y += fuzzers/hid/hid-fuzzer diff --git a/tools/lkl/bin/lkl-hijack.sh b/tools/lkl/bin/lkl-hijack.sh index e4f1e0c47b6995..260251bcc5ecc4 100755 --- a/tools/lkl/bin/lkl-hijack.sh +++ b/tools/lkl/bin/lkl-hijack.sh @@ -13,10 +13,19 @@ ## script_dir=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd) +. ${script_dir}/../tests/autoconf.sh export LD_LIBRARY_PATH=${script_dir}/../lib/hijack if [ -n ${LKL_HIJACK_DEBUG+x} ] then trap '' TSTP fi -LD_PRELOAD=liblkl-hijack.so $* + + +if [ -n "${LKL_HIJACK_ZPOLINE}" ] +then + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${LKL_HOST_CONFIG_ZPOLINE_DIR} + LD_PRELOAD=libzpoline.so LIBZPHOOK=liblkl-zpoline.so $* +else + LD_PRELOAD=liblkl-hijack.so $* +fi diff --git a/tools/lkl/include/lkl_host.h b/tools/lkl/include/lkl_host.h index 62e3c48fbd4e9f..05cd459a841fe3 100644 --- a/tools/lkl/include/lkl_host.h +++ b/tools/lkl/include/lkl_host.h @@ -9,6 +9,7 @@ extern "C" { #include extern struct lkl_host_operations lkl_host_ops; +extern void lkl_change_tls_mode(void); /** * lkl_printf - print a message via the host print operation diff --git a/tools/lkl/lib/hijack/Build b/tools/lkl/lib/hijack/Build index e68e93a3328ac6..0c807ef2e2b5be 100644 --- a/tools/lkl/lib/hijack/Build +++ b/tools/lkl/lib/hijack/Build @@ -1,4 +1,9 @@ +liblkl-hijack-y += preload.o liblkl-hijack-y += hijack.o liblkl-hijack-y += init.o liblkl-hijack-y += xlate.o +liblkl-zpoline-y += zpoline.o +liblkl-zpoline-y += hijack.o +liblkl-zpoline-y += init.o +liblkl-zpoline-y += xlate.o diff --git a/tools/lkl/lib/hijack/hijack.c b/tools/lkl/lib/hijack/hijack.c index 2ba5c507b11ef8..0dfe08aacb08d6 100644 --- a/tools/lkl/lib/hijack/hijack.c +++ b/tools/lkl/lib/hijack/hijack.c @@ -31,8 +31,9 @@ #include "xlate.h" #include "init.h" +#include "hijack.h" -static int is_lklfd(int fd) +int is_lklfd(int fd) { if (fd < LKL_FD_OFFSET) return 0; @@ -167,8 +168,8 @@ HOST_CALL(write) HOST_CALL(pipe2) HOST_CALL(setsockopt); -int setsockopt(int fd, int level, int optname, const void *optval, - socklen_t optlen) +int hijack_setsockopt(int fd, int level, int optname, const void *optval, + socklen_t optlen) { CHECK_HOST_CALL(setsockopt); if (!is_lklfd(fd)) @@ -178,7 +179,7 @@ int setsockopt(int fd, int level, int optname, const void *optval, } HOST_CALL(getsockopt); -int getsockopt(int fd, int level, int optname, void *optval, socklen_t *optlen) +int hijack_getsockopt(int fd, int level, int optname, void *optval, socklen_t *optlen) { CHECK_HOST_CALL(getsockopt); if (!is_lklfd(fd)) @@ -240,7 +241,7 @@ int fcntl(int fd, int cmd, ...) } HOST_CALL(poll); -int poll(struct pollfd *fds, nfds_t nfds, int timeout) +int hijack_poll(struct pollfd *fds, nfds_t nfds, int timeout) { unsigned int i, lklfds = 0, hostfds = 0; @@ -264,10 +265,8 @@ int poll(struct pollfd *fds, nfds_t nfds, int timeout) return lkl_sys_poll((struct lkl_pollfd *)fds, nfds, timeout); } -int __poll(struct pollfd *, nfds_t, int) __attribute__((alias("poll"))); - HOST_CALL(select); -int select(int nfds, fd_set *r, fd_set *w, fd_set *e, struct timeval *t) +int hijack_select(int nfds, fd_set *r, fd_set *w, fd_set *e, struct timeval *t) { int fd, hostfds = 0, lklfds = 0; @@ -324,7 +323,7 @@ int close(int fd) } HOST_CALL(epoll_create); -int epoll_create(int size) +int hijack_epoll_create(int size) { int host_fd; @@ -346,14 +345,14 @@ int epoll_create(int size) } HOST_CALL(epoll_create1); -int epoll_create1(int flags) +int hijack_epoll_create1(int flags) { int host_fd; CHECK_HOST_CALL(epoll_create1); host_fd = host_epoll_create1(flags); - if (!host_fd) { + if (host_fd < 0) { fprintf(stderr, "%s fail (%d)\n", __func__, errno); return -1; } @@ -369,7 +368,7 @@ int epoll_create1(int flags) HOST_CALL(epoll_ctl); -int epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event) +int hijack_epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event) { CHECK_HOST_CALL(epoll_ctl); @@ -404,7 +403,7 @@ static void *host_epollwait(void *arg) return (void *)(intptr_t)ret; } -int epoll_wait(int epfd, struct epoll_event *events, +int hijack_epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) { CHECK_HOST_CALL(epoll_wait); @@ -541,7 +540,7 @@ int epoll_wait(int epfd, struct epoll_event *events, return ret; } -int eventfd(unsigned int count, int flags) +int hijack_eventfd(unsigned int count, int flags) { if (!lkl_running) { int (*f)(unsigned int, int) = resolve_sym("eventfd"); @@ -553,7 +552,7 @@ int eventfd(unsigned int count, int flags) } HOST_CALL(eventfd_read); -int eventfd_read(int fd, uint64_t *value) +int hijack_eventfd_read(int fd, uint64_t *value) { CHECK_HOST_CALL(eventfd_read); @@ -565,7 +564,7 @@ int eventfd_read(int fd, uint64_t *value) } HOST_CALL(eventfd_write); -int eventfd_write(int fd, uint64_t value) +int hijack_eventfd_write(int fd, uint64_t value) { CHECK_HOST_CALL(eventfd_write); diff --git a/tools/lkl/lib/hijack/hijack.h b/tools/lkl/lib/hijack/hijack.h new file mode 100644 index 00000000000000..e9f55d8389c591 --- /dev/null +++ b/tools/lkl/lib/hijack/hijack.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + + +int is_lklfd(int fd); +int hijack_setsockopt(int fd, int level, int optname, const void *optval, + socklen_t optlen); +int hijack_getsockopt(int fd, int level, int optname, void *optval, socklen_t *optlen); +int hijack_poll(struct pollfd *fds, nfds_t nfds, int timeout); +int hijack_select(int nfds, fd_set *r, fd_set *w, fd_set *e, struct timeval *t); +int hijack_eventfd(unsigned int count, int flags); +int hijack_epoll_create(int size); +int hijack_epoll_create1(int flags); +int hijack_epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event); +int hijack_epoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout); +int hijack_eventfd_read(int fd, uint64_t *value); +int hijack_eventfd_write(int fd, uint64_t value); diff --git a/tools/lkl/lib/hijack/init.c b/tools/lkl/lib/hijack/init.c index 11ef12010aef45..8228fe6da4fb0a 100644 --- a/tools/lkl/lib/hijack/init.c +++ b/tools/lkl/lib/hijack/init.c @@ -112,8 +112,8 @@ static int config_load(void) return ret; } -void __attribute__((constructor)) -hijack_init(void) +void +__hijack_init(void) { int ret, i, dev_null; int single_cpu_mode = 0; @@ -225,8 +225,17 @@ hijack_init(void) lkl_load_config_post(cfg); } -void __attribute__((destructor)) -hijack_fini(void) +void __attribute__((constructor)) +hijack_init(void) +{ + if (getenv("LKL_HIJACK_ZPOLINE")) + return; + + return __hijack_init(); +} + +void +__hijack_fini(void) { int i; int err; @@ -257,3 +266,12 @@ hijack_fini(void) lkl_cleanup(); } + +void __attribute__((destructor)) +hijack_fini(void) +{ + if (getenv("LKL_HIJACK_ZPOLINE")) + return; + + return __hijack_fini(); +} diff --git a/tools/lkl/lib/hijack/init.h b/tools/lkl/lib/hijack/init.h index be4448f8fe6b9e..2f7a51d3c0a00a 100644 --- a/tools/lkl/lib/hijack/init.h +++ b/tools/lkl/lib/hijack/init.h @@ -4,4 +4,7 @@ extern int lkl_running; extern int dual_fds[]; +void __hijack_init(void); +void __hijack_fini(void); + #endif /*_LKL_HIJACK_INIT_H */ diff --git a/tools/lkl/lib/hijack/preload.c b/tools/lkl/lib/hijack/preload.c new file mode 100644 index 00000000000000..ec0c3d3fd09bfe --- /dev/null +++ b/tools/lkl/lib/hijack/preload.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * system calls hook by LD_PRELOAD + * Copyright (c) 2023 Hajime Tazaki + * + * Author: Hajime Tazaki + * + */ + +#include "hijack.h" + +int setsockopt(int fd, int level, int optname, const void *optval, + socklen_t optlen) +{ + return hijack_setsockopt(fd, level, optname, optval, optlen); +} + +int getsockopt(int fd, int level, int optname, void *optval, socklen_t *optlen) +{ + return hijack_getsockopt(fd, level, optname, optval, optlen); +} + +int poll(struct pollfd *fds, nfds_t nfds, int timeout) +{ + return hijack_poll(fds, nfds, timeout); +} +int __poll(struct pollfd *, nfds_t, int) __attribute__((alias("poll"))); + +int select(int nfds, fd_set *r, fd_set *w, fd_set *e, struct timeval *t) +{ + return hijack_select(nfds, r, w, e, t); +} + +int epoll_create(int size) +{ + return hijack_epoll_create(size); +} + +int epoll_create1(int flags) +{ + return hijack_epoll_create1(flags); +} + +int epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event) +{ + return hijack_epoll_ctl(epollfd, op, fd, event); +} + +int epoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout) +{ + return hijack_epoll_wait(epfd, events, maxevents, timeout); +} + +int eventfd(unsigned int count, int flags) +{ + return hijack_eventfd(count, flags); +} + +int eventfd_read(int fd, uint64_t *value) +{ + return hijack_eventfd_read(fd, value); +} + +int eventfd_write(int fd, uint64_t value) +{ + return hijack_eventfd_write(fd, value); +} diff --git a/tools/lkl/lib/hijack/zpoline.c b/tools/lkl/lib/hijack/zpoline.c new file mode 100644 index 00000000000000..12d47c3c2f7fd4 --- /dev/null +++ b/tools/lkl/lib/hijack/zpoline.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * zpoline related code for hijack + * Copyright (c) 2023 Hajime Tazaki + * + * Author: Hajime Tazaki + * + * Note: https://github.com/yasukata/zpoline + */ + +/* zpoline only works on x86_64 architecture */ +#ifdef __x86_64__ +#include +#include + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include "init.h" +#include "xlate.h" +#include "hijack.h" + +#include + +/* XXX: include doesn't do the job.. */ +extern __pid_t gettid(void) __THROW; + +#define CALL_LKL_FD_SYSCALL(x) \ +{ \ + case __NR_##x: \ + if (!is_lklfd(a2)) \ + ret = syscall(a1, a2, a3, a4, a5, a6, a7); \ + else { \ + long p[6] = {a2, a3, a4, a5, a6, a7}; \ + ret = lkl_syscall(__lkl__NR_##x, p); \ + } \ + break; \ +} + + +#define ZPOLINE_DEBUG 0 +#define dprintf(fmt, ...) \ + do { \ + if (ZPOLINE_DEBUG) { \ + printf(fmt, ##__VA_ARGS__); \ + } \ + } while (0) + +long zpoline_lkl_hook(int64_t a1, int64_t a2, int64_t a3, + int64_t a4, int64_t a5, int64_t a6, + int64_t a7) +{ + int ret; + + dprintf("syscall %ld: tid: %d\n", a1, gettid()); + if (!lkl_running) { + if (a1 == __NR_clone) { + if (a2 & CLONE_VM) { // pthread creation + /* push return address to the stack */ + a3 -= sizeof(uint64_t); + *((uint64_t *) a3) = a7; + } + } + return syscall(a1, a2, a3, a4, a5, a6); + } + + switch (a1) { + CALL_LKL_FD_SYSCALL(sendmsg); + CALL_LKL_FD_SYSCALL(recvmsg); + CALL_LKL_FD_SYSCALL(sendmmsg); + CALL_LKL_FD_SYSCALL(recvmmsg); + CALL_LKL_FD_SYSCALL(bind); + CALL_LKL_FD_SYSCALL(connect); + CALL_LKL_FD_SYSCALL(getsockopt); + CALL_LKL_FD_SYSCALL(setsockopt); + CALL_LKL_FD_SYSCALL(getsockname); + CALL_LKL_FD_SYSCALL(sendto); + CALL_LKL_FD_SYSCALL(recvfrom); + CALL_LKL_FD_SYSCALL(listen); + CALL_LKL_FD_SYSCALL(accept); + CALL_LKL_FD_SYSCALL(close); + CALL_LKL_FD_SYSCALL(ioctl); + CALL_LKL_FD_SYSCALL(fcntl); + CALL_LKL_FD_SYSCALL(read); + CALL_LKL_FD_SYSCALL(write); + CALL_LKL_FD_SYSCALL(pread64); + case __NR_socket: + ret = lkl_sys_socket(a2, a3, a4); + if (ret < 0) + syscall(a1, a2, a3, a4, a5, a6, a7); + break; + case __NR_openat: + if (!lkl_running) + ret = syscall(a1, a2, a3, a4, a5, a6, a7); + else { + ret = lkl_sys_open((char *)a3, a4, a5); + /* open to host libraries should not hijack */ + if (ret < 0 && (strncmp((char *)a3, "/lib", 4) == 0)) + ret = syscall(a1, a2, a3, a4, a5, a6, a7); + } + break; + case __NR_newfstatat: + if (!lkl_running) + ret = syscall(a1, a2, a3, a4, a5, a6, a7); + else + ret = lkl_sys_newfstatat(a2, (char *)a3, (void *)a4, a5); + break; + case __NR_epoll_create1: + return hijack_epoll_create1(a2); + case __NR_epoll_ctl: + return hijack_epoll_ctl(a2, a3, a4, (void *)a5); + case __NR_epoll_wait: + return hijack_epoll_wait(a2, (void *)a3, a4, a5); + case __NR_poll: + return hijack_poll((void *)a2, a3, a4); + case __NR_select: + return hijack_select(a2, (void *)a3, (void *)a4, (void *)a5, (void *)a6); + case __NR_eventfd2: + return hijack_eventfd(a2, a3); + case __NR_futex: + ret = syscall(a1, a2, a3, a4, a5, a6, a7); + if (ret < 0) + return -errno; + break; + default: + return syscall(a1, a2, a3, a4, a5, a6, a7); + } + + if (ret == LKL_ENOSYS) + fprintf(stderr, "no syscall defined in LKL (syscall=%ld)\n", a1); + + return ret; +} + +void __attribute__((destructor)) +hook_exit(void) +{ + __hijack_fini(); +} + +typedef long (*syscall_fn_t)(long, long, long, long, long, long, long); +int __hook_init(long placeholder __attribute__ ((__unused__)), + void *default_hook) +{ + *((syscall_fn_t *) default_hook) = zpoline_lkl_hook; + + /** + * XXX: this library is expected to be load via dlmopen of zpoline, thus + * we need to patch a workaorund to handle thread specific data. + */ + lkl_change_tls_mode(); + + __hijack_init(); + return 0; +} +#endif /* __x86_64__ */ diff --git a/tools/lkl/lib/posix-host.c b/tools/lkl/lib/posix-host.c index b5f71e2b512739..a667de02efa123 100644 --- a/tools/lkl/lib/posix-host.c +++ b/tools/lkl/lib/posix-host.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "iomem.h" #include "jmp_buf.h" @@ -247,7 +248,7 @@ void *thread_stack(unsigned long *size) return thread_stack; } -static struct lkl_tls_key *tls_alloc(void (*destructor)(void *)) +static struct lkl_tls_key *tsd_alloc(void (*destructor)(void *)) { struct lkl_tls_key *ret = malloc(sizeof(struct lkl_tls_key)); @@ -258,24 +259,107 @@ static struct lkl_tls_key *tls_alloc(void (*destructor)(void *)) return ret; } -static void tls_free(struct lkl_tls_key *key) +static void tsd_free(struct lkl_tls_key *key) { WARN_PTHREAD(pthread_key_delete(key->key)); free(key); } -static int tls_set(struct lkl_tls_key *key, void *data) +static int tsd_set(struct lkl_tls_key *key, void *data) { if (WARN_PTHREAD(pthread_setspecific(key->key, data))) return -1; return 0; } -static void *tls_get(struct lkl_tls_key *key) +static void *tsd_get(struct lkl_tls_key *key) { return pthread_getspecific(key->key); } +/** + * when LKL is loaded via dl*m*open(3), the pthread_getspecific() + * doesn't work correctly, as a global symbol, __pthread_keys, is + * duplicated across multiple namespaces and conflicts the same keys + * in multiple users of TSD in a single process, which makes our case + * impossible to work (e.g., host_task from each thread). + * + * To work around this issue, we use TLS, using __thread which doesn't + * require any conflict global symbols. but the default __thread uses + * __tls_get_addr() of glibc function, calling futex, and making a + * dead-lock in our thread. So explicitly initialize with + * initial-exec is needed. + * + * We'll still use the previous *TSD* (thread specific data) + * implementation based on pthread_key_create, as the most of the + * cases, don't hit this situation, as dlmopen is not a common + * practice and a few implementation (i.e., like glibc) has this + * function. + * + */ +#define LKL_MAX_TLS_KEYS (PTHREAD_KEYS_MAX/8) /* 1024/8 = 128 */ +struct __lkl_tls_keys { + int used; + void *data; +}; +static __thread struct __lkl_tls_keys __tls_keys[LKL_MAX_TLS_KEYS]; + +static struct lkl_tls_key *tls_alloc(void (*destructor)(void *)) +{ + int idx; + struct lkl_tls_key *ret = malloc(sizeof(struct lkl_tls_key)); + + for (idx = 0; idx < LKL_MAX_TLS_KEYS; idx++) { + /* data = NULL means the key unused */ + if (__tls_keys[idx].used == 0) { + ret->key = (pthread_key_t)idx; + __tls_keys[idx].used = 1; + return ret; + } + } + + /* if there are no unused keys, return NULL */ + free(ret); + return NULL; +} + +static void tls_free(struct lkl_tls_key *key) +{ + int idx = (int)key->key; + + if (idx < 0 || idx >= LKL_MAX_TLS_KEYS) { + lkl_printf("%s; key not found\n", __func__); + return; + } + __tls_keys[idx].used = 0; + free(key); +} + +static int tls_set(struct lkl_tls_key *key, void *data) +{ + int idx = (int)key->key; + + if (idx < 0 || idx >= LKL_MAX_TLS_KEYS) { + lkl_printf("%s; key not found\n", __func__); + return -1; + } + __tls_keys[idx].data = data; + return 0; +} + +static void *tls_get(struct lkl_tls_key *key) +{ + int idx = (int)key->key; + + if (idx < 0 || idx >= LKL_MAX_TLS_KEYS) { + lkl_printf("%s; key not found\n", __func__); + return NULL; + } + + return __tls_keys[idx].data; +} + + static unsigned long long time_ns(void) { struct timespec ts; @@ -423,10 +507,10 @@ struct lkl_host_operations lkl_host_ops = { .mutex_free = mutex_free, .mutex_lock = mutex_lock, .mutex_unlock = mutex_unlock, - .tls_alloc = tls_alloc, - .tls_free = tls_free, - .tls_set = tls_set, - .tls_get = tls_get, + .tls_alloc = tsd_alloc, + .tls_free = tsd_free, + .tls_set = tsd_set, + .tls_get = tsd_get, .time = time_ns, .timer_alloc = timer_alloc, .timer_set_oneshot = timer_set_oneshot, @@ -451,6 +535,14 @@ struct lkl_host_operations lkl_host_ops = { #endif }; +void lkl_change_tls_mode(void) +{ + lkl_host_ops.tls_alloc = tls_alloc; + lkl_host_ops.tls_free = tls_free; + lkl_host_ops.tls_set = tls_set; + lkl_host_ops.tls_get = tls_get; +} + static int fd_get_capacity(struct lkl_disk disk, unsigned long long *res) { off_t off; diff --git a/tools/lkl/scripts/checkpatch.sh b/tools/lkl/scripts/checkpatch.sh index e4d8316eb36097..b1c86123db99d4 100755 --- a/tools/lkl/scripts/checkpatch.sh +++ b/tools/lkl/scripts/checkpatch.sh @@ -52,7 +52,8 @@ if [ -z "$c" ]; then exit 0 fi -./scripts/checkpatch.pl $Q --summary-file --ignore FILE_PATH_CHANGES $tmp/*.patch +./scripts/checkpatch.pl $Q --summary-file --ignore FILE_PATH_CHANGES \ + --ignore PREFER_DEFINED_ATTRIBUTE_MACRO $tmp/*.patch rm $tmp/*.patch # checkpatch.pl does not know how to deal with 3 way diffs which would diff --git a/tools/lkl/tests/Build b/tools/lkl/tests/Build index ed943821d72e4d..b156f780ae402e 100644 --- a/tools/lkl/tests/Build +++ b/tools/lkl/tests/Build @@ -3,3 +3,4 @@ disk-y += disk.o cla.o test.o disk-vfio-pci-y += disk-vfio-pci.o cla.o test.o net-test-y += net-test.o cla.o test.o config-y += config.o test.o +test-dlmopen-y += test-dlmopen.o test.o diff --git a/tools/lkl/tests/hijack-test.sh b/tools/lkl/tests/hijack-test.sh index 164cf53977f0e7..597b6a834bed3e 100755 --- a/tools/lkl/tests/hijack-test.sh +++ b/tools/lkl/tests/hijack-test.sh @@ -686,10 +686,21 @@ if [[ ! -e ${basedir}/lib/hijack/liblkl-hijack.so ]]; then exit 0 fi +if [ -n "${LKL_HIJACK_ZPOLINE}" ] +then + if [ -z "$LKL_HOST_CONFIG_ZPOLINE_DIR" ]; then + lkl_test_plan 0 "zpoline tests" + echo "missing zpoline configuration" + exit $TEST_SKIP + fi + test_header=" (zpoline)" +fi + + if [ -n "$LKL_HOST_CONFIG_ANDROID" ]; then wdir=$ANDROID_WDIR adb_push lib/hijack/liblkl-hijack.so bin/lkl-hijack.sh tests/net-setup.sh \ - tests/run_netperf.sh tests/hijack-test.sh + tests/run_netperf.sh tests/hijack-test.sh tests/autoconf.sh ping="ping" ping6="ping6" hijack="$wdir/bin/lkl-hijack.sh" @@ -713,7 +724,7 @@ VDESWITCH=${wdir}/vde_switch # And make sure we clean up when we're done trap "clear_wdir &>/dev/null" EXIT -lkl_test_plan 5 "hijack basic tests" +lkl_test_plan 5 "hijack basic tests${test_header}" lkl_test_run 1 run_hijack ip addr lkl_test_run 2 run_hijack ip route lkl_test_run 3 test_ping diff --git a/tools/lkl/tests/run.py b/tools/lkl/tests/run.py index 5de0e1f5088b63..d013362cf97648 100755 --- a/tools/lkl/tests/run.py +++ b/tools/lkl/tests/run.py @@ -64,6 +64,7 @@ def end(self, obj): 'lklfuse.sh -t xfs', 'config', 'hijack-test.sh', + 'LKL_HIJACK_ZPOLINE=1 hijack-test.sh', 'disk-vfio-pci.sh -t ext4 run', 'disk-vfio-pci.sh -t btrfs run', 'disk-vfio-pci.sh -t vfat run', diff --git a/tools/lkl/tests/test-dlmopen.c b/tools/lkl/tests/test-dlmopen.c new file mode 100644 index 00000000000000..292eec7094a832 --- /dev/null +++ b/tools/lkl/tests/test-dlmopen.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* if dlemopen is not implemented, skip test */ +#if defined(__x86_64__) && defined(__linux__) +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include "test.h" + +#define CMD_LINE "mem=16M loglevel=8" + +/* glibc (may) only supports dlmopen(3) */ +#ifndef LM_ID_NEWLM +#define NO_DLMOPEN_LINUX 1 +#else +#define NO_DLMOPEN_LINUX 0 +#endif + +static int lkl_test_dlmopen(void) +{ + void *handle; + long ret; + char *filename = "liblkl.so"; + long params[6] = {0}; + int (*fn_init)(struct lkl_host_operations *ops); + int (*fn_start)(char *fmt, ...); + long (*fn_syscall)(long no, long *params); + long (*fn_sys_halt)(void); + void (*fn_tls_mode)(void); + struct lkl_host_operations *lkl_host_ops; + + handle = dlmopen(LM_ID_NEWLM, filename, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + lkl_test_logf("%s: dlmopen failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + fn_init = dlsym(handle, "lkl_init"); + if (!fn_init) { + lkl_test_logf("%s: dlsym failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + lkl_host_ops = dlsym(handle, "lkl_host_ops"); + if (!lkl_host_ops) { + lkl_test_logf("%s: dlsym failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + fn_start = dlsym(handle, "lkl_start_kernel"); + if (!fn_start) { + lkl_test_logf("%s: dlsym failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + fn_syscall = dlsym(handle, "lkl_syscall"); + if (!fn_syscall) { + lkl_test_logf("%s: dlsym failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + fn_sys_halt = dlsym(handle, "lkl_sys_halt"); + if (!fn_sys_halt) { + lkl_test_logf("%s: dlsym failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + fn_tls_mode = dlsym(handle, "lkl_change_tls_mode"); + if (!fn_tls_mode) { + lkl_test_logf("%s: dlsym failed, %s\n", __func__, dlerror()); + return TEST_FAILURE; + } + + /* start calling resolved symbols */ + fn_tls_mode(); + fn_init(lkl_host_ops); + ret = fn_start(CMD_LINE); + if (ret != 0) { + lkl_test_logf("lkl_start_kernel() = %ld %s\n", + ret, ret < 0 ? lkl_strerror(ret) : ""); + return TEST_FAILURE; + } + + ret = fn_syscall(__lkl__NR_getpid, params); + lkl_test_logf("getpid() = %ld\n", ret); + if (ret != 1) { + lkl_test_logf("getpid() = %ld %s\n", ret, ret < 0 ? lkl_strerror(ret) : ""); + return TEST_FAILURE; + } + + ret = fn_sys_halt(); + if (ret != 0) { + lkl_test_logf("halt() = %ld %s\n", ret, ret < 0 ? lkl_strerror(ret) : ""); + return TEST_FAILURE; + } + + return ret == 0 ? TEST_SUCCESS : TEST_FAILURE; +} + +struct lkl_test tests[] = { + LKL_TEST(dlmopen), +}; + +int main(int argc, const char **argv) +{ + int ret; + + if (NO_DLMOPEN_LINUX) { + lkl_test_logf("no dlmopen support\n"); + return TEST_SKIP; + } + + ret = lkl_test_run(tests, sizeof(tests)/sizeof(struct lkl_test), + "dlmopen"); + + lkl_cleanup(); + + return ret; +} + +#else +#include "test.h" + +int main(int argc, const char **argv) +{ + lkl_test_logf("no x86_64 arch supported\n"); + return TEST_SKIP; +} +#endif /* defined (__x86_64__) && defined (__linux__) */