diff options
Diffstat (limited to 'vendor/rustix/src/backend/linux_raw/thread/syscalls.rs')
| -rw-r--r-- | vendor/rustix/src/backend/linux_raw/thread/syscalls.rs | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/vendor/rustix/src/backend/linux_raw/thread/syscalls.rs b/vendor/rustix/src/backend/linux_raw/thread/syscalls.rs new file mode 100644 index 00000000..352e0615 --- /dev/null +++ b/vendor/rustix/src/backend/linux_raw/thread/syscalls.rs @@ -0,0 +1,549 @@ +//! linux_raw syscalls supporting `rustix::thread`. +//! +//! # Safety +//! +//! See the `rustix::backend` module documentation for details. +#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + +use super::types::RawCpuSet; +use crate::backend::c; +use crate::backend::conv::{ + by_mut, by_ref, c_int, c_uint, opt_ref, ret, ret_c_int, ret_c_int_infallible, ret_c_uint, + ret_usize, size_of, slice, slice_just_addr, slice_just_addr_mut, zero, +}; +use crate::fd::BorrowedFd; +use crate::io; +use crate::pid::Pid; +use crate::thread::{ + futex, ClockId, Cpuid, MembarrierCommand, MembarrierQuery, NanosleepRelativeResult, Timespec, +}; +use crate::utils::as_mut_ptr; +use core::mem::MaybeUninit; +use core::sync::atomic::AtomicU32; +#[cfg(target_pointer_width = "32")] +use linux_raw_sys::general::timespec as __kernel_old_timespec; +use linux_raw_sys::general::{membarrier_cmd, membarrier_cmd_flag, TIMER_ABSTIME}; + +#[inline] +pub(crate) fn clock_nanosleep_relative(id: ClockId, req: &Timespec) -> NanosleepRelativeResult { + #[cfg(target_pointer_width = "32")] + unsafe { + let mut rem = MaybeUninit::<Timespec>::uninit(); + match ret(syscall!( + __NR_clock_nanosleep_time64, + id, + c_int(0), + by_ref(req), + &mut rem + )) + .or_else(|err| { + // See the comments in `clock_gettime_via_syscall` about emulation. + if err == io::Errno::NOSYS { + clock_nanosleep_relative_old(id, req, &mut rem) + } else { + Err(err) + } + }) { + Ok(()) => NanosleepRelativeResult::Ok, + Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), + Err(err) => NanosleepRelativeResult::Err(err), + } + } + #[cfg(target_pointer_width = "64")] + unsafe { + let mut rem = MaybeUninit::<Timespec>::uninit(); + match ret(syscall!( + __NR_clock_nanosleep, + id, + c_int(0), + by_ref(req), + &mut rem + )) { + Ok(()) => NanosleepRelativeResult::Ok, + Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), + Err(err) => NanosleepRelativeResult::Err(err), + } + } +} + +#[cfg(target_pointer_width = "32")] +unsafe fn clock_nanosleep_relative_old( + id: ClockId, + req: &Timespec, + rem: &mut MaybeUninit<Timespec>, +) -> io::Result<()> { + let old_req = __kernel_old_timespec { + tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, + tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?, + }; + let mut old_rem = MaybeUninit::<__kernel_old_timespec>::uninit(); + ret(syscall!( + __NR_clock_nanosleep, + id, + c_int(0), + by_ref(&old_req), + &mut old_rem + ))?; + let old_rem = old_rem.assume_init(); + rem.write(Timespec { + tv_sec: old_rem.tv_sec.into(), + tv_nsec: old_rem.tv_nsec.into(), + }); + Ok(()) +} + +#[inline] +pub(crate) fn clock_nanosleep_absolute(id: ClockId, req: &Timespec) -> io::Result<()> { + #[cfg(target_pointer_width = "32")] + unsafe { + ret(syscall_readonly!( + __NR_clock_nanosleep_time64, + id, + c_uint(TIMER_ABSTIME), + by_ref(req), + zero() + )) + .or_else(|err| { + // See the comments in `clock_gettime_via_syscall` about emulation. + if err == io::Errno::NOSYS { + clock_nanosleep_absolute_old(id, req) + } else { + Err(err) + } + }) + } + #[cfg(target_pointer_width = "64")] + unsafe { + ret(syscall_readonly!( + __NR_clock_nanosleep, + id, + c_uint(TIMER_ABSTIME), + by_ref(req), + zero() + )) + } +} + +#[cfg(target_pointer_width = "32")] +unsafe fn clock_nanosleep_absolute_old(id: ClockId, req: &Timespec) -> io::Result<()> { + let old_req = __kernel_old_timespec { + tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, + tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?, + }; + ret(syscall_readonly!( + __NR_clock_nanosleep, + id, + c_int(0), + by_ref(&old_req), + zero() + )) +} + +#[inline] +pub(crate) fn nanosleep(req: &Timespec) -> NanosleepRelativeResult { + #[cfg(target_pointer_width = "32")] + unsafe { + let mut rem = MaybeUninit::<Timespec>::uninit(); + match ret(syscall!( + __NR_clock_nanosleep_time64, + ClockId::Realtime, + c_int(0), + by_ref(req), + &mut rem + )) + .or_else(|err| { + // See the comments in `clock_gettime_via_syscall` about emulation. + if err == io::Errno::NOSYS { + nanosleep_old(req, &mut rem) + } else { + Err(err) + } + }) { + Ok(()) => NanosleepRelativeResult::Ok, + Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), + Err(err) => NanosleepRelativeResult::Err(err), + } + } + #[cfg(target_pointer_width = "64")] + unsafe { + let mut rem = MaybeUninit::<Timespec>::uninit(); + match ret(syscall!(__NR_nanosleep, by_ref(req), &mut rem)) { + Ok(()) => NanosleepRelativeResult::Ok, + Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), + Err(err) => NanosleepRelativeResult::Err(err), + } + } +} + +#[cfg(target_pointer_width = "32")] +unsafe fn nanosleep_old(req: &Timespec, rem: &mut MaybeUninit<Timespec>) -> io::Result<()> { + let old_req = __kernel_old_timespec { + tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, + tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?, + }; + let mut old_rem = MaybeUninit::<__kernel_old_timespec>::uninit(); + ret(syscall!(__NR_nanosleep, by_ref(&old_req), &mut old_rem))?; + let old_rem = old_rem.assume_init(); + rem.write(Timespec { + tv_sec: old_rem.tv_sec.into(), + tv_nsec: old_rem.tv_nsec.into(), + }); + Ok(()) +} + +#[inline] +#[must_use] +pub(crate) fn gettid() -> Pid { + unsafe { + let tid = ret_c_int_infallible(syscall_readonly!(__NR_gettid)); + Pid::from_raw_unchecked(tid) + } +} + +/// # Safety +/// +/// The raw pointers must point to valid aligned memory. +#[inline] +pub(crate) unsafe fn futex_val2( + uaddr: *const AtomicU32, + op: super::futex::Operation, + flags: futex::Flags, + val: u32, + val2: u32, + uaddr2: *const AtomicU32, + val3: u32, +) -> io::Result<usize> { + // Pass `val2` in the least-significant bytes of the `timeout` argument. + // [“the kernel casts the timeout value first to unsigned long, then to + // uint32_t”], so we perform that exact conversion in reverse to create + // the pointer. + // + // [“the kernel casts the timeout value first to unsigned long, then to uint32_t”]: https://man7.org/linux/man-pages/man2/futex.2.html + let timeout = val2 as usize as *const Timespec; + + #[cfg(target_pointer_width = "32")] + { + // Linux 5.1 added `futex_time64`; if we have that, use it. We don't + // need it here, because `timeout` is just passing `val2` and not a + // real timeout, but it's nice to use `futex_time64` for consistency + // with the other futex calls that do. + #[cfg(feature = "linux_5_1")] + { + ret_usize(syscall!( + __NR_futex_time64, + uaddr, + (op, flags), + c_uint(val), + timeout, + uaddr2, + c_uint(val3) + )) + } + + // If we don't have Linux 5.1, use plain `futex`. + #[cfg(not(feature = "linux_5_1"))] + { + ret_usize(syscall!( + __NR_futex, + uaddr, + (op, flags), + c_uint(val), + timeout, + uaddr2, + c_uint(val3) + )) + } + } + #[cfg(target_pointer_width = "64")] + ret_usize(syscall!( + __NR_futex, + uaddr, + (op, flags), + c_uint(val), + timeout, + uaddr2, + c_uint(val3) + )) +} + +/// # Safety +/// +/// The raw pointers must point to valid aligned memory. +#[inline] +pub(crate) unsafe fn futex_timeout( + uaddr: *const AtomicU32, + op: super::futex::Operation, + flags: futex::Flags, + val: u32, + timeout: Option<&Timespec>, + uaddr2: *const AtomicU32, + val3: u32, +) -> io::Result<usize> { + #[cfg(target_pointer_width = "32")] + { + // If we don't have Linux 5.1, and the timeout fits in a + // `__kernel_old_timespec`, use plain `futex`. + // + // We do this unconditionally, rather than trying `futex_time64` and + // falling back on `Errno::NOSYS`, because seccomp configurations will + // sometimes abort the process on syscalls they don't recognize. + #[cfg(not(feature = "linux_5_1"))] + { + // If we don't have a timeout, or if we can convert the timeout to + // a `__kernel_old_timespec`, the use `__NR_futex`. + fn convert(timeout: &Timespec) -> Option<__kernel_old_timespec> { + Some(__kernel_old_timespec { + tv_sec: timeout.tv_sec.try_into().ok()?, + tv_nsec: timeout.tv_nsec.try_into().ok()?, + }) + } + let old_timeout = if let Some(timeout) = timeout { + match convert(timeout) { + // Could not convert timeout. + None => None, + // Could convert timeout. Ok! + Some(old_timeout) => Some(Some(old_timeout)), + } + } else { + // No timeout. Ok! + Some(None) + }; + if let Some(old_timeout) = old_timeout { + return ret_usize(syscall!( + __NR_futex, + uaddr, + (op, flags), + c_uint(val), + opt_ref(old_timeout.as_ref()), + uaddr2, + c_uint(val3) + )); + } + } + + // We either have Linux 5.1 or the timeout didn't fit in + // `__kernel_old_timespec` so `__NR_futex_time64` will either succeed + // or fail due to our having no other options. + ret_usize(syscall!( + __NR_futex_time64, + uaddr, + (op, flags), + c_uint(val), + opt_ref(timeout), + uaddr2, + c_uint(val3) + )) + } + #[cfg(target_pointer_width = "64")] + ret_usize(syscall!( + __NR_futex, + uaddr, + (op, flags), + c_uint(val), + opt_ref(timeout), + uaddr2, + c_uint(val3) + )) +} + +#[inline] +pub(crate) fn futex_waitv( + waiters: &[futex::Wait], + flags: futex::WaitvFlags, + timeout: Option<&Timespec>, + clockid: ClockId, +) -> io::Result<usize> { + let (waiters_addr, waiters_len) = slice(waiters); + unsafe { + ret_usize(syscall!( + __NR_futex_waitv, + waiters_addr, + waiters_len, + c_uint(flags.bits()), + opt_ref(timeout), + clockid + )) + } +} + +#[inline] +pub(crate) fn setns(fd: BorrowedFd<'_>, nstype: c::c_int) -> io::Result<c::c_int> { + unsafe { ret_c_int(syscall_readonly!(__NR_setns, fd, c_int(nstype))) } +} + +#[inline] +pub(crate) fn unshare(flags: crate::thread::UnshareFlags) -> io::Result<()> { + unsafe { ret(syscall_readonly!(__NR_unshare, flags)) } +} + +#[inline] +pub(crate) fn capget( + header: &mut linux_raw_sys::general::__user_cap_header_struct, + data: &mut [MaybeUninit<linux_raw_sys::general::__user_cap_data_struct>], +) -> io::Result<()> { + unsafe { + ret(syscall!( + __NR_capget, + by_mut(header), + slice_just_addr_mut(data) + )) + } +} + +#[inline] +pub(crate) fn capset( + header: &mut linux_raw_sys::general::__user_cap_header_struct, + data: &[linux_raw_sys::general::__user_cap_data_struct], +) -> io::Result<()> { + unsafe { ret(syscall!(__NR_capset, by_mut(header), slice_just_addr(data))) } +} + +#[inline] +pub(crate) fn setuid_thread(uid: crate::ugid::Uid) -> io::Result<()> { + unsafe { ret(syscall_readonly!(__NR_setuid, uid)) } +} + +#[inline] +pub(crate) fn setresuid_thread( + ruid: crate::ugid::Uid, + euid: crate::ugid::Uid, + suid: crate::ugid::Uid, +) -> io::Result<()> { + #[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc"))] + unsafe { + ret(syscall_readonly!(__NR_setresuid32, ruid, euid, suid)) + } + #[cfg(not(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc")))] + unsafe { + ret(syscall_readonly!(__NR_setresuid, ruid, euid, suid)) + } +} + +#[inline] +pub(crate) fn setgid_thread(gid: crate::ugid::Gid) -> io::Result<()> { + unsafe { ret(syscall_readonly!(__NR_setgid, gid)) } +} + +#[inline] +pub(crate) fn setresgid_thread( + rgid: crate::ugid::Gid, + egid: crate::ugid::Gid, + sgid: crate::ugid::Gid, +) -> io::Result<()> { + #[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc"))] + unsafe { + ret(syscall_readonly!(__NR_setresgid32, rgid, egid, sgid)) + } + #[cfg(not(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc")))] + unsafe { + ret(syscall_readonly!(__NR_setresgid, rgid, egid, sgid)) + } +} + +#[inline] +pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> { + let (addr, len) = slice(gids); + unsafe { ret(syscall_readonly!(__NR_setgroups, len, addr)) } +} + +// `sched_getcpu` has special optimizations via the vDSO on some architectures. +#[cfg(any( + target_arch = "x86_64", + target_arch = "x86", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x" +))] +pub(crate) use crate::backend::vdso_wrappers::sched_getcpu; + +// `sched_getcpu` on platforms without a vDSO entry for it. +#[cfg(not(any( + target_arch = "x86_64", + target_arch = "x86", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x" +)))] +#[inline] +pub(crate) fn sched_getcpu() -> usize { + let mut cpu = MaybeUninit::<u32>::uninit(); + unsafe { + let r = ret(syscall!(__NR_getcpu, &mut cpu, zero(), zero())); + debug_assert!(r.is_ok()); + cpu.assume_init() as usize + } +} + +#[inline] +pub(crate) fn sched_getaffinity(pid: Option<Pid>, cpuset: &mut RawCpuSet) -> io::Result<()> { + unsafe { + // The raw Linux syscall returns the size (in bytes) of the `cpumask_t` + // data type that is used internally by the kernel to represent the CPU + // set bit mask. + let size = ret_usize(syscall!( + __NR_sched_getaffinity, + c_int(Pid::as_raw(pid)), + size_of::<RawCpuSet, _>(), + by_mut(&mut cpuset.bits) + ))?; + let bytes = as_mut_ptr(cpuset).cast::<u8>(); + let rest = bytes.wrapping_add(size); + // Zero every byte in the cpuset not set by the kernel. + rest.write_bytes(0, core::mem::size_of::<RawCpuSet>() - size); + Ok(()) + } +} + +#[inline] +pub(crate) fn sched_setaffinity(pid: Option<Pid>, cpuset: &RawCpuSet) -> io::Result<()> { + unsafe { + ret(syscall_readonly!( + __NR_sched_setaffinity, + c_int(Pid::as_raw(pid)), + size_of::<RawCpuSet, _>(), + slice_just_addr(&cpuset.bits) + )) + } +} + +#[inline] +pub(crate) fn sched_yield() { + unsafe { + // See the documentation for [`crate::thread::sched_yield`] for why + // errors are ignored. + syscall_readonly!(__NR_sched_yield).decode_void(); + } +} + +#[inline] +pub(crate) fn membarrier_query() -> MembarrierQuery { + unsafe { + match ret_c_uint(syscall!( + __NR_membarrier, + c_int(membarrier_cmd::MEMBARRIER_CMD_QUERY as _), + c_uint(0) + )) { + Ok(query) => MembarrierQuery::from_bits_retain(query), + Err(_) => MembarrierQuery::empty(), + } + } +} + +#[inline] +pub(crate) fn membarrier(cmd: MembarrierCommand) -> io::Result<()> { + unsafe { ret(syscall!(__NR_membarrier, cmd, c_uint(0))) } +} + +#[inline] +pub(crate) fn membarrier_cpu(cmd: MembarrierCommand, cpu: Cpuid) -> io::Result<()> { + unsafe { + ret(syscall!( + __NR_membarrier, + cmd, + c_uint(membarrier_cmd_flag::MEMBARRIER_CMD_FLAG_CPU as _), + cpu + )) + } +} |
