From 5058010e568d7ffd8d5b6d2a3c9dd5232f743f12 Mon Sep 17 00:00:00 2001 From: Matheus Sampaio Queiroga Date: Tue, 8 Jul 2025 17:53:02 -0300 Subject: [PATCH 1/5] proot binding to golang Signed-off-by: Matheus Sampaio Queiroga --- .gitignore | 2 + proot/proot.go | 91 +- proot/proot_linux.go | 31 + proot/proot_linux/arch.h | 179 +++ proot/proot_linux/attribute.h | 32 + proot/proot_linux/build.h | 8 + proot/proot_linux/cli/cli.c | 588 +++++++++ proot/proot_linux/cli/cli.h | 65 + proot/proot_linux/cli/note.c | 97 ++ proot/proot_linux/cli/note.h | 54 + proot/proot_linux/cli/proot.c | 408 +++++++ proot/proot_linux/cli/proot.h | 357 ++++++ proot/proot_linux/compat.h | 262 ++++ proot/proot_linux/execve/aoxp.c | 439 +++++++ proot/proot_linux/execve/aoxp.h | 80 ++ proot/proot_linux/execve/auxv.c | 184 +++ proot/proot_linux/execve/auxv.h | 39 + proot/proot_linux/execve/elf.c | 178 +++ proot/proot_linux/execve/elf.h | 179 +++ proot/proot_linux/execve/enter.c | 685 +++++++++++ proot/proot_linux/execve/execve.h | 64 + proot/proot_linux/execve/exit.c | 479 ++++++++ proot/proot_linux/execve/ldso.c | 571 +++++++++ proot/proot_linux/execve/ldso.h | 42 + proot/proot_linux/execve/shebang.c | 307 +++++ proot/proot_linux/execve/shebang.h | 32 + proot/proot_linux/extension/care/archive.c | 568 +++++++++ proot/proot_linux/extension/care/archive.h | 47 + proot/proot_linux/extension/care/care.c | 604 +++++++++ proot/proot_linux/extension/care/care.h | 80 ++ proot/proot_linux/extension/care/extract.c | 351 ++++++ proot/proot_linux/extension/care/extract.h | 38 + proot/proot_linux/extension/care/final.c | 476 ++++++++ proot/proot_linux/extension/care/final.h | 30 + proot/proot_linux/extension/extension.c | 170 +++ proot/proot_linux/extension/extension.h | 206 ++++ .../extension/extension/care/archive.h | 47 + .../extension/extension/care/care.h | 80 ++ .../extension/extension/care/extract.h | 38 + .../extension/extension/care/final.h | 30 + .../extension/extension/extension.h | 206 ++++ .../extension/extension/portmap/portmap.h | 32 + .../extension/extension/python/proot.i | 106 ++ .../extension/python/python_extension.py | 19 + .../proot_linux/extension/fake_id0/fake_id0.c | 907 ++++++++++++++ proot/proot_linux/extension/kompat/kompat.c | 1074 +++++++++++++++++ .../extension/link2symlink/link2symlink.c | 557 +++++++++ proot/proot_linux/extension/portmap/map.c | 117 ++ proot/proot_linux/extension/portmap/portmap.c | 546 +++++++++ proot/proot_linux/extension/portmap/portmap.h | 32 + proot/proot_linux/extension/python/proot.i | 106 ++ proot/proot_linux/extension/python/python.c | 190 +++ .../extension/python/python_extension.py | 19 + proot/proot_linux/loader/assembly-arm.h | 93 ++ proot/proot_linux/loader/assembly-arm64.h | 98 ++ proot/proot_linux/loader/assembly-x86.h | 68 ++ proot/proot_linux/loader/assembly-x86_64.h | 96 ++ proot/proot_linux/loader/assembly.S | 62 + proot/proot_linux/loader/loader | Bin 0 -> 15880 bytes proot/proot_linux/loader/loader-m32 | Bin 0 -> 18544 bytes proot/proot_linux/loader/loader.c | 114 ++ proot/proot_linux/loader/script.h | 78 ++ proot/proot_linux/path/binding.c | 735 +++++++++++ proot/proot_linux/path/binding.h | 58 + proot/proot_linux/path/canon.c | 372 ++++++ proot/proot_linux/path/canon.h | 34 + proot/proot_linux/path/glue.c | 192 +++ proot/proot_linux/path/glue.h | 34 + proot/proot_linux/path/path.c | 739 ++++++++++++ proot/proot_linux/path/path.h | 99 ++ proot/proot_linux/path/proc.c | 195 +++ proot/proot_linux/path/proc.h | 44 + proot/proot_linux/path/temp.c | 393 ++++++ proot/proot_linux/path/temp.h | 34 + proot/proot_linux/proot.go | 128 ++ proot/proot_linux/ptrace/ptrace.c | 670 ++++++++++ proot/proot_linux/ptrace/ptrace.h | 36 + proot/proot_linux/ptrace/user.c | 166 +++ proot/proot_linux/ptrace/user.h | 56 + proot/proot_linux/ptrace/wait.c | 361 ++++++ proot/proot_linux/ptrace/wait.h | 47 + proot/proot_linux/syscall/chain.c | 161 +++ proot/proot_linux/syscall/chain.h | 41 + proot/proot_linux/syscall/enter.c | 592 +++++++++ proot/proot_linux/syscall/exit.c | 473 ++++++++ proot/proot_linux/syscall/heap.c | 213 ++++ proot/proot_linux/syscall/heap.h | 31 + proot/proot_linux/syscall/rlimit.c | 117 ++ proot/proot_linux/syscall/rlimit.h | 31 + proot/proot_linux/syscall/seccomp.c | 518 ++++++++ proot/proot_linux/syscall/seccomp.h | 48 + proot/proot_linux/syscall/socket.c | 217 ++++ proot/proot_linux/syscall/socket.h | 32 + proot/proot_linux/syscall/syscall.c | 181 +++ proot/proot_linux/syscall/syscall.h | 38 + proot/proot_linux/syscall/sysnum.c | 161 +++ proot/proot_linux/syscall/sysnum.h | 45 + proot/proot_linux/syscall/sysnums-arm.h | 344 ++++++ proot/proot_linux/syscall/sysnums-arm64.h | 267 ++++ proot/proot_linux/syscall/sysnums-i386.h | 356 ++++++ proot/proot_linux/syscall/sysnums-sh4.h | 347 ++++++ proot/proot_linux/syscall/sysnums-x32.h | 312 +++++ proot/proot_linux/syscall/sysnums-x86_64.h | 323 +++++ proot/proot_linux/syscall/sysnums.list | 433 +++++++ proot/proot_linux/tracee/abi.h | 131 ++ proot/proot_linux/tracee/event.c | 872 +++++++++++++ proot/proot_linux/tracee/event.h | 35 + proot/proot_linux/tracee/mem.c | 548 +++++++++ proot/proot_linux/tracee/mem.h | 112 ++ proot/proot_linux/tracee/reg.c | 342 ++++++ proot/proot_linux/tracee/reg.h | 54 + proot/proot_linux/tracee/tracee.c | 631 ++++++++++ proot/proot_linux/tracee/tracee.h | 291 +++++ proot/proot_test.go | 7 + 114 files changed, 25350 insertions(+), 85 deletions(-) create mode 100644 .gitignore create mode 100644 proot/proot_linux.go create mode 100644 proot/proot_linux/arch.h create mode 100644 proot/proot_linux/attribute.h create mode 100644 proot/proot_linux/build.h create mode 100644 proot/proot_linux/cli/cli.c create mode 100644 proot/proot_linux/cli/cli.h create mode 100644 proot/proot_linux/cli/note.c create mode 100644 proot/proot_linux/cli/note.h create mode 100644 proot/proot_linux/cli/proot.c create mode 100644 proot/proot_linux/cli/proot.h create mode 100644 proot/proot_linux/compat.h create mode 100644 proot/proot_linux/execve/aoxp.c create mode 100644 proot/proot_linux/execve/aoxp.h create mode 100644 proot/proot_linux/execve/auxv.c create mode 100644 proot/proot_linux/execve/auxv.h create mode 100644 proot/proot_linux/execve/elf.c create mode 100644 proot/proot_linux/execve/elf.h create mode 100644 proot/proot_linux/execve/enter.c create mode 100644 proot/proot_linux/execve/execve.h create mode 100644 proot/proot_linux/execve/exit.c create mode 100644 proot/proot_linux/execve/ldso.c create mode 100644 proot/proot_linux/execve/ldso.h create mode 100644 proot/proot_linux/execve/shebang.c create mode 100644 proot/proot_linux/execve/shebang.h create mode 100644 proot/proot_linux/extension/care/archive.c create mode 100644 proot/proot_linux/extension/care/archive.h create mode 100644 proot/proot_linux/extension/care/care.c create mode 100644 proot/proot_linux/extension/care/care.h create mode 100644 proot/proot_linux/extension/care/extract.c create mode 100644 proot/proot_linux/extension/care/extract.h create mode 100644 proot/proot_linux/extension/care/final.c create mode 100644 proot/proot_linux/extension/care/final.h create mode 100644 proot/proot_linux/extension/extension.c create mode 100644 proot/proot_linux/extension/extension.h create mode 100644 proot/proot_linux/extension/extension/care/archive.h create mode 100644 proot/proot_linux/extension/extension/care/care.h create mode 100644 proot/proot_linux/extension/extension/care/extract.h create mode 100644 proot/proot_linux/extension/extension/care/final.h create mode 100644 proot/proot_linux/extension/extension/extension.h create mode 100644 proot/proot_linux/extension/extension/portmap/portmap.h create mode 100644 proot/proot_linux/extension/extension/python/proot.i create mode 100644 proot/proot_linux/extension/extension/python/python_extension.py create mode 100644 proot/proot_linux/extension/fake_id0/fake_id0.c create mode 100644 proot/proot_linux/extension/kompat/kompat.c create mode 100644 proot/proot_linux/extension/link2symlink/link2symlink.c create mode 100644 proot/proot_linux/extension/portmap/map.c create mode 100644 proot/proot_linux/extension/portmap/portmap.c create mode 100644 proot/proot_linux/extension/portmap/portmap.h create mode 100644 proot/proot_linux/extension/python/proot.i create mode 100644 proot/proot_linux/extension/python/python.c create mode 100644 proot/proot_linux/extension/python/python_extension.py create mode 100644 proot/proot_linux/loader/assembly-arm.h create mode 100644 proot/proot_linux/loader/assembly-arm64.h create mode 100644 proot/proot_linux/loader/assembly-x86.h create mode 100644 proot/proot_linux/loader/assembly-x86_64.h create mode 100644 proot/proot_linux/loader/assembly.S create mode 100755 proot/proot_linux/loader/loader create mode 100755 proot/proot_linux/loader/loader-m32 create mode 100644 proot/proot_linux/loader/loader.c create mode 100644 proot/proot_linux/loader/script.h create mode 100644 proot/proot_linux/path/binding.c create mode 100644 proot/proot_linux/path/binding.h create mode 100644 proot/proot_linux/path/canon.c create mode 100644 proot/proot_linux/path/canon.h create mode 100644 proot/proot_linux/path/glue.c create mode 100644 proot/proot_linux/path/glue.h create mode 100644 proot/proot_linux/path/path.c create mode 100644 proot/proot_linux/path/path.h create mode 100644 proot/proot_linux/path/proc.c create mode 100644 proot/proot_linux/path/proc.h create mode 100644 proot/proot_linux/path/temp.c create mode 100644 proot/proot_linux/path/temp.h create mode 100644 proot/proot_linux/proot.go create mode 100644 proot/proot_linux/ptrace/ptrace.c create mode 100644 proot/proot_linux/ptrace/ptrace.h create mode 100644 proot/proot_linux/ptrace/user.c create mode 100644 proot/proot_linux/ptrace/user.h create mode 100644 proot/proot_linux/ptrace/wait.c create mode 100644 proot/proot_linux/ptrace/wait.h create mode 100644 proot/proot_linux/syscall/chain.c create mode 100644 proot/proot_linux/syscall/chain.h create mode 100644 proot/proot_linux/syscall/enter.c create mode 100644 proot/proot_linux/syscall/exit.c create mode 100644 proot/proot_linux/syscall/heap.c create mode 100644 proot/proot_linux/syscall/heap.h create mode 100644 proot/proot_linux/syscall/rlimit.c create mode 100644 proot/proot_linux/syscall/rlimit.h create mode 100644 proot/proot_linux/syscall/seccomp.c create mode 100644 proot/proot_linux/syscall/seccomp.h create mode 100644 proot/proot_linux/syscall/socket.c create mode 100644 proot/proot_linux/syscall/socket.h create mode 100644 proot/proot_linux/syscall/syscall.c create mode 100644 proot/proot_linux/syscall/syscall.h create mode 100644 proot/proot_linux/syscall/sysnum.c create mode 100644 proot/proot_linux/syscall/sysnum.h create mode 100644 proot/proot_linux/syscall/sysnums-arm.h create mode 100644 proot/proot_linux/syscall/sysnums-arm64.h create mode 100644 proot/proot_linux/syscall/sysnums-i386.h create mode 100644 proot/proot_linux/syscall/sysnums-sh4.h create mode 100644 proot/proot_linux/syscall/sysnums-x32.h create mode 100644 proot/proot_linux/syscall/sysnums-x86_64.h create mode 100644 proot/proot_linux/syscall/sysnums.list create mode 100644 proot/proot_linux/tracee/abi.h create mode 100644 proot/proot_linux/tracee/event.c create mode 100644 proot/proot_linux/tracee/event.h create mode 100644 proot/proot_linux/tracee/mem.c create mode 100644 proot/proot_linux/tracee/mem.h create mode 100644 proot/proot_linux/tracee/reg.c create mode 100644 proot/proot_linux/tracee/reg.h create mode 100644 proot/proot_linux/tracee/tracee.c create mode 100644 proot/proot_linux/tracee/tracee.h create mode 100644 proot/proot_test.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2edb906 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +*.d \ No newline at end of file diff --git a/proot/proot.go b/proot/proot.go index 17038c0..195912a 100644 --- a/proot/proot.go +++ b/proot/proot.go @@ -1,90 +1,11 @@ +// Exec proot with rootfs without root account package proot -import ( - "errors" - "fmt" - "net/netip" - "os" - "path/filepath" - "runtime" +import "os" - "sirherobrine23.com.br/go-bds/exec/exec" - "sirherobrine23.com.br/go-bds/exec/host" -) - -var ( - _ exec.Proc = &Proot{} - - ErrNoExtractUbuntu error = errors.New("cannot extract Ubuntu base image") -) - -// Mount rootfs and run command insider in proot -// -// if network not resolve names add nameserver to /etc/resolv.conf (`(echo 'nameserver 1.1.1.1'; echo 'nameserver 8.8.8.8') > /etc/resolv.conf`) type Proot struct { - Rootfs string // Rootfs to mount to run proot - Qemu string // Execute guest programs through QEMU, exp: "qemu-x86_64" or "qemu-x86_64-static" - GID, UID uint // User and Group ID, default is root - Binds map[string][]string // Bind mount directories, example: "/dev": {"/dev", "/root/dev"} => /dev -> /root/dev and /dev - *host.Os // Extends from Os struct -} + Rootfs string + Qemu string -// Append dns server to /etc/resolv.conf -// -// Example: Proot.Proot(netip.MustParseAddr("8.8.8.8"), netip.MustParseAddr("1.1.1.1")) -func (pr Proot) AddNameservers(aadrs ...netip.Addr) error { - file, err := os.OpenFile(filepath.Join(pr.Rootfs, "etc/resolv.conf"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return err - } - defer file.Close() - fmt.Fprint(file, "\n") - for _, addr := range aadrs { - if _, err := fmt.Fprintf(file, "nameserver %s\n", addr.String()); err != nil { - return err - } - } - return nil -} - -// Mount proot and Execute process -func (pr *Proot) Start(options *exec.Exec) error { - prootExec := &exec.Exec{ - Environment: options.Environment, - Context: options.Context, - - // proot -r ./rootfs -q qemu-x86_64 -0 -w / -b /dev -b /proc -b /sys - Arguments: []string{ - "proot", - "-r", pr.Rootfs, - "-b", "/dev", - "-b", "/proc", - "-b", "/sys", - }, - } - for src, dsts := range pr.Binds { - for _, dst := range dsts { - prootExec.Arguments = append(prootExec.Arguments, "-b", fmt.Sprintf("%s:%s", src, dst)) - } - } - if pr.GID != 0 || pr.UID != 0 { - prootExec.Arguments = append(prootExec.Arguments, "-i", fmt.Sprintf("%d:%d", pr.UID, pr.GID)) - } else { - prootExec.Arguments = append(prootExec.Arguments, "-0") - } - if pr.Qemu != "" { - prootExec.Arguments = append(prootExec.Arguments, "-q", pr.Qemu) - } - if options.Cwd != "" { - prootExec.Arguments = append(prootExec.Arguments, "-w", options.Cwd) - } - prootExec.Arguments = append(prootExec.Arguments, options.Arguments...) - if runtime.GOOS == "android" { - if prootExec.Environment == nil { - prootExec.Environment = exec.Env{} - } - prootExec.Environment["LD_PRELOAD"] = "" // Remove to termux - } - - return pr.Os.Start(prootExec) -} + Stdin, Stdout, Stderr *os.File +} \ No newline at end of file diff --git a/proot/proot_linux.go b/proot/proot_linux.go new file mode 100644 index 0000000..0171dd9 --- /dev/null +++ b/proot/proot_linux.go @@ -0,0 +1,31 @@ +//go:build cgo && (linux || android) + +package proot + +import ( + "os/exec" + "syscall" + + prootsrc "sirherobrine23.com.br/go-bds/exec/proot/proot_linux" +) + +func NewProc() (any, error) { + ptr, err := prootsrc.NewCli() + if err != nil { + return nil, err + } + + cmd := exec.Command("go", "version") + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Ptrace = true + cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER + + cmd.Start() + ptr.SetPID(cmd.Process.Pid) + + ptr.LoopEvent() + + return nil, nil +} diff --git a/proot/proot_linux/arch.h b/proot/proot_linux/arch.h new file mode 100644 index 0000000..9218f69 --- /dev/null +++ b/proot/proot_linux/arch.h @@ -0,0 +1,179 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef ARCH_H +#define ARCH_H + +#ifndef NO_LIBC_HEADER +#include /* linux.git:c0a3a20b */ +#include /* AUDIT_ARCH_*, */ +#endif + +typedef unsigned long word_t; +typedef unsigned char byte_t; + +#define SYSCALL_AVOIDER ((word_t) -1) +#define SYSTRAP_NUM SYSARG_NUM +#define STACK_ALIGNMENT 16 + +#define OFFSETOF_STATX_UID 20 +#define OFFSETOF_STATX_GID 24 + +#if !defined(ARCH_X86_64) && !defined(ARCH_ARM_EABI) && !defined(ARCH_X86) && !defined(ARCH_SH4) +# if defined(__x86_64__) +# define ARCH_X86_64 1 +# elif defined(__ARM_EABI__) +# define ARCH_ARM_EABI 1 +# elif defined(__aarch64__) +# define ARCH_ARM64 1 +# elif defined(__arm__) +# error "Only EABI is currently supported for ARM" +# elif defined(__i386__) +# define ARCH_X86 1 +# elif defined(__SH4__) +# define ARCH_SH4 1 +# else +# error "Unsupported architecture" +# endif +#endif + +/* Architecture specific definitions. */ +#if defined(ARCH_X86_64) + + #define SYSNUMS_HEADER1 "syscall/sysnums-x86_64.h" + #define SYSNUMS_HEADER2 "syscall/sysnums-i386.h" + #define SYSNUMS_HEADER3 "syscall/sysnums-x32.h" + + #define SYSNUMS_ABI1 sysnums_x86_64 + #define SYSNUMS_ABI2 sysnums_i386 + #define SYSNUMS_ABI3 sysnums_x32 + + #undef SYSTRAP_NUM + #define SYSTRAP_NUM SYSARG_RESULT + #define SYSTRAP_SIZE 2 + + #define SECCOMP_ARCHS { \ + { .value = AUDIT_ARCH_X86_64, .nb_abis = 2, .abis = { ABI_DEFAULT, ABI_3 } }, \ + { .value = AUDIT_ARCH_I386, .nb_abis = 1, .abis = { ABI_2 } }, \ + } + + #define HOST_ELF_MACHINE {62, 3, 6, 0} + #define RED_ZONE_SIZE 128 + #define OFFSETOF_STAT_UID_32 24 + #define OFFSETOF_STAT_GID_32 28 + + #define LOADER_ADDRESS 0x600000000000 + #define HAS_LOADER_32BIT true + + #define EXEC_PIC_ADDRESS 0x500000000000 + #define INTERP_PIC_ADDRESS 0x6f0000000000 + #define EXEC_PIC_ADDRESS_32 0x0f000000 + #define INTERP_PIC_ADDRESS_32 0xaf000000 + +#elif defined(ARCH_ARM_EABI) + + #define SYSNUMS_HEADER1 "syscall/sysnums-arm.h" + #define SYSNUMS_ABI1 sysnums_arm + + #define SYSTRAP_SIZE 4 + + #define SECCOMP_ARCHS { { .value = AUDIT_ARCH_ARM, .nb_abis = 1, .abis = { ABI_DEFAULT } } } + + #define user_regs_struct user_regs + #define HOST_ELF_MACHINE {40, 0}; + #define RED_ZONE_SIZE 0 + #define OFFSETOF_STAT_UID_32 0 + #define OFFSETOF_STAT_GID_32 0 + #define EM_ARM 40 + + #define LOADER_ADDRESS 0x10000000 + + #define EXEC_PIC_ADDRESS 0x0f000000 + #define INTERP_PIC_ADDRESS 0x1f000000 + +#elif defined(ARCH_ARM64) + + #define SYSNUMS_HEADER1 "syscall/sysnums-arm64.h" + #define SYSNUMS_ABI1 sysnums_arm64 + + #define SYSTRAP_SIZE 4 + + #ifndef AUDIT_ARCH_AARCH64 + #define AUDIT_ARCH_AARCH64 (EM_AARCH64 | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE) + #endif + + #define SECCOMP_ARCHS { { .value = AUDIT_ARCH_AARCH64, .nb_abis = 1, .abis = { ABI_DEFAULT } } } + + #define HOST_ELF_MACHINE {183, 0}; + #define RED_ZONE_SIZE 0 + #define OFFSETOF_STAT_UID_32 0 + #define OFFSETOF_STAT_GID_32 0 + + #define LOADER_ADDRESS 0x2000000000 + #define EXEC_PIC_ADDRESS 0x3000000000 + #define INTERP_PIC_ADDRESS 0x3f00000000 + +#elif defined(ARCH_X86) + + #define SYSNUMS_HEADER1 "syscall/sysnums-i386.h" + #define SYSNUMS_ABI1 sysnums_i386 + + #undef SYSTRAP_NUM + #define SYSTRAP_NUM SYSARG_RESULT + #define SYSTRAP_SIZE 2 + + #define SECCOMP_ARCHS { { .value = AUDIT_ARCH_I386, .nb_abis = 1, .abis = { ABI_DEFAULT } } } + + #define HOST_ELF_MACHINE {3, 6, 0}; + #define RED_ZONE_SIZE 0 + #define OFFSETOF_STAT_UID_32 0 + #define OFFSETOF_STAT_GID_32 0 + + #define LOADER_ADDRESS 0xa0000000 + #define LOADER_ARCH_CFLAGS -mregparm=3 + + #define EXEC_PIC_ADDRESS 0x0f000000 + #define INTERP_PIC_ADDRESS 0xaf000000 + +#elif defined(ARCH_SH4) + + #define SYSNUMS_HEADER1 "syscall/sysnums-sh4.h" + #define SYSNUMS_ABI1 sysnums_sh4 + + #define SYSTRAP_SIZE 2 + + #define SECCOMP_ARCHS { } + + #define user_regs_struct pt_regs + #define HOST_ELF_MACHINE {42, 0}; + #define RED_ZONE_SIZE 0 + #define OFFSETOF_STAT_UID_32 0 + #define OFFSETOF_STAT_GID_32 0 + #define NO_MISALIGNED_ACCESS 1 + +#else + + #error "Unsupported architecture" + +#endif + +#endif /* ARCH_H */ diff --git a/proot/proot_linux/attribute.h b/proot/proot_linux/attribute.h new file mode 100644 index 0000000..f772954 --- /dev/null +++ b/proot/proot_linux/attribute.h @@ -0,0 +1,32 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef ATTRIBUTE_H +#define ATTRIBUTE_H + +#define UNUSED __attribute__((unused)) +#define FORMAT(a, b, c) __attribute__ ((format (a, b, c))) +#define DONT_INSTRUMENT __attribute__((no_instrument_function)) +#define PACKED __attribute__((packed)) +#define WEAK __attribute__((weak)) + +#endif /* ATTRIBUTE_H */ diff --git a/proot/proot_linux/build.h b/proot/proot_linux/build.h new file mode 100644 index 0000000..5cf8056 --- /dev/null +++ b/proot/proot_linux/build.h @@ -0,0 +1,8 @@ +/* This file is auto-generated, edit at your own risk. */ +#ifndef BUILD_H +#define BUILD_H +#undef VERSION +#define VERSION "v5.4.0-60485d26" +// #define HAVE_PROCESS_VM +// #define HAVE_SECCOMP_FILTER +#endif /* BUILD_H */ diff --git a/proot/proot_linux/cli/cli.c b/proot/proot_linux/cli/cli.c new file mode 100644 index 0000000..9385047 --- /dev/null +++ b/proot/proot_linux/cli/cli.c @@ -0,0 +1,588 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* printf(3), */ +#include /* bool, true, false, */ +#include /* ARG_MAX, PATH_MAX, */ +#include /* str*(3), basename(3), */ +#include /* talloc*, */ +#include /* exit(3), EXIT_*, strtol(3), {g,s}etenv(3), */ +#include /* assert(3), */ +#include /* getpid(2), */ +#include /* getpid(2), */ +#include /* errno(3), */ +#include /* INT_MAX, */ + +/* execinfo.h is GNU extension, disable it not using glibc */ +#if defined(__GLIBC__) +#include /* backtrace_symbols(3), */ +#endif + +#include "cli/cli.h" +#include "cli/note.h" +#include "extension/care/extract.h" +#include "extension/extension.h" +#include "tracee/tracee.h" +#include "tracee/event.h" +#include "path/binding.h" +#include "path/canon.h" +#include "path/path.h" + +#include "build.h" + +/** + * Print a (@detailed) usage of PRoot. + */ +void print_usage(Tracee *tracee, const Cli *cli, bool detailed) +{ + const char *current_class = "none"; + const Option *options; + size_t i, j; + +#define DETAIL(a) if (detailed) a + + DETAIL(printf("%s %s: %s.\n\n", cli->name, cli->version, cli->subtitle)); + printf("Usage:\n %s\n", cli->synopsis); + DETAIL(printf("\n")); + + options = cli->options; + for (i = 0; options[i].class != NULL; i++) { + for (j = 0; ; j++) { + const Argument *argument = &(options[i].arguments[j]); + + if (!argument->name || (!detailed && j != 0)) { + DETAIL(printf("\n")); + printf("\t%s\n", options[i].description); + if (detailed) { + if (options[i].detail[0] != '\0') + printf("\n%s\n\n", options[i].detail); + else + printf("\n"); + } + break; + } + + if (strcmp(options[i].class, current_class) != 0) { + current_class = options[i].class; + printf("\n%s:\n", current_class); + } + + if (j == 0) + printf(" %s", argument->name); + else + printf(", %s", argument->name); + + if (argument->separator != '\0') + printf("%c*%s*", argument->separator, argument->value); + else if (!detailed) + printf("\t"); + } + } + + notify_extensions(tracee, PRINT_USAGE, detailed, 0); + + if (detailed) + printf("%s\n", cli->colophon); +} + +/** + * Print the version of PRoot. + */ +void print_version(const Cli *cli) +{ + printf("%s %s\n\n", cli->logo, cli->version); + printf("built-in accelerators: process_vm = %s, seccomp_filter = %s\n", +#if defined(HAVE_PROCESS_VM) + "yes", +#else + "no", +#endif +#if defined(HAVE_SECCOMP_FILTER) + "yes" +#else + "no" +#endif + ); +} + +static void print_execve_help(const Tracee *tracee, const char *argv0, int status) +{ + note(tracee, ERROR, SYSTEM, "execve(\"%s\")", argv0); + + /* Ubuntu kernel bug? */ + if (status == -EPERM && getenv("PROOT_NO_SECCOMP") == NULL) { + note(tracee, INFO, USER, +"It seems your kernel contains this bug: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1202161\n" +"To workaround it, set the env. variable PROOT_NO_SECCOMP to 1."); + return; + } + + note(tracee, INFO, USER, "possible causes:\n" +" * the program is a script but its interpreter (eg. /bin/sh) was not found;\n" +" * the program is an ELF but its interpreter (eg. ld-linux.so) was not found;\n" +" * the program is a foreign binary but qemu was not specified;\n" +" * qemu does not work correctly (if specified);\n" +" * the loader was not found or doesn't work."); +} + +static void print_error_separator(const Tracee *tracee, const Argument *argument) +{ + if (argument->separator == '\0') + note(tracee, ERROR, USER, "option '%s' expects no value.", argument->name); + else + note(tracee, ERROR, USER, "option '%s' and its value must be separated by '%c'.", + argument->name, argument->separator); +} + +static void print_argv(const Tracee *tracee, const char *prompt, char *const argv[]) +{ + char string[ARG_MAX] = ""; + size_t i; + + if (!argv) + return; + +#define APPEND(post) \ + do { \ + ssize_t length = sizeof(string) - (strlen(string) + strlen(post)); \ + if (length <= 0) \ + return; \ + strncat(string, post, length); \ + } while (0) + + APPEND(prompt); + APPEND(" ="); + for (i = 0; argv[i] != NULL; i++) { + APPEND(" "); + APPEND(argv[i]); + } + string[sizeof(string) - 1] = '\0'; + +#undef APPEND + + note(tracee, INFO, USER, "%s", string); +} + +static void print_config(Tracee *tracee, char *const argv[]) +{ + assert(tracee != NULL); + + if (tracee->verbose <= 0) + return; + + if (tracee->qemu) + note(tracee, INFO, USER, "host rootfs = %s", HOST_ROOTFS); + + if (tracee->glue) + note(tracee, INFO, USER, "glue rootfs = %s", tracee->glue); + + note(tracee, INFO, USER, "exe = %s", tracee->exe); + print_argv(tracee, "argv", argv); + print_argv(tracee, "qemu", tracee->qemu); + note(tracee, INFO, USER, "initial cwd = %s", tracee->fs->cwd); + note(tracee, INFO, USER, "verbose level = %d", tracee->verbose); + + notify_extensions(tracee, PRINT_CONFIG, 0, 0); +} + +/** + * Initialize @tracee's current working directory. This function + * returns -1 if an error occurred, otherwise 0. + */ +static int initialize_cwd(Tracee *tracee) +{ + char path2[PATH_MAX]; + char path[PATH_MAX]; + int status; + + /* Compute the base directory. */ + if (tracee->fs->cwd[0] != '/') { + status = getcwd2(tracee->reconf.tracee, path); + if (status < 0) { + note(tracee, ERROR, INTERNAL, "getcwd: %s", strerror(-status)); + return -1; + } + } + else + strcpy(path, "/"); + + /* The ending "." ensures canonicalize() will report an error + * if tracee->fs->cwd does not exist or if it is not a + * directory. */ + status = join_paths(3, path2, path, tracee->fs->cwd, "."); + if (status < 0) { + note(tracee, ERROR, INTERNAL, "getcwd: %s", strerror(-status)); + return -1; + } + + /* Initiale state for canonicalization. */ + strcpy(path, "/"); + + status = canonicalize(tracee, path2, true, path, 0); + if (status < 0) { + note(tracee, WARNING, USER, "can't chdir(\"%s\") in the guest rootfs: %s", + path2, strerror(-status)); + note(tracee, INFO, USER, "default working directory is now \"/\""); + strcpy(path, "/"); + } + chop_finality(path); + + /* Replace with the canonicalized working directory. */ + TALLOC_FREE(tracee->fs->cwd); + tracee->fs->cwd = talloc_strdup(tracee->fs, path); + if (tracee->fs->cwd == NULL) + return -1; + talloc_set_name_const(tracee->fs->cwd, "$cwd"); + + /* Keep this special environment variable consistent. */ + setenv("PWD", path, 1); + + return 0; +} + +/** + * Initialize @tracee->exe from @exe, i.e. canonicalize it from a + * guest point-of-view. + */ +static int initialize_exe(Tracee *tracee, const char *exe) +{ + char path[PATH_MAX]; + int status; + + status = which(tracee, tracee->reconf.paths, path, exe ?: "/bin/sh"); + if (status < 0) + return -1; + + status = detranslate_path(tracee, path, NULL); + if (status < 0) + return -1; + + tracee->exe = talloc_strdup(tracee, path); + if (tracee->exe == NULL) + return -1; + talloc_set_name_const(tracee->exe, "$exe"); + + return 0; +} + +/** + * Configure @tracee according to the command-line arguments stored in + * @argv[]. This function returns the index in @argv[] of the command + * to launch, otherwise -1 if an error occured. + */ +static int parse_config(Tracee *tracee, size_t argc, char *const argv[]) +{ + option_handler_t handler = NULL; + const Option *options; + const Cli *cli = NULL; + size_t argc_offset; + size_t i, j, k; + int status; + + if (get_care_cli != NULL) { + /* Check if it's an self-extracting CARE archive. */ + status = extract_archive_from_file("/proc/self/exe"); + if (status == 0) { + /* Yes it is, nothing more to do. */ + exit_failure = 0; + return -1; + } + + /* Check if it's a valid CARE tool name. */ + if (strncasecmp(basename(argv[0]), "care", strlen("care")) == 0) + cli = get_care_cli(tracee->ctx); + } + + /* Unknown tool name? Default to PRoot. */ + if (cli == NULL) + cli = get_proot_cli(tracee->ctx); + tracee->tool_name = cli->name; + + if (argc == 1) { + print_usage(tracee, cli, false); + return -1; + } + + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + + /* The current argument is the value of a short option. */ + if (handler != NULL) { + status = handler(tracee, cli, arg); + if (status < 0) + return -1; + handler = NULL; + continue; + } + + if (arg[0] != '-') + break; /* End of PRoot options. */ + + options = cli->options; + for (j = 0; options[j].class != NULL; j++) { + const Option *option = &options[j]; + + /* A given option has several aliases. */ + for (k = 0; ; k++) { + const Argument *argument; + size_t length; + + argument = &option->arguments[k]; + + /* End of aliases for this option. */ + if (!argument->name) + break; + + length = strlen(argument->name); + if (strncmp(arg, argument->name, length) != 0) + continue; + + /* Avoid ambiguities. */ + if (strlen(arg) > length + && arg[length] != argument->separator) { + print_error_separator(tracee, argument); + return -1; + } + + /* No option value. */ + if (!argument->value) { + status = option->handler(tracee, cli, NULL); + if (status < 0) + return -1; + goto known_option; + } + + /* Value coalesced with to its option. */ + if (argument->separator == arg[length]) { + assert(strlen(arg) >= length); + status = option->handler(tracee, cli, &arg[length + 1]); + if (status < 0) + return -1; + goto known_option; + } + + /* Avoid ambiguities. */ + if (argument->separator != ' ') { + print_error_separator(tracee, argument); + return -1; + } + + /* Short option with a separated value. */ + handler = option->handler; + goto known_option; + } + } + + note(tracee, ERROR, USER, "unknown option '%s'.", arg); + return -1; + + known_option: + if (handler != NULL && i == argc - 1) { + note(tracee, ERROR, USER, "missing value for option '%s'.", arg); + return -1; + } + } + argc_offset = i; + +#define HOOK_CONFIG(callback) \ + do { \ + if (cli->callback != NULL) { \ + status = cli->callback(tracee, cli, argc, argv, i); \ + if (status < 0) \ + return -1; \ + i = status; \ + } \ + } while (0) + + HOOK_CONFIG(pre_initialize_bindings); + + /* The guest rootfs is now known: bindings specified by the + * user (tracee->bindings.user) can be canonicalized. */ + status = initialize_bindings(tracee); + if (status < 0) + return -1; + + HOOK_CONFIG(post_initialize_bindings); + HOOK_CONFIG(pre_initialize_cwd); + + /* Bindings are now installed (tracee->bindings.guest & + * tracee->bindings.host): the current working directory can + * be canonicalized. */ + status = initialize_cwd(tracee); + if (status < 0) + return -1; + + HOOK_CONFIG(post_initialize_cwd); + HOOK_CONFIG(pre_initialize_exe); + + /* Bindings are now installed and the current working + * directory is canonicalized: resolve path to @tracee->exe + * and configure @tracee->cmdline. */ + status = initialize_exe(tracee, argv[argc_offset]); + if (status < 0) + return -1; + + HOOK_CONFIG(post_initialize_exe); +#undef HOOK_CONFIG + + print_config(tracee, &argv[argc_offset]); + + return argc_offset; +} + +bool exit_failure = true; + +int NO_main(int argc, char *const argv[]) +{ + Tracee *tracee; + int status; + + /* Configure the memory allocator. */ + talloc_enable_leak_report(); + +#if defined(TALLOC_VERSION_MAJOR) && TALLOC_VERSION_MAJOR >= 2 + talloc_set_log_stderr(); +#endif + + /* Pre-create the first tracee (pid == 0). */ + tracee = get_tracee(NULL, 0, true); + if (tracee == NULL) + goto error; + tracee->pid = getpid(); + + /* Pre-configure the first tracee. */ + status = parse_config(tracee, argc, argv); + if (status < 0) + goto error; + + /* Start the first tracee. */ + status = launch_process(tracee, &argv[status]); + if (status < 0) { + print_execve_help(tracee, tracee->exe, status); + goto error; + } + + /* Start tracing the first tracee and all its children. */ + exit(event_loop()); + +error: + TALLOC_FREE(tracee); + + if (exit_failure) { + fprintf(stderr, "fatal error: see `%s --help`.\n", basename(argv[0])); + exit(EXIT_FAILURE); + } + else + exit(EXIT_SUCCESS); +} + +/** + * Convert @value into an integer, then put the result into + * *@variable. This function prints a warning and returns -1 if a + * conversion error occured, otherwise it returns 0. + */ +int parse_integer_option(const Tracee *tracee, int *variable, const char *value, const char *option) +{ + char *end_ptr = NULL; + + errno = 0; + *variable = strtol(value, &end_ptr, 10); + if (errno != 0 || end_ptr == value) { + note(tracee, ERROR, USER, "option `%s` expects an integer value.", option); + return -1; + } + + return 0; +} + +/** + * Expand the environment variable in front of @string, if any. For + * example, this function can expand "$HOME" or "$HOME/.ICEauthority". + */ +const char *expand_front_variable(TALLOC_CTX *context, const char *string) +{ + const char *suffix; + char *expanded; + ptrdiff_t size; + + if (string[0] != '$') + return string; + + suffix = strchr(string, '/'); + if (suffix == NULL) + return (getenv(&string[1]) ?: string); + + size = suffix - string; + if (size <= 1) + return string; + + expanded = talloc_strndup(context, &string[1], size - 1); + if (expanded == NULL) + return string; + + expanded = getenv(expanded); + if (expanded == NULL) + return string; + + expanded = talloc_asprintf(context, "%s%s", expanded, suffix); + if (expanded == NULL) + return string; + + return expanded; +} + +/* Here follows the support for GCC function instrumentation. Build + * with CFLAGS='-finstrument-functions -O0 -g' and LDFLAGS='-rdynamic' + * to enable this mechanism. */ + +/* since we rely on GLIBC extensions, disable all of this code if + * __GLIBC__ is not defined */ +#if defined(__GLIBC__) +static int indent_level = 0; + +void __cyg_profile_func_enter(void *this_function, void *call_site) DONT_INSTRUMENT; +void __cyg_profile_func_enter(void *this_function, void *call_site) +{ + void *const pointers[] = { this_function, call_site }; + char **symbols = NULL; + + symbols = backtrace_symbols(pointers, 2); + if (symbols == NULL) + goto end; + + fprintf(stderr, "%*s from %s\n", (int) strlen(symbols[0]) + indent_level, symbols[0], symbols[1]); + +end: + if (symbols != NULL) + free(symbols); + + if (indent_level < INT_MAX) + indent_level++; +} + +void __cyg_profile_func_exit(void *this_function UNUSED, void *call_site UNUSED) DONT_INSTRUMENT; +void __cyg_profile_func_exit(void *this_function UNUSED, void *call_site UNUSED) +{ + if (indent_level > 0) + indent_level--; +} +#endif diff --git a/proot/proot_linux/cli/cli.h b/proot/proot_linux/cli/cli.h new file mode 100644 index 0000000..67bd92b --- /dev/null +++ b/proot/proot_linux/cli/cli.h @@ -0,0 +1,65 @@ +/* This file is automatically generated from the documentation. EDIT AT YOUR OWN RISK. */ + +#ifndef CLI_H +#define CLI_H + +#include +#include "tracee/tracee.h" +#include "attribute.h" + +typedef struct { + const char *name; + char separator; + const char *value; +} Argument; + +struct Cli; +typedef int (*option_handler_t)(Tracee *tracee, const struct Cli *cli, const char *value); + +typedef struct { + const char *class; + option_handler_t handler; + const char *description; + const char *detail; + Argument arguments[5]; +} Option; + +#define END_OF_OPTIONS { .class = NULL, \ + .arguments = {{ .name = NULL, .separator = '\0', .value = NULL }}, \ + .handler = NULL, \ + .description = NULL, \ + .detail = NULL \ + } + +typedef int (*initialization_hook_t)(Tracee *tracee, const struct Cli *cli, + size_t argc, char *const argv[], size_t cursor); +typedef struct Cli { + const char *name; + const char *version; + const char *subtitle; + const char *synopsis; + const char *colophon; + const char *logo; + + initialization_hook_t pre_initialize_bindings; + initialization_hook_t post_initialize_bindings; + initialization_hook_t pre_initialize_cwd; + initialization_hook_t post_initialize_cwd; + initialization_hook_t pre_initialize_exe; + initialization_hook_t post_initialize_exe; + void *private; + + const Option options[]; +} Cli; + +extern const Cli *get_proot_cli(TALLOC_CTX *context); +extern const Cli * WEAK get_care_cli(TALLOC_CTX *context); + +extern void print_usage(Tracee *tracee, const Cli *cli, bool detailed); +extern void print_version(const Cli *cli); +extern int parse_integer_option(const Tracee *tracee, int *variable, const char *value, const char *option); +extern const char *expand_front_variable(TALLOC_CTX *context, const char *string); + +extern bool exit_failure; + +#endif /* CLI_H */ diff --git a/proot/proot_linux/cli/note.c b/proot/proot_linux/cli/note.c new file mode 100644 index 0000000..a48636d --- /dev/null +++ b/proot/proot_linux/cli/note.c @@ -0,0 +1,97 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* errno, */ +#include /* strerror(3), */ +#include /* va_*, */ +#include /* vfprintf(3), */ +#include /* INT_MAX, */ + +#include "cli/note.h" +#include "tracee/tracee.h" + +int global_verbose_level; +const char *global_tool_name; + +/** + * Print @message to the standard error stream according to its + * @severity and @origin. + */ +void note(const Tracee *tracee, Severity severity, Origin origin, const char *message, ...) +{ + const char *tool_name; + va_list extra_params; + int verbose_level; + + if (tracee == NULL) { + verbose_level = global_verbose_level; + tool_name = global_tool_name ?: ""; + } + else { + verbose_level = tracee->verbose; + tool_name = tracee->tool_name; + } + + if (verbose_level < 0 && severity != ERROR) + return; + + switch (severity) { + case WARNING: + fprintf(stderr, "%s warning: ", tool_name); + break; + + case ERROR: + fprintf(stderr, "%s error: ", tool_name); + break; + + case INFO: + default: + fprintf(stderr, "%s info: ", tool_name); + break; + } + + if (origin == TALLOC) + fprintf(stderr, "talloc: "); + + va_start(extra_params, message); + vfprintf(stderr, message, extra_params); + va_end(extra_params); + + switch (origin) { + case SYSTEM: + fprintf(stderr, ": "); + perror(NULL); + break; + + case TALLOC: + break; + + case INTERNAL: + case USER: + default: + fprintf(stderr, "\n"); + break; + } + + return; +} + diff --git a/proot/proot_linux/cli/note.h b/proot/proot_linux/cli/note.h new file mode 100644 index 0000000..8c73f84 --- /dev/null +++ b/proot/proot_linux/cli/note.h @@ -0,0 +1,54 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef NOTE_H +#define NOTE_H + +#include "tracee/tracee.h" +#include "attribute.h" + +/* Specify where a notice is coming from. */ +typedef enum { + SYSTEM, + INTERNAL, + USER, + TALLOC, +} Origin; + +/* Specify the severity of a notice. */ +typedef enum { + ERROR, + WARNING, + INFO, +} Severity; + +#define VERBOSE(tracee, level, message, args...) do { \ + if (tracee == NULL || tracee->verbose >= (level)) \ + note(tracee, INFO, INTERNAL, (message), ## args); \ + } while (0) + +extern void note(const Tracee *tracee, Severity severity, Origin origin, const char *message, ...) FORMAT(printf, 4, 5); + +extern int global_verbose_level; +extern const char *global_tool_name; + +#endif /* NOTE_H */ diff --git a/proot/proot_linux/cli/proot.c b/proot/proot_linux/cli/proot.c new file mode 100644 index 0000000..429be4c --- /dev/null +++ b/proot/proot_linux/cli/proot.c @@ -0,0 +1,408 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* str*(3), */ +#include /* assert(3), */ +#include /* printf(3), fflush(3), */ +#include /* write(2), */ + +#include "cli/cli.h" +#include "cli/note.h" +#include "extension/extension.h" +#include "path/binding.h" +#include "attribute.h" + +/* These should be included last. */ +#include "build.h" +#include "cli/proot.h" + +static int handle_option_r(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + Binding *binding; + + /* ``chroot $PATH`` is semantically equivalent to ``mount + * --bind $PATH /``. */ + binding = new_binding(tracee, value, "/", true); + if (binding == NULL) + return -1; + + return 0; +} + +static int handle_option_b(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + char *host; + char *guest; + + host = talloc_strdup(tracee->ctx, value); + if (host == NULL) { + note(tracee, ERROR, INTERNAL, "can't allocate memory"); + return -1; + } + + guest = strchr(host, ':'); + if (guest != NULL) { + *guest = '\0'; + guest++; + } + + new_binding(tracee, host, guest, true); + return 0; +} + +static int handle_option_q(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + const char *ptr; + size_t nb_args; + bool last; + size_t i; + + nb_args = 0; + ptr = value; + while (1) { + nb_args++; + + /* Keep consecutive non-space characters. */ + while (*ptr != ' ' && *ptr != '\0') + ptr++; + + /* End-of-string ? */ + if (*ptr == '\0') + break; + + /* Skip consecutive space separators. */ + while (*ptr == ' ' && *ptr != '\0') + ptr++; + + /* End-of-string ? */ + if (*ptr == '\0') + break; + } + + tracee->qemu = talloc_zero_array(tracee, char *, nb_args + 1); + if (tracee->qemu == NULL) + return -1; + talloc_set_name_const(tracee->qemu, "@qemu"); + + i = 0; + ptr = value; + do { + const void *start; + const void *end; + last = true; + + /* Keep consecutive non-space characters. */ + start = ptr; + while (*ptr != ' ' && *ptr != '\0') + ptr++; + end = ptr; + + /* End-of-string ? */ + if (*ptr == '\0') + goto next; + + /* Remove consecutive space separators. */ + while (*ptr == ' ' && *ptr != '\0') + ptr++; + + /* End-of-string ? */ + if (*ptr == '\0') + goto next; + + last = false; + next: + tracee->qemu[i] = talloc_strndup(tracee->qemu, start, end - start); + if (tracee->qemu[i] == NULL) + return -1; + i++; + } while (!last); + assert(i == nb_args); + + new_binding(tracee, "/", HOST_ROOTFS, true); + new_binding(tracee, "/dev/null", "/etc/ld.so.preload", false); + + return 0; +} + +static int handle_option_mixed_mode(Tracee *tracee, const Cli *cli UNUSED, const char *value UNUSED) +{ + tracee->mixed_mode = value; + return 0; +} + +static int handle_option_w(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + tracee->fs->cwd = talloc_strdup(tracee->fs, value); + if (tracee->fs->cwd == NULL) + return -1; + talloc_set_name_const(tracee->fs->cwd, "$cwd"); + return 0; +} + +static int handle_option_k(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + void *extension; + int status; + + extension = get_extension(tracee, kompat_callback); + if (extension != NULL) { + note(tracee, WARNING, USER, "option -k was already specified"); + note(tracee, INFO, USER, "only the last -k option is enabled"); + TALLOC_FREE(extension); + } + + status = initialize_extension(tracee, kompat_callback, value); + if (status < 0) + note(tracee, WARNING, INTERNAL, "option \"-k %s\" discarded", value); + + return 0; +} + +static int handle_option_i(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + void *extension; + + extension = get_extension(tracee, fake_id0_callback); + if (extension != NULL) { + note(tracee, WARNING, USER, "option -i/-0/-S was already specified"); + note(tracee, INFO, USER, "only the last -i/-0/-S option is enabled"); + TALLOC_FREE(extension); + } + + (void) initialize_extension(tracee, fake_id0_callback, value); + return 0; +} + +static int handle_option_0(Tracee *tracee, const Cli *cli, const char *value UNUSED) +{ + return handle_option_i(tracee, cli, "0:0"); +} + +static int handle_option_kill_on_exit(Tracee *tracee, const Cli *cli UNUSED, const char *value UNUSED) +{ + tracee->killall_on_exit = true; + return 0; +} + +static int handle_option_v(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + int status; + + status = parse_integer_option(tracee, &tracee->verbose, value, "-v"); + if (status < 0) + return status; + + global_verbose_level = tracee->verbose; + return 0; +} + +extern unsigned char WEAK _binary_licenses_start; +extern unsigned char WEAK _binary_licenses_end; + +static int handle_option_V(Tracee *tracee UNUSED, const Cli *cli, const char *value UNUSED) +{ + size_t size; + + print_version(cli); + printf("\n%s\n", cli->colophon); + fflush(stdout); + + size = &_binary_licenses_end - &_binary_licenses_start; + if (size > 0) + write(1, &_binary_licenses_start, size); + + exit_failure = false; + return -1; +} + +static int handle_option_h(Tracee *tracee, const Cli *cli, const char *value UNUSED) +{ + print_usage(tracee, cli, true); + exit_failure = false; + return -1; +} + +static void new_bindings(Tracee *tracee, const char *bindings[], const char *value) +{ + int i; + + for (i = 0; bindings[i] != NULL; i++) { + const char *path; + + path = (strcmp(bindings[i], "*path*") != 0 + ? expand_front_variable(tracee->ctx, bindings[i]) + : value); + + new_binding(tracee, path, NULL, false); + } +} + +static int handle_option_R(Tracee *tracee, const Cli *cli, const char *value) +{ + int status; + + status = handle_option_r(tracee, cli, value); + if (status < 0) + return status; + + new_bindings(tracee, recommended_bindings, value); + + return 0; +} + +static int handle_option_S(Tracee *tracee, const Cli *cli, const char *value) +{ + int status; + + status = handle_option_0(tracee, cli, value); + if (status < 0) + return status; + + status = handle_option_r(tracee, cli, value); + if (status < 0) + return status; + + new_bindings(tracee, recommended_su_bindings, value); + + return 0; +} + +static int handle_option_p(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + int status = 0; + char *port_in; + char *port_out; + + port_in = talloc_strdup(tracee->ctx, value); + if (port_in == NULL) { + note(tracee, ERROR, INTERNAL, "can't allocate memory"); + return -1; + } + + port_out = strchr(port_in, ':'); + if (port_out != NULL) { + *port_out = '\0'; + port_out++; + } + + if(global_portmap_extension == NULL) + status = initialize_extension(tracee, portmap_callback, value); + if(status < 0) + return status; + + status = add_portmap_entry(atoi(port_in), atoi(port_out)); + + return status; +} + +static int handle_option_n(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + int status = 0; + + if(global_portmap_extension == NULL) + status = initialize_extension(tracee, portmap_callback, value); + if(status < 0) + return status; + + status = activate_netcoop_mode(); + + return status; +} + +#ifdef HAVE_PYTHON_EXTENSION +static int handle_option_P(Tracee *tracee, const Cli *cli UNUSED, const char *value) +{ + (void) initialize_extension(tracee, python_callback, value); + return 0; +} +#endif + +static int handle_option_l(Tracee *tracee, const Cli *cli UNUSED, const char *value UNUSED) +{ + return initialize_extension(tracee, link2symlink_callback, NULL); +} + +/** + * Initialize @tracee->qemu. + */ +static int post_initialize_exe(Tracee *tracee, const Cli *cli UNUSED, + size_t argc UNUSED, char *const argv[] UNUSED, size_t cursor UNUSED) +{ + char path[PATH_MAX]; + int status; + + /* Nothing else to do ? */ + if (tracee->qemu == NULL) + return 0; + + /* Resolve the full guest path to tracee->qemu[0]. */ + status = which(tracee->reconf.tracee, tracee->reconf.paths, path, tracee->qemu[0]); + if (status < 0) + return -1; + + /* Actually tracee->qemu[0] has to be a host path from the tracee's + * point-of-view, not from the PRoot's point-of-view. See + * translate_execve() for details. */ + if (tracee->reconf.tracee != NULL) { + status = detranslate_path(tracee->reconf.tracee, path, NULL); + if (status < 0) + return -1; + } + + tracee->qemu[0] = talloc_strdup(tracee->qemu, path); + if (tracee->qemu[0] == NULL) + return -1; + + return 0; +} + +/** + * Initialize @tracee's fields that are mandatory for PRoot but that + * are not required on the command line, i.e. "-w" and "-r". + */ +static int pre_initialize_bindings(Tracee *tracee, const Cli *cli, + size_t argc UNUSED, char *const argv[] UNUSED, size_t cursor) +{ + int status; + + /* Default to "." if no CWD were specified. */ + if (tracee->fs->cwd == NULL) { + status = handle_option_w(tracee, cli, "."); + if (status < 0) + return -1; + } + + /* The default guest rootfs is "/" if none was specified. */ + if (get_root(tracee) == NULL) { + status = handle_option_r(tracee, cli, "/"); + if (status < 0) + return -1; + } + + return cursor; +} + +const Cli *get_proot_cli(TALLOC_CTX *context UNUSED) +{ + global_tool_name = proot_cli.name; + return &proot_cli; +} diff --git a/proot/proot_linux/cli/proot.h b/proot/proot_linux/cli/proot.h new file mode 100644 index 0000000..85366b6 --- /dev/null +++ b/proot/proot_linux/cli/proot.h @@ -0,0 +1,357 @@ +/* This file is automatically generated from the documentation. EDIT AT YOUR OWN RISK. */ + +#ifndef PROOT_CLI_H +#define PROOT_CLI_H + +#include "cli/cli.h" + +#ifndef VERSION +#define VERSION "5.4.0" +#endif + +static const char *recommended_bindings[] = { + "/etc/host.conf", + "/etc/hosts", + "/etc/hosts.equiv", + "/etc/mtab", + "/etc/netgroup", + "/etc/networks", + "/etc/passwd", + "/etc/group", + "/etc/nsswitch.conf", + "/etc/resolv.conf", + "/etc/localtime", + "/dev/", + "/sys/", + "/proc/", + "/tmp/", + "/run/", + "/var/run/dbus/system_bus_socket", +/* "/var/tmp/kdecache-$LOGNAME", */ + "$HOME", + "*path*", + NULL, +}; + +static const char *recommended_su_bindings[] = { + "/etc/host.conf", + "/etc/hosts", + "/etc/nsswitch.conf", + "/etc/resolv.conf", + "/dev/", + "/sys/", + "/proc/", + "/tmp/", + "/run/shm", + "$HOME", + "*path*", + NULL, +}; + +static int handle_option_r(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_b(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_q(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_mixed_mode(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_w(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_v(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_V(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_h(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_k(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_0(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_i(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_p(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_n(Tracee *tracee, const Cli *cli, const char *value); +#ifdef HAVE_PYTHON_EXTENSION +static int handle_option_P(Tracee *tracee, const Cli *cli, const char *value); +#endif +static int handle_option_l(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_R(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_S(Tracee *tracee, const Cli *cli, const char *value); +static int handle_option_kill_on_exit(Tracee *tracee, const Cli *cli, const char *value); + +static int pre_initialize_bindings(Tracee *, const Cli *, size_t, char *const *, size_t); +static int post_initialize_exe(Tracee *, const Cli *, size_t, char *const *, size_t); + +static Cli proot_cli = { + .version = VERSION, + .name = "proot", + .subtitle = "chroot, mount --bind, and binfmt_misc without privilege/setup", + .synopsis = "proot [option] ... [command]", + .colophon = "Visit https://proot-me.github.io for help, bug reports, suggestions, patches, ...\n\ +Copyright (C) 2023 PRoot Developers, licensed under GPL v2 or later.", + .logo = "\ + _____ _____ ___\n\ +| __ \\ __ \\_____ _____| |_\n\ +| __/ / _ \\/ _ \\ _|\n\ +|__| |__|__\\_____/\\_____/\\____|", + + .pre_initialize_bindings = pre_initialize_bindings, + .post_initialize_exe = post_initialize_exe, + + .options = { + { .class = "Regular options", + .arguments = { + { .name = "-r", .separator = ' ', .value = "path" }, + { .name = "--rootfs", .separator = '=', .value = "path" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_r, + .description = "Use *path* as the new guest root file-system, default is /.", + .detail = "\tThe specified path typically contains a Linux distribution where\n\ +\tall new programs will be confined. The default rootfs is /\n\ +\twhen none is specified, this makes sense when the bind mechanism\n\ +\tis used to relocate host files and directories, see the -b\n\ +\toption and the Examples section for details.\n\ +\t\n\ +\tIt is recommended to use the -R or -S options instead.", + }, + { .class = "Regular options", + .arguments = { + { .name = "-b", .separator = ' ', .value = "path" }, + { .name = "--bind", .separator = '=', .value = "path" }, + { .name = "-m", .separator = ' ', .value = "path" }, + { .name = "--mount", .separator = '=', .value = "path" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_b, + .description = "Make the content of *path* accessible in the guest rootfs.", + .detail = "\tThis option makes any file or directory of the host rootfs\n\ +\taccessible in the confined environment just as if it were part of\n\ +\tthe guest rootfs. By default the host path is bound to the same\n\ +\tpath in the guest rootfs but users can specify any other location\n\ +\twith the syntax: -b *host_path*:*guest_location*. If the\n\ +\tguest location is a symbolic link, it is dereferenced to ensure\n\ +\tthe new content is accessible through all the symbolic links that\n\ +\tpoint to the overlaid content. In most cases this default\n\ +\tbehavior shouldn't be a problem, although it is possible to\n\ +\texplicitly not dereference the guest location by appending it the\n\ +\t! character: -b *host_path*:*guest_location!*.", + }, + { .class = "Regular options", + .arguments = { + { .name = "-q", .separator = ' ', .value = "command" }, + { .name = "--qemu", .separator = '=', .value = "command" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_q, + .description = "Execute guest programs through QEMU as specified by *command*.", + .detail = "\tEach time a guest program is going to be executed, PRoot inserts\n\ +\tthe QEMU user-mode command in front of the initial request.\n\ +\tThat way, guest programs actually run on a virtual guest CPU\n\ +\temulated by QEMU user-mode. The native execution of host programs\n\ +\tis still effective and the whole host rootfs is bound to\n\ +\t/host-rootfs in the guest environment.", + }, + { .class = "Regular options", + .arguments = { + { .name = "--mixed-mode", .separator = ' ', .value = "value" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_mixed_mode, + .description = "Disable the mixed-execution feature.", + .detail = "\tDo not treat ELF executables specially when they appear to be\n\ +\tnative executables of the host system.", + }, + { .class = "Regular options", + .arguments = { + { .name = "-w", .separator = ' ', .value = "path" }, + { .name = "--pwd", .separator = '=', .value = "path" }, + { .name = "--cwd", .separator = '=', .value = "path" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_w, + .description = "Set the initial working directory to *path*.", + .detail = "\tSome programs expect to be launched from a given directory but do\n\ +\tnot perform any chdir by themselves. This option avoids the\n\ +\tneed for running a shell and then entering the directory manually.", + }, + { .class = "Regular options", + .arguments = { + { .name = "--kill-on-exit", .separator = '\0', .value = NULL }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_kill_on_exit, + .description = "Kill all processes on command exit.", + .detail = "\tWhen the executed command leaves orphean or detached processes\n\ +\taround, proot waits until all processes possibly terminate. This option forces\n\ +\tthe immediate termination of all tracee processes when the main command exits.", + }, + { .class = "Regular options", + .arguments = { + { .name = "-v", .separator = ' ', .value = "value" }, + { .name = "--verbose", .separator = '=', .value = "value" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_v, + .description = "Set the level of debug information to *value*.", + .detail = "\tThe higher the integer value is, the more detailed debug\n\ +\tinformation is printed to the standard error stream. A negative\n\ +\tvalue makes PRoot quiet except on fatal errors.", + }, + { .class = "Regular options", + .arguments = { + { .name = "-V", .separator = '\0', .value = NULL }, + { .name = "--version", .separator = '\0', .value = NULL }, + { .name = "--about", .separator = '\0', .value = NULL }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_V, + .description = "Print version, copyright, license and contact, then exit.", + .detail = "", + }, + { .class = "Regular options", + .arguments = { + { .name = "-h", .separator = '\0', .value = NULL }, + { .name = "--help", .separator = '\0', .value = NULL }, + { .name = "--usage", .separator = '\0', .value = NULL }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_h, + .description = "Print the version and the command-line usage, then exit.", + .detail = "", + }, + { .class = "Extension options", + .arguments = { + { .name = "-k", .separator = ' ', .value = "string" }, + { .name = "--kernel-release", .separator = '=', .value = "string" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_k, + .description = "Make current kernel appear as kernel release *string*.", + .detail = "\tIf a program is run on a kernel older than the one expected by its\n\ +\tGNU C library, the following error is reported: \"FATAL: kernel too\n\ +\told\". To be able to run such programs, PRoot can emulate some of\n\ +\tthe features that are available in the kernel release specified by\n\ +\t*string* but that are missing in the current kernel.", + }, + { .class = "Extension options", + .arguments = { + { .name = "-0", .separator = '\0', .value = NULL }, + { .name = "--root-id", .separator = '\0', .value = NULL }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_0, + .description = "Make current user appear as \"root\" and fake its privileges.", + .detail = "\tSome programs will refuse to work if they are not run with \"root\"\n\ +\tprivileges, even if there is no technical reason for that. This\n\ +\tis typically the case with package managers. This option allows\n\ +\tusers to bypass this kind of limitation by faking the user/group\n\ +\tidentity, and by faking the success of some operations like\n\ +\tchanging the ownership of files, changing the root directory to\n\ +\t/, ... Note that this option is quite limited compared to\n\ +\tfakeroot.", + }, + { .class = "Extension options", + .arguments = { + { .name = "-i", .separator = ' ', .value = "string" }, + { .name = "--change-id", .separator = '=', .value = "string" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_i, + .description = "Make current user and group appear as *string* \"uid:gid\".", + .detail = "\tThis option makes the current user and group appear as uid and\n\ +\tgid. Likewise, files actually owned by the current user and\n\ +\tgroup appear as if they were owned by uid and gid instead.\n\ +\tNote that the -0 option is the same as -i 0:0.", + }, + { .class = "Extension options", + .arguments = { + { .name = "-p", .separator = ' ', .value = "string" }, + { .name = "--port", .separator = '=', .value = "string" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_p, + .description = "Map ports to others with the syntax as *string* \"port_in:port_out\".", + .detail = "\tThis option makes PRoot intercept bind and connect system calls,\n\ +\tand change the port they use. The port map is specified\n\ +\twith the syntax: -b *port_in*:*port_out*. For example,\n\ +\tan application that runs a MySQL server binding to 5432 wants\n\ +\tto cohabit with other similar application, but doesn't have an\n\ +\toption to change its port. PRoot can be used here to modify\n\ +\tthis port: proot -p 5432:5433 myapplication. With this command,\n\ +\tthe MySQL server will be bound to the port 5433.\n\ +\tThis command can be repeated multiple times to map multiple ports.", + }, + { .class = "Extension options", + .arguments = { + { .name = "-n", .separator = '\0', .value = NULL }, + { .name = "--netcoop", .separator = '\0', .value = NULL }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_n, + .description = "Enable the network cooperation mode.", + .detail = "\tThis option makes PRoot intercept bind() system calls and\n\ +\tchange the port they are binding to to 0. With this, the system will\n\ +\tallocate an available port. Each time this is done, a new entry is added\n\ +\tto the port mapping entries, so that corresponding connect() system calls\n\ +\tuse the same resulting port. This network \"cooperation\" makes it possible\n\ +\tto run multiple instances of a same program without worrying about the same ports\n\ +\tbeing used twice.", + }, +#ifdef HAVE_PYTHON_EXTENSION + { .class = "Extension options", + .arguments = { + { .name = "-P", .separator = ' ', .value = "string" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_P, + .description = "Allow to access tracee information from python (experimental).", + .detail = "\tThis option allow to launch a python script as an extension (experimental).", + }, +#endif + { .class = "Extension options", + .arguments = { + { .name = "-l", .separator = '\0', .value = NULL }, + { .name = "--link2symlink", .separator = '\0', .value = NULL }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_l, + .description = "Enable the link2symlink extension.", + .detail = "\tThis extension causes proot to create a symlink when a hardlink\n\ +\tshould be created. Some environments don't let the user create a hardlink, this\n\ +\toption should be used to fix it.", + }, + { .class = "Alias options", + .arguments = { + { .name = "-R", .separator = ' ', .value = "path" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_R, + .description = "Alias: -r *path* + a couple of recommended -b.", + .detail = "\tPrograms isolated in *path*, a guest rootfs, might still need to\n\ +\taccess information about the host system, as it is illustrated in\n\ +\tthe Examples section of the manual. These host information\n\ +\tare typically: user/group definition, network setup, run-time\n\ +\tinformation, users' files, ... On all Linux distributions, they\n\ +\tall lie in a couple of host files and directories that are\n\ +\tautomatically bound by this option:\n\ +\t\n\ +\t * /etc/host.conf\n\ +\t * /etc/hosts\n\ +\t * /etc/hosts.equiv\n\ +\t * /etc/mtab\n\ +\t * /etc/netgroup\n\ +\t * /etc/networks\n\ +\t * /etc/passwd\n\ +\t * /etc/group\n\ +\t * /etc/nsswitch.conf\n\ +\t * /etc/resolv.conf\n\ +\t * /etc/localtime\n\ +\t * /dev/\n\ +\t * /sys/\n\ +\t * /proc/\n\ +\t * /tmp/\n\ +\t * /run/\n\ +\t * /var/run/dbus/system_bus_socket\n\ +\t * $HOME", + }, + { .class = "Alias options", + .arguments = { + { .name = "-S", .separator = ' ', .value = "path" }, + { .name = NULL, .separator = '\0', .value = NULL } }, + .handler = handle_option_S, + .description = "Alias: -0 -r *path* + a couple of recommended -b.", + .detail = "\tThis option is useful to safely create and install packages into\n\ +\tthe guest rootfs. It is similar to the -R option except it\n\ +\tenables the -0 option and binds only the following minimal set\n\ +\tof paths to avoid unexpected changes on host files:\n\ +\t\n\ +\t * /etc/host.conf\n\ +\t * /etc/hosts\n\ +\t * /etc/nsswitch.conf\n\ +\t * /etc/resolv.conf\n\ +\t * /dev/\n\ +\t * /sys/\n\ +\t * /proc/\n\ +\t * /tmp/\n\ +\t * /run/shm\n\ +\t * $HOME", + }, + END_OF_OPTIONS, + }, +}; + +#endif /* PROOT_CLI_H */ diff --git a/proot/proot_linux/compat.h b/proot/proot_linux/compat.h new file mode 100644 index 0000000..c601991 --- /dev/null +++ b/proot/proot_linux/compat.h @@ -0,0 +1,262 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef COMPAT_H +#define COMPAT_H + +/* Local definitions for compatibility with old and/or broken distros... */ +# ifndef AT_NULL +# define AT_NULL 0 +# endif +# ifndef AT_PHDR +# define AT_PHDR 3 +# endif +# ifndef AT_PHENT +# define AT_PHENT 4 +# endif +# ifndef AT_PHNUM +# define AT_PHNUM 5 +# endif +# ifndef AT_BASE +# define AT_BASE 7 +# endif +# ifndef AT_ENTRY +# define AT_ENTRY 9 +# endif +# ifndef AT_RANDOM +# define AT_RANDOM 25 +# endif +# ifndef AT_EXECFN +# define AT_EXECFN 31 +# endif +# ifndef AT_SYSINFO +# define AT_SYSINFO 32 +# endif +# ifndef AT_SYSINFO_EHDR +# define AT_SYSINFO_EHDR 33 +# endif +# ifndef AT_FDCWD +# define AT_FDCWD -100 +# endif +# ifndef AT_SYMLINK_FOLLOW +# define AT_SYMLINK_FOLLOW 0x400 +# endif +# ifndef AT_REMOVEDIR +# define AT_REMOVEDIR 0x200 +# endif +# ifndef AT_SYMLINK_NOFOLLOW +# define AT_SYMLINK_NOFOLLOW 0x100 +# endif +# ifndef IN_DONT_FOLLOW +# define IN_DONT_FOLLOW 0x02000000 +# endif +# ifndef WIFCONTINUED +# define WIFCONTINUED(status) ((status) == 0xffff) +# endif +# ifndef PTRACE_GETREGS +# define PTRACE_GETREGS 12 +# endif +# ifndef PTRACE_SETREGS +# define PTRACE_SETREGS 13 +# endif +# ifndef PTRACE_GETFPREGS +# define PTRACE_GETFPREGS 14 +# endif +# ifndef PTRACE_SETFPREGS +# define PTRACE_SETFPREGS 15 +# endif +# ifndef PTRACE_GETFPXREGS +# define PTRACE_GETFPXREGS 18 +# endif +# ifndef PTRACE_SETFPXREGS +# define PTRACE_SETFPXREGS 19 +# endif +# ifndef PTRACE_SETOPTIONS +# define PTRACE_SETOPTIONS 0x4200 +# endif +# ifndef PTRACE_GETEVENTMSG +# define PTRACE_GETEVENTMSG 0x4201 +# endif +# ifndef PTRACE_GETREGSET +# define PTRACE_GETREGSET 0x4204 +# endif +# ifndef PTRACE_SETREGSET +# define PTRACE_SETREGSET 0x4205 +# endif +# ifndef PTRACE_SEIZE +# define PTRACE_SEIZE 0x4206 +# endif +# ifndef PTRACE_INTERRUPT +# define PTRACE_INTERRUPT 0x4207 +# endif +# ifndef PTRACE_LISTEN +# define PTRACE_LISTEN 0x4208 +# endif +# ifndef PTRACE_O_TRACESYSGOOD +# define PTRACE_O_TRACESYSGOOD 0x00000001 +# endif +# ifndef PTRACE_O_TRACEFORK +# define PTRACE_O_TRACEFORK 0x00000002 +# endif +# ifndef PTRACE_O_TRACEVFORK +# define PTRACE_O_TRACEVFORK 0x00000004 +# endif +# ifndef PTRACE_O_TRACECLONE +# define PTRACE_O_TRACECLONE 0x00000008 +# endif +# ifndef PTRACE_O_TRACEEXEC +# define PTRACE_O_TRACEEXEC 0x00000010 +# endif +# ifndef PTRACE_O_TRACEVFORKDONE +# define PTRACE_O_TRACEVFORKDONE 0x00000020 +# endif +# ifndef PTRACE_O_TRACEEXIT +# define PTRACE_O_TRACEEXIT 0x00000040 +# endif +# ifndef PTRACE_O_TRACESECCOMP +# define PTRACE_O_TRACESECCOMP 0x00000080 +# endif +# ifndef PTRACE_EVENT_FORK +# define PTRACE_EVENT_FORK 1 +# endif +# ifndef PTRACE_EVENT_VFORK +# define PTRACE_EVENT_VFORK 2 +# endif +# ifndef PTRACE_EVENT_CLONE +# define PTRACE_EVENT_CLONE 3 +# endif +# ifndef PTRACE_EVENT_EXEC +# define PTRACE_EVENT_EXEC 4 +# endif +# ifndef PTRACE_EVENT_VFORK_DONE +# define PTRACE_EVENT_VFORK_DONE 5 +# endif +# ifndef PTRACE_EVENT_EXIT +# define PTRACE_EVENT_EXIT 6 +# endif +# ifndef PTRACE_EVENT_SECCOMP +# define PTRACE_EVENT_SECCOMP 7 +# endif +# ifndef PTRACE_EVENT_SECCOMP2 +# if PTRACE_EVENT_SECCOMP == 7 +# define PTRACE_EVENT_SECCOMP2 8 +# elif PTRACE_EVENT_SECCOMP == 8 +# define PTRACE_EVENT_SECCOMP2 7 +# else +# error "unknown PTRACE_EVENT_SECCOMP value" +# endif +# endif +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif +# ifndef PTRACE_GET_THREAD_AREA +# define PTRACE_GET_THREAD_AREA 25 +# endif +# ifndef PTRACE_SET_THREAD_AREA +# define PTRACE_SET_THREAD_AREA 26 +# endif +# ifndef PTRACE_GETVFPREGS +# define PTRACE_GETVFPREGS 27 +# endif +# ifndef PTRACE_ARCH_PRCTL +# define PTRACE_ARCH_PRCTL 30 +# endif +# ifndef ARCH_SET_GS +# define ARCH_SET_GS 0x1001 +# endif +# ifndef ARCH_SET_FS +# define ARCH_SET_FS 0x1002 +# endif +# ifndef ARCH_GET_GS +# define ARCH_GET_FS 0x1003 +# endif +# ifndef ARCH_GET_FS +# define ARCH_GET_GS 0x1004 +# endif +# ifndef PTRACE_SINGLEBLOCK +# define PTRACE_SINGLEBLOCK 33 +# endif +# ifndef ADDR_NO_RANDOMIZE +# define ADDR_NO_RANDOMIZE 0x0040000 +# endif +# ifndef SYS_ACCEPT4 +# define SYS_ACCEPT4 18 +# endif +# ifndef TALLOC_FREE +# define TALLOC_FREE(ctx) do { talloc_free(ctx); ctx = NULL; } while(0) +# endif +# ifndef PR_SET_NAME +# define PR_SET_NAME 15 +# endif +# ifndef PR_SET_NO_NEW_PRIVS +# define PR_SET_NO_NEW_PRIVS 38 +# endif +# ifndef PR_SET_SECCOMP +# define PR_SET_SECCOMP 22 +# endif +# ifndef SECCOMP_MODE_FILTER +# define SECCOMP_MODE_FILTER 2 +# endif +# ifndef talloc_get_type_abort +# define talloc_get_type_abort talloc_get_type +# endif +# ifndef FUTEX_PRIVATE_FLAG +# define FUTEX_PRIVATE_FLAG 128 +# endif +# ifndef EFD_SEMAPHORE +# define EFD_SEMAPHORE 1 +# endif +# ifndef F_DUPFD_CLOEXEC +# define F_DUPFD_CLOEXEC 1030 +# endif +# ifndef O_RDONLY +# define O_RDONLY 00000000 +# endif +# ifndef O_CLOEXEC +# define O_CLOEXEC 02000000 +# endif +# ifndef MAP_PRIVATE +# define MAP_PRIVATE 0x02 +# endif +# ifndef MAP_FIXED +# define MAP_FIXED 0x10 +# endif +# ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS 0x20 +# endif +# ifndef PROT_READ +# define PROT_READ 0x1 +# endif +# ifndef PROT_WRITE +# define PROT_WRITE 0x2 +# endif +# ifndef PROT_EXEC +# define PROT_EXEC 0x4 +# endif +# ifndef PROT_GROWSDOWN +# define PROT_GROWSDOWN 0x01000000 +# endif +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif + +#endif /* COMPAT_H */ diff --git a/proot/proot_linux/execve/aoxp.c b/proot/proot_linux/execve/aoxp.c new file mode 100644 index 0000000..b1103a6 --- /dev/null +++ b/proot/proot_linux/execve/aoxp.c @@ -0,0 +1,439 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* ARG_MAX, */ +#include /* assert(3), */ +#include /* strlen(3), memcmp(3), memcpy(3), */ +#include /* bzero(3), */ +#include /* bool, true, false, */ +#include /* E*, */ +#include /* va_*, */ +#include /* uint32_t, */ +#include /* talloc_*, */ + +#include "arch.h" +#include "tracee/tracee.h" +#include "tracee/mem.h" +#include "tracee/abi.h" +#include "build.h" + +struct mixed_pointer { + /* Pointer -- in tracee's address space -- to the current + * object, if local == NULL. */ + word_t remote; + + /* Pointer -- in tracer's address space -- to the current + * object, if local != NULL. */ + void *local; +}; + +#include "execve/aoxp.h" + +/** + * Read object pointed to by @array[@index] from tracee's memory, then + * make @local_pointer points to the locally *cached* version. This + * function returns -errno when an error occured, otherwise 0. + */ +int read_xpointee_as_object(ArrayOfXPointers *array, size_t index, void **local_pointer) +{ + int status; + int size; + + assert(index < array->length); + + /* Already cached locally? */ + if (array->_xpointers[index].local != NULL) + goto end; + + /* Remote NULL is mapped to local NULL. */ + if (array->_xpointers[index].remote == 0) { + array->_xpointers[index].local = NULL; + goto end; + } + + size = sizeof_xpointee(array, index); + if (size < 0) + return size; + + array->_xpointers[index].local = talloc_size(array, size); + if (array->_xpointers[index].local == NULL) + return -ENOMEM; + + /* Copy locally the remote object. */ + status = read_data(TRACEE(array), array->_xpointers[index].local, + array->_xpointers[index].remote, size); + if (status < 0) { + array->_xpointers[index].local = NULL; + return status; + } + +end: + *local_pointer = array->_xpointers[index].local; + return 0; +} + +/** + * Read string pointed to by @array[@index] from tracee's memory, then + * make @local_pointer points to the locally *cached* version. This + * function returns -errno when an error occured, otherwise 0. + */ +int read_xpointee_as_string(ArrayOfXPointers *array, size_t index, char **local_pointer) +{ + char tmp[ARG_MAX]; + int status; + + assert(index < array->length); + + /* Already cached locally? */ + if (array->_xpointers[index].local != NULL) + goto end; + + /* Remote NULL is mapped to local NULL. */ + if (array->_xpointers[index].remote == 0) { + array->_xpointers[index].local = NULL; + goto end; + } + + /* Copy locally the remote string into a temporary buffer. */ + status = read_string(TRACEE(array), tmp, array->_xpointers[index].remote, ARG_MAX); + if (status < 0) + return status; + if (status >= ARG_MAX) + return -ENOMEM; + + /* Save the local string in a "persistent" buffer. */ + array->_xpointers[index].local = talloc_strdup(array, tmp); + if (array->_xpointers[index].local == NULL) + return -ENOMEM; + +end: + *local_pointer = array->_xpointers[index].local; + return 0; +} + +/** + * This function returns the number of bytes of the string pointed to + * by @array[@index], otherwise -errno if an error occured. + */ +int sizeof_xpointee_as_string(ArrayOfXPointers *array, size_t index) +{ + char *string; + int status; + + assert(index < array->length); + + status = read_xpointee_as_string(array, index, &string); + if (status < 0) + return status; + + if (string == NULL) + return 0; + + return strlen(string) + 1; +} + +/** + * Compare object pointed to by @array[@index] with object pointed to + * by @local_reference. This function returns 1 if they are + * equivalent, 0 otherwise. On error, -errno is returned. + */ +int compare_xpointee_generic(ArrayOfXPointers *array, size_t index, const void *local_reference) +{ + void *object; + int status; + + assert(index < array->length); + + status = read_xpointee(array, index, &object); + if (status < 0) + return status; + + if (object == NULL && local_reference == NULL) + return 1; + + if (object == NULL && local_reference != NULL) + return 0; + + if (object != NULL && local_reference == NULL) + return 0; + + status = sizeof_xpointee(array, index); + if (status < 0) + return status; + + return (int) (memcmp(object, local_reference, status) == 0); +} + +/** + * This function returns the index in @array of the first pointee + * equivalent to the @local_reference pointee, otherwise it returns + * -errno if an error occured. + */ +int find_xpointee(ArrayOfXPointers *array, const void *local_reference) +{ + size_t i; + + for (i = 0; i < array->length; i++) { + int status; + + status = compare_xpointee(array, i, local_reference); + if (status < 0) + return status; + if (status != 0) + break; + } + + return i; +} + +/** + * Make @array[@index] points to a copy of the string pointed to by + * @string. This function returns -errno when an error occured, + * otherwise 0. + */ +int write_xpointee_as_string(ArrayOfXPointers *array, size_t index, const char *string) +{ + assert(index < array->length); + + array->_xpointers[index].local = talloc_strdup(array, string); + if (array->_xpointers[index].local == NULL) + return -ENOMEM; + + return 0; +} + +/** + * Make @array[@index ... @index + @nb_xpointees] points to a copy of + * the variadic arguments. This function returns -errno when an error + * occured, otherwise 0. + */ +int write_xpointees(ArrayOfXPointers *array, size_t index, size_t nb_xpointees, ...) +{ + va_list va_xpointees; + int status; + size_t i; + + va_start(va_xpointees, nb_xpointees); + + for (i = 0; i < nb_xpointees; i++) { + void *object = va_arg(va_xpointees, void *); + + status = write_xpointee(array, index + i, object); + if (status < 0) + goto end; + } + status = 0; + +end: + va_end(va_xpointees); + return status; +} + + +/** + * Resize the @array at the given @index by the @delta_nb_entries. + * This function returns -errno when an error occured, otherwise 0. + */ +int resize_array_of_xpointers(ArrayOfXPointers *array, size_t index, ssize_t delta_nb_entries) +{ + size_t nb_moved_entries; + size_t new_length; + void *tmp; + + assert(index < array->length); + + if (delta_nb_entries == 0) + return 0; + + new_length = array->length + delta_nb_entries; + nb_moved_entries = array->length - index; + + if (delta_nb_entries > 0) { + tmp = talloc_realloc(array, array->_xpointers, XPointer, new_length); + if (tmp == NULL) + return -ENOMEM; + array->_xpointers = tmp; + + memmove(array->_xpointers + index + delta_nb_entries, array->_xpointers + index, + nb_moved_entries * sizeof(XPointer)); + + bzero(array->_xpointers + index, delta_nb_entries * sizeof(XPointer)); + } + else { + assert(delta_nb_entries <= 0); + assert(index >= (size_t) -delta_nb_entries); + + memmove(array->_xpointers + index + delta_nb_entries, array->_xpointers + index, + nb_moved_entries * sizeof(XPointer)); + + tmp = talloc_realloc(array, array->_xpointers, XPointer, new_length); + if (tmp == NULL) + return -ENOMEM; + array->_xpointers = tmp; + } + + array->length = new_length; + return 0; +} + +/** + * Copy into *@array_ the pointer array pointed to by @reg from + * @tracee's memory space. Only the first @nb_entries are copied, + * unless it is 0 then all the entries up to the NULL pointer are + * copied. This function returns -errno when an error occured, + * otherwise 0. + */ +int fetch_array_of_xpointers(Tracee *tracee, ArrayOfXPointers **array_, Reg reg, size_t nb_entries) +{ + word_t pointer = 1; /* ie. != 0 */ + word_t address; + ArrayOfXPointers *array; + size_t i; + + assert(array_ != NULL); + + *array_ = talloc_zero(tracee->ctx, ArrayOfXPointers); + if (*array_ == NULL) + return -ENOMEM; + array = *array_; + + address = peek_reg(tracee, CURRENT, reg); + + for (i = 0; nb_entries != 0 ? i < nb_entries : pointer != 0; i++) { + void *tmp = talloc_realloc(array, array->_xpointers, XPointer, i + 1); + if (tmp == NULL) + return -ENOMEM; + array->_xpointers = tmp; + + pointer = peek_word(tracee, address + i * sizeof_word(tracee)); + if (errno != 0) + return -errno; + + array->_xpointers[i].remote = pointer; + array->_xpointers[i].local = NULL; + } + array->length = i; + + /* By default, assume it is an array of string pointers. */ + array->read_xpointee = (read_xpointee_t) read_xpointee_as_string; + array->sizeof_xpointee = sizeof_xpointee_as_string; + array->write_xpointee = (write_xpointee_t) write_xpointee_as_string; + + /* By default, use generic callbacks: they rely on + * array->read_xpointee() and array->sizeof_xpointee(). */ + array->compare_xpointee = compare_xpointee_generic; + + return 0; +} + +/** + * Copy @array into tracee's memory space, then put in @reg the + * address where it was copied. This function returns -errno if an + * error occured, otherwise 0. + */ +int push_array_of_xpointers(ArrayOfXPointers *array, Reg reg) +{ + Tracee *tracee; + struct iovec *local; + size_t local_count; + size_t total_size; + word_t *pod_array; + word_t tracee_ptr; + int status; + size_t i; + + /* Nothing to do, for sure. */ + if (array == NULL) + return 0; + + tracee = TRACEE(array); + + /* The pointer table is a POD array in the tracee's memory. */ + pod_array = talloc_zero_size(tracee->ctx, array->length * sizeof_word(tracee)); + if (pod_array == NULL) + return -ENOMEM; + + /* There's one vector per modified pointee + one vector for the + * pod array. */ + local = talloc_zero_array(tracee->ctx, struct iovec, array->length + 1); + if (local == NULL) + return -ENOMEM; + + /* The pod array is expected to be at the beginning of the + * allocated memory by the caller. */ + total_size = array->length * sizeof_word(tracee); + local[0].iov_base = pod_array; + local[0].iov_len = total_size; + local_count = 1; + + /* Create one vector for each modified pointee. */ + for (i = 0; i < array->length; i++) { + ssize_t size; + + if (array->_xpointers[i].local == NULL) + continue; + + /* At this moment, we only know the offsets in the + * tracee's memory block. */ + array->_xpointers[i].remote = total_size; + + size = sizeof_xpointee(array, i); + if (size < 0) + return size; + total_size += size; + + local[local_count].iov_base = array->_xpointers[i].local; + local[local_count].iov_len = size; + local_count++; + } + + /* Nothing has changed, don't update anything. */ + if (local_count == 1) + return 0; + assert(local_count < array->length + 1); + + /* Modified pointees and the pod array are stored in a tracee's + * memory block. */ + tracee_ptr = alloc_mem(tracee, total_size); + if (tracee_ptr == 0) + return -E2BIG; + + /* Now, we know the absolute addresses in the tracee's + * memory. */ + for (i = 0; i < array->length; i++) { + if (array->_xpointers[i].local != NULL) + array->_xpointers[i].remote += tracee_ptr; + + if (is_32on64_mode(tracee)) + ((uint32_t *) pod_array)[i] = array->_xpointers[i].remote; + else + pod_array[i] = array->_xpointers[i].remote; + } + + /* Write all the modified pointees and the pod array at once. */ + status = writev_data(tracee, tracee_ptr, local, local_count); + if (status < 0) + return status; + + poke_reg(tracee, reg, tracee_ptr); + return 0; +} diff --git a/proot/proot_linux/execve/aoxp.h b/proot/proot_linux/execve/aoxp.h new file mode 100644 index 0000000..7a852aa --- /dev/null +++ b/proot/proot_linux/execve/aoxp.h @@ -0,0 +1,80 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef AOXP_H +#define AOXP_H + +#include + +#include "tracee/reg.h" +#include "arch.h" + +typedef struct array_of_xpointers ArrayOfXPointers; +typedef int (*read_xpointee_t)(ArrayOfXPointers *array, size_t index, void **object); +typedef int (*write_xpointee_t)(ArrayOfXPointers *array, size_t index, const void *object); +typedef int (*compare_xpointee_t)(ArrayOfXPointers *array, size_t index, const void *reference); +typedef int (*sizeof_xpointee_t)(ArrayOfXPointers *array, size_t index); + +typedef struct mixed_pointer XPointer; +struct array_of_xpointers { + XPointer *_xpointers; + size_t length; + + read_xpointee_t read_xpointee; + write_xpointee_t write_xpointee; + compare_xpointee_t compare_xpointee; + sizeof_xpointee_t sizeof_xpointee; +}; + +static inline int read_xpointee(ArrayOfXPointers *array, size_t index, void **object) +{ + return array->read_xpointee(array, index, object); +} + +static inline int write_xpointee(ArrayOfXPointers *array, size_t index, const void *object) +{ + return array->write_xpointee(array, index, object); +} + +static inline int compare_xpointee(ArrayOfXPointers *array, size_t index, const void *reference) +{ + return array->compare_xpointee(array, index, reference); +} + +static inline int sizeof_xpointee(ArrayOfXPointers *array, size_t index) +{ + return array->sizeof_xpointee(array, index); +} + +extern int find_xpointee(ArrayOfXPointers *array, const void *reference); +extern int resize_array_of_xpointers(ArrayOfXPointers *array, size_t index, ssize_t nb_delta_entries); +extern int fetch_array_of_xpointers(Tracee *tracee, ArrayOfXPointers **array, Reg reg, size_t nb_entries); +extern int push_array_of_xpointers(ArrayOfXPointers *array, Reg reg); + +extern int read_xpointee_as_object(ArrayOfXPointers *array, size_t index, void **object); +extern int read_xpointee_as_string(ArrayOfXPointers *array, size_t index, char **string); +extern int write_xpointee_as_string(ArrayOfXPointers *array, size_t index, const char *string); +extern int write_xpointees(ArrayOfXPointers *array, size_t index, size_t nb_xpointees, ...); +extern int compare_xpointee_generic(ArrayOfXPointers *array, size_t index, const void *reference); +extern int sizeof_xpointee_as_string(ArrayOfXPointers *array, size_t index); + +#endif /* AOXP_H */ diff --git a/proot/proot_linux/execve/auxv.c b/proot/proot_linux/execve/auxv.c new file mode 100644 index 0000000..45f9d84 --- /dev/null +++ b/proot/proot_linux/execve/auxv.c @@ -0,0 +1,184 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* AT_*, */ +#include /* assert(3), */ +#include /* E*, */ +#include /* write(3), close(3), */ +#include /* open(2), */ +#include /* open(2), */ +#include /* open(2), */ + +#include "execve/auxv.h" +#include "syscall/sysnum.h" +#include "tracee/tracee.h" +#include "tracee/mem.h" +#include "tracee/reg.h" +#include "tracee/abi.h" +#include "arch.h" + + +/** + * Add the given vector [@type, @value] to @vectors. This function + * returns -errno if an error occurred, otherwise 0. + */ +int add_elf_aux_vector(ElfAuxVector **vectors, word_t type, word_t value) +{ + ElfAuxVector *tmp; + size_t nb_vectors; + + assert(*vectors != NULL); + + nb_vectors = talloc_array_length(*vectors); + + /* Sanity checks. */ + assert(nb_vectors > 0); + assert((*vectors)[nb_vectors - 1].type == AT_NULL); + + tmp = talloc_realloc(talloc_parent(*vectors), *vectors, ElfAuxVector, nb_vectors + 1); + if (tmp == NULL) + return -ENOMEM; + *vectors = tmp; + + /* Replace the sentinel with the new vector. */ + (*vectors)[nb_vectors - 1].type = type; + (*vectors)[nb_vectors - 1].value = value; + + /* Restore the sentinel. */ + (*vectors)[nb_vectors].type = AT_NULL; + (*vectors)[nb_vectors].value = 0; + + return 0; +} + +/** + * Get the address of the the ELF auxiliary vectors table for the + * given @tracee. This function returns 0 if an error occurred. + */ +word_t get_elf_aux_vectors_address(const Tracee *tracee) +{ + word_t address; + word_t data; + + /* Sanity check: this works only in execve sysexit. */ + assert(IS_IN_SYSEXIT2(tracee, PR_execve)); + + /* Right after execve, the stack layout is: + * + * argc, argv[0], ..., 0, envp[0], ..., 0, auxv[0].type, auxv[0].value, ..., 0, 0 + */ + address = peek_reg(tracee, CURRENT, STACK_POINTER); + + /* Read: argc */ + data = peek_word(tracee, address); + if (errno != 0) + return 0; + + /* Skip: argc, argv, 0 */ + address += (1 + data + 1) * sizeof_word(tracee); + + /* Skip: envp, 0 */ + do { + data = peek_word(tracee, address); + if (errno != 0) + return 0; + address += sizeof_word(tracee); + } while (data != 0); + + return address; +} + +/** + * Fetch ELF auxiliary vectors stored at the given @address in + * @tracee's memory. This function returns NULL if an error occurred, + * otherwise it returns a pointer to the new vectors, in an ABI + * independent form (the Talloc parent of this pointer is + * @tracee->ctx). + */ +ElfAuxVector *fetch_elf_aux_vectors(const Tracee *tracee, word_t address) +{ + ElfAuxVector *vectors = NULL; + ElfAuxVector vector; + int status; + + /* It is assumed the sentinel always exists. */ + vectors = talloc_array(tracee->ctx, ElfAuxVector, 1); + if (vectors == NULL) + return NULL; + vectors[0].type = AT_NULL; + vectors[0].value = 0; + + while (1) { + vector.type = peek_word(tracee, address); + if (errno != 0) + return NULL; + address += sizeof_word(tracee); + + if (vector.type == AT_NULL) + break; /* Already added. */ + + vector.value = peek_word(tracee, address); + if (errno != 0) + return NULL; + address += sizeof_word(tracee); + + status = add_elf_aux_vector(&vectors, vector.type, vector.value); + if (status < 0) + return NULL; + } + + return vectors; +} + +/** + * Push ELF auxiliary @vectors to the given @address in @tracee's + * memory. This function returns -errno if an error occurred, + * otherwise 0. + */ +int push_elf_aux_vectors(const Tracee* tracee, ElfAuxVector *vectors, word_t address) +{ + size_t i; + + for (i = 0; vectors[i].type != AT_NULL; i++) { + poke_word(tracee, address, vectors[i].type); + if (errno != 0) + return -errno; + address += sizeof_word(tracee); + + poke_word(tracee, address, vectors[i].value); + if (errno != 0) + return -errno; + address += sizeof_word(tracee); + } + + poke_word(tracee, address, AT_NULL); + if (errno != 0) + return -errno; + address += sizeof_word(tracee); + + poke_word(tracee, address, 0); + if (errno != 0) + return -errno; + address += sizeof_word(tracee); + + return 0; +} diff --git a/proot/proot_linux/execve/auxv.h b/proot/proot_linux/execve/auxv.h new file mode 100644 index 0000000..cd5871b --- /dev/null +++ b/proot/proot_linux/execve/auxv.h @@ -0,0 +1,39 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef AUXV +#define AUXV + +#include "tracee/tracee.h" +#include "arch.h" + +typedef struct elf_aux_vector { + word_t type; + word_t value; +} ElfAuxVector; + +extern word_t get_elf_aux_vectors_address(const Tracee *tracee); +extern ElfAuxVector *fetch_elf_aux_vectors(const Tracee *tracee, word_t address); +extern int add_elf_aux_vector(ElfAuxVector **vectors, word_t type, word_t value); +extern int push_elf_aux_vectors(const Tracee* tracee, ElfAuxVector *vectors, word_t address); + +#endif /* AUXV */ diff --git a/proot/proot_linux/execve/elf.c b/proot/proot_linux/execve/elf.c new file mode 100644 index 0000000..b1f78e6 --- /dev/null +++ b/proot/proot_linux/execve/elf.c @@ -0,0 +1,178 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* open(2), */ +#include /* read(2), close(2), */ +#include /* EACCES, ENOTSUP, */ +#include /* UINT64_MAX, */ +#include /* PATH_MAX, */ +#include /* str*(3), memcpy(3), */ +#include /* assert(3), */ +#include /* talloc_*, */ +#include /* bool, true, false, */ + +#include "execve/elf.h" +#include "tracee/tracee.h" +#include "cli/note.h" +#include "arch.h" + +#include "compat.h" + +/** + * Open the ELF file @t_path and extract its header into @elf_header. + * This function returns -errno if an error occured, otherwise the + * file descriptor for @t_path. + */ +int open_elf(const char *t_path, ElfHeader *elf_header) +{ + int fd; + int status; + + /* + * Read the ELF header. + */ + + fd = open(t_path, O_RDONLY); + if (fd < 0) + return -errno; + + /* Check if it is an ELF file. */ + status = read(fd, elf_header, sizeof(ElfHeader)); + if (status < 0) { + status = -errno; + goto end; + } + if ((size_t) status < sizeof(ElfHeader) + || ELF_IDENT(*elf_header, 0) != 0x7f + || ELF_IDENT(*elf_header, 1) != 'E' + || ELF_IDENT(*elf_header, 2) != 'L' + || ELF_IDENT(*elf_header, 3) != 'F') { + status = -ENOEXEC; + goto end; + } + + /* Check if it is a known class (32-bit or 64-bit). */ + if ( !IS_CLASS32(*elf_header) + && !IS_CLASS64(*elf_header)) { + status = -ENOEXEC; + goto end; + } + + status = 0; +end: + /* Delayed error handling. */ + if (status < 0) { + close(fd); + return status; + } + + return fd; +} + +/** + * Invoke @callback(..., @data) for each program headers from the + * specified ELF file (referenced by @fd, with the given @elf_header). + * This function returns -errno if an error occured, or it returns + * immediately the value != 0 returned by @callback, otherwise 0. + */ +int iterate_program_headers(const Tracee *tracee, int fd, const ElfHeader *elf_header, + program_headers_iterator_t callback, void *data) +{ + ProgramHeader program_header; + + uint64_t elf_phoff; + uint16_t elf_phentsize; + uint16_t elf_phnum; + + int status; + int i; + + /* Get class-specific fields. */ + elf_phnum = ELF_FIELD(*elf_header, phnum); + elf_phentsize = ELF_FIELD(*elf_header, phentsize); + elf_phoff = ELF_FIELD(*elf_header, phoff); + + /* + * Some sanity checks regarding the current + * support of the ELF specification in PRoot. + */ + + if (elf_phnum >= 0xffff) { + note(tracee, WARNING, INTERNAL, "%d: big PH tables are not yet supported.", fd); + return -ENOTSUP; + } + + if (!KNOWN_PHENTSIZE(*elf_header, elf_phentsize)) { + note(tracee, WARNING, INTERNAL, "%d: unsupported size of program header.", fd); + return -ENOTSUP; + } + + status = (int) lseek(fd, elf_phoff, SEEK_SET); + if (status < 0) + return -errno; + + for (i = 0; i < elf_phnum; i++) { + status = read(fd, &program_header, elf_phentsize); + if (status != elf_phentsize) + return (status < 0 ? -errno : -ENOTSUP); + + status = callback(elf_header, &program_header, data); + if (status != 0) + return status; + } + + return 0; +} + +/** + * Check if @host_path is an ELF file for the host architecture. + */ +bool is_host_elf(const Tracee *tracee, const char *host_path) +{ + int host_elf_machine[] = HOST_ELF_MACHINE; + static int force_foreign = -1; + ElfHeader elf_header; + uint16_t elf_machine; + int fd; + int i; + + if (force_foreign < 0) + force_foreign = (getenv("PROOT_FORCE_FOREIGN_BINARY") != NULL); + + if (force_foreign > 0 || !tracee->qemu) + return false; + + fd = open_elf(host_path, &elf_header); + if (fd < 0) + return false; + close(fd); + + elf_machine = ELF_FIELD(elf_header, machine); + for (i = 0; host_elf_machine[i] != 0; i++) { + if (host_elf_machine[i] == elf_machine) { + VERBOSE(tracee, 1, "'%s' is a host ELF", host_path); + return true; + } + } + + return false; +} diff --git a/proot/proot_linux/execve/elf.h b/proot/proot_linux/execve/elf.h new file mode 100644 index 0000000..a5b367b --- /dev/null +++ b/proot/proot_linux/execve/elf.h @@ -0,0 +1,179 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef ELF_H +#define ELF_H + +#define EI_NIDENT 16 + +#include +#include + +typedef struct { + unsigned char e_ident[EI_NIDENT]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} ElfHeader32; + +typedef struct { + unsigned char e_ident[EI_NIDENT]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} ElfHeader64; + +typedef union { + ElfHeader32 class32; + ElfHeader64 class64; +} ElfHeader; + +typedef struct { + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} ProgramHeader32; + +typedef struct { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} ProgramHeader64; + +typedef union { + ProgramHeader32 class32; + ProgramHeader64 class64; +} ProgramHeader; + +/* Object type: */ +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 + +/* Segment flags: */ +#define PF_X 1 +#define PF_W 2 +#define PF_R 4 + +typedef enum { + PT_LOAD = 1, + PT_DYNAMIC = 2, + PT_INTERP = 3, + PT_GNU_STACK = 0x6474e551, +} SegmentType; + +typedef struct { + int32_t d_tag; + uint32_t d_val; +} DynamicEntry32; + +typedef struct { + int64_t d_tag; + uint64_t d_val; +} DynamicEntry64; + +typedef union { + DynamicEntry32 class32; + DynamicEntry64 class64; +} DynamicEntry; + +typedef enum { + DT_STRTAB = 5, + DT_RPATH = 15, + DT_RUNPATH = 29 +} DynamicType; + +/* The following macros are also compatible with ELF 64-bit. */ +#define ELF_IDENT(header, index) (header).class32.e_ident[(index)] +#define ELF_CLASS(header) ELF_IDENT(header, 4) +#define IS_CLASS32(header) (ELF_CLASS(header) == 1) +#define IS_CLASS64(header) (ELF_CLASS(header) == 2) + +/* Helper to access a @field of the structure ElfHeaderXX. */ +#define ELF_FIELD(header, field) \ + (IS_CLASS64(header) \ + ? (header).class64. e_ ## field \ + : (header).class32. e_ ## field) + +/* Helper to access a @field of the structure ProgramHeaderXX */ +#define PROGRAM_FIELD(ehdr, phdr, field) \ + (IS_CLASS64(ehdr) \ + ? (phdr).class64. p_ ## field \ + : (phdr).class32. p_ ## field) + +/* Helper to access a @field of the structure DynamicEntryXX */ +#define DYNAMIC_FIELD(ehdr, dynent, field) \ + (IS_CLASS64(ehdr) \ + ? (dynent).class64. d_ ## field \ + : (dynent).class32. d_ ## field) + +#define KNOWN_PHENTSIZE(header, size) \ + ( (IS_CLASS32(header) && (size) == sizeof(ProgramHeader32)) \ + || (IS_CLASS64(header) && (size) == sizeof(ProgramHeader64))) + +#define IS_POSITION_INDENPENDANT(elf_header) \ + (ELF_FIELD((elf_header), type) == ET_DYN) + +#include "tracee/tracee.h" + +extern int open_elf(const char *t_path, ElfHeader *elf_header); + +extern bool is_host_elf(const Tracee *tracee, const char *t_path); + +typedef int (* program_headers_iterator_t)(const ElfHeader *elf_header, + const ProgramHeader *program_header, void *data); + +extern int iterate_program_headers(const Tracee *tracee, int fd, const ElfHeader *elf_header, + program_headers_iterator_t callback, void *data); + +#endif /* ELF_H */ diff --git a/proot/proot_linux/execve/enter.c b/proot/proot_linux/execve/enter.c new file mode 100644 index 0000000..2ad765d --- /dev/null +++ b/proot/proot_linux/execve/enter.c @@ -0,0 +1,685 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* lstat(2), lseek(2), */ +#include /* lstat(2), lseek(2), fchmod(2), */ +#include /* access(2), lstat(2), close(2), read(2), */ +#include /* E*, */ +#include /* assert(3), */ +#include /* talloc*, */ +#include /* PROT_*, */ +#include /* strlen(3), strcpy(3), */ +#include /* getenv(3), */ +#include /* fwrite(3), */ +#include /* assert(3), */ + +#include "execve/execve.h" +#include "execve/shebang.h" +#include "execve/aoxp.h" +#include "execve/ldso.h" +#include "execve/elf.h" +#include "path/path.h" +#include "path/temp.h" +#include "path/binding.h" +#include "tracee/tracee.h" +#include "syscall/syscall.h" +#include "syscall/sysnum.h" +#include "arch.h" +#include "cli/note.h" + +#define P(a) PROGRAM_FIELD(load_info->elf_header, *program_header, a) + +/** + * Add @program_header (type PT_LOAD) to @load_info->mappings. This + * function returns -errno if an error occured, otherwise it returns + * 0. + */ +static int add_mapping(const Tracee *tracee UNUSED, LoadInfo *load_info, + const ProgramHeader *program_header) +{ + size_t index; + word_t start_address; + word_t end_address; + static word_t page_size = 0; + static word_t page_mask = 0; + + if (page_size == 0) { + page_size = sysconf(_SC_PAGE_SIZE); + if ((int) page_size <= 0) + page_size = 0x1000; + page_mask = ~(page_size - 1); + } + + if (load_info->mappings == NULL) + index = 0; + else + index = talloc_array_length(load_info->mappings); + + load_info->mappings = talloc_realloc(load_info, load_info->mappings, Mapping, index + 1); + if (load_info->mappings == NULL) + return -ENOMEM; + + start_address = P(vaddr) & page_mask; + end_address = (P(vaddr) + P(filesz) + page_size) & page_mask; + + load_info->mappings[index].fd = -1; /* Unknown yet. */ + load_info->mappings[index].offset = P(offset) & page_mask; + load_info->mappings[index].addr = start_address; + load_info->mappings[index].length = end_address - start_address; + load_info->mappings[index].flags = MAP_PRIVATE | MAP_FIXED; + load_info->mappings[index].prot = ( (P(flags) & PF_R ? PROT_READ : 0) + | (P(flags) & PF_W ? PROT_WRITE : 0) + | (P(flags) & PF_X ? PROT_EXEC : 0)); + + /* "If the segment's memory size p_memsz is larger than the + * file size p_filesz, the "extra" bytes are defined to hold + * the value 0 and to follow the segment's initialized area." + * -- man 7 elf. */ + if (P(memsz) > P(filesz)) { + /* How many extra bytes in the current page? */ + load_info->mappings[index].clear_length = end_address - P(vaddr) - P(filesz); + + /* Create new pages for the remaining extra bytes. */ + start_address = end_address; + end_address = (P(vaddr) + P(memsz) + page_size) & page_mask; + if (end_address > start_address) { + index++; + load_info->mappings = talloc_realloc(load_info, load_info->mappings, + Mapping, index + 1); + if (load_info->mappings == NULL) + return -ENOMEM; + + load_info->mappings[index].fd = -1; /* Anonymous. */ + load_info->mappings[index].offset = 0; + load_info->mappings[index].addr = start_address; + load_info->mappings[index].length = end_address - start_address; + load_info->mappings[index].clear_length = 0; + load_info->mappings[index].flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + load_info->mappings[index].prot = load_info->mappings[index - 1].prot; + } + } + else + load_info->mappings[index].clear_length = 0; + + return 0; +} + +/** + * Translate @user_path into @host_path and check if this latter exists, is + * executable and is a regular file. This function returns -errno if + * an error occured, 0 otherwise. + */ +int translate_and_check_exec(Tracee *tracee, char host_path[PATH_MAX], const char *user_path) +{ + struct stat statl; + int status; + + if (user_path[0] == '\0') + return -ENOEXEC; + + status = translate_path(tracee, host_path, AT_FDCWD, user_path, true); + if (status < 0) + return status; + + status = access(host_path, F_OK); + if (status < 0) + return -ENOENT; + + status = access(host_path, X_OK); + if (status < 0) + return -EACCES; + + status = lstat(host_path, &statl); + if (status < 0) + return -EPERM; + + return 0; +} + +/** + * Add @program_header (type PT_INTERP) to @load_info->interp. This + * function returns -errno if an error occured, otherwise it returns + * 0. + */ +static int add_interp(Tracee *tracee, int fd, LoadInfo *load_info, + const ProgramHeader *program_header) +{ + char host_path[PATH_MAX]; + char *user_path; + int status; + + /* Only one PT_INTERP segment is allowed. */ + if (load_info->interp != NULL) + return -EINVAL; + + load_info->interp = talloc_zero(load_info, LoadInfo); + if (load_info->interp == NULL) + return -ENOMEM; + + user_path = talloc_size(tracee->ctx, P(filesz) + 1); + if (user_path == NULL) + return -ENOMEM; + + /* Remember pread(2) doesn't change the + * current position in the file. */ + status = pread(fd, user_path, P(filesz), P(offset)); + if ((size_t) status != P(filesz)) /* Unexpected size. */ + status = -EACCES; + if (status < 0) + return status; + + user_path[P(filesz)] = '\0'; + + /* When a QEMU command was specified: + * + * - if it's a foreign binary we are reading the ELF + * interpreter of QEMU instead. + * + * - if it's a host binary, we are reading its ELF + * interpreter. + * + * In both case, it lies in "/host-rootfs" from a guest + * point-of-view. */ + if (tracee->qemu != NULL && user_path[0] == '/') { + user_path = talloc_asprintf(tracee->ctx, "%s%s", HOST_ROOTFS, user_path); + if (user_path == NULL) + return -ENOMEM; + } + + status = translate_and_check_exec(tracee, host_path, user_path); + if (status < 0) + return status; + + load_info->interp->host_path = talloc_strdup(load_info->interp, host_path); + if (load_info->interp->host_path == NULL) + return -ENOMEM; + + load_info->interp->user_path = talloc_strdup(load_info->interp, user_path); + if (load_info->interp->user_path == NULL) + return -ENOMEM; + + return 0; +} + +#undef P + +struct add_load_info_data { + LoadInfo *load_info; + Tracee *tracee; + int fd; +}; + +/** + * This function is a program header iterator. It invokes + * add_mapping() or add_interp(), according to the type of + * @program_header. This function returns -errno if an error + * occurred, otherwise 0. + */ +static int add_load_info(const ElfHeader *elf_header, + const ProgramHeader *program_header, void *data_) +{ + struct add_load_info_data *data = data_; + int status; + + switch (PROGRAM_FIELD(*elf_header, *program_header, type)) { + case PT_LOAD: + status = add_mapping(data->tracee, data->load_info, program_header); + if (status < 0) + return status; + break; + + case PT_INTERP: + status = add_interp(data->tracee, data->fd, data->load_info, program_header); + if (status < 0) + return status; + break; + + case PT_GNU_STACK: + data->load_info->needs_executable_stack |= + ((PROGRAM_FIELD(*elf_header, *program_header, flags) & PF_X) != 0); + break; + + default: + break; + } + + return 0; +} + +/** + * Extract the load info from @load->host_path. This function returns + * -errno if an error occured, otherwise it returns 0. + */ +static int extract_load_info(Tracee *tracee, LoadInfo *load_info) +{ + struct add_load_info_data data; + int fd = -1; + int status; + + assert(load_info != NULL); + assert(load_info->host_path != NULL); + + fd = open_elf(load_info->host_path, &load_info->elf_header); + if (fd < 0) + return fd; + + /* Sanity check. */ + switch (ELF_FIELD(load_info->elf_header, type)) { + case ET_EXEC: + case ET_DYN: + break; + + default: + status = -EINVAL; + goto end; + } + + data.load_info = load_info; + data.tracee = tracee; + data.fd = fd; + + status = iterate_program_headers(tracee, fd, &load_info->elf_header, add_load_info, &data); +end: + if (fd >= 0) + close(fd); + + return status; +} + +/** + * Add @load_base to each adresses of @load_info. + */ +static void add_load_base(LoadInfo *load_info, word_t load_base) +{ + size_t nb_mappings; + size_t i; + + nb_mappings = talloc_array_length(load_info->mappings); + for (i = 0; i < nb_mappings; i++) + load_info->mappings[i].addr += load_base; + + if (IS_CLASS64(load_info->elf_header)) + load_info->elf_header.class64.e_entry += load_base; + else + load_info->elf_header.class32.e_entry += load_base; +} + +/** + * Compute the final load address for each position independant + * objects of @tracee. + * + * TODO: support for ASLR. + */ +static void compute_load_addresses(Tracee *tracee) +{ + if (IS_POSITION_INDENPENDANT(tracee->load_info->elf_header) + && tracee->load_info->mappings[0].addr == 0) { +#if defined(HAS_LOADER_32BIT) + if (IS_CLASS32(tracee->load_info->elf_header)) + add_load_base(tracee->load_info, EXEC_PIC_ADDRESS_32); + else +#endif + add_load_base(tracee->load_info, EXEC_PIC_ADDRESS); + } + + /* Nothing more to do? */ + if (tracee->load_info->interp == NULL) + return; + + if (IS_POSITION_INDENPENDANT(tracee->load_info->interp->elf_header) + && tracee->load_info->interp->mappings[0].addr == 0) { +#if defined(HAS_LOADER_32BIT) + if (IS_CLASS32(tracee->load_info->elf_header)) + add_load_base(tracee->load_info->interp, INTERP_PIC_ADDRESS_32); + else +#endif + add_load_base(tracee->load_info->interp, INTERP_PIC_ADDRESS); + } +} + +/** + * Expand in argv[] and envp[] the runner for @user_path, if needed. + * This function returns -errno if an error occurred, otherwise 0. On + * success, both @host_path and @user_path point to the program to + * execute (respectively from host and guest point-of-views), and both + * @tracee's argv[] (pointed to by SYSARG_2) @tracee's envp[] (pointed + * to by SYSARG_3) are correctly updated. + */ +static int expand_runner(Tracee* tracee, char host_path[PATH_MAX], char user_path[PATH_MAX]) +{ + ArrayOfXPointers *envp; + char *argv0; + int status; + + /* Execution of host programs when QEMU is in use relies on + * LD_ environment variables. */ + status = fetch_array_of_xpointers(tracee, &envp, SYSARG_3, 0); + if (status < 0) + return status; + + /* Environment variables should be compared with the "name" + * part of the "name=value" string format. */ + envp->compare_xpointee = (compare_xpointee_t) compare_xpointee_env; + + /* No need to adjust argv[] if it's a host binary (a.k.a + * mixed-mode). */ + if (tracee->mixed_mode || !is_host_elf(tracee, host_path)) { + ArrayOfXPointers *argv; + size_t nb_qemu_args; + size_t i; + + status = fetch_array_of_xpointers(tracee, &argv, SYSARG_2, 0); + if (status < 0) + return status; + + status = read_xpointee_as_string(argv, 0, &argv0); + if (status < 0) + return status; + + /* Assuming PRoot was invoked this way: + * + * proot -q 'qemu-arm -cpu cortex-a9' ... + * + * a call to: + * + * execve("/bin/true", { "true", NULL }, ...) + * + * becomes: + * + * execve("/usr/bin/qemu", + * { "qemu", "-cpu", "cortex-a9", "-0", "true", "/bin/true", NULL }, ...) + */ + + nb_qemu_args = talloc_array_length(tracee->qemu) - 1; + status = resize_array_of_xpointers(argv, 1, nb_qemu_args + 2); + if (status < 0) + return status; + + for (i = 0; i < nb_qemu_args; i++) { + status = write_xpointee(argv, i, tracee->qemu[i]); + if (status < 0) + return status; + } + + status = write_xpointees(argv, i, 3, "-0", argv0, user_path); + if (status < 0) + return status; + + /* Ensure LD_ features should not be applied to QEMU + * iteself. */ + status = ldso_env_passthru(tracee, envp, argv, "-E", "-U", i); + if (status < 0) + return status; + + status = push_array_of_xpointers(argv, SYSARG_2); + if (status < 0) + return status; + + /* Launch the runner in lieu of the initial + * program. */ + assert(strlen(tracee->qemu[0]) + strlen(HOST_ROOTFS) < PATH_MAX); + assert(tracee->qemu[0][0] == '/'); + + strcpy(host_path, tracee->qemu[0]); + + strcpy(user_path, HOST_ROOTFS); + strcat(user_path, host_path); + } + + /* Provide information to the host dynamic linker to find host + * libraries (remember the guest root file-system contains + * libraries for the guest architecture only). */ + status = rebuild_host_ldso_paths(tracee, host_path, envp); + if (status < 0) + return status; + + status = push_array_of_xpointers(envp, SYSARG_3); + if (status < 0) + return status; + + return 0; +} + +extern unsigned char _binary_loader_elf_start[] = ""; +extern unsigned char _binary_loader_elf_end[] = ""; + +extern unsigned char WEAK _binary_loader_m32_elf_start[] = ""; +extern unsigned char WEAK _binary_loader_m32_elf_end[] = ""; + +/** + * Extract the built-in loader. This function returns NULL if an + * error occurred, otherwise it returns the path to the extracted + * loader. Note: @tracee is only used for notification purpose. + */ +static char *extract_loader(const Tracee *tracee, bool wants_32bit_version) +{ + char path[PATH_MAX]; + size_t status2; + void *start; + size_t size; + int status; + int fd; + + char *loader_path = NULL; + FILE *file = NULL; + + file = open_temp_file(NULL, "prooted"); + if (file == NULL) + goto end; + fd = fileno(file); + + if (wants_32bit_version) { + start = (void *) _binary_loader_m32_elf_start; + size = (size_t)(_binary_loader_m32_elf_end-_binary_loader_m32_elf_start); + } + else { + start = (void *) _binary_loader_elf_start; + size = (size_t) (_binary_loader_elf_end-_binary_loader_elf_start); + } + + status2 = write(fd, start, size); + if (status2 != size) { + note(tracee, ERROR, SYSTEM, "can't write the loader"); + goto end; + } + + status = fchmod(fd, S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); + if (status < 0) { + note(tracee, ERROR, SYSTEM, "can't change loader permissions (u+rx)"); + goto end; + } + + status = readlink_proc_pid_fd(getpid(), fd, path); + if (status < 0) { + note(tracee, ERROR, INTERNAL, "can't retrieve loader path (/proc/self/fd/)"); + goto end; + } + + status = access(path, X_OK); + if (status < 0) { + note(tracee, ERROR, INTERNAL, + "it seems the current temporary directory (%s) " + "is mounted with no execution permission.", + get_temp_directory()); + note(tracee, INFO, USER, + "Please set PROOT_TMP_DIR env. variable to an alternate " + "location ('%s/tmp' for example).", get_root(tracee)); + goto end; + } + + loader_path = talloc_strdup(talloc_autofree_context(), path); + if (loader_path == NULL) { + note(tracee, ERROR, INTERNAL, "can't allocate memory"); + goto end; + } + + if (tracee->verbose >= 2) + note(tracee, INFO, INTERNAL, "loader: %s", loader_path); + +end: + if (file != NULL) { + status = fclose(file); + if (status < 0) + note(tracee, WARNING, SYSTEM, "can't close loader file"); + } + + return loader_path; +} + +/** + * Get the path to the loader for the given @tracee. This function + * returns NULL if an error occurred. + */ +static inline const char *get_loader_path(const Tracee *tracee) +{ + static char *loader_path = NULL; + +#if defined(HAS_LOADER_32BIT) + static char *loader32_path = NULL; + + if (IS_CLASS32(tracee->load_info->elf_header)) { + loader32_path = loader32_path ?: getenv("PROOT_LOADER_32") ?: extract_loader(tracee, true); + return loader32_path; + } + else +#endif + { + loader_path = loader_path ?: getenv("PROOT_LOADER") ?: extract_loader(tracee, false); + return loader_path; + } +} + +/** + * Extract all the information that will be required by + * translate_load_*(). This function returns -errno if an error + * occured, otherwise 0. + */ +int translate_execve_enter(Tracee *tracee) +{ + char user_path[PATH_MAX]; + char host_path[PATH_MAX]; + char new_exe[PATH_MAX]; + char *raw_path; + const char *loader_path; + int status; + + if (IS_NOTIFICATION_PTRACED_LOAD_DONE(tracee)) { + /* Syscalls can now be reported to its ptracer. */ + tracee->as_ptracee.ignore_loader_syscalls = false; + + /* Cancel this spurious execve, it was only used as a + * notification. */ + set_sysnum(tracee, PR_void); + return 0; + } + + status = get_sysarg_path(tracee, user_path, SYSARG_1); + if (status < 0) + return status; + + /* Remember the user path before it is overwritten by + * expand_shebang(). This "raw" path is useful to fix the + * value of AT_EXECFN and /proc/{@tracee->pid}/comm. */ + raw_path = talloc_strdup(tracee->ctx, user_path); + if (raw_path == NULL) + return -ENOMEM; + + status = expand_shebang(tracee, host_path, user_path); + if (status < 0) + /* The Linux kernel actually returns -EACCES when + * trying to execute a directory. */ + return status == -EISDIR ? -EACCES : status; + + /* user_path is modified only if there's an interpreter + * (ie. for a script or with qemu). */ + if (status == 0 && tracee->qemu == NULL) + TALLOC_FREE(raw_path); + + /* Remember the new value for "/proc/self/exe". It points to + * a canonicalized guest path, hence detranslate_path() + * instead of using user_path directly. */ + strcpy(new_exe, host_path); + status = detranslate_path(tracee, new_exe, NULL); + if (status >= 0) { + talloc_unlink(tracee, tracee->new_exe); + tracee->new_exe = talloc_strdup(tracee, new_exe); + } + else + tracee->new_exe = NULL; + + if (tracee->qemu != NULL) { + status = expand_runner(tracee, host_path, user_path); + if (status < 0) + return status; + } + + TALLOC_FREE(tracee->load_info); + + tracee->load_info = talloc_zero(tracee, LoadInfo); + if (tracee->load_info == NULL) + return -ENOMEM; + + tracee->load_info->host_path = talloc_strdup(tracee->load_info, host_path); + if (tracee->load_info->host_path == NULL) + return -ENOMEM; + + tracee->load_info->user_path = talloc_strdup(tracee->load_info, user_path); + if (tracee->load_info->user_path == NULL) + return -ENOMEM; + + tracee->load_info->raw_path = (raw_path != NULL + ? talloc_reparent(tracee->ctx, tracee->load_info, raw_path) + : talloc_reference(tracee->load_info, tracee->load_info->user_path)); + if (tracee->load_info->raw_path == NULL) + return -ENOMEM; + + status = extract_load_info(tracee, tracee->load_info); + if (status < 0) + return status; + + if (tracee->load_info->interp != NULL) { + status = extract_load_info(tracee, tracee->load_info->interp); + if (status < 0) + return status; + + /* An ELF interpreter is supposed to be + * standalone. */ + if (tracee->load_info->interp->interp != NULL) + return -EINVAL; + } + + compute_load_addresses(tracee); + + /* Execute the loader instead of the program. */ + loader_path = get_loader_path(tracee); + if (loader_path == NULL) + return -ENOENT; + + status = set_sysarg_path(tracee, loader_path, SYSARG_1); + if (status < 0) + return status; + + /* Mask to its ptracer syscalls performed by the loader. */ + tracee->as_ptracee.ignore_loader_syscalls = true; + + return 0; +} diff --git a/proot/proot_linux/execve/execve.h b/proot/proot_linux/execve/execve.h new file mode 100644 index 0000000..4a1a409 --- /dev/null +++ b/proot/proot_linux/execve/execve.h @@ -0,0 +1,64 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef EXECVE_H +#define EXECVE_H + +#include /* PATH_MAX, */ + +#include "tracee/tracee.h" +#include "execve/elf.h" +#include "arch.h" + +extern int translate_execve_enter(Tracee *tracee); +extern void translate_execve_exit(Tracee *tracee); +extern int translate_and_check_exec(Tracee *tracee, char host_path[PATH_MAX], const char *user_path); + +typedef struct mapping { + word_t addr; + word_t length; + word_t clear_length; + word_t prot; + word_t flags; + word_t fd; + word_t offset; +} Mapping; + +typedef struct load_info { + char *host_path; + char *user_path; + char *raw_path; + Mapping *mappings; + ElfHeader elf_header; + bool needs_executable_stack; + + struct load_info *interp; +} LoadInfo; + +#define IS_NOTIFICATION_PTRACED_LOAD_DONE(tracee) ( \ + (tracee)->as_ptracee.ptracer != NULL \ + && peek_reg((tracee), ORIGINAL, SYSARG_1) == (word_t) 1 \ + && peek_reg((tracee), ORIGINAL, SYSARG_4) == (word_t) 2 \ + && peek_reg((tracee), ORIGINAL, SYSARG_5) == (word_t) 3 \ + && peek_reg((tracee), ORIGINAL, SYSARG_6) == (word_t) 4) + +#endif /* EXECVE_H */ diff --git a/proot/proot_linux/execve/exit.c b/proot/proot_linux/execve/exit.c new file mode 100644 index 0000000..415d7df --- /dev/null +++ b/proot/proot_linux/execve/exit.c @@ -0,0 +1,479 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* AT_*, */ +#include /* talloc*, */ +#include /* MAP_*, */ +#include /* assert(3), */ +#include /* strlen(3), strerror(3), */ +#include /* bzero(3), */ +#include /* kill(2), SIG*, */ +#include /* write(2), */ +#include /* E*, */ + +#include "execve/execve.h" +#include "execve/elf.h" +#include "loader/script.h" +#include "tracee/reg.h" +#include "tracee/abi.h" +#include "tracee/mem.h" +#include "syscall/sysnum.h" +#include "execve/auxv.h" +#include "path/binding.h" +#include "path/temp.h" +#include "cli/note.h" + + +/** + * Fill @path with the content of @vectors, formatted according to + * @ptracee's current ABI. + */ +static int fill_file_with_auxv(const Tracee *ptracee, const char *path, + const ElfAuxVector *vectors) +{ + const ssize_t current_sizeof_word = sizeof_word(ptracee); + ssize_t status; + int fd = -1; + int i; + + fd = open(path, O_WRONLY); + if (fd < 0) + return -1; + + i = 0; + do { + status = write(fd, &vectors[i].type, current_sizeof_word); + if (status < current_sizeof_word) { + status = -1; + goto end; + } + + status = write(fd, &vectors[i].value, current_sizeof_word); + if (status < current_sizeof_word) { + status = -1; + goto end; + } + } while (vectors[i++].type != AT_NULL); + + status = 0; +end: + if (fd >= 0) + (void) close(fd); + + return status; +} + +/** + * Bind content of @vectors over /proc/{@ptracee->pid}/auxv. This + * function returns -1 if an error occurred, otherwise 0. + */ +static int bind_proc_pid_auxv(const Tracee *ptracee) +{ + word_t vectors_address; + ElfAuxVector *vectors; + + const char *guest_path; + const char *host_path; + Binding *binding; + int status; + + vectors_address = get_elf_aux_vectors_address(ptracee); + if (vectors_address == 0) + return -1; + + vectors = fetch_elf_aux_vectors(ptracee, vectors_address); + if (vectors == NULL) + return -1; + + /* Path to these ELF auxiliary vectors. */ + guest_path = talloc_asprintf(ptracee->ctx, "/proc/%d/auxv", ptracee->pid); + if (guest_path == NULL) + return -1; + + /* Remove binding to this path, if any. It contains ELF + * auxiliary vectors of the previous execve(2). */ + binding = get_binding(ptracee, GUEST, guest_path); + if (binding != NULL && compare_paths(binding->guest.path, guest_path) == PATHS_ARE_EQUAL) { + remove_binding_from_all_lists(ptracee, binding); + TALLOC_FREE(binding); + } + + host_path = create_temp_file(ptracee->ctx, "auxv"); + if (host_path == NULL) + return -1; + + status = fill_file_with_auxv(ptracee, host_path, vectors); + if (status < 0) + return -1; + + /* Note: this binding will be removed once ptracee gets freed. */ + binding = insort_binding3(ptracee, ptracee->life_context, host_path, guest_path); + if (binding == NULL) + return -1; + + /* This temporary file (host_path) will be removed once the + * binding is freed. */ + talloc_reparent(ptracee->ctx, binding, host_path); + + return 0; +} + +/** + * Convert @mappings into load @script statements at the given @cursor + * position. This function returns the new cursor position. + */ +static void *transcript_mappings(void *cursor, const Mapping *mappings) +{ + size_t nb_mappings; + size_t i; + + nb_mappings = talloc_array_length(mappings); + for (i = 0; i < nb_mappings; i++) { + LoadStatement *statement = cursor; + + if ((mappings[i].flags & MAP_ANONYMOUS) != 0) + statement->action = LOAD_ACTION_MMAP_ANON; + else + statement->action = LOAD_ACTION_MMAP_FILE; + + statement->mmap.addr = mappings[i].addr; + statement->mmap.length = mappings[i].length; + statement->mmap.prot = mappings[i].prot; + statement->mmap.offset = mappings[i].offset; + statement->mmap.clear_length = mappings[i].clear_length; + + cursor += LOAD_STATEMENT_SIZE(*statement, mmap); + } + + return cursor; +} + +/** + * Convert @tracee->load_info into a load script, then transfer this + * latter into @tracee's memory. + */ +static int transfer_load_script(Tracee *tracee) +{ + const word_t stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); + static word_t page_size = 0; + static word_t page_mask = 0; + + word_t entry_point; + + size_t script_size; + size_t strings_size; + size_t string1_size; + size_t string2_size; + size_t string3_size; + size_t padding_size; + + word_t string1_address; + word_t string2_address; + word_t string3_address; + + void *buffer; + size_t buffer_size; + + bool needs_executable_stack; + LoadStatement *statement; + void *cursor; + int status; + + if (page_size == 0) { + page_size = sysconf(_SC_PAGE_SIZE); + if ((int) page_size <= 0) + page_size = 0x1000; + page_mask = ~(page_size - 1); + } + + needs_executable_stack = (tracee->load_info->needs_executable_stack + || ( tracee->load_info->interp != NULL + && tracee->load_info->interp->needs_executable_stack)); + + /* Strings addresses are required to generate the load script, + * for "open" actions. Since I want to generate it in one + * pass, these strings will be put right below the current + * stack pointer -- the only known adresses so far -- in the + * "strings area". */ + string1_size = strlen(tracee->load_info->user_path) + 1; + + string2_size = (tracee->load_info->interp == NULL ? 0 + : strlen(tracee->load_info->interp->user_path) + 1); + + string3_size = (tracee->load_info->raw_path == tracee->load_info->user_path ? 0 + : strlen(tracee->load_info->raw_path) + 1); + + /* A padding will be appended at the end of the load script + * (a.k.a "strings area") to ensure this latter is aligned properly. */ + padding_size = (stack_pointer - string1_size - string2_size - string3_size) + % STACK_ALIGNMENT; + + strings_size = string1_size + string2_size + string3_size + padding_size; + string1_address = stack_pointer - strings_size; + string2_address = stack_pointer - strings_size + string1_size; + string3_address = (string3_size == 0 + ? string1_address + : stack_pointer - strings_size + string1_size + string2_size); + + /* Compute the size of the load script. */ + script_size = + LOAD_STATEMENT_SIZE(*statement, open) + + (LOAD_STATEMENT_SIZE(*statement, mmap) + * talloc_array_length(tracee->load_info->mappings)) + + (tracee->load_info->interp == NULL ? 0 + : LOAD_STATEMENT_SIZE(*statement, open) + + (LOAD_STATEMENT_SIZE(*statement, mmap) + * talloc_array_length(tracee->load_info->interp->mappings))) + + (needs_executable_stack ? LOAD_STATEMENT_SIZE(*statement, make_stack_exec) : 0) + + LOAD_STATEMENT_SIZE(*statement, start); + + /* Allocate enough room for both the load script and the + * strings area. */ + buffer_size = script_size + strings_size; + buffer = talloc_zero_size(tracee->ctx, buffer_size); + if (buffer == NULL) + return -ENOMEM; + + cursor = buffer; + + /* Load script statement: open. */ + statement = cursor; + statement->action = LOAD_ACTION_OPEN; + statement->open.string_address = string1_address; + + cursor += LOAD_STATEMENT_SIZE(*statement, open); + + /* Load script statements: mmap. */ + cursor = transcript_mappings(cursor, tracee->load_info->mappings); + + if (tracee->load_info->interp != NULL) { + /* Load script statement: open. */ + statement = cursor; + statement->action = LOAD_ACTION_OPEN_NEXT; + statement->open.string_address = string2_address; + + cursor += LOAD_STATEMENT_SIZE(*statement, open); + + /* Load script statements: mmap. */ + cursor = transcript_mappings(cursor, tracee->load_info->interp->mappings); + + entry_point = ELF_FIELD(tracee->load_info->interp->elf_header, entry); + } + else + entry_point = ELF_FIELD(tracee->load_info->elf_header, entry); + + if (needs_executable_stack) { + /* Load script statement: stack_exec. */ + statement = cursor; + + statement->action = LOAD_ACTION_MAKE_STACK_EXEC; + statement->make_stack_exec.start = stack_pointer & page_mask; + + cursor += LOAD_STATEMENT_SIZE(*statement, make_stack_exec); + } + + /* Load script statement: start. */ + statement = cursor; + + /* Start of the program slightly differs when ptraced. */ + if (tracee->as_ptracee.ptracer != NULL) + statement->action = LOAD_ACTION_START_TRACED; + else + statement->action = LOAD_ACTION_START; + + statement->start.stack_pointer = stack_pointer; + statement->start.entry_point = entry_point; + statement->start.at_phent = ELF_FIELD(tracee->load_info->elf_header, phentsize); + statement->start.at_phnum = ELF_FIELD(tracee->load_info->elf_header, phnum); + statement->start.at_entry = ELF_FIELD(tracee->load_info->elf_header, entry); + statement->start.at_phdr = ELF_FIELD(tracee->load_info->elf_header, phoff) + + tracee->load_info->mappings[0].addr; + statement->start.at_execfn = string3_address; + + cursor += LOAD_STATEMENT_SIZE(*statement, start); + + /* Sanity check. */ + assert((uintptr_t) cursor - (uintptr_t) buffer == script_size); + + /* Convert the load script to the expected format. */ + if (is_32on64_mode(tracee)) { + int i; + for (i = 0; buffer + i * sizeof(uint64_t) < cursor; i++) + ((uint32_t *) buffer)[i] = ((uint64_t *) buffer)[i]; + } + + /* Concatenate the load script and the strings. */ + memcpy(cursor, tracee->load_info->user_path, string1_size); + cursor += string1_size; + + if (string2_size != 0) { + memcpy(cursor, tracee->load_info->interp->user_path, string2_size); + cursor += string2_size; + } + + if (string3_size != 0) { + memcpy(cursor, tracee->load_info->raw_path, string3_size); + cursor += string3_size; + } + + /* Sanity check. */ + cursor += padding_size; + assert((uintptr_t) cursor - (uintptr_t) buffer == buffer_size); + + /* Allocate enough room in tracee's memory for the load + * script, and make the first user argument points to this + * location. Note that it is safe to update the stack pointer + * manually since we are in execve sysexit. However it should + * be done before transfering data since the kernel might not + * allow page faults below the stack pointer. */ + poke_reg(tracee, STACK_POINTER, stack_pointer - buffer_size); + poke_reg(tracee, USERARG_1, stack_pointer - buffer_size); + + /* Copy everything in the tracee's memory at once. */ + status = write_data(tracee, stack_pointer - buffer_size, buffer, buffer_size); + if (status < 0) + return status; + + /* Tracee's stack content is now as follow: + * + * +------------+ <- initial stack pointer (higher address) + * | padding | + * +------------+ + * | string3 | + * +------------+ + * | string2 | + * +------------+ + * | string1 | + * +------------+ + * | start | + * +------------+ + * | mmap anon | + * +------------+ + * | mmap file | + * +------------+ + * | open next | + * +------------+ + * | mmap anon. | + * +------------+ + * | mmap file | + * +------------+ + * | open | + * +------------+ <- stack pointer, userarg1 (word aligned) + */ + + /* Remember we are in the sysexit stage, so be sure the + * current register values will be used as-is at the end. */ + save_current_regs(tracee, ORIGINAL); + tracee->_regs_were_changed = true; + + return 0; +} + +/** + * Start the loading of @tracee. This function returns no error since + * it's either too late to do anything useful (the calling process is + * already replaced) or the error reported by the kernel + * (syscall_result < 0) will be propagated as-is. + */ +void translate_execve_exit(Tracee *tracee) +{ + word_t syscall_result; + int status; + + if (IS_NOTIFICATION_PTRACED_LOAD_DONE(tracee)) { + /* Be sure not to confuse the ptracer with an + * unexpected syscall/returned value. */ + poke_reg(tracee, SYSARG_RESULT, 0); + set_sysnum(tracee, PR_execve); + + /* According to most ABIs, all registers have + * undefined values at program startup except: + * + * - the stack pointer + * - the instruction pointer + * - the rtld_fini pointer + * - the state flags + */ + poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, SYSARG_2)); + poke_reg(tracee, INSTR_POINTER, peek_reg(tracee, ORIGINAL, SYSARG_3)); + poke_reg(tracee, RTLD_FINI, 0); + poke_reg(tracee, STATE_FLAGS, 0); + + /* Restore registers to their current values. */ + save_current_regs(tracee, ORIGINAL); + tracee->_regs_were_changed = true; + + /* This is is required to make GDB work correctly + * under PRoot, however it deserves to be used + * unconditionally. */ + (void) bind_proc_pid_auxv(tracee); + + /* If the PTRACE_O_TRACEEXEC option is *not* in effect + * for the execing tracee, the kernel delivers an + * extra SIGTRAP to the tracee after execve(2) + * *returns*. This is an ordinary signal (similar to + * one which can be generated by "kill -TRAP"), not a + * special kind of ptrace-stop. Employing + * PTRACE_GETSIGINFO for this signal returns si_code + * set to 0 (SI_USER). This signal may be blocked by + * signal mask, and thus may be delivered (much) + * later. -- man 2 ptrace + * + * This signal is delayed so far since the program was + * not fully loaded yet; GDB would get "invalid + * adress" errors otherwise. */ + if ((tracee->as_ptracee.options & PTRACE_O_TRACEEXEC) == 0) + kill(tracee->pid, SIGTRAP); + + return; + } + + syscall_result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + if ((int) syscall_result < 0) + return; + + /* Execve happened; commit the new "/proc/self/exe". */ + if (tracee->new_exe != NULL) { + (void) talloc_unlink(tracee, tracee->exe); + tracee->exe = talloc_reference(tracee, tracee->new_exe); + talloc_set_name_const(tracee->exe, "$exe"); + } + + /* New processes have no heap. The process could've been cloned with + * CLONE_VM so it has been sharing the heap with its parent. execve() + * discards the VM so make sure to reallocate new heap. */ + if (talloc_reference_count(tracee->heap) > 0) { + talloc_unlink(tracee, tracee->heap); + tracee->heap = talloc_zero(tracee, Heap); + if (!tracee->heap) + note(tracee, ERROR, INTERNAL, "can't allocate heap"); + } else { + bzero(tracee->heap, sizeof(Heap)); + } + + /* Transfer the load script to the loader. */ + status = transfer_load_script(tracee); + if (status < 0) + note(tracee, ERROR, INTERNAL, "can't transfer load script: %s", strerror(-status)); + + return; +} diff --git a/proot/proot_linux/execve/ldso.c b/proot/proot_linux/execve/ldso.c new file mode 100644 index 0000000..c19881a --- /dev/null +++ b/proot/proot_linux/execve/ldso.c @@ -0,0 +1,571 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* bool, true, false, */ +#include /* strlen(3), strcpy(3), strcat(3), strcmp(3), */ +#include /* getenv(3), */ +#include /* assert(3), */ +#include /* ENOMEM, */ +#include /* close(2), */ +#include /* PATH_MAX, ARG_MAX, */ + +#include "execve/ldso.h" +#include "execve/elf.h" +#include "execve/aoxp.h" +#include "tracee/tracee.h" +#include "cli/note.h" + +/** + * Check if the environment @variable has the given @name. + */ +bool is_env_name(const char *variable, const char *name) +{ + size_t length = strlen(name); + + return (variable[0] == name[0] + && length < strlen(variable) + && variable[length] == '=' + && strncmp(variable, name, length) == 0); +} + +/** + * This function returns 1 or 0 depending on the equivalence of the + * @reference environment variable and the one pointed to by the entry + * in @envp at the given @index, otherwise it returns -errno when an + * error occured. + */ +int compare_xpointee_env(ArrayOfXPointers *envp, size_t index, const char *reference) +{ + char *value; + int status; + + assert(index < envp->length); + + status = read_xpointee_as_string(envp, index, &value); + if (status < 0) + return status; + + if (value == NULL) + return 0; + + return (int)is_env_name(value, reference); +} + +/** + * This function ensures that environment variables related to the + * dynamic linker are applied to the emulated program, not to QEMU + * itself. For instance, let's say the user has entered the + * command-line below: + * + * env LD_TRACE_LOADED_OBJECTS=1 /bin/ls + * + * It should be converted to: + * + * qemu -E LD_TRACE_LOADED_OBJECTS=1 /bin/ls + * + * instead of: + * + * env LD_TRACE_LOADED_OBJECTS=1 qemu /bin/ls + * + * Note that the LD_LIBRARY_PATH variable is always required to run + * QEMU (a host binary): + * + * env LD_LIBRARY_PATH=... qemu -U LD_LIBRARY_PATH /bin/ls + * + * or when LD_LIBRARY_PATH was also specified by the user: + * + * env LD_LIBRARY_PATH=... qemu -E LD_LIBRARY_PATH=... /bin/ls + * + * This funtion returns -errno if an error occured, otherwise 0. + */ +int ldso_env_passthru(const Tracee *tracee, ArrayOfXPointers *envp, ArrayOfXPointers *argv, + const char *define, const char *undefine, size_t offset) +{ + bool has_seen_library_path = false; + int status; + size_t i; + + for (i = 0; i < envp->length; i++) { + bool is_known = false; + char *env; + + status = read_xpointee_as_string(envp, i, &env); + if (status < 0) + return status; + + /* Skip variables that do not start with "LD_". */ + if (env == NULL || strncmp(env, "LD_", sizeof("LD_") - 1) != 0) + continue; + + /* When a host program executes a guest program, use + * the value of LD_LIBRARY_PATH as it was before being + * swapped by the mixed-mode support. */ + if ( tracee->host_ldso_paths != NULL + && tracee->guest_ldso_paths != NULL + && is_env_name(env, "LD_LIBRARY_PATH") + && strcmp(env, tracee->host_ldso_paths) == 0) + env = (char *) tracee->guest_ldso_paths; + +#define PASSTHRU(check, name) \ + if (is_env_name(env, name)) { \ + check |= true; \ + /* Errors are not fatal here. */ \ + status = resize_array_of_xpointers(argv, offset, 2); \ + if (status >= 0) { \ + status = write_xpointees(argv, offset, 2, define, env); \ + if (status < 0) \ + return status; \ + } \ + write_xpointee(envp, i, ""); \ + continue; \ + } \ + + PASSTHRU(has_seen_library_path, "LD_LIBRARY_PATH"); + PASSTHRU(is_known, "LD_PRELOAD"); + PASSTHRU(is_known, "LD_BIND_NOW"); + PASSTHRU(is_known, "LD_TRACE_LOADED_OBJECTS"); + PASSTHRU(is_known, "LD_AOUT_LIBRARY_PATH"); + PASSTHRU(is_known, "LD_AOUT_PRELOAD"); + PASSTHRU(is_known, "LD_AUDIT"); + PASSTHRU(is_known, "LD_BIND_NOT"); + PASSTHRU(is_known, "LD_DEBUG"); + PASSTHRU(is_known, "LD_DEBUG_OUTPUT"); + PASSTHRU(is_known, "LD_DYNAMIC_WEAK"); + PASSTHRU(is_known, "LD_HWCAP_MASK"); + PASSTHRU(is_known, "LD_KEEPDIR"); + PASSTHRU(is_known, "LD_NOWARN"); + PASSTHRU(is_known, "LD_ORIGIN_PATH"); + PASSTHRU(is_known, "LD_POINTER_GUARD"); + PASSTHRU(is_known, "LD_PROFILE"); + PASSTHRU(is_known, "LD_PROFILE_OUTPUT"); + PASSTHRU(is_known, "LD_SHOW_AUXV"); + PASSTHRU(is_known, "LD_USE_LOAD_BIAS"); + PASSTHRU(is_known, "LD_VERBOSE"); + PASSTHRU(is_known, "LD_WARN"); + } + + if (!has_seen_library_path) { + /* Errors are not fatal here. */ + status = resize_array_of_xpointers(argv, offset, 2); + if (status >= 0) { + status = write_xpointees(argv, offset, 2, undefine, "LD_LIBRARY_PATH"); + if (status < 0) + return status; + } + } + + return 0; +} + +/** + * Add to @host_ldso_paths the list of @paths prefixed with the path + * to the host rootfs. + */ +static int add_host_ldso_paths(char host_ldso_paths[ARG_MAX], const char *paths) +{ + char *cursor1; + const char *cursor2; + + cursor1 = host_ldso_paths + strlen(host_ldso_paths); + cursor2 = paths; + + do { + bool is_absolute; + size_t length1; + size_t length2 = strcspn(cursor2, ":"); + + is_absolute = (*cursor2 == '/'); + + length1 = 1 + length2; + if (is_absolute) + length1 += strlen(HOST_ROOTFS); + + /* Check there's enough room. */ + if (cursor1 + length1 >= host_ldso_paths + ARG_MAX) + return -ENOEXEC; + + if (cursor1 != host_ldso_paths) { + strcpy(cursor1, ":"); + cursor1++; + } + + /* Since we are executing a host binary under a + * QEMUlated environment, we have to access its + * library paths through the "host-rootfs" binding. + * Technically it means a path like "/lib" is accessed + * as "${HOST_ROOTFS}/lib" to avoid conflict with the + * guest "/lib". */ + if (is_absolute) { + strcpy(cursor1, HOST_ROOTFS); + cursor1 += strlen(HOST_ROOTFS); + } + + strncpy(cursor1, cursor2, length2); + cursor1 += length2; + + cursor2 += length2 + 1; + } while (*(cursor2 - 1) != '\0'); + + *cursor1 = '\0'; + + return 0; +} + +struct find_program_header_data { + ProgramHeader *program_header; + SegmentType type; + uint64_t address; +}; + +/** + * This function is a program header iterator. It stops the iteration + * (by returning 1) once it has found a program header that matches + * @data. This function returns -errno if an error occurred, + * otherwise 0 or 1. + */ +static int find_program_header(const ElfHeader *elf_header, + const ProgramHeader *program_header, void *data_) +{ + struct find_program_header_data *data = data_; + + if (PROGRAM_FIELD(*elf_header, *program_header, type) == data->type) { + uint64_t start; + uint64_t end; + + memcpy(data->program_header, program_header, sizeof(ProgramHeader)); + + if (data->address == (uint64_t) -1) + return 1; + + start = PROGRAM_FIELD(*elf_header, *program_header, vaddr); + end = start + PROGRAM_FIELD(*elf_header, *program_header, memsz); + + if (start < end + && data->address >= start + && data->address <= end) + return 1; + } + + return 0; +} + +/** + * Add to @xpaths the paths (':'-separated list) from the file + * referenced by @fd at the given @offset. This function returns + * -errno if an error occured, otherwise 0. + */ +static int add_xpaths(const Tracee *tracee, int fd, uint64_t offset, char **xpaths) +{ + char *paths = NULL; + char *tmp; + + size_t length; + size_t size; + int status; + + status = (int) lseek(fd, offset, SEEK_SET); + if (status < 0) + return -errno; + + /* Read the complete list of paths. */ + length = 0; + paths = NULL; + do { + size = length + 1024; + + tmp = talloc_realloc(tracee->ctx, paths, char, size); + if (!tmp) + return -ENOMEM; + paths = tmp; + + status = read(fd, paths + length, 1024); + if (status < 0) + return status; + + length += strnlen(paths + length, 1024); + } while (length == size); + + /* Concatene this list of paths to xpaths. */ + if (!*xpaths) { + *xpaths = talloc_array(tracee->ctx, char, length + 1); + if (!*xpaths) + return -ENOMEM; + + strcpy(*xpaths, paths); + } + else { + length += strlen(*xpaths); + length++; /* ":" separator */ + + tmp = talloc_realloc(tracee->ctx, *xpaths, char, length + 1); + if (!tmp) + return -ENOMEM; + *xpaths = tmp; + + strcat(*xpaths, ":"); + strcat(*xpaths, paths); + } + + /* I don't know if DT_R*PATH entries are unique. In + * doubt I support multiple entries. */ + return 0; +} + +/** + * Put the RPATH and RUNPATH dynamic entries from the file referenced + * by @fd -- which has the provided @elf_header -- in @rpaths and + * @runpaths respectively. This function returns -errno if an error + * occured, otherwise 0. + */ +static int read_ldso_rpaths(const Tracee* tracee, int fd, const ElfHeader *elf_header, + char **rpaths, char **runpaths) +{ + ProgramHeader dynamic_segment; + ProgramHeader strtab_segment; + struct find_program_header_data data; + uint64_t strtab_address = (uint64_t) -1; + off_t strtab_offset; + int status; + size_t i; + + uint64_t offsetof_dynamic_segment; + uint64_t sizeof_dynamic_segment; + size_t sizeof_dynamic_entry; + + data.program_header = &dynamic_segment; + data.type = PT_DYNAMIC; + data.address = (uint64_t) -1; + + status = iterate_program_headers(tracee, fd, elf_header, find_program_header, &data); + if (status <= 0) + return status; + + offsetof_dynamic_segment = PROGRAM_FIELD(*elf_header, dynamic_segment, offset); + sizeof_dynamic_segment = PROGRAM_FIELD(*elf_header, dynamic_segment, filesz); + + if (IS_CLASS32(*elf_header)) + sizeof_dynamic_entry = sizeof(DynamicEntry32); + else + sizeof_dynamic_entry = sizeof(DynamicEntry64); + + if (sizeof_dynamic_segment % sizeof_dynamic_entry != 0) + return -ENOEXEC; + +/** + * Invoke @embedded_code on each dynamic entry of the given @type. + */ +#define FOREACH_DYNAMIC_ENTRY(type, embedded_code) \ + for (i = 0; i < sizeof_dynamic_segment / sizeof_dynamic_entry; i++) { \ + DynamicEntry dynamic_entry; \ + uint64_t value; \ + \ + /* embedded_code may change the file offset. */ \ + status = (int) lseek(fd, offsetof_dynamic_segment + i * sizeof_dynamic_entry, \ + SEEK_SET); \ + if (status < 0) \ + return -errno; \ + \ + status = read(fd, &dynamic_entry, sizeof_dynamic_entry); \ + if (status < 0) \ + return status; \ + \ + if (DYNAMIC_FIELD(*elf_header, dynamic_entry, tag) != type) \ + continue; \ + \ + value = DYNAMIC_FIELD(*elf_header, dynamic_entry, val); \ + \ + embedded_code \ + } + + /* Get the address of the *first* string table. The ELF + * specification doesn't mention if it may have several string + * table references. */ + FOREACH_DYNAMIC_ENTRY(DT_STRTAB, { + strtab_address = value; + break; + }) + + if (strtab_address == (uint64_t) -1) + return 0; + + data.program_header = &strtab_segment; + data.type = PT_LOAD; + data.address = strtab_address; + + /* Search the program header that contains the given string table. */ + status = iterate_program_headers(tracee, fd, elf_header, find_program_header, &data); + if (status < 0) + return status; + + strtab_offset = PROGRAM_FIELD(*elf_header, strtab_segment, offset) + + (strtab_address - PROGRAM_FIELD(*elf_header, strtab_segment, vaddr)); + + FOREACH_DYNAMIC_ENTRY(DT_RPATH, { + if (strtab_offset < 0 || (uint64_t) strtab_offset > UINT64_MAX - value) + return -ENOEXEC; + + status = add_xpaths(tracee, fd, strtab_offset + value, rpaths); + if (status < 0) + return status; + }) + + FOREACH_DYNAMIC_ENTRY(DT_RUNPATH, { + if (strtab_offset < 0 || (uint64_t) strtab_offset > UINT64_MAX - value) + return -ENOEXEC; + + status = add_xpaths(tracee, fd, strtab_offset + value, runpaths); + if (status < 0) + return status; + }) + +#undef FOREACH_DYNAMIC_ENTRY + + return 0; +} + +/** + * Rebuild the variable LD_LIBRARY_PATH in @envp for the program + * @host_path according to its RPATH, RUNPATH, and the initial + * LD_LIBRARY_PATH. This function returns -errno if an error occured, + * 1 if RPATH/RUNPATH entries were found, 0 otherwise. + */ +int rebuild_host_ldso_paths(Tracee *tracee, const char host_path[PATH_MAX], ArrayOfXPointers *envp) +{ + static char *initial_ldso_paths = NULL; + ElfHeader elf_header; + + char host_ldso_paths[ARG_MAX] = ""; + bool rpath_found = false; + + char *rpaths = NULL; + char *runpaths = NULL; + + size_t length1; + size_t length2; + + size_t index; + int status; + int fd; + + fd = open_elf(host_path, &elf_header); + if (fd < 0) + return fd; + + status = read_ldso_rpaths(tracee, fd, &elf_header, &rpaths, &runpaths); + close(fd); + if (status < 0) + return status; + + /* 1. DT_RPATH */ + if (rpaths && !runpaths) { + status = add_host_ldso_paths(host_ldso_paths, rpaths); + if (status < 0) + return 0; /* Not fatal. */ + rpath_found = true; + } + + /* 2. LD_LIBRARY_PATH */ + if (initial_ldso_paths == NULL) + initial_ldso_paths = strdup(getenv("LD_LIBRARY_PATH") ?: "/"); + if (initial_ldso_paths != NULL && initial_ldso_paths[0] != '\0') { + status = add_host_ldso_paths(host_ldso_paths, initial_ldso_paths); + if (status < 0) + return 0; /* Not fatal. */ + } + + /* 3. DT_RUNPATH */ + if (runpaths) { + status = add_host_ldso_paths(host_ldso_paths, runpaths); + if (status < 0) + return 0; /* Not fatal. */ + rpath_found = true; + } + + /* 4. /etc/ld.so.cache NYI. */ + + /* 5. /lib[32|64], /usr/lib[32|64] + /usr/local/lib[32|64] */ + /* 6. /lib, /usr/lib + /usr/local/lib */ + if (IS_CLASS32(elf_header)) + status = add_host_ldso_paths(host_ldso_paths, +#if defined(ARCH_X86) || defined(ARCH_X86_64) + "/lib/i386-linux-gnu:/usr/lib/i386-linux-gnu:" +#endif + "/lib32:/usr/lib32:/usr/local/lib32" + ":/lib:/usr/lib:/usr/local/lib"); + else + status = add_host_ldso_paths(host_ldso_paths, +#if defined(ARCH_X86_64) + "/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:" +#elif defined(ARCH_ARM64) + "/lib/aarch64-linux-gnu:/usr/lib/aarch64-linux-gnu:" +#endif + "/lib64:/usr/lib64:/usr/local/lib64" + ":/lib:/usr/lib:/usr/local/lib"); + if (status < 0) + return 0; /* Not fatal. */ + + status = find_xpointee(envp, "LD_LIBRARY_PATH"); + if (status < 0) + return 0; /* Not fatal. */ + index = (size_t) status; + + if (index == envp->length) { + /* Allocate a new entry at the end of envp[] when + * LD_LIBRARY_PATH was not found. */ + + index = (envp->length > 0 ? envp->length - 1 : 0); + status = resize_array_of_xpointers(envp, index, 1); + if (status < 0) + return 0; /* Not fatal. */ + } + else if (tracee->guest_ldso_paths == NULL) { + /* Remember guest LD_LIBRARY_PATH in order to restore + * it when a host program will execute a guest + * program. */ + char *env; + + /* Errors are not fatal here. */ + status = read_xpointee_as_string(envp, index, &env); + if (status >= 0) + tracee->guest_ldso_paths = talloc_strdup(tracee, env); + } + + /* Forge the new LD_LIBRARY_PATH variable from + * host_ldso_paths. */ + length1 = strlen("LD_LIBRARY_PATH="); + length2 = strlen(host_ldso_paths); + if (ARG_MAX - length2 - 1 < length1) + return 0; /* Not fatal. */ + + memmove(host_ldso_paths + length1, host_ldso_paths, length2 + 1); + memcpy(host_ldso_paths, "LD_LIBRARY_PATH=" , length1); + + write_xpointee(envp, index, host_ldso_paths); + + /* The guest LD_LIBRARY_PATH will be restored only if the host + * program didn't change it explicitly, so remember its + * initial value. */ + if (tracee->host_ldso_paths == NULL) + tracee->host_ldso_paths = talloc_strdup(tracee, host_ldso_paths); + + return (int) rpath_found; +} diff --git a/proot/proot_linux/execve/ldso.h b/proot/proot_linux/execve/ldso.h new file mode 100644 index 0000000..43782c1 --- /dev/null +++ b/proot/proot_linux/execve/ldso.h @@ -0,0 +1,42 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef LDSO_H +#define LDSO_H + +#include +#include + +#include "execve/aoxp.h" +#include "execve/elf.h" + +extern int ldso_env_passthru(const Tracee *tracee, ArrayOfXPointers *envp, ArrayOfXPointers *argv, + const char *define, const char *undefine, size_t offset); + +extern int rebuild_host_ldso_paths(Tracee *tracee, const char t_program[PATH_MAX], + ArrayOfXPointers *envp); + +extern int compare_xpointee_env(ArrayOfXPointers *envp, size_t index, const char *name); + +extern bool is_env_name(const char *variable, const char *name); + +#endif /* LDSO_H */ diff --git a/proot/proot_linux/execve/shebang.c b/proot/proot_linux/execve/shebang.c new file mode 100644 index 0000000..83ceeda --- /dev/null +++ b/proot/proot_linux/execve/shebang.c @@ -0,0 +1,307 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* open(2), */ +#include /* open(2), */ +#include /* open(2), */ +#include /* PATH_MAX, */ +#include /* BINPRM_BUF_SIZE, */ +#include /* read(2), close(2), */ +#include /* -E*, */ +#include /* MAXSYMLINKS, */ +#include /* bool, */ +#include /* assert(3), */ + +#include "execve/shebang.h" +#include "execve/execve.h" +#include "execve/aoxp.h" +#include "tracee/tracee.h" +#include "attribute.h" + +/** + * Extract into @user_path and @argument the shebang from @host_path. + * This function returns -errno if an error occured, 1 if a shebang + * was found and extracted, otherwise 0. + * + * Extract from "man 2 execve": + * + * On Linux, the entire string following the interpreter name is + * passed as a *single* argument to the interpreter, and this + * string can include white space. + */ +static int extract_shebang(const Tracee *tracee UNUSED, const char *host_path, + char user_path[PATH_MAX], char argument[BINPRM_BUF_SIZE]) +{ + char tmp2[2]; + char tmp; + + size_t current_length; + size_t i; + + int status; + int fd; + + /* Assumption. */ + assert(BINPRM_BUF_SIZE < PATH_MAX); + + argument[0] = '\0'; + + /* Inspect the executable. */ + fd = open(host_path, O_RDONLY); + if (fd < 0) + return -errno; + + status = read(fd, tmp2, 2 * sizeof(char)); + if (status < 0) { + status = -errno; + goto end; + } + if ((size_t) status < 2 * sizeof(char)) { /* EOF */ + status = 0; + goto end; + } + + /* Check if it really is a script text. */ + if (tmp2[0] != '#' || tmp2[1] != '!') { + status = 0; + goto end; + } + current_length = 2; + user_path[0] = '\0'; + + /* Skip leading spaces. */ + do { + status = read(fd, &tmp, sizeof(char)); + if (status < 0) { + status = -errno; + goto end; + } + if ((size_t) status < sizeof(char)) { /* EOF */ + status = -ENOEXEC; + goto end; + } + + current_length++; + } while ((tmp == ' ' || tmp == '\t') && current_length < BINPRM_BUF_SIZE); + + /* Slurp the interpreter path until the first space or end-of-line. */ + for (i = 0; current_length < BINPRM_BUF_SIZE; current_length++, i++) { + switch (tmp) { + case ' ': + case '\t': + /* Remove spaces in between the interpreter + * and the hypothetical argument. */ + user_path[i] = '\0'; + break; + + case '\n': + case '\r': + /* There is no argument. */ + user_path[i] = '\0'; + argument[0] = '\0'; + status = 1; + goto end; + + default: + /* There is an argument if the previous + * character in user_path[] is '\0'. */ + if (i > 1 && user_path[i - 1] == '\0') + goto argument; + else + user_path[i] = tmp; + break; + } + + status = read(fd, &tmp, sizeof(char)); + if (status < 0) { + status = -errno; + goto end; + } + if ((size_t) status < sizeof(char)) { /* EOF */ + user_path[i] = '\0'; + argument[0] = '\0'; + status = 1; + goto end; + } + } + + /* The interpreter path is too long, truncate it. */ + user_path[i] = '\0'; + argument[0] = '\0'; + status = 1; + goto end; + +argument: + + /* Slurp the argument until the end-of-line. */ + for (i = 0; current_length < BINPRM_BUF_SIZE; current_length++, i++) { + switch (tmp) { + case '\n': + case '\r': + argument[i] = '\0'; + + /* Remove trailing spaces. */ + for (i--; i > 0 && (argument[i] == ' ' || argument[i] == '\t'); i--) + argument[i] = '\0'; + + status = 1; + goto end; + + default: + argument[i] = tmp; + break; + } + + status = read(fd, &tmp, sizeof(char)); + if (status < 0) { + status = -errno; + goto end; + } + if ((size_t) status < sizeof(char)) { /* EOF */ + argument[0] = '\0'; + status = 1; + goto end; + } + } + + /* The argument is too long, truncate it. */ + argument[i] = '\0'; + status = 1; + +end: + close(fd); + + /* Did an error occur or isn't a script? */ + if (status <= 0) + return status; + + return 1; +} + +/** + * Expand in argv[] the shebang of @user_path, if any. This function + * returns -errno if an error occurred, 1 if a shebang was found and + * extracted, otherwise 0. On success, both @host_path and @user_path + * point to the program to execute (respectively from host + * point-of-view and as-is), and @tracee's argv[] (pointed to by + * SYSARG_2) is correctly updated. + */ +int expand_shebang(Tracee *tracee, char host_path[PATH_MAX], char user_path[PATH_MAX]) +{ + ArrayOfXPointers *argv = NULL; + bool has_shebang = false; + + char argument[BINPRM_BUF_SIZE]; + int status; + size_t i; + + /* "The interpreter must be a valid pathname for an executable + * which is not itself a script [1]. If the filename + * argument of execve() specifies an interpreter script, then + * interpreter will be invoked with the following arguments: + * + * interpreter [optional-arg] filename arg... + * + * where arg... is the series of words pointed to by the argv + * argument of execve()." -- man 2 execve + * + * [1]: as of this writing (3.10.17) this is true only for the + * ELF interpreter; ie. a script can use a script as + * interpreter. + */ + for (i = 0; i < MAXSYMLINKS; i++) { + char *old_user_path; + + /* Translate this path (user -> host), then check it is executable. */ + status = translate_and_check_exec(tracee, host_path, user_path); + if (status < 0) + return status; + + /* Remember the initial user path. */ + old_user_path = talloc_strdup(tracee->ctx, user_path); + if (old_user_path == NULL) + return -ENOMEM; + + /* Extract into user_path and argument the shebang from host_path. */ + status = extract_shebang(tracee, host_path, user_path, argument); + if (status < 0) + return status; + + /* No more shebang. */ + if (status == 0) + break; + has_shebang = true; + + /* Translate new path (user -> host), then check it is executable. */ + status = translate_and_check_exec(tracee, host_path, user_path); + if (status < 0) + return status; + + /* Fetch argv[] only on demand. */ + if (argv == NULL) { + status = fetch_array_of_xpointers(tracee, &argv, SYSARG_2, 0); + if (status < 0) + return status; + } + + /* Assuming the shebang of "script" is "#!/bin/sh -x", + * a call to: + * + * execve("./script", { "script.sh", NULL }, ...) + * + * becomes: + * + * execve("/bin/sh", { "/bin/sh", "-x", "./script", NULL }, ...) + * + * See commit 8c8fbe85 about "argv->length == 1". */ + if (argument[0] != '\0') { + status = resize_array_of_xpointers(argv, 0, 2 + (argv->length == 1)); + if (status < 0) + return status; + + status = write_xpointees(argv, 0, 3, user_path, argument, old_user_path); + if (status < 0) + return status; + } + else { + status = resize_array_of_xpointers(argv, 0, 1 + (argv->length == 1)); + if (status < 0) + return status; + + status = write_xpointees(argv, 0, 2, user_path, old_user_path); + if (status < 0) + return status; + } + } + + if (i == MAXSYMLINKS) + return -ELOOP; + + /* Push argv[] only on demand. */ + if (argv != NULL) { + status = push_array_of_xpointers(argv, SYSARG_2); + if (status < 0) + return status; + } + + return (has_shebang ? 1 : 0); +} diff --git a/proot/proot_linux/execve/shebang.h b/proot/proot_linux/execve/shebang.h new file mode 100644 index 0000000..1ae63be --- /dev/null +++ b/proot/proot_linux/execve/shebang.h @@ -0,0 +1,32 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef SHEBANG_H +#define SHEBANG_H + +#include /* PATH_MAX, ARG_MAX, */ + +#include "tracee/tracee.h" + +extern int expand_shebang(Tracee *tracee, char host_path[PATH_MAX], char user_path[PATH_MAX]); + +#endif /* SHEBANG_H */ diff --git a/proot/proot_linux/extension/care/archive.c b/proot/proot_linux/extension/care/archive.c new file mode 100644 index 0000000..7d5c0e6 --- /dev/null +++ b/proot/proot_linux/extension/care/archive.c @@ -0,0 +1,568 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* open(2), lseek(2), */ +#include /* open(2), */ +#include /* open(2), */ +#include /* read(2), readlink(2), close(2), lseek(2), */ +#include /* errno, EACCES, */ +#include /* assert(3), */ +#include /* PATH_MAX, */ +#include /* strlen(3), strcmp(3), */ +#include /* bool, true, false, */ +#include /* talloc(3), */ +#include /* archive_*(3), */ +#include /* archive_entry*(3), */ + +#include "extension/care/archive.h" +#include "tracee/tracee.h" +#include "cli/note.h" + +typedef struct { + int (*set_format)(struct archive *); + int (*add_filter)(struct archive *); + int hardlink_resolver_strategy; + const char *options; + enum { NOT_SPECIAL = 0, SELF_EXTRACTING, RAW } special; +} Format; + +/** + * Move *@cursor backward -- within in the given @string -- if it + * reads @suffix once moved. + */ +static bool slurp_suffix(const char *string, const char **cursor, const char *suffix) +{ + size_t length; + + length = strlen(suffix); + if (*cursor - length < string || strncmp(*cursor - length, suffix, length) != 0) + return false; + + *cursor -= length; + return true; +} + +/** + * Detect the expected format for the given @string. This function + * returns -1 if an error occurred, otherwise it returns 0 and updates + * the @format structure and @suffix_length with the number of + * characters that describes the parsed format. + */ +static int parse_suffix(const Tracee* tracee, Format *format, + const char *string, size_t *suffix_length) +{ + const char *cursor; + bool found; + + bool no_wrapper_found = false; + bool no_filter_found = false; + bool no_format_found = false; + + cursor = string + strlen(string); + bzero(format, sizeof(Format)); + +/* parse_special: */ + + found = slurp_suffix(string, &cursor, "/"); + if (found) + goto end; + + found = slurp_suffix(string, &cursor, ".raw"); + if (found) { + format->special = RAW; + goto parse_filter; + } + + found = slurp_suffix(string, &cursor, ".bin"); + if (found) { +#if defined(CARE_BINARY_IS_PORTABLE) + format->special = SELF_EXTRACTING; + goto parse_filter; +#else + note(tracee, ERROR, USER, "This version of CARE was built " + "without self-extracting (.bin) support"); + return -1; +#endif + } + + no_wrapper_found = true; + +parse_filter: + + found = slurp_suffix(string, &cursor, ".gz"); + if (found) { + format->add_filter = archive_write_add_filter_gzip; + format->options = "gzip:compression-level=1"; + goto parse_format; + } + + found = slurp_suffix(string, &cursor, ".lzo"); + if (found) { + format->add_filter = archive_write_add_filter_lzop; + format->options = "lzop:compression-level=1"; + goto parse_format; + } + + found = slurp_suffix(string, &cursor, ".tgz"); + if (found) { + format->add_filter = archive_write_add_filter_gzip; + format->options = "gzip:compression-level=1"; + format->set_format = archive_write_set_format_gnutar; + format->hardlink_resolver_strategy = ARCHIVE_FORMAT_TAR_GNUTAR; + goto sanity_checks; + } + + found = slurp_suffix(string, &cursor, ".tzo"); + if (found) { + format->add_filter = archive_write_add_filter_lzop; + format->options = "lzop:compression-level=1"; + format->set_format = archive_write_set_format_gnutar; + format->hardlink_resolver_strategy = ARCHIVE_FORMAT_TAR_GNUTAR; + goto sanity_checks; + } + + no_filter_found = true; + +parse_format: + + found = slurp_suffix(string, &cursor, ".cpio"); + if (found) { + format->set_format = archive_write_set_format_cpio; + format->hardlink_resolver_strategy = ARCHIVE_FORMAT_CPIO_POSIX; + goto sanity_checks; + } + + found = slurp_suffix(string, &cursor, ".tar"); + if (found) { + format->set_format = archive_write_set_format_gnutar; + format->hardlink_resolver_strategy = ARCHIVE_FORMAT_TAR_GNUTAR; + goto sanity_checks; + } + + no_format_found = true; + +sanity_checks: + + if (no_filter_found && no_format_found) { + format->add_filter = archive_write_add_filter_lzop; + format->options = "lzop:compression-level=1"; + format->set_format = archive_write_set_format_gnutar; + format->hardlink_resolver_strategy = ARCHIVE_FORMAT_TAR_GNUTAR; + + if (no_wrapper_found) { +#if defined(CARE_BINARY_IS_PORTABLE) + format->special = SELF_EXTRACTING; + note(tracee, WARNING, USER, + "unknown suffix, assuming self-extracting format."); +#else + format->special = RAW; + note(tracee, WARNING, USER, + "unknown suffix, assuming raw format."); +#endif + } + + no_wrapper_found = false; + no_filter_found = false; + no_format_found = false; + } + + if (no_format_found) { + note(tracee, WARNING, USER, "unknown format, assuming tar format."); + format->set_format = archive_write_set_format_gnutar; + format->hardlink_resolver_strategy = ARCHIVE_FORMAT_TAR_GNUTAR; + + no_format_found = false; + } + +end: + *suffix_length = strlen(cursor); + return 0; +} + +/** + * Copy "/proc/self/exe" into @destination. This function returns -1 + * if an error occured, otherwise the file descriptor of the + * destination. + */ +static int copy_self_exe(const Tracee *tracee, const char *destination) +{ + int output_fd; + int input_fd; + int status; + + input_fd = open("/proc/self/exe", O_RDONLY); + if (input_fd < 0) { + note(tracee, ERROR, SYSTEM, "can't open '/proc/self/exe'"); + return -1; + } + + output_fd = open(destination, O_RDWR|O_CREAT|O_TRUNC, S_IRWXU|S_IRGRP|S_IXGRP); + if (output_fd < 0) { + note(tracee, ERROR, SYSTEM, "can't open/create '%s'", destination); + status = -1; + goto end; + } + + while (1) { + uint8_t buffer[4 * 1024]; + ssize_t size; + + status = read(input_fd, buffer, sizeof(buffer)); + if (status < 0) { + note(tracee, ERROR, SYSTEM, "can't read '/proc/self/exe'"); + goto end; + } + + if (status == 0) + break; + + size = status; + status = write(output_fd, buffer, size); + if (status < 0) { + note(tracee, ERROR, SYSTEM, "can't write '%s'", destination); + goto end; + } + if (status != size) + note(tracee, WARNING, INTERNAL, + "wrote %zd bytes instead of %zd", (size_t) status, size); + } + +end: + (void) close(input_fd); + + if (status < 0) { + (void) close(output_fd); + return -1; + } + + return output_fd; +} + +/** + * Create a new archive structure (memory allocation attached to + * @context) for the given @output file. This function returns NULL + * on error, otherwise the newly allocated archive structure. See + * parse_suffix() for the meaning of @suffix_length. + */ +Archive *new_archive(TALLOC_CTX *context, const Tracee* tracee, + const char *output, size_t *suffix_length) +{ + Format format; + Archive *archive; + int status; + + assert(output != NULL); + + status = parse_suffix(tracee, &format, output, suffix_length); + if (status < 0) + return NULL; + + archive = talloc_zero(context, Archive); + if (archive == NULL) { + note(tracee, ERROR, INTERNAL, "can't allocate archive structure"); + return NULL; + } + archive->fd = -1; + + /* No format was set, content will be copied into a directory + * instead of being archived. */ + if (format.set_format == NULL) { + int flags = ARCHIVE_EXTRACT_PERM + | ARCHIVE_EXTRACT_TIME + | ARCHIVE_EXTRACT_ACL + | ARCHIVE_EXTRACT_FFLAGS + | ARCHIVE_EXTRACT_XATTR + | (geteuid() == 0 ? ARCHIVE_EXTRACT_OWNER : 0); + + archive->handle = archive_write_disk_new(); + if (archive->handle == NULL) { + note(tracee, WARNING, INTERNAL, "can't initialize archive structure"); + return NULL; + } + + status = archive_write_disk_set_options(archive->handle, flags); + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "set archive options: %s", + archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't set archive options: %s", + archive_error_string(archive->handle)); + return NULL; + } + + status = archive_write_disk_set_standard_lookup(archive->handle); + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "set archive lookup: %s", + archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't set archive lookup: %s", + archive_error_string(archive->handle)); + return NULL; + } + + archive->hardlink_resolver = archive_entry_linkresolver_new(); + if (archive->hardlink_resolver != NULL) + archive_entry_linkresolver_set_strategy(archive->hardlink_resolver, + ARCHIVE_FORMAT_TAR); + + return archive; + } + + archive->handle = archive_write_new(); + if (archive->handle == NULL) { + note(tracee, WARNING, INTERNAL, "can't initialize archive structure"); + return NULL; + } + + assert(format.set_format != NULL); + status = format.set_format(archive->handle); + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "set archive format: %s", + archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't set archive format: %s", + archive_error_string(archive->handle)); + return NULL; + } + + if (format.hardlink_resolver_strategy != 0) { + archive->hardlink_resolver = archive_entry_linkresolver_new(); + if (archive->hardlink_resolver != NULL) + archive_entry_linkresolver_set_strategy(archive->hardlink_resolver, + format.hardlink_resolver_strategy); + } + + if (format.add_filter != NULL) { + status = format.add_filter(archive->handle); + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "add archive filter: %s", + archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't add archive filter: %s", + archive_error_string(archive->handle)); + return NULL; + } + } + + if (format.options != NULL) { + status = archive_write_set_options(archive->handle, format.options); + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "set archive options: %s", + archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't set archive options: %s", + archive_error_string(archive->handle)); + return NULL; + } + } + + switch (format.special) { + case SELF_EXTRACTING: + archive->fd = copy_self_exe(tracee, output); + if (archive->fd < 0) + return NULL; + + /* Remember where the CARE binary ends. */ + archive->offset = lseek(archive->fd, 0, SEEK_CUR); + + status = archive_write_open_fd(archive->handle, archive->fd); + break; + + case RAW: + archive->fd = open(output, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP); + if (archive->fd < 0) { + note(tracee, ERROR, SYSTEM, "can't open/create '%s'", output); + return NULL; + } + + status = write(archive->fd, "RAW", strlen("RAW")); + if (status != strlen("RAW")) { + note(tracee, ERROR, SYSTEM, "can't write '%s'", output); + (void) close(archive->fd); + return NULL; + } + + /* Remember where the "RAW" string ends. */ + archive->offset = lseek(archive->fd, 0, SEEK_CUR); + + status = archive_write_open_fd(archive->handle, archive->fd); + break; + + default: + status = archive_write_open_filename(archive->handle, output); + break; + } + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "open archive '%s': %s", + output, archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't open archive '%s': %s", + output, archive_error_string(archive->handle)); + return NULL; + } + + return archive; +} + +/** + * Finalize the given @archive. This function returns -1 if an error + * occurred, otherwise 0. + */ +int finalize_archive(Archive *archive) +{ + int status; + + if (archive == NULL || archive->handle == NULL) + return -1; + + if (archive->hardlink_resolver != NULL) + archive_entry_linkresolver_free(archive->hardlink_resolver); + + status = archive_write_close(archive->handle); + if (status != ARCHIVE_OK && status != ARCHIVE_WARN) + return -1; + + status = archive_write_free(archive->handle); + if (status != ARCHIVE_OK && status != ARCHIVE_WARN) + return -1; + + return 0; +} + +/** + * Put the content of @path into @archive, with the specified @statl + * status, at the given @alternate_path (NULL if unchanged). This + * function returns -1 if an error occurred, otherwise 0. Note: this + * function can be called with @tracee == NULL. + */ +int archive(const Tracee* tracee, Archive *archive, + const char *path, const char *alternate_path, const struct stat *statl) +{ + struct archive_entry *entry = NULL; + ssize_t status; + mode_t type; + size_t size; + int fd = -1; + + if (archive == NULL || archive->handle == NULL) + return -1; + + entry = archive_entry_new(); + if (entry == NULL) { + note(tracee, WARNING, INTERNAL, "can't create archive entry for '%s': %s", + path, archive_error_string(archive->handle)); + status = -1; + goto end; + } + + archive_entry_set_pathname(entry, alternate_path ?: path); + archive_entry_copy_stat(entry, statl); + + if (archive->hardlink_resolver != NULL) { + struct archive_entry *unused; + archive_entry_linkify(archive->hardlink_resolver, &entry, &unused); + } + + /* Get status only once hardlinks were resolved. */ + size = archive_entry_size(entry); + type = archive_entry_filetype(entry); + + if (type == AE_IFLNK) { + char target[PATH_MAX]; + status = readlink(path, target, PATH_MAX); + if (status >= PATH_MAX) { + status = -1; + errno = ENAMETOOLONG; + } + if (status < 0) { + note(tracee, WARNING, SYSTEM, "can't readlink '%s'", path); + status = -1; + goto end; + } + target[status] = '\0'; + + /* Must be done before archive_write_header(). */ + archive_entry_set_symlink(entry, target); + } + + status = archive_write_header(archive->handle, entry); + if (status == ARCHIVE_WARN) { + note(tracee, WARNING, INTERNAL, "write header for '%s': %s", + path, archive_error_string(archive->handle)); + } + else if (status != ARCHIVE_OK) { + note(tracee, ERROR, INTERNAL, "can't write header for '%s': %s", + path, archive_error_string(archive->handle)); + status = -1; + goto end; + } + + /* No content to archive? */ + if (type != AE_IFREG || size == 0) { + status = 0; + goto end; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + if (errno != EACCES) + note(tracee, WARNING, SYSTEM, "can't open '%s'", path); + status = -1; + goto end; + } + + /* Copy the content from the file into the archive. */ + do { + uint8_t buffer[4096]; + + status = read(fd, buffer, sizeof(buffer)); + if (status < 0) { + note(tracee, WARNING, SYSTEM, "can't read '%s'", path); + status = -1; + goto end; + } + + size = archive_write_data(archive->handle, buffer, status); + if ((size_t) status != size) { + note(tracee, WARNING, INTERNAL, "can't archive '%s' content: %s", + path, archive_error_string(archive->handle)); + status = -1; + goto end; + } + } while (status > 0); + status = 0; + +end: + if (fd >= 0) + (void) close(fd); + + if (entry != NULL) + archive_entry_free(entry); + + return status; +} diff --git a/proot/proot_linux/extension/care/archive.h b/proot/proot_linux/extension/care/archive.h new file mode 100644 index 0000000..c56d472 --- /dev/null +++ b/proot/proot_linux/extension/care/archive.h @@ -0,0 +1,47 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef ARCHIVE_H +#define ARCHIVE_H + +#include +#include +#include + +#include "tracee/tracee.h" + +typedef struct { + struct archive *handle; + struct archive_entry_linkresolver *hardlink_resolver; + + /* Information used to create an self-extracting archive. */ + off_t offset; + int fd; +} Archive; + +extern Archive *new_archive(TALLOC_CTX *context, const Tracee* tracee, + const char *output, size_t *prefix_length); +extern int finalize_archive(Archive *archive); +extern int archive(const Tracee* tracee, Archive *archive, + const char *path, const char *alternate_path, const struct stat *statl); + +#endif /* ARCHIVE_H */ diff --git a/proot/proot_linux/extension/care/care.c b/proot/proot_linux/extension/care/care.c new file mode 100644 index 0000000..cae1b28 --- /dev/null +++ b/proot/proot_linux/extension/care/care.c @@ -0,0 +1,604 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* struct stat, */ +#include /* struct stat, */ +#include /* lstat(2), */ +#include /* PATH_MAX, */ +#include /* strlen(3), */ +#include /* assert(3), */ +#include /* time(2), localtime(3), */ +#include /* offsetof(3), */ +#include /* talloc*, */ +#include /* STAILQ_*, */ +#include /* PRI*, */ +#include /* AT_*, */ + +#include "uthash.h" /* ut*, UT*, HASH*, */ +#include "extension/care/care.h" +#include "extension/care/final.h" +#include "extension/care/archive.h" +#include "extension/extension.h" +#include "tracee/tracee.h" +#include "tracee/mem.h" +#include "execve/auxv.h" +#include "path/canon.h" +#include "path/path.h" +#include "path/binding.h" +#include "cli/note.h" + +/* Make uthash use talloc. */ +#undef uthash_malloc +#undef uthash_free +#define uthash_malloc(size) talloc_size(care, size) +#define uthash_free(pointer, size) TALLOC_FREE(pointer) + +/* Hash entry. */ +typedef struct Entry { + UT_hash_handle hh; + char *path; +} Entry; + +/** + * Add a copy of @value at the end if the given @list. All the newly + * talloc'ed elements (duplicated value, item, list head) are attached + * to the given @context. This function returns NULL if an error + * occurred, otherwise the newly talloc'ed item. + */ +Item *queue_item(TALLOC_CTX *context, List **list, const char *value) +{ + Item *item; + + if (*list == NULL) { + *list = talloc_zero(context, List); + if (*list == NULL) + return NULL; + + STAILQ_INIT(*list); + } + + item = talloc_zero(*list, Item); + if (item == NULL) + return NULL; + + item->load = talloc_strdup(item, value); + if (item->load == NULL) + return NULL; + + STAILQ_INSERT_TAIL(*list, item, link); + return item; +} + +/** + * Generate a valid archive @care->output from @care. + */ +static void generate_output_name(const Tracee *tracee, Care *care) +{ + struct tm *splitted_time; + time_t flat_time; + + flat_time = time(NULL); + splitted_time = localtime(&flat_time); + if (splitted_time == NULL) { + note(tracee, ERROR, INTERNAL, + "can't generate a valid output name from the current time, " + "please specify an ouput name explicitly"); + return; + } + + care->output = talloc_asprintf(care, "care-%02d%02d%02d%02d%02d%02d.%s", + splitted_time->tm_year - 100, splitted_time->tm_mon + 1, + splitted_time->tm_mday, splitted_time->tm_hour, + splitted_time->tm_min, splitted_time->tm_sec, +#if defined(CARE_BINARY_IS_PORTABLE) + "bin" +#else + "raw" +#endif + ); + if (care->output == NULL) { + note(tracee, ERROR, INTERNAL, + "can't generate a valid output name from the current time, " + "please specify an ouput name explicitly"); + return; + } +} + +/** + * Genereate @extension->config from @options. This function returns + * -1 if an error ocurred, otherwise 0. + */ +static int generate_care(Extension *extension, const Options *options) +{ + size_t suffix_length; + const char *cursor; + Tracee *tracee; + Item *item2; + Item *item; + Care *care; + + tracee = TRACEE(extension); + + extension->config = talloc_zero(extension, Care); + if (extension->config == NULL) + return -1; + care = extension->config; + + care->command = options->command; + care->ipc_are_volatile = !options->ignore_default_config; + + if (options->output != NULL) + care->output = talloc_strdup(care, options->output); + else + generate_output_name(tracee, care); + if (care->output == NULL) { + note(tracee, WARNING, INTERNAL, "can't get output name"); + return -1; + } + + care->initial_cwd = talloc_strdup(care, tracee->fs->cwd); + if (care->initial_cwd == NULL) { + note(tracee, WARNING, INTERNAL, "can't allocate cwd"); + return -1; + } + + care->archive = new_archive(care, tracee, care->output, &suffix_length); + if (care->archive == NULL) + return -1; + + cursor = strrchr(care->output, '/'); + if (cursor == NULL || strlen(cursor) == 1) + cursor = care->output; + else + cursor++; + + care->prefix = talloc_strndup(care, cursor, strlen(cursor) - suffix_length); + if (care->prefix == NULL) { + note(tracee, WARNING, INTERNAL, "can't allocate archive prefix"); + return -1; + } + + /* Copy & canonicalize volatile paths. */ + if (options->volatile_paths != NULL) { + char path[PATH_MAX]; + int status; + + STAILQ_FOREACH(item, options->volatile_paths, link) { + /* Initial state before canonicalization. */ + strcpy(path, "/"); + + status = canonicalize(tracee, (const char *) item->load, false, path, 0); + if (status < 0) + continue; + + /* Sanity check. */ + if (strcmp(path, "/") == 0) { + const char *string; + const char *name; + + name = talloc_get_name(item); + string = name == NULL || name[0] != '$' + ? talloc_asprintf(tracee->ctx, "'%s'", + (const char *) item->load) + : talloc_asprintf(tracee->ctx, "'%s' (%s)", + (const char *) item->load, name); + + note(tracee, WARNING, USER, + "path %s was declared volatile but it leads to '/', " + "as a consequence it will *not* be considered volatile.", + string); + continue; + } + + item2 = queue_item(care, &care->volatile_paths, path); + if (item2 == NULL) + continue; + + /* Preserve the non expanded form. */ + talloc_set_name_const(item2, talloc_get_name(item)); + + VERBOSE(tracee, 1, "volatile path: %s", (const char *) item2->load); + } + } + + /* Copy volatile env. variables. */ + if (options->volatile_envars != NULL) { + STAILQ_FOREACH(item, options->volatile_envars, link) { + item2 = queue_item(care, &care->volatile_envars, item->load); + if (item2 == NULL) + continue; + + VERBOSE(tracee, 1, "volatile envar: %s", (const char *) item2->load); + } + } + + /* Convert the limit from megabytes to bytes, as expected by + * handle_host_path(). */ + care->max_size = options->max_size * 1024 * 1024; + + /* handle_host_path() can now be safely used. */ + care->is_ready = true; + + talloc_set_destructor(care, finalize_care); + return 0; +} + +/** + * Add @path_ to the list of @care->concealed_accesses. This function + * does *not* check for duplicated entries. + */ +static void register_concealed_access(const Tracee *tracee, Care *care, const char *path_) +{ + char path[PATH_MAX]; + size_t length; + int status; + + length = strlen(path_); + if (length >= PATH_MAX) + return; + memcpy(path, path_, length + 1); + + /* It was a concealed access if, and only if, the path was + * part of a asymmetric binding. */ + status = substitute_binding(tracee, HOST, path); + if (status != 1) + return; + + /* Do not register accesses that would not succeed even if the + * path was revealed, i.e. the path does not exist at all. */ + status = access(path, F_OK); + if (status < 0) + return; + + queue_item(care, &care->concealed_accesses, path); + VERBOSE(tracee, 1, "concealed: %s", path); +} + +/** + * Archive @path if needed. + */ +static void handle_host_path(Extension *extension, const char *path) +{ + struct stat statl; + bool as_dentries; + char *location; + Tracee *tracee; + Entry *entry; + Care *care; + int status; + + care = talloc_get_type_abort(extension->config, Care); + tracee = TRACEE(extension); + + if (!care->is_ready) + return; + + /* Don't archive if the path was already seen before. + * This ensures the rootfs is re-created as it was + * before any file creation or modification. */ + HASH_FIND_STR(care->entries, path, entry); + if (entry != NULL) + return; + + switch (get_sysnum(tracee, ORIGINAL)) { + case PR_getdents: + case PR_getdents64: + /* Don't archive if the dentry was already seen + * before, it would be useless. */ + HASH_FIND_STR(care->dentries, path, entry); + if (entry != NULL) + return; + as_dentries = true; + break; + + default: + as_dentries = false; + break; + } + + entry = talloc_zero(care, Entry); + if (entry == NULL) { + note(tracee, WARNING, INTERNAL, "can't allocate entry for '%s'", path); + return; + } + + entry->path = talloc_strdup(entry, path); + if (entry->path == NULL) { + note(tracee, WARNING, INTERNAL, "can't allocate name for '%s'", path); + return; + } + + /* Remember this new entry. */ + if (as_dentries) + HASH_ADD_KEYPTR(hh, care->dentries, entry->path, strlen(entry->path), entry); + else + HASH_ADD_KEYPTR(hh, care->entries, entry->path, strlen(entry->path), entry); + + /* Don't use faccessat(2) here since it would require Linux >= + * 2.6.16 and Glibc >= 2.4, whereas CARE is supposed to work + * on any Linux 2.6 systems. */ + status = lstat(path, &statl); + if (status < 0) { + register_concealed_access(tracee, care, path); + return; + } + + /* FIFOs and Unix domain sockets should be volatile. */ + if (S_ISFIFO(statl.st_mode) || S_ISSOCK(statl.st_mode)) { + if (care->ipc_are_volatile) { + Item *item = queue_item(care, &care->volatile_paths, path); + if (item != NULL) + VERBOSE(tracee, 0, "volatile path: %s", path); + else + note(tracee, WARNING, USER, + "can't declare '%s' (fifo or socket) as volatile", path); + return; + } + else + note(tracee, WARNING, USER, + "'%1$s' might be explicitely declared volatile (-p %1$s)", path); + } + + /* Don't archive the content of dentries, this save a lot of + * space! */ + if (as_dentries) + statl.st_size = 0; + + if (care->volatile_paths != NULL) { + Item *item; + + STAILQ_FOREACH(item, care->volatile_paths, link) { + switch (compare_paths(item->load, path)) { + case PATHS_ARE_EQUAL: + /* It's a volatile path, archive it as + * empty to preserve its dentry. */ + statl.st_size = 0; + break; + + case PATH1_IS_PREFIX: + /* Don't archive it's a sub-part of a + * volatile path. */ + return; + + default: + continue; + } + break; + } + } + + if (care->max_size >= 0 && statl.st_size > care->max_size) { + note(tracee, WARNING, USER, + "file '%s' is archived with a null size since it is bigger than %" + PRIi64 "MB, you can specify an alternate limit with the option -m.", + path, care->max_size / 1024 / 1024); + statl.st_size = 0; + } + + /* Format the location within the archive. */ + location = NULL; + assert(path[0] == '/'); + location = talloc_asprintf(tracee->ctx, "%s/rootfs%s", care->prefix, path); + if (location == NULL) { + note(tracee, WARNING, INTERNAL, "can't allocate location for '%s'", path); + return; + } + + status = archive(tracee, care->archive, path, location, &statl); + if (status == 0) + VERBOSE(tracee, 1, "archived: %s", path); +} + +typedef struct { + uint32_t d_ino; + uint32_t next; + uint16_t size; + char name[]; +} Dirent32; + +typedef struct { + uint64_t d_ino; + uint64_t next; + uint16_t size; + char name[]; +} Dirent64; + +typedef struct { + uint64_t inode; + int64_t next; + uint16_t size; + uint8_t type; + char name[]; +} NewDirent; + +/** + * Archive all the entries returned by getdents syscalls. + */ +static void handle_getdents(Tracee *tracee, bool is_new_getdents) +{ + char component[PATH_MAX]; + char path[PATH_MAX]; + uint64_t offset; + int status; + + word_t result; + word_t buffer; + word_t fd; + + Dirent32 dirent32; + Dirent64 dirent64; + NewDirent new_dirent; + + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + if ((int) result < 0) + return; + + fd = peek_reg(tracee, ORIGINAL, SYSARG_1); + buffer = peek_reg(tracee, ORIGINAL, SYSARG_2); + + offset = 0; + while (offset < result) { + word_t name_offset; + word_t address; + size_t size; + + address = buffer + offset; + + if (!is_new_getdents) { +#if defined(ARCH_X86_64) + const bool is_32bit = is_32on64_mode(tracee); +#else + const bool is_32bit = true; +#endif + if (is_32bit) { + name_offset = offsetof(Dirent32, name); + status = read_data(tracee, &dirent32, address, sizeof(dirent32)); + size = dirent32.size; + } + else { + name_offset = offsetof(Dirent64, name); + status = read_data(tracee, &dirent64, address, sizeof(dirent64)); + size = dirent64.size; + } + } else { + name_offset = offsetof(NewDirent, name); + status = read_data(tracee, &new_dirent, address, sizeof(new_dirent)); + size = new_dirent.size; + } + if (status < 0) { + note(tracee, WARNING, INTERNAL, "can't read dentry"); + break; + } + + status = read_string(tracee, component, address + name_offset, PATH_MAX); + if (status < 0 || status >= PATH_MAX) { + note(tracee, WARNING, INTERNAL, "can't read dentry" ); + goto next; + } + + /* Archive through the host_path notification. */ + strcpy(path, "/"); + translate_path(tracee, path, fd, component, false); + next: + offset += size; + } + + if (offset != result) + note(tracee, WARNING, INTERNAL, "dentry table out of sync."); +} + +/** + * Set AT_HWCAP to 0 to ensure no processor specific extensions will + * be used, for the sake of reproducibility across different CPUs. + * This function assumes the "argv, envp, auxv" stuff is pointed to by + * @tracee's stack pointer, as expected right after a successful call + * to execve(2). + */ +static int adjust_elf_auxv(Tracee *tracee) +{ + ElfAuxVector *vectors; + ElfAuxVector *vector; + word_t vectors_address; + + vectors_address = get_elf_aux_vectors_address(tracee); + if (vectors_address == 0) + return 0; + + vectors = fetch_elf_aux_vectors(tracee, vectors_address); + if (vectors == NULL) + return 0; + + for (vector = vectors; vector->type != AT_NULL; vector++) { + if (vector->type == AT_HWCAP) + vector->value = 0; + } + + push_elf_aux_vectors(tracee, vectors, vectors_address); + + return 0; +} + +/* List of syscalls handled by this extensions. */ +static FilteredSysnum filtered_sysnums[] = { + { PR_getdents, FILTER_SYSEXIT }, + { PR_getdents64, FILTER_SYSEXIT }, + FILTERED_SYSNUM_END, +}; + +/** + * Handler for this @extension. It is triggered each time an @event + * occurred. See ExtensionEvent for the meaning of @data1 and @data2. + */ +int care_callback(Extension *extension, ExtensionEvent event, + intptr_t data1, intptr_t data2 UNUSED) +{ + Tracee *tracee; + + switch (event) { + case INITIALIZATION: + extension->filtered_sysnums = filtered_sysnums; + return generate_care(extension, (Options *) data1); + + case NEW_STATUS: { + int status = (int) data1; + if (WIFEXITED(status)) { + Care *care = talloc_get_type_abort(extension->config, Care); + care->last_exit_status = WEXITSTATUS(status); + } + return 0; + } + + case HOST_PATH: + handle_host_path(extension, (const char *) data1); + return 0; + + case SYSCALL_EXIT_START: + tracee = TRACEE(extension); + + switch (get_sysnum(tracee, ORIGINAL)) { + case PR_getdents: + handle_getdents(tracee, false); + break; + + case PR_getdents64: + handle_getdents(tracee, true); + break; + + case PR_execve: { + word_t result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + + /* Note: this can be done only before PRoot pushes the + * load script into tracee's stack. */ + if ((int) result >= 0) + adjust_elf_auxv(tracee); + break; + } + + default: + break; + } + return 0; + + default: + return 0; + } +} diff --git a/proot/proot_linux/extension/care/care.h b/proot/proot_linux/extension/care/care.h new file mode 100644 index 0000000..b5411bb --- /dev/null +++ b/proot/proot_linux/extension/care/care.h @@ -0,0 +1,80 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef CARE_H +#define CARE_H + +#include +#include /* STAILQ_*, */ + +#include "extension/care/archive.h" + +/* Generic item for a STAILQ list. */ +typedef struct item { + const void *load; + STAILQ_ENTRY(item) link; +} Item; + +typedef STAILQ_HEAD(list, item) List; + +/* CARE CLI configuration. */ +typedef struct { + const char *output; + char *const *command; + + List *concealed_paths; + List *revealed_paths; + List *volatile_paths; + List *volatile_envars; + + bool ignore_default_config; + + int max_size; +} Options; + +/* CARE internal configuration. */ +typedef struct { + struct Entry *entries; + struct Entry *dentries; + + char *const *command; + List *volatile_paths; + List *volatile_envars; + List *concealed_accesses; + + const char *prefix; + const char *output; + const char *initial_cwd; + bool ipc_are_volatile; + + Archive *archive; + int64_t max_size; + + int last_exit_status; + + bool is_ready; +} Care; + +extern Item *queue_item(TALLOC_CTX *context, List **list, const char *value); + +#endif /* CARE_H */ + diff --git a/proot/proot_linux/extension/care/extract.c b/proot/proot_linux/extension/care/extract.c new file mode 100644 index 0000000..ca838c8 --- /dev/null +++ b/proot/proot_linux/extension/care/extract.c @@ -0,0 +1,351 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* open(2), fstat(2), lseek(2), */ +#include /* open(2), fstat(2), */ +#include /* open(2), */ +#include /* open(2), */ +#include /* *int*_t, *INT*_MAX, */ +#include /* fstat(2), read(2), lseek(2), */ +#include /* mmap(2), MAP_*, */ +#include /* bool, true, false, */ +#include /* assert(3), */ +#include /* errno(3), */ +#include /* strerror(3), */ +#include /* PRI*, */ +#include /* be64toh(3), */ +#include /* archive_*(3), */ +#include /* archive_entry*(3), */ + +#include "extension/care/extract.h" +#include "cli/note.h" + +/** + * Extract the given @archive into the current working directory. + * This function returns -1 if an error occured, otherwise 0. + */ +static int extract_archive(struct archive *archive) +{ + struct archive_entry *entry; + int result = 0; + int status; + + int flags = ARCHIVE_EXTRACT_PERM + | ARCHIVE_EXTRACT_TIME + | ARCHIVE_EXTRACT_ACL + | ARCHIVE_EXTRACT_FFLAGS + | ARCHIVE_EXTRACT_XATTR; + + /* Avoid spurious warnings. One should test for the CAP_CHOWN + * capability instead but libarchive only does this test: */ + if (geteuid() == 0) + flags |= ARCHIVE_EXTRACT_OWNER; + + while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { + status = archive_read_extract(archive, entry, flags); + switch (status) { + case ARCHIVE_WARN: + note(NULL, WARNING, INTERNAL, "%s: %s", + archive_error_string(archive), + strerror(archive_errno(archive))); + /* FALLTHROUGH */ + case ARCHIVE_OK: + note(NULL, INFO, USER, "extracted: %s", archive_entry_pathname(entry)); + break; + + default: + result = -1; + note(NULL, ERROR, INTERNAL, "%s: %s", + archive_error_string(archive), + strerror(archive_errno(archive))); + break; + } + } + + return result; +} + +/* Data used by archive_[open/read/close] callbacks. */ +typedef struct +{ + uint8_t buffer[4096]; + const char *path; + size_t size_remaining; + int fd; +} CallbackData; + +/** + * This callback is invoked by archive_open(). It returns ARCHIVE_OK + * if the underlying file or data source is successfully opened. If + * the open fails, it calls archive_set_error() to register an error + * code and message and returns ARCHIVE_FATAL. + * + * -- man 3 archive_read_open. + */ +static int open_callback(struct archive *archive, void *data_) +{ + CallbackData *data = talloc_get_type_abort(data_, CallbackData); + AutoExtractInfo info; + struct stat statf; + off_t offset; + int status; + + /* Note: data->fd will be closed by close_callback(). */ + data->fd = open(data->path, O_RDONLY); + if (data->fd < 0) { + archive_set_error(archive, errno, "can't open archive"); + return ARCHIVE_FATAL; + } + + status = fstat(data->fd, &statf); + if (status < 0) { + archive_set_error(archive, errno, "can't stat archive"); + return ARCHIVE_FATAL; + } + + /* Assume it's a regular archive if it physically can't be a + * self-extracting one. */ + if (statf.st_size < (off_t) sizeof(AutoExtractInfo)) + return ARCHIVE_OK; + + offset = lseek(data->fd, statf.st_size - sizeof(AutoExtractInfo), SEEK_SET); + if (offset == (off_t) -1) { + archive_set_error(archive, errno, "can't seek in archive"); + return ARCHIVE_FATAL; + } + + status = read(data->fd, &info, sizeof(AutoExtractInfo)); + if (status < 0) { + archive_set_error(archive, errno, "can't read archive"); + return ARCHIVE_FATAL; + } + + if ( status == sizeof(AutoExtractInfo) + && strcmp(info.signature, AUTOEXTRACT_SIGNATURE) == 0) { + /* This is a self-extracting archive, retrieve it's + * offset and size. */ + + data->size_remaining = be64toh(info.size); + offset = statf.st_size - data->size_remaining - sizeof(AutoExtractInfo); + + note(NULL, INFO, USER, + "archive found: offset = %" PRIu64 ", size = %" PRIu64 "", + (uint64_t) offset, data->size_remaining); + } + else { + /* This is not a self-extracting archive, assume it's + * a regular one... */ + offset = 0; + data->size_remaining = SIZE_MAX; + + /* ... unless a self-extracting archive really was + * expected. */ + if (strcmp(data->path, "/proc/self/exe") == 0) + return ARCHIVE_FATAL; + } + + offset = lseek(data->fd, offset, SEEK_SET); + if (offset == (off_t) -1) { + archive_set_error(archive, errno, "can't seek in archive"); + return ARCHIVE_FATAL; + } + + return ARCHIVE_OK; +} + +/** + * This callback is invoked whenever the library requires raw bytes + * from the archive. The read callback reads data into a buffer, set + * the @buffer argument to point to the available data, and return a + * count of the number of bytes available. The library will invoke + * the read callback again only after it has consumed this data. The + * library imposes no constraints on the size of the data blocks + * returned. On end-of-file, the read callback returns zero. On + * error, the read callback should invoke archive_set_error() to + * register an error code and message and returns -1. + * + * -- man 3 archive_read_open. + */ +static ssize_t read_callback(struct archive *archive, void *data_, const void **buffer) +{ + CallbackData *data = talloc_get_type_abort(data_, CallbackData); + ssize_t size = sizeof(data->buffer); + + if (sizeof(data->buffer) > data->size_remaining) { + size = data->size_remaining; + if (size == 0) { + return 0; + } + } + + size = read(data->fd, data->buffer, size); + if (size < 0) { + archive_set_error(archive, errno, "can't read archive"); + return -1; + } + assert(size <= data->size_remaining); + data->size_remaining -= size; + + *buffer = data->buffer; + return size; +} + +/** + * This callback is invoked by archive_close() when the archive + * processing is complete. The callback returns ARCHIVE_OK on + * success. On failure, the callback invokes archive_set_error() to + * register an error code and message and returns ARCHIVE_FATAL. + * + * -- man 3 archive_read_open + */ +static int close_callback(struct archive *archive, void *data_) +{ + CallbackData *data = talloc_get_type_abort(data_, CallbackData); + int status; + + status = close(data->fd); + if (status < 0) { + archive_set_error(archive, errno, "can't close archive"); + return ARCHIVE_WARN; + } + + return ARCHIVE_OK; +} + +/** + * Extract the archive stored at the given @path. This function + * returns -1 if an error occurred, otherwise 0. + */ +int extract_archive_from_file(const char *path) +{ + struct archive *archive = NULL; + CallbackData *data = NULL; + int status2; + int status; + + archive = archive_read_new(); + if (archive == NULL) { + note(NULL, ERROR, INTERNAL, "can't initialize archive structure"); + status = -1; + goto end; + } + + status = archive_read_support_format_cpio(archive); + if (status == ARCHIVE_WARN) { + note(NULL, WARNING, INTERNAL, "set archive format: %s", + archive_error_string(archive)); + } + else if (status != ARCHIVE_OK) { + note(NULL, ERROR, INTERNAL, "can't set archive format: %s", + archive_error_string(archive)); + status = -1; + goto end; + } + + status = archive_read_support_format_gnutar(archive); + if (status == ARCHIVE_WARN) { + note(NULL, WARNING, INTERNAL, "set archive format: %s", + archive_error_string(archive)); + } + else if (status != ARCHIVE_OK) { + note(NULL, ERROR, INTERNAL, "can't set archive format: %s", + archive_error_string(archive)); + status = -1; + goto end; + } + + status = archive_read_support_filter_gzip(archive); + if (status == ARCHIVE_WARN) { + note(NULL, WARNING, INTERNAL, "add archive filter: %s", + archive_error_string(archive)); + } + else if (status != ARCHIVE_OK) { + note(NULL, ERROR, INTERNAL, "can't add archive filter: %s", + archive_error_string(archive)); + status = -1; + goto end; + } + + status = archive_read_support_filter_lzop(archive); + if (status == ARCHIVE_WARN) { + note(NULL, WARNING, INTERNAL, "add archive filter: %s", + archive_error_string(archive)); + } + else if (status != ARCHIVE_OK) { + note(NULL, ERROR, INTERNAL, "can't add archive filter: %s", + archive_error_string(archive)); + status = -1; + goto end; + } + + data = talloc_zero(NULL, CallbackData); + if (data == NULL) { + note(NULL, ERROR, INTERNAL, "can't allocate callback data"); + status = -1; + goto end; + + } + + data->path = talloc_strdup(data, path); + if (data->path == NULL) { + note(NULL, ERROR, INTERNAL, "can't allocate callback data path"); + status = -1; + goto end; + + } + + status = archive_read_open(archive, data, open_callback, read_callback, close_callback); + if (status == ARCHIVE_WARN) { + if (archive_error_string(archive) != NULL) + note(NULL, WARNING, INTERNAL, "read archive: %s", + archive_error_string(archive)); + } + else if (status != ARCHIVE_OK) { + /* Don't complain if no error message were registered, + * ie. when testing for a self-extracting archive. */ + if (archive_error_string(archive) != NULL) + note(NULL, ERROR, INTERNAL, "can't read archive: %s", + archive_error_string(archive)); + status = -1; + goto end; + } + + status = extract_archive(archive); +end: + if (archive != NULL) { + status2 = archive_read_close(archive); + if (status2 != ARCHIVE_OK) { + note(NULL, WARNING, INTERNAL, "can't close archive: %s", + archive_error_string(archive)); + } + + status2 = archive_read_free(archive); + if (status2 != ARCHIVE_OK) { + note(NULL, WARNING, INTERNAL, "can't free archive: %s", + archive_error_string(archive)); + } + } + + TALLOC_FREE(data); + + return status; +} diff --git a/proot/proot_linux/extension/care/extract.h b/proot/proot_linux/extension/care/extract.h new file mode 100644 index 0000000..b29d07e --- /dev/null +++ b/proot/proot_linux/extension/care/extract.h @@ -0,0 +1,38 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef EXTRACT_H +#define EXTRACT_H + +#include +#include "attribute.h" + +#define AUTOEXTRACT_SIGNATURE "I_LOVE_PIZZA" + +typedef struct { + char signature[sizeof(AUTOEXTRACT_SIGNATURE)]; + uint64_t size; +} PACKED AutoExtractInfo; + +extern int WEAK extract_archive_from_file(const char *path); + +#endif /* EXTRACT_H */ diff --git a/proot/proot_linux/extension/care/final.c b/proot/proot_linux/extension/care/final.c new file mode 100644 index 0000000..965e92c --- /dev/null +++ b/proot/proot_linux/extension/care/final.c @@ -0,0 +1,476 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* lstat(2), readlink(2), getpid(2), wirte(2), lseek(2), get*id(2), */ +#include /* get*id(2), */ +#include /* struct stat, fchmod(2), */ +#include /* PATH_MAX, */ +#include /* uname(2), */ +#include /* fprintf(3), fclose(3), */ +#include /* errno, ENAMETOOLONG, */ +#include /* strcpy(3), */ +#include /* htobe64(3), */ +#include /* assert(3), */ + +#include "extension/care/final.h" +#include "extension/care/care.h" +#include "extension/care/extract.h" +#include "execve/ldso.h" +#include "path/path.h" +#include "path/temp.h" +#include "cli/note.h" + +/** + * Find in @care->volatile_envars the given @envar (format + * "name=value"). This function returns the name of the variable if + * found (format "name"), NULL otherwise. + */ +static const char *find_volatile_envar(const Care *care, const char *envar) +{ + const Item *volatile_envar; + + if (care->volatile_envars == NULL) + return NULL; + + STAILQ_FOREACH(volatile_envar, care->volatile_envars, link) { + if (is_env_name(envar, volatile_envar->load)) + return volatile_envar->load; + } + + return NULL; +} + +extern char **environ; + +/** + * Archive in @care->archive the content of @file with the given + * @name, then close it. This function returns < 0 if an error + * occured, otherwise 0. Note: this function is called in @care's + * destructor. + */ +static int archive_close_file(const Care *care, FILE *file, const char *name) +{ + char path[PATH_MAX]; + struct stat statl; + char *location; + int status; + int fd; + + /* Ensure everything is written into the file before archiving + * it. */ + fflush(file); + + fd = fileno(file); + + status = fstat(fd, &statl); + if (status < 0) { + note(NULL, ERROR, SYSTEM, "can't get '%s' status", name); + goto end; + } + + location = talloc_asprintf(care, "%s/%s", care->prefix, name); + if (location == NULL) { + note(NULL, ERROR, INTERNAL, "can't allocate location for '%s'", name); + status = -1; + goto end; + } + + status = readlink_proc_pid_fd(getpid(), fd, path); + if (status < 0) { + note(NULL, ERROR, INTERNAL, "can't readlink(/proc/self/fd/%d)", fd); + goto end; + } + + status = archive(NULL, care->archive, path, location, &statl); +end: + (void) fclose(file); + return status; +} + +/** + * Return a copy -- attached to @context -- of @input with all ' + * (single quote) characters escaped. + */ +static const char *escape_quote(TALLOC_CTX *context, const char *input) +{ + char *output; + size_t length; + size_t i; + + output = talloc_strdup(context, ""); + if (output == NULL) + return NULL; + + length = strlen(input); + for (i = 0; i < length; i++) { + char buffer[2] = { input[i], '\0' }; + + if (buffer[0] == '\'') + output = talloc_strdup_append_buffer(output, "'\\''"); + else + output = talloc_strdup_append_buffer(output, buffer); + if (output == NULL) + return NULL; + } + + return output; +} + +/* Helpers for archive_* functions. */ +#define N(format, ...) \ + do { \ + if (fprintf(file, format "\n", ##__VA_ARGS__) < 0) { \ + note(NULL, ERROR, INTERNAL, "can't write file"); \ + (void) fclose(file); \ + return -1; \ + } \ + } while (0) + +#define C(format, ...) N(format " \\", ##__VA_ARGS__) + +/** + * Archive the "re-execute.sh" file, according to the given @care. + * This function returns < 0 if an error occured, 0 otherwise. Note: + * this function is called in @care's destructor. + */ +static int archive_re_execute_sh(Care *care) +{ + struct utsname utsname; + const Item *item; + FILE *file; + int status; + int i; + + file = open_temp_file(NULL, "care"); + if (file == NULL) { + note(NULL, ERROR, INTERNAL, "can't create temporary file for 're-execute.sh'"); + return -1; + } + + status = fchmod(fileno(file), 0755); + if (status < 0) + note(NULL, WARNING, SYSTEM, "can't make 're-execute.sh' executable"); + + N("#! /bin/sh"); + N(""); + N("export XAUTHORITY=\"${XAUTHORITY:-$HOME/.Xauthority}\""); + N("export ICEAUTHORITY=\"${ICEAUTHORITY:-$HOME/.ICEauthority}\""); + N(""); + + N("nbargs=$#"); + C("[ $nbargs -ne 0 ] || set --"); + for (i = 0; care->command != NULL && care->command[i] != NULL; i++) + C("'%s'", care->command[i]); + N(""); + + N("PROOT=\"${PROOT-$(dirname $0)/proot}\""); + N(""); + + N("if [ ! -e ${PROOT} ]; then"); + N(" PROOT=$(which proot)"); + N("fi"); + N(""); + + N("if [ -z ${PROOT} ]; then"); + N(" echo '**********************************************************************'"); + N(" echo '\"proot\" command not found, please get it from https://proot-me.github.io'"); + N(" echo '**********************************************************************'"); + N(" exit 1"); + N("fi"); + N(""); + + N("if [ x$PROOT_NO_SECCOMP != x ]; then"); + N(" PROOT_NO_SECCOMP=\"PROOT_NO_SECCOMP=$PROOT_NO_SECCOMP\""); + N("fi"); + N(""); + + C("env --ignore-environment"); + C("PROOT_IGNORE_MISSING_BINDINGS=1"); + C("$PROOT_NO_SECCOMP"); + + for (i = 0; environ[i] != NULL; i++) { + const char *volatile_envar; + + volatile_envar = find_volatile_envar(care, environ[i]); + if (volatile_envar != NULL) + C("'%1$s'=\"$%1$s\" ", volatile_envar); + else { + const char *string = escape_quote(care, environ[i]); + C("'%s' ", string ?: environ[i]); + } + } + + C("\"${PROOT-$(dirname $0)/proot}\""); + + if (care->volatile_paths != NULL) { + /* If a volatile path is relative to $HOME, use an + * asymmetric binding. For instance: + * + * -b $HOME/.Xauthority:/home/user/.Xauthority + * + * where "/home/user" was the $HOME during the + * original execution. */ + STAILQ_FOREACH(item, care->volatile_paths, link) { + const char *name = talloc_get_name(item); + if (name[0] == '$') + C("-b \"%s:%s\" ", name, (char *) item->load); + else + C("-b \"%s\" ", (char *) item->load); + } + } + + status = uname(&utsname); + if (status < 0) { + note(NULL, WARNING, SYSTEM, "can't get kernel release"); + C("-k 3.17.0"); + } + else { + C("-k '\\%s\\%s\\%s\\%s\\%s\\%s\\0\\' ", + utsname.sysname, + utsname.nodename, + utsname.release, + utsname.version, + utsname.machine, + utsname.domainname); + } + + C("-i %d:%d", getuid(), getgid()); + C("-w '%s' ", care->initial_cwd); + C("-r \"$(dirname $0)/rootfs\""); + + /* In case the program retrieves its DSOs from /proc/self/maps + * (eg. VLC). */ + C("-b \"$(dirname $0)/rootfs\""); + N("${1+\"$@\"}"); + N(""); + + N("status=$?"); + N("if [ $status -ne %d ] && [ $nbargs -eq 0 ]; then", care->last_exit_status); + N("echo \"care: The reproduced execution didn't return the same exit status as the\""); + N("echo \"care: original execution. If it is unexpected, please report this bug\""); + N("echo \"care: to CARE/PRoot developers:\""); + N("echo \"care: * mailing list: reproducible@googlegroups.com; or\""); + N("echo \"care: * forum: https://groups.google.com/forum/?fromgroups#!forum/reproducible; or\""); + N("echo \"care: * issue tracker: https://github.com/cedric-vincent/PRoot/issues/\""); + N("fi"); + N(""); + N("exit $status"); + + return archive_close_file(care, file, "re-execute.sh"); +} + +/** + * Archive the "concealed-accesses.txt" file in @care->archive, + * according to the content of @care->concealed_accesses. This + * function returns < 0 if an error occured, 0 otherwise. Note: this + * function is called in @care's destructor. + */ +static int archive_concealed_accesses_txt(const Care *care) +{ + const Item *item; + FILE *file; + + if (care->concealed_accesses == NULL) + return 0; + + file = open_temp_file(NULL, "care"); + if (file == NULL) { + note(NULL, WARNING, INTERNAL, + "can't create temporary file for 'concealed-accesses.txt'"); + return -1; + } + + STAILQ_FOREACH(item, care->concealed_accesses, link) + N("%s", (char *) item->load); + + return archive_close_file(care, file, "concealed-accesses.txt"); +} + +/** + * Archive the "README.txt" file in @care->archive. This function + * returns < 0 if an error occured, 0 otherwise. Note: this function + * is called in @care's destructor. + */ +static int archive_readme_txt(const Care *care) +{ + FILE *file; + + file = open_temp_file(NULL, "care"); + if (file == NULL) { + note(NULL, WARNING, INTERNAL, "can't create temporary file for 'README.txt'"); + return -1; + } + + N("This archive was created with CARE: https://proot-me.github.io. It contains:"); + N(""); + N("re-execute.sh"); + N(" start the re-execution of the initial command as originally"); + N(" specified. It is also possible to specify an alternate command."); + N(" For example, assuming gcc was archived, it can be re-invoked"); + N(" differently:"); + N(""); + N(" $ ./re-execute.sh gcc --version"); + N(" gcc (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2"); + N(""); + N(" $ echo 'int main(void) { return puts(\"OK\"); }' > rootfs/foo.c"); + N(" $ ./re-execute.sh gcc -Wall /foo.c"); + N(" $ foo.c: In function \"main\":"); + N(" $ foo.c:1:1: warning: implicit declaration of function \"puts\""); + N(""); + N("rootfs/"); + N(" directory where all the files used during the original execution"); + N(" were archived, they will be required for the reproduced execution."); + N(""); + N("proot"); + N(" virtualization tool invoked by re-execute.sh to confine the"); + N(" reproduced execution into the rootfs. It also emulates the"); + N(" missing kernel features if needed."); + N(""); + N("concealed-accesses.txt"); + N(" list of accessed paths that were concealed during the original"); + N(" execution. Its main purpose is to know what are the paths that"); + N(" should be revealed if the the original execution didn't go as"); + N(" expected. It is absolutely useless for the reproduced execution."); + N(""); + + return archive_close_file(care, file, "README.txt"); +} + +#undef N +#undef C + +#if !defined(CARE_BINARY_IS_PORTABLE) +static int archive_myself(const Care *care) UNUSED; +#endif + +/** + * Archive the content pointed to by "/proc/self/exe" in + * "@care->archive:@care->prefix/proot". Note: this function is + * called in @care's destructor. + */ +static int archive_myself(const Care *care) +{ + char path[PATH_MAX]; + struct stat statl; + char *location; + int status; + + status = readlink("/proc/self/exe", path, PATH_MAX); + if (status >= PATH_MAX) { + status = -1; + errno = ENAMETOOLONG; + } + if (status < 0) { + note(NULL, ERROR, SYSTEM, "can't readlink '/proc/self/exe'"); + return status; + } + path[status] = '\0'; + + status = lstat(path, &statl); + if (status < 0) { + note(NULL, ERROR, INTERNAL, "can't lstat '%s'", path); + return status; + } + + location = talloc_asprintf(care, "%s/proot", care->prefix); + if (location == NULL) { + note(NULL, ERROR, INTERNAL, "can't allocate location for 'proot'"); + return -1; + } + + return archive(NULL, care->archive, path, location, &statl); +} + +/** + * Archive "re-execute.sh" & "proot" from @care. This function + * always returns 0. Note: this is a Talloc destructor. + */ +int finalize_care(Care *care) +{ + char *extractor; + int status; + + /* Generate & archive the "re-execute.sh" script. */ + status = archive_re_execute_sh(care); + if (status < 0) + note(NULL, WARNING, INTERNAL, "can't archive 're-execute.sh'"); + + /* Generate & archive the "concealed-accesses.txt" file. */ + status = archive_concealed_accesses_txt(care); + if (status < 0) + note(NULL, WARNING, INTERNAL, "can't archive 'concealed-accesses.txt'"); + + /* Generate & archive the "README.txt" file. */ + status = archive_readme_txt(care); + if (status < 0) + note(NULL, WARNING, INTERNAL, "can't archive 'README.txt'"); + +#if defined(CARE_BINARY_IS_PORTABLE) + /* Archive "care" as "proot", these are the same binary. */ + status = archive_myself(care); + if (status < 0) + note(NULL, WARNING, INTERNAL, "can't archive 'proot'"); +#endif + + finalize_archive(care->archive); + + /* Append self/raw extracting information if needed. */ + if (care->archive->fd >= 0 && care->archive->offset > 0) { + AutoExtractInfo info; + off_t position; + + strcpy(info.signature, AUTOEXTRACT_SIGNATURE); + + /* Compute the size of the archive. */ + position = lseek(care->archive->fd, 0, SEEK_CUR); + assert(position > care->archive->offset); + info.size = htobe64(position - care->archive->offset); + + status = write(care->archive->fd, &info, sizeof(info)); + if (status != sizeof(info)) + note(NULL, WARNING, SYSTEM, "can't write extracting information"); + + (void) close(care->archive->fd); + care->archive->fd = -1; + + if (care->archive->offset == strlen("RAW")) + extractor = talloc_asprintf(care, "`care -x %s`", care->output); + else + extractor = talloc_asprintf(care, "`%2$s%1$s` or `care -x %1$s`", + care->output, care->output[0] == '/' ? "" : "./"); + } + else if (care->output[strlen(care->output) - 1] != '/') + extractor = talloc_asprintf(care, "`care -x %s`", care->output); + else + extractor = NULL; + + note(NULL, INFO, USER, + "----------------------------------------------------------------------"); + note(NULL, INFO, USER, "Hints:"); + note(NULL, INFO, USER, + " - search for \"conceal\" in `care -h` if the execution didn't go as expected."); + + if (extractor != NULL) + note(NULL, INFO, USER, " - run %s to extract the output archive correctly.", extractor); + + return 0; +} diff --git a/proot/proot_linux/extension/care/final.h b/proot/proot_linux/extension/care/final.h new file mode 100644 index 0000000..d299a10 --- /dev/null +++ b/proot/proot_linux/extension/care/final.h @@ -0,0 +1,30 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef CARE_FINAL_H +#define CARE_FINAL_H + +#include "care.h" + +extern int finalize_care(Care *care); + +#endif /* CARE_FINAL_H */ diff --git a/proot/proot_linux/extension/extension.c b/proot/proot_linux/extension/extension.c new file mode 100644 index 0000000..f26bcf6 --- /dev/null +++ b/proot/proot_linux/extension/extension.c @@ -0,0 +1,170 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* assert(3), */ +#include /* talloc_*, */ +#include /* LIST_*, */ +#include /* bzero(3), */ + +#include "extension/extension.h" +#include "cli/note.h" +#include "build.h" + +#include "compat.h" +#include "extension/portmap/portmap.h" + +/** + * Remove an @extension from its tracee's list, then send it the + * "REMOVED" event. + * + * Note: this is a Talloc destructor. + */ +static int remove_extension(Extension *extension) +{ + LIST_REMOVE(extension, link); + extension->callback(extension, REMOVED, 0, 0); + + bzero(extension, sizeof(Extension)); + return 0; +} + +/** + * Allocate a new extension for the given @callback then attach it to + * its @tracee. This function returns NULL on error, otherwise the + * new extension. + */ +static Extension *new_extension(Tracee *tracee, extension_callback_t callback) +{ + Extension *extension; + + /* Lazy allocation of the list head. */ + if (tracee->extensions == NULL) { + tracee->extensions = talloc_zero(tracee, Extensions); + if (tracee->extensions == NULL) + return NULL; + } + + /* Allocate a new extension. */ + extension = talloc_zero(tracee->extensions, Extension); + if (extension == NULL) + return NULL; + extension->callback = callback; + + /* Attach it to its tracee. */ + LIST_INSERT_HEAD(tracee->extensions, extension, link); + talloc_set_destructor(extension, remove_extension); + + return extension; +} + +/** + * Retrieve from @tracee->extensions the extension for the given + * @callback. + */ +Extension *get_extension(Tracee *tracee, extension_callback_t callback) +{ + Extension *extension; + + if (tracee->extensions == NULL) + return NULL; + + LIST_FOREACH(extension, tracee->extensions, link) { + if (extension->callback == callback) + return extension; + } + + return NULL; +} + +/** + * Initialize a new extension for the given @callback then attach it + * to its @tracee. The parameter @cli is its argument that was passed + * to the command-line interface. This function return -1 if an error + * occurred, otherwise 0. + */ +int initialize_extension(Tracee *tracee, extension_callback_t callback, const char *cli) +{ + Extension *extension; + int status; + + extension = new_extension(tracee, callback); + if (extension == NULL) { + note(tracee, WARNING, INTERNAL, "can't create a new extension"); + return -1; + } + + /* Remove the new extension if its initialized has failed. */ + status = extension->callback(extension, INITIALIZATION, (intptr_t) cli, 0); + if (status < 0) { + TALLOC_FREE(extension); + return status; + } + + return 0; +} + +/** + * Rebuild a new list of extensions for this @child from its @parent. + * The inheritance model is controlled by the @parent. + */ +void inherit_extensions(Tracee *child, Tracee *parent, word_t clone_flags) +{ + Extension *parent_extension; + Extension *child_extension; + int status; + + if (parent->extensions == NULL) + return; + + /* Sanity check. */ + assert(child->extensions == NULL || clone_flags == CLONE_RECONF); + + LIST_FOREACH(parent_extension, parent->extensions, link) { + /* Ask the parent how this extension is + * inheritable. */ + status = parent_extension->callback(parent_extension, INHERIT_PARENT, + (intptr_t)child, clone_flags); + + /* Not inheritable. */ + if (status < 0) + continue; + + /* Inheritable... */ + child_extension = new_extension(child, parent_extension->callback); + if (child_extension == NULL) { + note(parent, WARNING, INTERNAL, + "can't create a new extension for pid %d", child->pid); + continue; + } + + if (status == 0) { + /* ... with a shared config or ... */ + child_extension->config = + talloc_reference(child_extension, parent_extension->config); + } + else { + /* ... with another inheritance model. */ + child_extension->callback(child_extension, INHERIT_CHILD, + (intptr_t)parent_extension, clone_flags); + } + } +} diff --git a/proot/proot_linux/extension/extension.h b/proot/proot_linux/extension/extension.h new file mode 100644 index 0000000..29a44c8 --- /dev/null +++ b/proot/proot_linux/extension/extension.h @@ -0,0 +1,206 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef EXTENSION_H +#define EXTENSION_H + +#include /* LIST_, */ +#include /* intptr_t, */ + +#include "tracee/tracee.h" +#include "syscall/seccomp.h" +#include "extension/portmap/portmap.h" + +/* List of possible events. */ +typedef enum { + /* A guest path passed as an argument of the current syscall + * is about to be translated: "(char *) data1" is the base for + * "(char *) data2" -- the guest path -- if this latter is + * relative. If the extension returns > 0, then PRoot skips + * its own handling. If the extension returns < 0, then PRoot + * reports this errno as-is. */ + GUEST_PATH, + + /* A canonicalized host path is being accessed during the + * translation of a guest path: "(char *) data1" is the + * canonicalized host path and "(bool) data2" is true if it is + * the last iteration. Note that several host paths are accessed + * for a given guest path since PRoot has to walk along all + * parent directories and symlinks in order to translate it. + * If the extension returns < 0, then PRoot reports this errno + * as-is. */ + HOST_PATH, + + /* The tracee enters a syscall, and PRoot hasn't do anything + * yet. If the extension returns > 0, then PRoot skips its + * own handling. If the extension returns < 0, then PRoot + * cancels the syscall and reports this errno to the + * tracee. */ + SYSCALL_ENTER_START, + + /* The tracee enters a syscall, and PRoot has already handled + * it: "(int) data1" is the current status, it is < 0 when + * something went wrong. If the extension returns < 0, then + * PRoot cancels the syscall and reports this errno to the + * tracee. */ + SYSCALL_ENTER_END, + + /* The tracee exits a syscall, and PRoot hasn't do anything + * yet. If the extension returns > 0, then PRoot skips its + * own handling. If the extension returns < 0, then PRoot + * reports this errno to the tracee. */ + SYSCALL_EXIT_START, + + /* The tracee exits a syscall, and PRoot has already handled + * it. If the extension returns < 0, then PRoot reports this + * errno to the tracee. */ + SYSCALL_EXIT_END, + + /* The canonicalization succeeds: "(char *) data1" is the + * translated path from the host point-of-view. It can be + * substituted by the extension. If the extension returns < + * 0, then PRoot reports this errno as-is. */ + TRANSLATED_PATH, + + /* The tracee is stopped either because of a syscall or a + * signal: "(int) data1" is its new status as reported by + * waitpid(2). If the extension returns != 0, then PRoot + * skips its own handling. */ + NEW_STATUS, + + /* Ask how this extension is inheritable: "(Tracee *) data1" + * is the child tracee and "(bool) data2" is the clone(2) + * flags (CLONE_RECONF for sub-reconfiguration). The meaning + * of the returned value is: + * + * < 0 : not inheritable + * == 0 : inheritable + shared configuration. + * > 0 : inheritable + call INHERIT_CHILD. */ + INHERIT_PARENT, + + /* Control the inheritance: "(Extension *) data1" is the + * extension of the parent and "(word_t) data2" is the clone(2) + * flags (CLONE_RECONF for sub-reconfiguration). For instance + * the extension for the child could use a configuration + * different from the parent's configuration. */ + INHERIT_CHILD, + + /* The tracee enters a "chained" syscall, that is, an + * unrequested syscall inserted by PRoot after an actual + * syscall. If the extension returns < 0, then PRoot cancels + * the syscall and reports this errno to the tracee. */ + SYSCALL_CHAINED_ENTER, + + /* The tracee exists a "chained" syscall, that is, an + * unrequested syscall inserted by PRoot after an actual + * syscall. */ + SYSCALL_CHAINED_EXIT, + + /* Initialize the extension: "(const char *) data1" is its + * argument that was passed to the command-line interface. If + * the extension returns < 0, then PRoot removed it. */ + INITIALIZATION, + + /* The extension is not attached to its tracee anymore + * (destructor). */ + REMOVED, + + /* Print the current configuration of the extension. See + * print_config() as an example. */ + PRINT_CONFIG, + + /* Print the usage of the extension: "(bool) data1" is true + * for a detailed usage. See print_usage() as an example. */ + PRINT_USAGE, + + /* Called for every already opened file descriptor: + * "(const char *)" data1" is the path, "(int) data2" is the file descriptor" */ + ALREADY_OPENED_FD, +} ExtensionEvent; + +#define CLONE_RECONF ((word_t) -1) + +struct extension; +typedef int (*extension_callback_t)(struct extension *extension, ExtensionEvent event, + intptr_t data1, intptr_t data2); + +typedef struct extension { + /* Function to be called when any event occured. */ + extension_callback_t callback; + + /* A chunk of memory allocated by any talloc functions. + * Mainly useful to store a configuration. */ + TALLOC_CTX *config; + + /* List of sysnum handled by this extension. */ + const FilteredSysnum *filtered_sysnums; + + /* Link to the next and previous extensions. Note the order + * is *never* garantee. */ + LIST_ENTRY(extension) link; +} Extension; + +typedef LIST_HEAD(extensions, extension) Extensions; + +extern int initialize_extension(Tracee *tracee, extension_callback_t callback, const char *cli); +extern void inherit_extensions(Tracee *child, Tracee *parent, word_t clone_flags); +extern Extension *get_extension(Tracee *tracee, extension_callback_t callback); + +/** + * Notify all extensions of @tracee that the given @event occured. + * See ExtensionEvent for the meaning of @data1 and @data2. + */ +static inline int notify_extensions(Tracee *tracee, ExtensionEvent event, + intptr_t data1, intptr_t data2) +{ + Extension *extension; + + if (tracee->extensions == NULL) + return 0; + + LIST_FOREACH(extension, tracee->extensions, link) { + int status = extension->callback(extension, event, data1, data2); + if (status != 0) + return status; + } + + return 0; +} + +/* Built-in extensions. */ +extern int kompat_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int fake_id0_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int care_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int python_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int link2symlink_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); + +/* Added extensions. */ +/** + * We use a global variable in order to support multiple port mapping options, + * otherwise we would have a different extension instance for each (port_in, port_out) pair, + * which would be a waste of memory and performance. + * This variable is modified only once, in the INITIALIZATION event. + */ +extern Extension *global_portmap_extension; +extern int portmap_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); + +#endif /* EXTENSION_H */ diff --git a/proot/proot_linux/extension/extension/care/archive.h b/proot/proot_linux/extension/extension/care/archive.h new file mode 100644 index 0000000..c56d472 --- /dev/null +++ b/proot/proot_linux/extension/extension/care/archive.h @@ -0,0 +1,47 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef ARCHIVE_H +#define ARCHIVE_H + +#include +#include +#include + +#include "tracee/tracee.h" + +typedef struct { + struct archive *handle; + struct archive_entry_linkresolver *hardlink_resolver; + + /* Information used to create an self-extracting archive. */ + off_t offset; + int fd; +} Archive; + +extern Archive *new_archive(TALLOC_CTX *context, const Tracee* tracee, + const char *output, size_t *prefix_length); +extern int finalize_archive(Archive *archive); +extern int archive(const Tracee* tracee, Archive *archive, + const char *path, const char *alternate_path, const struct stat *statl); + +#endif /* ARCHIVE_H */ diff --git a/proot/proot_linux/extension/extension/care/care.h b/proot/proot_linux/extension/extension/care/care.h new file mode 100644 index 0000000..b5411bb --- /dev/null +++ b/proot/proot_linux/extension/extension/care/care.h @@ -0,0 +1,80 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef CARE_H +#define CARE_H + +#include +#include /* STAILQ_*, */ + +#include "extension/care/archive.h" + +/* Generic item for a STAILQ list. */ +typedef struct item { + const void *load; + STAILQ_ENTRY(item) link; +} Item; + +typedef STAILQ_HEAD(list, item) List; + +/* CARE CLI configuration. */ +typedef struct { + const char *output; + char *const *command; + + List *concealed_paths; + List *revealed_paths; + List *volatile_paths; + List *volatile_envars; + + bool ignore_default_config; + + int max_size; +} Options; + +/* CARE internal configuration. */ +typedef struct { + struct Entry *entries; + struct Entry *dentries; + + char *const *command; + List *volatile_paths; + List *volatile_envars; + List *concealed_accesses; + + const char *prefix; + const char *output; + const char *initial_cwd; + bool ipc_are_volatile; + + Archive *archive; + int64_t max_size; + + int last_exit_status; + + bool is_ready; +} Care; + +extern Item *queue_item(TALLOC_CTX *context, List **list, const char *value); + +#endif /* CARE_H */ + diff --git a/proot/proot_linux/extension/extension/care/extract.h b/proot/proot_linux/extension/extension/care/extract.h new file mode 100644 index 0000000..b29d07e --- /dev/null +++ b/proot/proot_linux/extension/extension/care/extract.h @@ -0,0 +1,38 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef EXTRACT_H +#define EXTRACT_H + +#include +#include "attribute.h" + +#define AUTOEXTRACT_SIGNATURE "I_LOVE_PIZZA" + +typedef struct { + char signature[sizeof(AUTOEXTRACT_SIGNATURE)]; + uint64_t size; +} PACKED AutoExtractInfo; + +extern int WEAK extract_archive_from_file(const char *path); + +#endif /* EXTRACT_H */ diff --git a/proot/proot_linux/extension/extension/care/final.h b/proot/proot_linux/extension/extension/care/final.h new file mode 100644 index 0000000..d299a10 --- /dev/null +++ b/proot/proot_linux/extension/extension/care/final.h @@ -0,0 +1,30 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef CARE_FINAL_H +#define CARE_FINAL_H + +#include "care.h" + +extern int finalize_care(Care *care); + +#endif /* CARE_FINAL_H */ diff --git a/proot/proot_linux/extension/extension/extension.h b/proot/proot_linux/extension/extension/extension.h new file mode 100644 index 0000000..29a44c8 --- /dev/null +++ b/proot/proot_linux/extension/extension/extension.h @@ -0,0 +1,206 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef EXTENSION_H +#define EXTENSION_H + +#include /* LIST_, */ +#include /* intptr_t, */ + +#include "tracee/tracee.h" +#include "syscall/seccomp.h" +#include "extension/portmap/portmap.h" + +/* List of possible events. */ +typedef enum { + /* A guest path passed as an argument of the current syscall + * is about to be translated: "(char *) data1" is the base for + * "(char *) data2" -- the guest path -- if this latter is + * relative. If the extension returns > 0, then PRoot skips + * its own handling. If the extension returns < 0, then PRoot + * reports this errno as-is. */ + GUEST_PATH, + + /* A canonicalized host path is being accessed during the + * translation of a guest path: "(char *) data1" is the + * canonicalized host path and "(bool) data2" is true if it is + * the last iteration. Note that several host paths are accessed + * for a given guest path since PRoot has to walk along all + * parent directories and symlinks in order to translate it. + * If the extension returns < 0, then PRoot reports this errno + * as-is. */ + HOST_PATH, + + /* The tracee enters a syscall, and PRoot hasn't do anything + * yet. If the extension returns > 0, then PRoot skips its + * own handling. If the extension returns < 0, then PRoot + * cancels the syscall and reports this errno to the + * tracee. */ + SYSCALL_ENTER_START, + + /* The tracee enters a syscall, and PRoot has already handled + * it: "(int) data1" is the current status, it is < 0 when + * something went wrong. If the extension returns < 0, then + * PRoot cancels the syscall and reports this errno to the + * tracee. */ + SYSCALL_ENTER_END, + + /* The tracee exits a syscall, and PRoot hasn't do anything + * yet. If the extension returns > 0, then PRoot skips its + * own handling. If the extension returns < 0, then PRoot + * reports this errno to the tracee. */ + SYSCALL_EXIT_START, + + /* The tracee exits a syscall, and PRoot has already handled + * it. If the extension returns < 0, then PRoot reports this + * errno to the tracee. */ + SYSCALL_EXIT_END, + + /* The canonicalization succeeds: "(char *) data1" is the + * translated path from the host point-of-view. It can be + * substituted by the extension. If the extension returns < + * 0, then PRoot reports this errno as-is. */ + TRANSLATED_PATH, + + /* The tracee is stopped either because of a syscall or a + * signal: "(int) data1" is its new status as reported by + * waitpid(2). If the extension returns != 0, then PRoot + * skips its own handling. */ + NEW_STATUS, + + /* Ask how this extension is inheritable: "(Tracee *) data1" + * is the child tracee and "(bool) data2" is the clone(2) + * flags (CLONE_RECONF for sub-reconfiguration). The meaning + * of the returned value is: + * + * < 0 : not inheritable + * == 0 : inheritable + shared configuration. + * > 0 : inheritable + call INHERIT_CHILD. */ + INHERIT_PARENT, + + /* Control the inheritance: "(Extension *) data1" is the + * extension of the parent and "(word_t) data2" is the clone(2) + * flags (CLONE_RECONF for sub-reconfiguration). For instance + * the extension for the child could use a configuration + * different from the parent's configuration. */ + INHERIT_CHILD, + + /* The tracee enters a "chained" syscall, that is, an + * unrequested syscall inserted by PRoot after an actual + * syscall. If the extension returns < 0, then PRoot cancels + * the syscall and reports this errno to the tracee. */ + SYSCALL_CHAINED_ENTER, + + /* The tracee exists a "chained" syscall, that is, an + * unrequested syscall inserted by PRoot after an actual + * syscall. */ + SYSCALL_CHAINED_EXIT, + + /* Initialize the extension: "(const char *) data1" is its + * argument that was passed to the command-line interface. If + * the extension returns < 0, then PRoot removed it. */ + INITIALIZATION, + + /* The extension is not attached to its tracee anymore + * (destructor). */ + REMOVED, + + /* Print the current configuration of the extension. See + * print_config() as an example. */ + PRINT_CONFIG, + + /* Print the usage of the extension: "(bool) data1" is true + * for a detailed usage. See print_usage() as an example. */ + PRINT_USAGE, + + /* Called for every already opened file descriptor: + * "(const char *)" data1" is the path, "(int) data2" is the file descriptor" */ + ALREADY_OPENED_FD, +} ExtensionEvent; + +#define CLONE_RECONF ((word_t) -1) + +struct extension; +typedef int (*extension_callback_t)(struct extension *extension, ExtensionEvent event, + intptr_t data1, intptr_t data2); + +typedef struct extension { + /* Function to be called when any event occured. */ + extension_callback_t callback; + + /* A chunk of memory allocated by any talloc functions. + * Mainly useful to store a configuration. */ + TALLOC_CTX *config; + + /* List of sysnum handled by this extension. */ + const FilteredSysnum *filtered_sysnums; + + /* Link to the next and previous extensions. Note the order + * is *never* garantee. */ + LIST_ENTRY(extension) link; +} Extension; + +typedef LIST_HEAD(extensions, extension) Extensions; + +extern int initialize_extension(Tracee *tracee, extension_callback_t callback, const char *cli); +extern void inherit_extensions(Tracee *child, Tracee *parent, word_t clone_flags); +extern Extension *get_extension(Tracee *tracee, extension_callback_t callback); + +/** + * Notify all extensions of @tracee that the given @event occured. + * See ExtensionEvent for the meaning of @data1 and @data2. + */ +static inline int notify_extensions(Tracee *tracee, ExtensionEvent event, + intptr_t data1, intptr_t data2) +{ + Extension *extension; + + if (tracee->extensions == NULL) + return 0; + + LIST_FOREACH(extension, tracee->extensions, link) { + int status = extension->callback(extension, event, data1, data2); + if (status != 0) + return status; + } + + return 0; +} + +/* Built-in extensions. */ +extern int kompat_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int fake_id0_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int care_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int python_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); +extern int link2symlink_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); + +/* Added extensions. */ +/** + * We use a global variable in order to support multiple port mapping options, + * otherwise we would have a different extension instance for each (port_in, port_out) pair, + * which would be a waste of memory and performance. + * This variable is modified only once, in the INITIALIZATION event. + */ +extern Extension *global_portmap_extension; +extern int portmap_callback(Extension *extension, ExtensionEvent event, intptr_t d1, intptr_t d2); + +#endif /* EXTENSION_H */ diff --git a/proot/proot_linux/extension/extension/portmap/portmap.h b/proot/proot_linux/extension/extension/portmap/portmap.h new file mode 100644 index 0000000..65d7a30 --- /dev/null +++ b/proot/proot_linux/extension/extension/portmap/portmap.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2016 Vincent Hage + */ + +#ifndef PORTMAP_H +#define PORTMAP_H + +#include "extension/extension.h" + +#define PORTMAP_SIZE 4096 /* must be a power of 2 */ +#define PORTMAP_DEFAULT_VALUE 0 /* default value that indicates an unused entry */ +#define PORTMAP_VERBOSITY 1 + +typedef struct PortMapEntry { + uint16_t port_in; + uint16_t port_out; +} PortMapEntry; + +typedef struct PortMap { + PortMapEntry map[PORTMAP_SIZE]; + uint16_t table_mask; +} PortMap; + +void initialize_portmap(PortMap *portmap); +uint16_t get_index(PortMap *portmap, uint16_t key); +int add_entry(PortMap *portmap, uint16_t port_in, uint16_t port_out); +uint16_t get_port(PortMap *portmap, uint16_t port_in); + +int add_portmap_entry(uint16_t port_in, uint16_t port_out); +int activate_netcoop_mode(); + +#endif /* PORTMAP_H */ diff --git a/proot/proot_linux/extension/extension/python/proot.i b/proot/proot_linux/extension/extension/python/proot.i new file mode 100644 index 0000000..4f79373 --- /dev/null +++ b/proot/proot_linux/extension/extension/python/proot.i @@ -0,0 +1,106 @@ +%module proot +%{ +#define SWIG_FILE_WITH_INIT + +#include "arch.h" +#include "syscall/sysnum.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "extension/extension.h" + +/* define an internal global with correct PR number */ +#define SYSNUM(item) static const int PR_internal ## item = PR_ ## item; +#include "syscall/sysnums.list" +#undef SYSNUM +%} + +/* now say PR_item has value PR_internal */ +/* works but ugly. Another way to do this ? */ +#define SYSNUM(item) static const int PR_ ## item = PR_internal ## item; +%include "syscall/sysnums.list" +#undef SYSNUM + +/* python extension helper */ +%inline %{ +Tracee *get_tracee_from_extension(long extension_handle) +{ + Extension *extension = (Extension *)extension_handle; + Tracee *tracee = TRACEE(extension); + + return tracee; +} +%} + +/* arch.h */ +typedef unsigned long word_t; + +/* tracee/tracee.h */ +typedef enum { + CURRENT = 0, + ORIGINAL = 1, + MODIFIED = 2, + NB_REG_VERSION +} RegVersion; + +/* syscall/sysnum.h */ +typedef enum Sysnum; +extern Sysnum get_sysnum(const Tracee *tracee, RegVersion version); +extern void set_sysnum(Tracee *tracee, Sysnum sysnum); + +/* tracee/reg.h */ +typedef enum { + SYSARG_NUM = 0, + SYSARG_1, + SYSARG_2, + SYSARG_3, + SYSARG_4, + SYSARG_5, + SYSARG_6, + SYSARG_RESULT, + STACK_POINTER, + INSTR_POINTER, + RTLD_FINI, + STATE_FLAGS, + USERARG_1, +} Reg; + +extern word_t peek_reg(const Tracee *tracee, RegVersion version, Reg reg); +extern void poke_reg(Tracee *tracee, Reg reg, word_t value); + +/* tracee/mem.h */ +/* make read_data / write_data pythonic */ +%apply (char *STRING, size_t LENGTH) { (const void *src_tracer, word_t size2) }; +extern int write_data(const Tracee *tracee, word_t dest_tracee, const void *src_tracer, word_t size2); + + %include +%rename(read_data) read_data_for_python; +%cstring_output_withsize(void *dest_tracer, int *size2); +%inline %{ +void read_data_for_python(const Tracee *tracee, word_t src_tracee, void *dest_tracer, int *size2) +{ + int res = read_data(tracee, dest_tracer, src_tracee, *size2); + /* in case of error we return empty string */ + if (res) + *size2 = 0; +} +%} + +/* extension/extention.h */ +typedef enum { + GUEST_PATH, + HOST_PATH, + SYSCALL_ENTER_START, + SYSCALL_ENTER_END, + SYSCALL_EXIT_START, + SYSCALL_EXIT_END, + NEW_STATUS, + INHERIT_PARENT, + INHERIT_CHILD, + SYSCALL_CHAINED_ENTER, + SYSCALL_CHAINED_EXIT, + INITIALIZATION, + REMOVED, + PRINT_CONFIG, + PRINT_USAGE, +} ExtensionEvent; diff --git a/proot/proot_linux/extension/extension/python/python_extension.py b/proot/proot_linux/extension/extension/python/python_extension.py new file mode 100644 index 0000000..80c1058 --- /dev/null +++ b/proot/proot_linux/extension/extension/python/python_extension.py @@ -0,0 +1,19 @@ +from proot import * +import ctypes +import imp + +client = None + +def python_callback(extension, event, data1, data2): + global client + res = 0 + + if event == 11: + if client: + print "Already have a client => refuse to use %s" % (ctypes.string_at(data1)) + else: + client = imp.load_source('client', ctypes.string_at(data1)) + if client: + return client.python_callback(extension, event, data1, data2) + + return 0 diff --git a/proot/proot_linux/extension/fake_id0/fake_id0.c b/proot/proot_linux/extension/fake_id0/fake_id0.c new file mode 100644 index 0000000..b21b241 --- /dev/null +++ b/proot/proot_linux/extension/fake_id0/fake_id0.c @@ -0,0 +1,907 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* assert(3), */ +#include /* intptr_t, */ +#include /* E*, */ +#include /* chmod(2), stat(2) */ +#include /* uid_t, gid_t, get*id(2), */ +#include /* get*id(2), */ +#include /* linux.git:c0a3a20b */ +#include /* AUDIT_ARCH_*, */ +#include /* memcpy(3), */ +#include /* strtol(3), */ +#include /* AT_, */ + +#include "extension/extension.h" +#include "syscall/syscall.h" +#include "syscall/sysnum.h" +#include "syscall/seccomp.h" +#include "execve/execve.h" +#include "tracee/tracee.h" +#include "tracee/abi.h" +#include "tracee/mem.h" +#include "execve/auxv.h" +#include "path/binding.h" +#include "arch.h" + +typedef struct { + uid_t ruid; + uid_t euid; + uid_t suid; + uid_t fsuid; + + gid_t rgid; + gid_t egid; + gid_t sgid; + gid_t fsgid; +} Config; + +typedef struct { + char *path; + mode_t mode; +} ModifiedNode; + +/* List of syscalls handled by this extensions. */ +static FilteredSysnum filtered_sysnums[] = { + { PR_capset, FILTER_SYSEXIT }, + { PR_chmod, FILTER_SYSEXIT }, + { PR_chown, FILTER_SYSEXIT }, + { PR_chown32, FILTER_SYSEXIT }, + { PR_chroot, FILTER_SYSEXIT }, + { PR_execve, FILTER_SYSEXIT }, + { PR_fchmod, FILTER_SYSEXIT }, + { PR_fchmodat, FILTER_SYSEXIT }, + { PR_fchown, FILTER_SYSEXIT }, + { PR_fchown32, FILTER_SYSEXIT }, + { PR_fchownat, FILTER_SYSEXIT }, + { PR_fstat, FILTER_SYSEXIT }, + { PR_fstat, FILTER_SYSEXIT }, + { PR_fstat64, FILTER_SYSEXIT }, + { PR_fstatat64, FILTER_SYSEXIT }, + { PR_getegid, FILTER_SYSEXIT }, + { PR_getegid32, FILTER_SYSEXIT }, + { PR_geteuid, FILTER_SYSEXIT }, + { PR_geteuid32, FILTER_SYSEXIT }, + { PR_getgid, FILTER_SYSEXIT }, + { PR_getgid32, FILTER_SYSEXIT }, + { PR_getgroups, FILTER_SYSEXIT }, + { PR_getgroups32, FILTER_SYSEXIT }, + { PR_getresgid, FILTER_SYSEXIT }, + { PR_getresgid32, FILTER_SYSEXIT }, + { PR_getresuid, FILTER_SYSEXIT }, + { PR_getresuid32, FILTER_SYSEXIT }, + { PR_getuid, FILTER_SYSEXIT }, + { PR_getuid32, FILTER_SYSEXIT }, + { PR_lchown, FILTER_SYSEXIT }, + { PR_lchown32, FILTER_SYSEXIT }, + { PR_lstat, FILTER_SYSEXIT }, + { PR_lstat64, FILTER_SYSEXIT }, + { PR_mknod, FILTER_SYSEXIT }, + { PR_mknodat, FILTER_SYSEXIT }, + { PR_newfstatat, FILTER_SYSEXIT }, + { PR_oldlstat, FILTER_SYSEXIT }, + { PR_oldstat, FILTER_SYSEXIT }, + { PR_setfsgid, FILTER_SYSEXIT }, + { PR_setfsgid32, FILTER_SYSEXIT }, + { PR_setfsuid, FILTER_SYSEXIT }, + { PR_setfsuid32, FILTER_SYSEXIT }, + { PR_setgid, FILTER_SYSEXIT }, + { PR_setgid32, FILTER_SYSEXIT }, + { PR_setgroups, FILTER_SYSEXIT }, + { PR_setgroups32, FILTER_SYSEXIT }, + { PR_setregid, FILTER_SYSEXIT }, + { PR_setregid32, FILTER_SYSEXIT }, + { PR_setreuid, FILTER_SYSEXIT }, + { PR_setreuid32, FILTER_SYSEXIT }, + { PR_setresgid, FILTER_SYSEXIT }, + { PR_setresgid32, FILTER_SYSEXIT }, + { PR_setresuid, FILTER_SYSEXIT }, + { PR_setresuid32, FILTER_SYSEXIT }, + { PR_setuid, FILTER_SYSEXIT }, + { PR_setuid32, FILTER_SYSEXIT }, + { PR_setxattr, FILTER_SYSEXIT }, + { PR_setdomainname, FILTER_SYSEXIT }, + { PR_sethostname, FILTER_SYSEXIT }, + { PR_lsetxattr, FILTER_SYSEXIT }, + { PR_fsetxattr, FILTER_SYSEXIT }, + { PR_stat, FILTER_SYSEXIT }, + { PR_statx, FILTER_SYSEXIT }, + { PR_stat64, FILTER_SYSEXIT }, + { PR_statfs, FILTER_SYSEXIT }, + { PR_statfs64, FILTER_SYSEXIT }, + FILTERED_SYSNUM_END, +}; + +/** + * Restore the @node->mode for the given @node->path. + * + * Note: this is a Talloc destructor. + */ +static int restore_mode(ModifiedNode *node) +{ + (void) chmod(node->path, node->mode); + return 0; +} + +/** + * Force permissions of @path to "rwx" during the path translation of + * current @tracee's syscall, in order to simulate CAP_DAC_OVERRIDE. + * The original permissions are restored through talloc destructors. + * See canonicalize() for the meaning of @is_final. + */ +static void override_permissions(const Tracee *tracee, const char *path, bool is_final) +{ + ModifiedNode *node; + struct stat perms; + mode_t new_mode; + int status; + + /* Get the meta-data */ + status = stat(path, &perms); + if (status < 0) + return; + + /* Copy the current permissions */ + new_mode = perms.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); + + /* Add read and write permissions to everything. */ + new_mode |= (S_IRUSR | S_IWUSR); + + /* Always add 'x' bit to directories */ + if (S_ISDIR(perms.st_mode)) + new_mode |= S_IXUSR; + + /* Patch the permissions only if needed. */ + if (new_mode == (perms.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))) + return; + + node = talloc_zero(tracee->ctx, ModifiedNode); + if (node == NULL) + return; + + if (!is_final) { + /* Restore the previous mode of any non final components. */ + node->mode = perms.st_mode; + } + else { + switch (get_sysnum(tracee, ORIGINAL)) { + /* For chmod syscalls: restore the new mode of the final component. */ + case PR_chmod: + node->mode = peek_reg(tracee, ORIGINAL, SYSARG_2); + break; + + case PR_fchmodat: + node->mode = peek_reg(tracee, ORIGINAL, SYSARG_3); + break; + + /* For stat syscalls: don't touch the mode of the final component. */ + case PR_fstatat64: + case PR_lstat: + case PR_lstat64: + case PR_newfstatat: + case PR_oldlstat: + case PR_oldstat: + case PR_stat: + case PR_statx: + case PR_stat64: + case PR_statfs: + case PR_statfs64: + return; + + /* Otherwise: restore the previous mode of the final component. */ + default: + node->mode = perms.st_mode; + break; + } + } + + node->path = talloc_strdup(node, path); + if (node->path == NULL) { + /* Keep only consistent nodes. */ + TALLOC_FREE(node); + return; + } + + /* The mode restoration works because Talloc destructors are + * called in reverse order. */ + talloc_set_destructor(node, restore_mode); + + (void) chmod(path, new_mode); + + return; +} + +/** + * Adjust current @tracee's syscall parameters according to @config. + * This function always returns 0. + */ +static int handle_sysenter_end(Tracee *tracee, const Config *config) +{ + word_t sysnum; + + sysnum = get_sysnum(tracee, ORIGINAL); + switch (sysnum) { + case PR_setuid: + case PR_setuid32: + case PR_setgid: + case PR_setgid32: + case PR_setreuid: + case PR_setreuid32: + case PR_setregid: + case PR_setregid32: + case PR_setresuid: + case PR_setresuid32: + case PR_setresgid: + case PR_setresgid32: + case PR_setfsuid: + case PR_setfsuid32: + case PR_setfsgid: + case PR_setfsgid32: + /* These syscalls are fully emulated. */ + set_sysnum(tracee, PR_void); + return 0; + + case PR_chown: + case PR_chown32: + case PR_lchown: + case PR_lchown32: + case PR_fchown: + case PR_fchown32: + case PR_fchownat: { + Reg uid_sysarg; + Reg gid_sysarg; + uid_t uid; + gid_t gid; + + if (sysnum == PR_fchownat) { + uid_sysarg = SYSARG_3; + gid_sysarg = SYSARG_4; + } + else { + uid_sysarg = SYSARG_2; + gid_sysarg = SYSARG_3; + } + + uid = peek_reg(tracee, ORIGINAL, uid_sysarg); + gid = peek_reg(tracee, ORIGINAL, gid_sysarg); + + /* Swap actual and emulated ids to get a chance of + * success. */ + if (uid == config->ruid) + poke_reg(tracee, uid_sysarg, getuid()); + if (gid == config->rgid) + poke_reg(tracee, gid_sysarg, getgid()); + + return 0; + } + + case PR_setgroups: + case PR_setgroups32: + case PR_getgroups: + case PR_getgroups32: + /* TODO */ + + default: + return 0; + } + + /* Never reached */ + assert(0); + return 0; + +} + +/** + * Copy config->@field to the tracee's memory location pointed to by @sysarg. + */ +#define POKE_MEM_ID(sysarg, field) do { \ + poke_uint32(tracee, peek_reg(tracee, ORIGINAL, sysarg), config->field); \ + if (errno != 0) \ + return -errno; \ +} while (0) + +/** + * Emulate setuid(2) and setgid(2). + */ +#define SETXID(id) do { \ + id ## _t id = peek_reg(tracee, ORIGINAL, SYSARG_1); \ + bool allowed; \ + \ + /* "EPERM: The user is not privileged (does not have the \ + * CAP_SETUID capability) and uid does not match the real UID \ + * or saved set-user-ID of the calling process." -- man \ + * setuid */ \ + allowed = (config->euid == 0 /* TODO: || HAS_CAP(SETUID) */ \ + || id == config->r ## id \ + || id == config->e ## id \ + || id == config->s ## id); \ + if (!allowed) \ + return -EPERM; \ + \ + /* "If the effective UID of the caller is root, the real UID \ + * and saved set-user-ID are also set." -- man setuid */ \ + if (config->e ## id == 0) { \ + config->r ## id = id; \ + config->s ## id = id; \ + } \ + \ + /* "whenever the effective user ID is changed, fsuid will also \ + * be changed to the new value of the effective user ID." -- \ + * man setfsuid */ \ + config->e ## id = id; \ + config->fs ## id = id; \ + \ + poke_reg(tracee, SYSARG_RESULT, 0); \ + return 0; \ +} while (0) + +/** + * Check whether @id is set or not. + */ +#define UNSET_ID(id) (id == (uid_t) -1) + +/** + * Check whether @id is change or not. + */ +#define UNCHANGED_ID(id) (UNSET_ID(id) || id == config->id) + +/** + * Emulate setreuid(2) and setregid(2). + */ +#define SETREXID(id) do { \ + id ## _t r ## id = peek_reg(tracee, ORIGINAL, SYSARG_1); \ + id ## _t e ## id = peek_reg(tracee, ORIGINAL, SYSARG_2); \ + bool allowed; \ + \ + /* "Unprivileged processes may only set the effective user ID \ + * to the real user ID, the effective user ID, or the saved \ + * set-user-ID. \ + * \ + * Unprivileged users may only set the real user ID to the \ + * real user ID or the effective user ID." \ + * + * "EPERM: The calling process is not privileged (does not \ + * have the CAP_SETUID) and a change other than: \ + * 1. swapping the effective user ID with the real user ID, \ + * or; \ + * 2. setting one to the value of the other, or ; \ + * 3. setting the effective user ID to the value of the saved \ + * set-user-ID \ + * was specified." -- man setreuid \ + * \ + * Is it possible to "ruid <- euid" and "euid <- suid" at the \ + * same time? */ \ + allowed = (config->euid == 0 /* TODO: || HAS_CAP(SETUID) */ \ + || (UNCHANGED_ID(e ## id) && UNCHANGED_ID(r ## id)) \ + || (r ## id == config->e ## id && (e ## id == config->r ## id || UNCHANGED_ID(e ## id))) \ + || (e ## id == config->r ## id && (r ## id == config->e ## id || UNCHANGED_ID(r ## id))) \ + || (e ## id == config->s ## id && UNCHANGED_ID(r ## id))); \ + if (!allowed) \ + return -EPERM; \ + \ + /* "Supplying a value of -1 for either the real or effective \ + * user ID forces the system to leave that ID unchanged. \ + * [...] If the real user ID is set or the effective user ID \ + * is set to a value not equal to the previous real user ID, \ + * the saved set-user-ID will be set to the new effective user \ + * ID." -- man setreuid */ \ + if (!UNSET_ID(e ## id)) { \ + if (e ## id != config->r ## id) \ + config->s ## id = e ## id; \ + \ + config->e ## id = e ## id; \ + config->fs ## id = e ## id; \ + } \ + \ + /* Since it changes the current ruid value, this has to be \ + * done after euid handling. */ \ + if (!UNSET_ID(r ## id)) { \ + if (!UNSET_ID(e ## id)) \ + config->s ## id = e ## id; \ + config->r ## id = r ## id; \ + } \ + \ + poke_reg(tracee, SYSARG_RESULT, 0); \ + return 0; \ +} while (0) + +/** + * Check if @var is equal to any config->r{@type}id's. + */ +#define EQUALS_ANY_ID(var, type) (var == config->r ## type ## id \ + || var == config->e ## type ## id \ + || var == config->s ## type ## id) + +/** + * Emulate setresuid(2) and setresgid(2). + */ +#define SETRESXID(type) do { \ + type ## id_t r ## type ## id = peek_reg(tracee, ORIGINAL, SYSARG_1); \ + type ## id_t e ## type ## id = peek_reg(tracee, ORIGINAL, SYSARG_2); \ + type ## id_t s ## type ## id = peek_reg(tracee, ORIGINAL, SYSARG_3); \ + bool allowed; \ + \ + /* "Unprivileged user processes may change the real UID, \ + * effective UID, and saved set-user-ID, each to one of: the \ + * current real UID, the current effective UID or the current \ + * saved set-user-ID. \ + * \ + * Privileged processes (on Linux, those having the CAP_SETUID \ + * capability) may set the real UID, effective UID, and saved \ + * set-user-ID to arbitrary values." -- man setresuid */ \ + allowed = (config->euid == 0 /* || HAS_CAP(SETUID) */ \ + || ((UNSET_ID(r ## type ## id) || EQUALS_ANY_ID(r ## type ## id, type)) \ + && (UNSET_ID(e ## type ## id) || EQUALS_ANY_ID(e ## type ## id, type)) \ + && (UNSET_ID(s ## type ## id) || EQUALS_ANY_ID(s ## type ## id, type)))); \ + if (!allowed) \ + return -EPERM; \ + \ + /* "If one of the arguments equals -1, the corresponding value \ + * is not changed." -- man setresuid */ \ + if (!UNSET_ID(r ## type ## id)) \ + config->r ## type ## id = r ## type ## id; \ + \ + if (!UNSET_ID(e ## type ## id)) { \ + /* "the file system UID is always set to the same \ + * value as the (possibly new) effective UID." -- man \ + * setresuid */ \ + config->e ## type ## id = e ## type ## id; \ + config->fs ## type ## id = e ## type ## id; \ + } \ + \ + if (!UNSET_ID(s ## type ## id)) \ + config->s ## type ## id = s ## type ## id; \ + \ + poke_reg(tracee, SYSARG_RESULT, 0); \ + return 0; \ +} while (0) + +/** + * Emulate setfsuid(2) and setfsgid(2). + */ +#define SETFSXID(type) do { \ + uid_t fs ## type ## id = peek_reg(tracee, ORIGINAL, SYSARG_1); \ + uid_t old_fs ## type ## id = config->fs ## type ## id; \ + bool allowed; \ + \ + /* "setfsuid() will succeed only if the caller is the \ + * superuser or if fsuid matches either the real user ID, \ + * effective user ID, saved set-user-ID, or the current value \ + * of fsuid." -- man setfsuid */ \ + allowed = (config->euid == 0 /* TODO: || HAS_CAP(SETUID) */ \ + || fs ## type ## id == config->fs ## type ## id \ + || EQUALS_ANY_ID(fs ## type ## id, type)); \ + if (allowed) \ + config->fs ## type ## id = fs ## type ## id; \ + \ + /* "On success, the previous value of fsuid is returned. On \ + * error, the current value of fsuid is returned." -- man \ + * setfsuid */ \ + poke_reg(tracee, SYSARG_RESULT, old_fs ## type ## id); \ + return 0; \ +} while (0) + +/** + * Adjust current @tracee's syscall result according to @config. This + * function returns -errno if an error occured, otherwise 0. + */ +static int handle_sysexit_end(Tracee *tracee, Config *config) +{ + word_t sysnum; + word_t result; + + sysnum = get_sysnum(tracee, ORIGINAL); + switch (sysnum) { + + case PR_setuid: + case PR_setuid32: + SETXID(uid); + + case PR_setgid: + case PR_setgid32: + SETXID(gid); + + case PR_setreuid: + case PR_setreuid32: + SETREXID(uid); + + case PR_setregid: + case PR_setregid32: + SETREXID(gid); + + case PR_setresuid: + case PR_setresuid32: + SETRESXID(u); + + case PR_setresgid: + case PR_setresgid32: + SETRESXID(g); + + case PR_setfsuid: + case PR_setfsuid32: + SETFSXID(u); + + case PR_setfsgid: + case PR_setfsgid32: + SETFSXID(g); + + case PR_getuid: + case PR_getuid32: + poke_reg(tracee, SYSARG_RESULT, config->ruid); + return 0; + + case PR_getgid: + case PR_getgid32: + poke_reg(tracee, SYSARG_RESULT, config->rgid); + return 0; + + case PR_geteuid: + case PR_geteuid32: + poke_reg(tracee, SYSARG_RESULT, config->euid); + return 0; + + case PR_getegid: + case PR_getegid32: + poke_reg(tracee, SYSARG_RESULT, config->egid); + return 0; + + case PR_getresuid: + case PR_getresuid32: + POKE_MEM_ID(SYSARG_1, ruid); + POKE_MEM_ID(SYSARG_2, euid); + POKE_MEM_ID(SYSARG_3, suid); + return 0; + + case PR_getresgid: + case PR_getresgid32: + POKE_MEM_ID(SYSARG_1, rgid); + POKE_MEM_ID(SYSARG_2, egid); + POKE_MEM_ID(SYSARG_3, sgid); + return 0; + + case PR_setdomainname: + case PR_sethostname: + case PR_setgroups: + case PR_setgroups32: + case PR_mknod: + case PR_mknodat: + case PR_capset: + case PR_setxattr: + case PR_lsetxattr: + case PR_fsetxattr: + case PR_chmod: + case PR_chown: + case PR_fchmod: + case PR_fchown: + case PR_lchown: + case PR_chown32: + case PR_fchown32: + case PR_lchown32: + case PR_fchmodat: + case PR_fchownat: { + word_t result; + + /* Override only permission errors. */ + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + if ((int) result != -EPERM) + return 0; + + /* Force success if the tracee was supposed to have + * the capability. */ + if (config->euid == 0) /* TODO: || HAS_CAP(...) */ + poke_reg(tracee, SYSARG_RESULT, 0); + + return 0; + } + + case PR_fstatat64: + case PR_newfstatat: + case PR_stat64: + case PR_lstat64: + case PR_fstat64: + case PR_stat: + case PR_statx: + case PR_lstat: + case PR_fstat: { + word_t address; + Reg sysarg; + uid_t uid; + gid_t gid; + off_t uid_offset; + off_t gid_offset; + + /* Override only if it succeed. */ + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + if (result != 0) + return 0; + + /* Get the address of the 'stat' structure. */ + if (sysnum == PR_statx) { + sysarg = SYSARG_5; + uid_offset = OFFSETOF_STATX_UID; + gid_offset = OFFSETOF_STATX_GID; + } + else { + if (sysnum == PR_fstatat64 || sysnum == PR_newfstatat) + sysarg = SYSARG_3; + else + sysarg = SYSARG_2; + uid_offset = offsetof_stat_uid(tracee); + gid_offset = offsetof_stat_gid(tracee); + } + + address = peek_reg(tracee, ORIGINAL, sysarg); + + /* Sanity checks. */ + assert(__builtin_types_compatible_p(uid_t, uint32_t)); + assert(__builtin_types_compatible_p(gid_t, uint32_t)); + + /* Get the uid & gid values from the 'stat' structure. */ + uid = peek_uint32(tracee, address + uid_offset); + if (errno != 0) + uid = 0; /* Not fatal. */ + + gid = peek_uint32(tracee, address + gid_offset); + if (errno != 0) + gid = 0; /* Not fatal. */ + + /* Override only if the file is owned by the current user. + * Errors are not fatal here. */ + if (uid == getuid()) + poke_uint32(tracee, address + uid_offset, config->suid); + + if (gid == getgid()) + poke_uint32(tracee, address + gid_offset, config->sgid); + + return 0; + } + + case PR_chroot: { + char path[PATH_MAX]; + char abspath[PATH_MAX]; + word_t input; + int status; + + if (config->euid != 0) /* TODO: && !HAS_CAP(SYS_CHROOT) */ + return 0; + + /* Override only permission errors. */ + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + if ((int) result != -EPERM) + return 0; + + input = peek_reg(tracee, MODIFIED, SYSARG_1); + + status = read_path(tracee, path, input); + if (status < 0) + return status; + + /* Resolve relative path segments. */ + if (!realpath(path, abspath)) + return 0; + + /* Only "new rootfs == current rootfs" is supported yet. */ + status = compare_paths(get_root(tracee), abspath); + if (status != PATHS_ARE_EQUAL) + return 0; + + /* Force success. */ + poke_reg(tracee, SYSARG_RESULT, 0); + return 0; + } + + default: + return 0; + } +} + +#undef POKE_MEM_ID +#undef SETXID +#undef UNSET_ID +#undef UNCHANGED_ID +#undef SETREXID +#undef EQUALS_ANY_ID +#undef SETRESXID +#undef SETFSXID + +/** + * Adjust some ELF auxiliary vectors. This function assumes the + * "argv, envp, auxv" stuff is pointed to by @tracee's stack pointer, + * as expected right after a successful call to execve(2). + */ +static int adjust_elf_auxv(Tracee *tracee, Config *config) +{ + ElfAuxVector *vectors; + ElfAuxVector *vector; + word_t vectors_address; + + vectors_address = get_elf_aux_vectors_address(tracee); + if (vectors_address == 0) + return 0; + + vectors = fetch_elf_aux_vectors(tracee, vectors_address); + if (vectors == NULL) + return 0; + + for (vector = vectors; vector->type != AT_NULL; vector++) { + switch (vector->type) { + case AT_UID: + vector->value = config->ruid; + break; + + case AT_EUID: + vector->value = config->euid; + break; + + case AT_GID: + vector->value = config->rgid; + break; + + case AT_EGID: + vector->value = config->egid; + break; + + default: + break; + } + } + + push_elf_aux_vectors(tracee, vectors, vectors_address); + + return 0; +} + +/** + * Handler for this @extension. It is triggered each time an @event + * occurred. See ExtensionEvent for the meaning of @data1 and @data2. + */ +int fake_id0_callback(Extension *extension, ExtensionEvent event, intptr_t data1, intptr_t data2) +{ + switch (event) { + case INITIALIZATION: { + const char *uid_string = (const char *) data1; + const char *gid_string; + Config *config; + int uid, gid; + + errno = 0; + uid = strtol(uid_string, NULL, 10); + if (errno != 0) + uid = getuid(); + + gid_string = strchr(uid_string, ':'); + if (gid_string == NULL) { + errno = EINVAL; + } + else { + errno = 0; + gid = strtol(gid_string + 1, NULL, 10); + } + /* Fallback to the current gid if an error occured. */ + if (errno != 0) + gid = getgid(); + + extension->config = talloc(extension, Config); + if (extension->config == NULL) + return -1; + + config = talloc_get_type_abort(extension->config, Config); + config->ruid = uid; + config->euid = uid; + config->suid = uid; + config->fsuid = uid; + config->rgid = gid; + config->egid = gid; + config->sgid = gid; + config->fsgid = gid; + + extension->filtered_sysnums = filtered_sysnums; + return 0; + } + + case INHERIT_PARENT: /* Inheritable for sub reconfiguration ... */ + return 1; + + case INHERIT_CHILD: { + /* Copy the parent configuration to the child. The + * structure should not be shared as uid/gid changes + * in one process should not affect other processes. + * This assertion is not true for POSIX threads + * sharing the same group, however Linux threads never + * share uid/gid information. As a consequence, the + * GlibC emulates the POSIX behavior on Linux by + * sending a signal to all group threads to cause them + * to invoke the system call too. Finally, PRoot + * doesn't have to worry about clone flags. + */ + + Extension *parent = (Extension *) data1; + extension->config = talloc_zero(extension, Config); + if (extension->config == NULL) + return -1; + + memcpy(extension->config, parent->config, sizeof(Config)); + return 0; + } + + case HOST_PATH: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + + /* Force permissions if the tracee was supposed to + * have the capability. */ + if (config->euid == 0) /* TODO: || HAS_CAP(DAC_OVERRIDE) */ + override_permissions(tracee, (char*) data1, (bool) data2); + return 0; + } + + case SYSCALL_ENTER_END: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + + return handle_sysenter_end(tracee, config); + } + + case SYSCALL_EXIT_END: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + + return handle_sysexit_end(tracee, config); + } + + case SYSCALL_EXIT_START: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + word_t result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + word_t sysnum = get_sysnum(tracee, ORIGINAL); + struct stat mode; + int status; + + if ((int) result < 0 || sysnum != PR_execve) + return 0; + + /* This has to be done before PRoot pushes the load + * script into tracee's stack. */ + adjust_elf_auxv(tracee, config); + + status = stat(tracee->load_info->host_path, &mode); + if (status < 0) + return 0; /* Not fatal. */ + + if ((mode.st_mode & S_ISUID) != 0) { + config->euid = 0; + config->suid = 0; + } + + if ((mode.st_mode & S_ISGID) != 0) { + config->egid = 0; + config->sgid = 0; + } + + return 0; + } + + default: + return 0; + } +} diff --git a/proot/proot_linux/extension/kompat/kompat.c b/proot/proot_linux/extension/kompat/kompat.c new file mode 100644 index 0000000..102a9a0 --- /dev/null +++ b/proot/proot_linux/extension/kompat/kompat.c @@ -0,0 +1,1074 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* intptr_t, */ +#include /* strtoul(3), */ +#include /* KERNEL_VERSION, */ +#include /* assert(3), */ +#include /* uname(2), utsname, */ +#include /* str*(3), memcpy(3), */ +#include /* talloc_*, */ +#include /* AT_*, */ +#include /* linux.git:c0a3a20b */ +#include /* errno, */ +#include /* AT_, */ +#include /* FUTEX_PRIVATE_FLAG */ +#include /* MIN, */ + +#include "extension/extension.h" +#include "syscall/seccomp.h" +#include "syscall/sysnum.h" +#include "syscall/chain.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/abi.h" +#include "tracee/mem.h" +#include "execve/auxv.h" +#include "cli/note.h" +#include "arch.h" + +#include "attribute.h" +#include "compat.h" + +#define MAX_ARG_SHIFT 2 +typedef struct { + int expected_release; + word_t new_sysarg_num; + struct { + Reg sysarg; /* first argument to be moved. */ + size_t nb_args; /* number of arguments to be moved. */ + int offset; /* offset to be applied. */ + } shifts[MAX_ARG_SHIFT]; +} Modif; + +#define NONE {{0, 0, 0}} + +typedef struct { + int actual_release; + int virtual_release; + struct utsname utsname; + word_t hwcap; +} Config; + +/** + * Return whether the @expected_release is newer than + * @config->actual_release and older than @config->virtual_release. + */ +static bool needs_kompat(const Config *config, int expected_release) +{ + return (expected_release > config->actual_release + && expected_release <= config->virtual_release); +} + +/** + * Modify the current syscall of @tracee as described by @modif + * regarding the given @config. This function returns whether the + * syscall was modified or not. + */ +static bool modify_syscall(Tracee *tracee, const Config *config, const Modif *modif) +{ + size_t i, j; + word_t syscall; + + assert(config != NULL); + + if (!needs_kompat(config, modif->expected_release)) + return false; + + /* Check if this syscall is supported on this architecture. */ + syscall = detranslate_sysnum(get_abi(tracee), modif->new_sysarg_num); + if (syscall == SYSCALL_AVOIDER) + return false; + + set_sysnum(tracee, modif->new_sysarg_num); + + /* Shift syscall arguments. */ + for (i = 0; i < MAX_ARG_SHIFT; i++) { + Reg sysarg = modif->shifts[i].sysarg; + size_t nb_args = modif->shifts[i].nb_args; + int offset = modif->shifts[i].offset; + + for (j = 0; j < nb_args; j++) { + word_t arg = peek_reg(tracee, CURRENT, sysarg + j); + poke_reg(tracee, sysarg + j + offset, arg); + } + } + + return true; +} + +/** + * Return the numeric value for the given kernel @release. + */ +static int parse_kernel_release(const char *release) +{ + unsigned long major = 0; + unsigned long minor = 0; + unsigned long revision = 0; + char *cursor = (char *)release; + + major = strtoul(cursor, &cursor, 10); + + if (*cursor == '.') { + cursor++; + minor = strtoul(cursor, &cursor, 10); + } + + if (*cursor == '.') { + cursor++; + revision = strtoul(cursor, &cursor, 10); + } + + return KERNEL_VERSION(major, minor, revision); +} + +/** + * Remove @discarded_flags from the given @tracee's @sysarg register + * if the actual kernel release is not compatible with the + * @expected_release. + */ +static void discard_fd_flags(Tracee *tracee, const Config *config, + int discarded_flags, int expected_release, Reg sysarg) +{ + word_t flags; + + if (!needs_kompat(config, expected_release)) + return; + + flags = peek_reg(tracee, CURRENT, sysarg); + poke_reg(tracee, sysarg, flags & ~discarded_flags); +} + +/** + * Replace current @tracee's syscall with an older and compatible one + * whenever it's required, i.e. when the syscall is supported by the + * kernel as specified by @config->virtual_release but it isn't + * supported by the actual kernel. + */ +static int handle_sysenter_end(Tracee *tracee, Config *config) +{ + /* Note: syscalls like "openat" can be replaced by "open" since PRoot + * has canonicalized "fd + path" into "path". */ + switch (get_sysnum(tracee, ORIGINAL)) { + case PR_accept4: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,28), + .new_sysarg_num = PR_accept, + .shifts = NONE + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_dup3: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,27), + .new_sysarg_num = PR_dup2, + .shifts = NONE + }; + + /* "If oldfd equals newfd, then dup3() fails with the + * error EINVAL" -- man dup3 */ + if (peek_reg(tracee, CURRENT, SYSARG_1) == peek_reg(tracee, CURRENT, SYSARG_2)) + return -EINVAL; + + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_epoll_create1: { + bool modified; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,27), + .new_sysarg_num = PR_epoll_create, + .shifts = NONE + }; + + /* "the size argument is ignored, but must be greater + * than zero" -- man epoll_create */ + modified = modify_syscall(tracee, config, &modif); + if (modified) + poke_reg(tracee, SYSARG_1, 1); + return 0; + } + + case PR_epoll_pwait: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,19), + .new_sysarg_num = PR_epoll_wait, + .shifts = NONE + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_eventfd2: { + bool modified; + word_t flags; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,27), + .new_sysarg_num = PR_eventfd, + .shifts = NONE + }; + + modified = modify_syscall(tracee, config, &modif); + if (modified) { + /* EFD_SEMAPHORE can't be emulated with eventfd. */ + flags = peek_reg(tracee, CURRENT, SYSARG_2); + if ((flags & EFD_SEMAPHORE) != 0) + return -EINVAL; + } + return 0; + } + + case PR_faccessat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_access, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 2, + .offset = -1 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_fchmodat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_chmod, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 2, + .offset = -1 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_fchownat: { + word_t flags; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 3, + .offset = -1 } + } + }; + + flags = peek_reg(tracee, CURRENT, SYSARG_5); + modif.new_sysarg_num = ((flags & AT_SYMLINK_NOFOLLOW) != 0 + ? PR_lchown + : PR_chown); + + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_fcntl: { + word_t command; + + if (!needs_kompat(config, KERNEL_VERSION(2,6,24))) + return 0; + + command = peek_reg(tracee, ORIGINAL, SYSARG_2); + if (command == F_DUPFD_CLOEXEC) + poke_reg(tracee, SYSARG_2, F_DUPFD); + + return 0; + } + + case PR_newfstatat: + case PR_fstatat64: { + word_t flags; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 2, + .offset = -1 } + } + }; + + if (config->actual_release == 0) + return 0; + + flags = peek_reg(tracee, CURRENT, SYSARG_4); +#if defined(ARCH_X86_64) + if ((flags & AT_SYMLINK_NOFOLLOW) != 0) + modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_lstat : PR_lstat64); + else + modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_stat : PR_stat64); +#else + if ((flags & AT_SYMLINK_NOFOLLOW) != 0) + modif.new_sysarg_num = PR_lstat64; + else + modif.new_sysarg_num = PR_stat64; +#endif + + if (modify_syscall(tracee, config, &modif)) { + // Do this check only if we are patching this syscall. + // New flags have been added since 2.6.38 + // that we should not error on. + if ((flags & ~AT_SYMLINK_NOFOLLOW) != 0) { + return -EINVAL; /* Exposed by LTP. */ + } + } + return 0; + } + + case PR_futex: { + word_t operation; + static bool warned = false; + + if (!needs_kompat(config, KERNEL_VERSION(2,6,22)) || config->actual_release == 0) + return 0; + + operation = peek_reg(tracee, CURRENT, SYSARG_2); + if ((operation & FUTEX_PRIVATE_FLAG) == 0) + return 0; + + if (!warned) { + warned = true; + note(tracee, WARNING, USER, + "kompat: this kernel doesn't support private futexes " + "and PRoot can't emulate them. Expect some troubles..."); + } + + poke_reg(tracee, SYSARG_2, operation & ~FUTEX_PRIVATE_FLAG); + return 0; + } + + case PR_futimesat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_utimes, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 2, + .offset = -1 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_inotify_init1: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,27), + .new_sysarg_num = PR_inotify_init, + .shifts = NONE + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_linkat: { + word_t flags; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_link, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 1, + .offset = -1 }, + [1] = { + .sysarg = SYSARG_4, + .nb_args = 1, + .offset = -2 } + } + }; + + flags = peek_reg(tracee, CURRENT, SYSARG_5); + if ((flags & ~AT_SYMLINK_FOLLOW) != 0) + return -EINVAL; /* Exposed by LTP. */ + + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_mkdirat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_mkdir, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 2, + .offset = -1 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_mknodat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_mknod, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 3, + .offset = -1 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_openat: { + bool modified; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_open, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 3, + .offset = -1 } + } + }; + modified = modify_syscall(tracee, config, &modif); + discard_fd_flags(tracee, config, O_CLOEXEC, KERNEL_VERSION(2,6,23), + modified ? SYSARG_2 : SYSARG_3); + return 0; + } + + case PR_open: + discard_fd_flags(tracee, config, O_CLOEXEC, KERNEL_VERSION(2,6,23), SYSARG_2); + return 0; + + case PR_pipe2: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,27), + .new_sysarg_num = PR_pipe, + .shifts = NONE + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_pselect6: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .shifts = NONE + }; +#if defined(ARCH_X86_64) + modif.new_sysarg_num = (get_abi(tracee) != ABI_2 ? PR_select : PR__newselect); +#else + modif.new_sysarg_num = PR__newselect; +#endif + + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_readlinkat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_readlink, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 3, + .offset = -1} + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_renameat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_rename, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 1, + .offset =-1 }, + [1] = { + .sysarg = SYSARG_4, + .nb_args = 1, + .offset = -2 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_renameat2: { + Modif modif = { + .expected_release = KERNEL_VERSION(3,15,0), + .new_sysarg_num = PR_rename, + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 1, + .offset =-1 }, + [1] = { + .sysarg = SYSARG_4, + .nb_args = 1, + .offset = -2 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_signalfd4: { + bool modified; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,27), + .new_sysarg_num = PR_signalfd, + .shifts = NONE + }; + + /* "In Linux up to version 2.6.26, the flags argument + * is unused, and must be specified as zero." -- man + * signalfd */ + modified = modify_syscall(tracee, config, &modif); + if (modified) + poke_reg(tracee, SYSARG_4, 0); + return 0; + } + + case PR_socket: + case PR_socketpair: + case PR_timerfd_create: + discard_fd_flags(tracee, config, O_CLOEXEC | O_NONBLOCK, + KERNEL_VERSION(2,6,27), SYSARG_2); + return 0; + + case PR_symlinkat: { + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .new_sysarg_num = PR_symlink, + .shifts = { [0] = { + .sysarg = SYSARG_3, + .nb_args = 1, + .offset = -1 } + } + }; + modify_syscall(tracee, config, &modif); + return 0; + } + + case PR_unlinkat: { + word_t flags; + Modif modif = { + .expected_release = KERNEL_VERSION(2,6,16), + .shifts = { [0] = { + .sysarg = SYSARG_2, + .nb_args = 1, + .offset = -1 + } + } + }; + + flags = peek_reg(tracee, CURRENT, SYSARG_3); + modif.new_sysarg_num = ((flags & AT_REMOVEDIR) != 0 + ? PR_rmdir + : PR_unlink); + + modify_syscall(tracee, config, &modif); + return 0; + } + + default: + return 0; + } +} + +/** + * Adjust some ELF auxiliary vectors to improve the compatibility. + * This function assumes the "argv, envp, auxv" stuff is pointed to by + * @tracee's stack pointer, as expected right after a successful call + * to execve(2). + */ +static void adjust_elf_auxv(Tracee *tracee, Config *config) +{ + ElfAuxVector *vectors; + ElfAuxVector *vector; + word_t vectors_address; + word_t stack_pointer; + void *argv_envp; + size_t size; + size_t reserve_size; + int status; + + vectors_address = get_elf_aux_vectors_address(tracee); + if (vectors_address == 0) + return; + + vectors = fetch_elf_aux_vectors(tracee, vectors_address); + if (vectors == NULL) + return; + + for (vector = vectors; vector->type != AT_NULL; vector++) { + switch (vector->type) { + /* Discard AT_SYSINFO* vectors: they can be used to + * get the OS release number from memory instead of + * from the uname syscall, and only this latter is + * currently hooked by PRoot. */ + case AT_SYSINFO_EHDR: + case AT_SYSINFO: + vector->type = AT_IGNORE; + vector->value = 0; + break; + + case AT_HWCAP: + if (config->hwcap != (word_t) -1) + vector->value = config->hwcap; + break; + + case AT_RANDOM: + /* Skip only if not in forced mode. */ + if (config->actual_release != 0) + goto end; + break; + + default: + break; + } + } + + /* Add the AT_RANDOM vector only if needed. */ + if (!needs_kompat(config, KERNEL_VERSION(2,6,29))) + goto end; + + status = add_elf_aux_vector(&vectors, AT_RANDOM, vectors_address); + if (status < 0) + goto end; /* Not fatal. */ + + /* Since a new vector needs to be added, the ELF auxiliary + * vectors array can't be pushed in place. As a consequence, + * argv[] and envp[] arrays are moved one vector downward to + * make room for the new ELF auxiliary vectors array. + * Remember, the stack layout is as follow right after execve: + * + * argv[], envp[], auxv[] + */ + stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); + size = vectors_address - stack_pointer; + argv_envp = talloc_size(tracee->ctx, size); + if (argv_envp == NULL) + goto end; + + status = read_data(tracee, argv_envp, stack_pointer, size); + if (status < 0) + goto end; + + /* Allocate enough room in tracee's stack for the new ELF + * auxiliary vector. */ + reserve_size = 2 * sizeof_word(tracee); + /* Make sure the stack is still aligned */ + reserve_size = ((reserve_size - 1) / STACK_ALIGNMENT + 1) * STACK_ALIGNMENT; + stack_pointer -= reserve_size; + vectors_address -= reserve_size; + + /* Note that it is safe to update the stack pointer manually + * since we are in execve sysexit. However it should be done + * before transfering data since the kernel might not allow + * page faults below the stack pointer. */ + poke_reg(tracee, STACK_POINTER, stack_pointer); + + status = write_data(tracee, stack_pointer, argv_envp, size); + if (status < 0) + return; + +end: + push_elf_aux_vectors(tracee, vectors, vectors_address); + return; +} + +/** + * Append to the @tracee's current syscall enough calls to fcntl(@fd) + * in order to set the flags from the original @sysarg register, if + * there are also set in @emulated_flags. + */ +static void emulate_fd_flags(Tracee *tracee, word_t fd, Reg sysarg, int emulated_flags) +{ + word_t flags; + + flags = peek_reg(tracee, ORIGINAL, sysarg); + if (flags == 0) + return; + + if ((emulated_flags & flags & O_CLOEXEC) != 0) + register_chained_syscall(tracee, PR_fcntl, fd, F_SETFD, FD_CLOEXEC, 0, 0, 0); + + if ((emulated_flags & flags & O_NONBLOCK) != 0) + register_chained_syscall(tracee, PR_fcntl, fd, F_SETFL, O_NONBLOCK, 0, 0, 0); + + force_chain_final_result(tracee, peek_reg(tracee, CURRENT, SYSARG_RESULT)); +} + +/** + * Adjust the results/output parameters for syscalls that were + * modified in handle_sysenter_end(). This function returns -errno if + * an error occured, otherwise 0. + */ +static int handle_sysexit_end(Tracee *tracee, Config *config) +{ + word_t result; + word_t sysnum; + int status; + + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + sysnum = get_sysnum(tracee, ORIGINAL); + + /* Error reported by the kernel. */ + status = (int) result; + if (status < 0) + return 0; + + switch (sysnum) { + case PR_uname: { + word_t address; + + address = peek_reg(tracee, ORIGINAL, SYSARG_1); + + /* The layout of struct utsname does not depend on the + * architecture, it only depends on the kernel + * version. In this regards, this structure is stable + * since < 2.6.0. */ + status = write_data(tracee, address, &config->utsname, sizeof(config->utsname)); + if (status < 0) + return status; + return 0; + } + + case PR_setdomainname: + case PR_sethostname: { + word_t address; + word_t length; + char *name; + + name = (sysnum == PR_setdomainname + ? config->utsname.domainname + : config->utsname.nodename); + + length = peek_reg(tracee, ORIGINAL, SYSARG_2); + if (length > sizeof(config->utsname.domainname) - 1) + return -EINVAL; + + /* Because of the test above. */ + assert(sizeof(config->utsname.domainname) == sizeof(config->utsname.nodename)); + + address = peek_reg(tracee, ORIGINAL, SYSARG_1); + status = read_data(tracee, name, address, length); + if (status < 0) + return status; + + /* "name does not require a terminating null byte." -- + * man 2 set{domain,host}name. */ + name[length] = '\0'; + + return 0; + } + + case PR_accept4: + if (get_sysnum(tracee, MODIFIED) == PR_accept) + emulate_fd_flags(tracee, result, SYSARG_4, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_dup3: + if (get_sysnum(tracee, MODIFIED) == PR_dup2) + emulate_fd_flags(tracee, peek_reg(tracee, ORIGINAL, SYSARG_2), + SYSARG_3, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_epoll_create1: + if (get_sysnum(tracee, MODIFIED) == PR_epoll_create) + emulate_fd_flags(tracee, result, SYSARG_1, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_eventfd2: + if (get_sysnum(tracee, MODIFIED) == PR_eventfd) + emulate_fd_flags(tracee, result, SYSARG_2, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_fcntl: { + word_t command; + + if (!needs_kompat(config, KERNEL_VERSION(2,6,24))) + return 0; + + command = peek_reg(tracee, ORIGINAL, SYSARG_2); + if (command != F_DUPFD_CLOEXEC) + return 0; + + register_chained_syscall(tracee, PR_fcntl, result, F_SETFD, FD_CLOEXEC, 0, 0, 0); + force_chain_final_result(tracee, peek_reg(tracee, CURRENT, SYSARG_RESULT)); + return 0; + } + + case PR_inotify_init1: + if (get_sysnum(tracee, MODIFIED) == PR_inotify_init) + emulate_fd_flags(tracee, result, SYSARG_1, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_open: + if (needs_kompat(config, KERNEL_VERSION(2,6,23))) + emulate_fd_flags(tracee, result, SYSARG_2, O_CLOEXEC); + return 0; + + case PR_openat: + if (needs_kompat(config, KERNEL_VERSION(2,6,23))) + emulate_fd_flags(tracee, result, SYSARG_3, O_CLOEXEC); + return 0; + + case PR_pipe2: { + int fds[2]; + + if (get_sysnum(tracee, MODIFIED) != PR_pipe) + return 0; + + status = read_data(tracee, fds, peek_reg(tracee, MODIFIED, SYSARG_1), sizeof(fds)); + if (status < 0) + return 0; + + emulate_fd_flags(tracee, fds[0], SYSARG_2, O_CLOEXEC | O_NONBLOCK); + emulate_fd_flags(tracee, fds[1], SYSARG_2, O_CLOEXEC | O_NONBLOCK); + + return 0; + } + + case PR_signalfd4: + if (get_sysnum(tracee, MODIFIED) == PR_signalfd) + emulate_fd_flags(tracee, result, SYSARG_4, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_socket: + case PR_timerfd_create: + if (needs_kompat(config, KERNEL_VERSION(2,6,27))) + emulate_fd_flags(tracee, result, SYSARG_2, O_CLOEXEC | O_NONBLOCK); + return 0; + + case PR_socketpair: { + int fds[2]; + + if (!needs_kompat(config, KERNEL_VERSION(2,6,27))) + return 0; + + status = read_data(tracee, fds, peek_reg(tracee, MODIFIED, SYSARG_4), sizeof(fds)); + if (status < 0) + return 0; + + emulate_fd_flags(tracee, fds[0], SYSARG_2, O_CLOEXEC | O_NONBLOCK); + emulate_fd_flags(tracee, fds[1], SYSARG_2, O_CLOEXEC | O_NONBLOCK); + + return 0; + } + + default: + return 0; + } + + return 0; +} + +/** + * Fill @config->utsname and @config->hwcap according to the content + * of @string. This function returns -1 if there is a parsing error, + * otherwise 0. + */ +static int parse_utsname(Config *config, const char *string) +{ + struct utsname utsname; + int status; + + assert(string != NULL); + + status = uname(&utsname); + if (status < 0 || getenv("PROOT_FORCE_KOMPAT") != NULL) + config->actual_release = 0; + else + config->actual_release = parse_kernel_release(utsname.release); + + /* Check whether it is the simple format (ie. release number), + * or the complex one: + * + * '\sysname\nodename\release\version\machine\domainname\hwcap\' + * + * This complex format is ugly on purpose: it ain't to be used + * directly by users. */ + if (string[0] == '\\') { + const char *start; + const char *end; + char *end2; + + /* Initial state of the parser. */ + end = string; + +#define PARSE(field) do { \ + size_t length; \ + \ + start = end + 1; \ + end = strchr(start, '\\'); \ + if (end == NULL) { \ + note(NULL, ERROR, USER, \ + "can't find %s field in '%s'", #field, string); \ + return -1; \ + } \ + \ + length = end - start; \ + length = MIN(length, sizeof(config->utsname.field) - 1); \ + strncpy(config->utsname.field, start, length); \ + config->utsname.field[length] = '\0'; \ + } while(0) + + PARSE(sysname); + PARSE(nodename); + PARSE(release); + PARSE(version); + PARSE(machine); + PARSE(domainname); + +#undef PARSE + + /* The hwcap field is parsed as an hexadecimal value. */ + errno = 0; + config->hwcap = strtol(end + 1, &end2, 16); + if (errno != 0 || end2[0] != '\\') { + note(NULL, ERROR, USER, "can't find hwcap field in '%s'", string); + return -1; + } + } + else { + size_t length; + + memcpy(&config->utsname, &utsname, sizeof(config->utsname)); + + length = MIN(strlen(string), sizeof(config->utsname.release) - 1); + strncpy(config->utsname.release, string, length); + config->utsname.release[length] = '\0'; + + config->hwcap = (word_t) -1; + } + + config->virtual_release = parse_kernel_release(config->utsname.release); + + return 0; +} + +/* List of syscalls handled by this extensions. */ +static FilteredSysnum filtered_sysnums[] = { + { PR_accept4, FILTER_SYSEXIT }, + { PR_dup3, FILTER_SYSEXIT }, + { PR_epoll_create1, FILTER_SYSEXIT }, + { PR_epoll_pwait, 0 }, + { PR_eventfd2, FILTER_SYSEXIT }, + { PR_execve, FILTER_SYSEXIT }, + { PR_faccessat, 0 }, + { PR_fchmodat, 0 }, + { PR_fchownat, 0 }, + { PR_fcntl, FILTER_SYSEXIT }, + { PR_fstatat64, 0 }, + { PR_futimesat, 0 }, + { PR_futex, 0 }, + { PR_inotify_init1, FILTER_SYSEXIT }, + { PR_linkat, 0 }, + { PR_mkdirat, 0 }, + { PR_mknodat, 0 }, + { PR_newfstatat, 0 }, + { PR_open, FILTER_SYSEXIT }, + { PR_openat, FILTER_SYSEXIT }, + { PR_pipe2, FILTER_SYSEXIT }, + { PR_pselect6, 0 }, + { PR_readlinkat, 0 }, + { PR_renameat, 0 }, + { PR_renameat2, 0 }, + { PR_setdomainname, FILTER_SYSEXIT }, + { PR_sethostname, FILTER_SYSEXIT }, + { PR_signalfd4, FILTER_SYSEXIT }, + { PR_socket, FILTER_SYSEXIT }, + { PR_socketpair, FILTER_SYSEXIT }, + { PR_symlinkat, 0 }, + { PR_timerfd_create, FILTER_SYSEXIT }, + { PR_uname, FILTER_SYSEXIT }, + { PR_unlinkat, 0 }, + FILTERED_SYSNUM_END, +}; + +/** + * Handler for this @extension. It is triggered each time an @event + * occured. See ExtensionEvent for the meaning of @data1 and @data2. + */ +int kompat_callback(Extension *extension, ExtensionEvent event, + intptr_t data1, intptr_t data2 UNUSED) +{ + int status; + + switch (event) { + case INITIALIZATION: { + Config *config; + + extension->config = talloc_zero(extension, Config); + if (extension->config == NULL) + return -1; + config = extension->config; + + status = parse_utsname(config, (const char *) data1); + if (status < 0) + return -1; + + extension->filtered_sysnums = filtered_sysnums; + return 0; + } + + case SYSCALL_ENTER_END: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + + /* Nothing to do if this syscall is being discarded + * (because of an error detected by PRoot). */ + if ((int) data1 < 0) + return 0; + + return handle_sysenter_end(tracee, config); + } + + case SYSCALL_EXIT_END: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + + return handle_sysexit_end(tracee, config); + } + + case SYSCALL_EXIT_START: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + word_t result = peek_reg(tracee, CURRENT, SYSARG_RESULT);; + word_t sysnum = get_sysnum(tracee, ORIGINAL); + + /* Note: this can be done only before PRoot pushes the + * load script into tracee's stack. */ + if ((int) result >= 0 && sysnum == PR_execve) + adjust_elf_auxv(tracee, config); + return 0; + } + + default: + return 0; + } +} diff --git a/proot/proot_linux/extension/link2symlink/link2symlink.c b/proot/proot_linux/extension/link2symlink/link2symlink.c new file mode 100644 index 0000000..3484da5 --- /dev/null +++ b/proot/proot_linux/extension/link2symlink/link2symlink.c @@ -0,0 +1,557 @@ +#include /* rename(2), */ +#include /* atoi */ +#include /* symlink(2), symlinkat(2), readlink(2), lstat(2), unlink(2), unlinkat(2)*/ +#include /* str*, strrchr, strcat, strcpy, strncpy, strncmp */ +#include /* lstat(2), */ +#include /* lstat(2), */ +#include /* E*, */ +#include /* PATH_MAX, */ + +#include "extension/extension.h" +#include "tracee/tracee.h" +#include "tracee/mem.h" +#include "syscall/syscall.h" +#include "syscall/sysnum.h" +#include "path/path.h" +#include "arch.h" +#include "attribute.h" + +#define PREFIX ".l2s." +#define DELETED_SUFFIX " (deleted)" + +/** + * Copy the contents of the @symlink into @value (nul terminated). + * This function returns -errno if an error occured, otherwise 0. + */ +static int my_readlink(const char symlink[PATH_MAX], char value[PATH_MAX]) +{ + ssize_t size; + + size = readlink(symlink, value, PATH_MAX); + if (size < 0) + return size; + if (size >= PATH_MAX) + return -ENAMETOOLONG; + value[size] = '\0'; + + return 0; +} + +/** + * Move the path pointed to by @tracee's @sysarg to a new location, + * symlink the original path to this new one, make @tracee's @sysarg + * point to the new location. This function returns -errno if an + * error occured, otherwise 0. + */ +static int move_and_symlink_path(Tracee *tracee, Reg sysarg) +{ + char original[PATH_MAX]; + char intermediate[PATH_MAX]; + char new_intermediate[PATH_MAX]; + char final[PATH_MAX]; + char new_final[PATH_MAX]; + char * name; + struct stat statl; + ssize_t size; + int status; + int link_count; + int first_link = 1; + int intermediate_suffix = 1; + + /* Note: this path was already canonicalized. */ + size = read_string(tracee, original, peek_reg(tracee, CURRENT, sysarg), PATH_MAX); + if (size < 0) + return size; + if (size >= PATH_MAX) + return -ENAMETOOLONG; + + /* Sanity check: directories can't be linked. */ + status = lstat(original, &statl); + if (status < 0) + return status; + if (S_ISDIR(statl.st_mode)) + return -EPERM; + + /* Check if it is a symbolic link. */ + if (S_ISLNK(statl.st_mode)) { + /* get name */ + size = my_readlink(original, intermediate); + if (size < 0) + return size; + + name = strrchr(intermediate, '/'); + if (name == NULL) + name = intermediate; + else + name++; + + if (strncmp(name, PREFIX, strlen(PREFIX)) == 0) + first_link = 0; + } else { + /* compute new name */ + if (strlen(PREFIX) + strlen(original) + 5 >= PATH_MAX) + return -ENAMETOOLONG; + + name = strrchr(original,'/'); + if (name == NULL) + name = original; + else + name++; + + strncpy(intermediate, original, strlen(original) - strlen(name)); + intermediate[strlen(original) - strlen(name)] = '\0'; + strcat(intermediate, PREFIX); + strcat(intermediate, name); + } + + if (first_link) { + /*Move the original content to the new path. */ + do { + sprintf(new_intermediate, "%s%04d", intermediate, intermediate_suffix); + intermediate_suffix++; + } while ((access(new_intermediate,F_OK) != -1) && (intermediate_suffix < 1000)); + strcpy(intermediate, new_intermediate); + + strcpy(final, intermediate); + strcat(final, ".0002"); + status = rename(original, final); + if (status < 0) + return status; + + /* Symlink the intermediate to the final file. */ + status = symlink(final, intermediate); + if (status < 0) + return status; + + /* Symlink the original path to the intermediate one. */ + status = symlink(intermediate, original); + if (status < 0) + return status; + } else { + /*Move the original content to new location, by incrementing count at end of path. */ + size = my_readlink(intermediate, final); + if (size < 0) + return size; + + link_count = atoi(final + strlen(final) - 4); + link_count++; + + strncpy(new_final, final, strlen(final) - 4); + sprintf(new_final + strlen(final) - 4, "%04d", link_count); + + status = rename(final, new_final); + if (status < 0) + return status; + strcpy(final, new_final); + /* Symlink the intermediate to the final file. */ + status = unlink(intermediate); + if (status < 0) + return status; + status = symlink(final, intermediate); + if (status < 0) + return status; + } + + status = set_sysarg_path(tracee, intermediate, sysarg); + if (status < 0) + return status; + + return 0; +} + + +/* If path points a file that is a symlink to a file that begins + * with PREFIX, let the file be deleted, but also delete the + * symlink that was created and decremnt the count that is tacked + * to end of original file. + */ +static int decrement_link_count(Tracee *tracee, Reg sysarg) +{ + char original[PATH_MAX]; + char intermediate[PATH_MAX]; + char final[PATH_MAX]; + char new_final[PATH_MAX]; + char * name; + struct stat statl; + ssize_t size; + int status; + int link_count; + + /* Note: this path was already canonicalized. */ + size = read_string(tracee, original, peek_reg(tracee, CURRENT, sysarg), PATH_MAX); + if (size < 0) + return size; + if (size >= PATH_MAX) + return -ENAMETOOLONG; + + /* Check if it is a converted link already. */ + status = lstat(original, &statl); + if (status < 0) + return 0; + + if (!S_ISLNK(statl.st_mode)) + return 0; + + size = my_readlink(original, intermediate); + if (size < 0) + return size; + + name = strrchr(intermediate, '/'); + if (name == NULL) + name = intermediate; + else + name++; + + /* Check if an l2s file is pointed to */ + if (strncmp(name, PREFIX, strlen(PREFIX)) != 0) + return 0; + + size = my_readlink(intermediate, final); + if (size < 0) + return size; + + link_count = atoi(final + strlen(final) - 4); + link_count--; + + /* Check if it is or is not the last link to delete */ + if (link_count > 0) { + strncpy(new_final, final, strlen(final) - 4); + sprintf(new_final + strlen(final) - 4, "%04d", link_count); + + status = rename(final, new_final); + if (status < 0) + return status; + + strcpy(final, new_final); + + /* Symlink the intermediate to the final file. */ + status = unlink(intermediate); + if (status < 0) + return status; + + status = symlink(final, intermediate); + if (status < 0) + return status; + } else { + /* If it is the last, delete the intermediate and final */ + status = unlink(intermediate); + if (status < 0) + return status; + status = unlink(final); + if (status < 0) + return status; + } + + return 0; +} + +/** + * Make it so fake hard links look like real hard link with respect to number of links and inode + * This function returns -errno if an error occured, otherwise 0. + */ +static int handle_sysexit_end(Tracee *tracee) +{ + word_t sysnum; + + sysnum = get_sysnum(tracee, ORIGINAL); + + switch (sysnum) { + + case PR_fstatat64: //int fstatat(int dirfd, const char *pathname, struct stat *buf, int flags); + case PR_newfstatat: //int fstatat(int dirfd, const char *pathname, struct stat *buf, int flags); + case PR_stat64: //int stat(const char *path, struct stat *buf); + case PR_lstat64: //int lstat(const char *path, struct stat *buf); + case PR_fstat64: //int fstat(int fd, struct stat *buf); + case PR_stat: //int stat(const char *path, struct stat *buf); + case PR_statx: //int statx(int fd, const char *path, unsigned flags, unsigned mask, struct statx *buf); + case PR_lstat: //int lstat(const char *path, struct stat *buf); + case PR_fstat: { //int fstat(int fd, struct stat *buf); + word_t result; + Reg sysarg_stat; + Reg sysarg_path; + int status; + struct stat statl; + ssize_t size; + char original[PATH_MAX]; + char intermediate[PATH_MAX]; + char final[PATH_MAX]; + char * name; + struct stat finalStat; + + /* Override only if it succeed. */ + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + if (result != 0) + return 0; + + if (sysnum == PR_fstat64 || sysnum == PR_fstat) { + status = readlink_proc_pid_fd(tracee->pid, peek_reg(tracee, MODIFIED, SYSARG_1), original); + if (strcmp(original + strlen(original) - strlen(DELETED_SUFFIX), DELETED_SUFFIX) == 0) + original[strlen(original) - strlen(DELETED_SUFFIX)] = '\0'; + if (status < 0) + return status; + } else { + if (sysnum == PR_fstatat64 || sysnum == PR_newfstatat || sysnum == PR_statx) + sysarg_path = SYSARG_2; + else + sysarg_path = SYSARG_1; + size = read_string(tracee, original, peek_reg(tracee, MODIFIED, sysarg_path), PATH_MAX); + if (size < 0) + return size; + if (size >= PATH_MAX) + return -ENAMETOOLONG; + } + + name = strrchr(original, '/'); + if (name == NULL) + name = original; + else + name++; + + /* Check if it is a link */ + status = lstat(original, &statl); + + if (strncmp(name, PREFIX, strlen(PREFIX)) == 0) { + if (S_ISLNK(statl.st_mode)) { + strcpy(intermediate,original); + goto intermediate_proc; + } else { + strcpy(final,original); + goto final_proc; + } + } + + if (!S_ISLNK(statl.st_mode)) + return 0; + + size = my_readlink(original, intermediate); + if (size < 0) + return size; + + name = strrchr(intermediate, '/'); + if (name == NULL) + name = intermediate; + else + name++; + + if (strncmp(name, PREFIX, strlen(PREFIX)) != 0) + return 0; + + intermediate_proc: size = my_readlink(intermediate, final); + if (size < 0) + return size; + + final_proc: status = lstat(final,&finalStat); + if (status < 0) + return status; + + finalStat.st_nlink = atoi(final + strlen(final) - 4); + + /* Get the address of the 'stat' structure. */ + if (sysnum == PR_fstatat64 || sysnum == PR_newfstatat) + sysarg_stat = SYSARG_3; + else if (sysnum == PR_statx) + sysarg_stat = SYSARG_5; + else + sysarg_stat = SYSARG_2; + + status = write_data(tracee, peek_reg(tracee, ORIGINAL, sysarg_stat), &finalStat, sizeof(finalStat)); + if (status < 0) + return status; + + return 0; + } + + default: + return 0; + } +} + +/** + * When @translated_path is a faked hard-link, replace it with the + * point it (internally) points to. + */ +static void translated_path(char translated_path[PATH_MAX]) +{ + char path2[PATH_MAX]; + char path[PATH_MAX]; + char *component; + int status; + + status = my_readlink(translated_path, path); + if (status < 0) + return; + + component = strrchr(path, '/'); + if (component == NULL) + return; + component++; + + if (strncmp(component, PREFIX, strlen(PREFIX)) != 0) + return; + + status = my_readlink(path, path2); + if (status < 0) + return; + +#if 0 /* Sanity check. */ + component = strrchr(path, '/'); + if (component == NULL) + return; + component++; + + if (strncmp(component, PREFIX, strlen(PREFIX)) != 0) + return; +#endif + + strcpy(translated_path, path2); + return; +} + +/** + * Handler for this @extension. It is triggered each time an @event + * occurred. See ExtensionEvent for the meaning of @data1 and @data2. + */ +int link2symlink_callback(Extension *extension, ExtensionEvent event, + intptr_t data1, intptr_t data2 UNUSED) +{ + int status; + + switch (event) { + case INITIALIZATION: { + /* List of syscalls handled by this extensions. */ + static FilteredSysnum filtered_sysnums[] = { + { PR_link, FILTER_SYSEXIT }, + { PR_linkat, FILTER_SYSEXIT }, + { PR_unlink, FILTER_SYSEXIT }, + { PR_unlinkat, FILTER_SYSEXIT }, + { PR_fstat, FILTER_SYSEXIT }, + { PR_fstat64, FILTER_SYSEXIT }, + { PR_fstatat64, FILTER_SYSEXIT }, + { PR_lstat, FILTER_SYSEXIT }, + { PR_lstat64, FILTER_SYSEXIT }, + { PR_newfstatat, FILTER_SYSEXIT }, + { PR_stat, FILTER_SYSEXIT }, + { PR_statx, FILTER_SYSEXIT }, + { PR_stat64, FILTER_SYSEXIT }, + { PR_rename, FILTER_SYSEXIT }, + { PR_renameat, FILTER_SYSEXIT }, + FILTERED_SYSNUM_END, + }; + extension->filtered_sysnums = filtered_sysnums; + return 0; + } + + case SYSCALL_ENTER_END: { + Tracee *tracee = TRACEE(extension); + + switch (get_sysnum(tracee, ORIGINAL)) { + case PR_rename: + /*int rename(const char *oldpath, const char *newpath); + *If newpath is a psuedo hard link decrement the link count. + */ + + status = decrement_link_count(tracee, SYSARG_2); + if (status < 0) + return status; + + break; + + case PR_renameat: + /*int renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); + *If newpath is a psuedo hard link decrement the link count. + */ + + status = decrement_link_count(tracee, SYSARG_4); + if (status < 0) + return status; + + break; + + case PR_unlink: + /* If path points a file that is an symlink to a file that begins + * with PREFIX, let the file be deleted, but also decrement the + * hard link count, if it is greater than 1, otherwise delete + * the original file and intermediate file too. + */ + + status = decrement_link_count(tracee, SYSARG_1); + if (status < 0) + return status; + + break; + + case PR_unlinkat: + /* If path points a file that is a symlink to a file that begins + * with PREFIX, let the file be deleted, but also delete the + * symlink that was created and decremnt the count that is tacked + * to end of original file. + */ + + status = decrement_link_count(tracee, SYSARG_2); + if (status < 0) + return status; + + break; + + case PR_link: + /* Convert: + * + * int link(const char *oldpath, const char *newpath); + * + * into: + * + * int symlink(const char *oldpath, const char *newpath); + */ + + status = move_and_symlink_path(tracee, SYSARG_1); + if (status < 0) + return status; + + set_sysnum(tracee, PR_symlink); + break; + + case PR_linkat: + /* Convert: + * + * int linkat(int olddirfd, const char *oldpath, + * int newdirfd, const char *newpath, int flags); + * + * into: + * + * int symlink(const char *oldpath, const char *newpath); + * + * Note: PRoot has already canonicalized + * linkat() paths this way: + * + * olddirfd + oldpath -> oldpath + * newdirfd + newpath -> newpath + */ + + status = move_and_symlink_path(tracee, SYSARG_2); + if (status < 0) + return status; + + poke_reg(tracee, SYSARG_1, peek_reg(tracee, CURRENT, SYSARG_2)); + poke_reg(tracee, SYSARG_2, peek_reg(tracee, CURRENT, SYSARG_4)); + + set_sysnum(tracee, PR_symlink); + break; + + default: + break; + } + return 0; + } + + case SYSCALL_EXIT_END: { + return handle_sysexit_end(TRACEE(extension)); + } + + case TRANSLATED_PATH: + translated_path((char *) data1); + return 0; + + default: + return 0; + } +} diff --git a/proot/proot_linux/extension/portmap/map.c b/proot/proot_linux/extension/portmap/map.c new file mode 100644 index 0000000..dede10a --- /dev/null +++ b/proot/proot_linux/extension/portmap/map.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2016 Vincent Hage + */ + +#include /* inet_ntop */ + +#include "cli/note.h" +#include "extension/portmap/portmap.h" + +/** + * Set all entries empty by setting their key and values to PORTMAP_DEFAULT_VALUE. + * The table mask is used in get_index as a fast way of doing the modulus operation. + */ +void initialize_portmap(PortMap *portmap) +{ + int i; + + for(i = 0; i < PORTMAP_SIZE; i++) { + portmap->map[i].port_in = PORTMAP_DEFAULT_VALUE; + portmap->map[i].port_out = PORTMAP_DEFAULT_VALUE; + } + + portmap->table_mask = PORTMAP_SIZE - 1; +} + +/** + * Find an entry that is either empty or has the same key. + * Return the index is successful, or PORTMAP_DEFAULT_VALUE otherwise + */ +uint16_t get_index(PortMap *portmap, uint16_t key) +{ + int i = 0; + uint16_t index; + + /* the table mask is used instead of the mod operation + * to removed the unecessary bits of a number, to get an index. */ + index = key & portmap->table_mask; + + /* we go through the map until either: + * 1. the end of the map is reached + * 2. an empty entry is reached (if check_empty is true) + * 3. an entry with the same key is found + */ + while(index < PORTMAP_SIZE && + portmap->map[index].port_in != PORTMAP_DEFAULT_VALUE && + portmap->map[index].port_in != key) { + index++; + i++; + } + + /* if a good entry has been found, we can return it directly */ + if(index < PORTMAP_SIZE) + return index; + + /* otherwise, we loop back from the beginning */ + index = 0; + + /* we go through the map until either: + * 1. i == PORTMAP_SIZE (the whole map has been explored) + * 2. an empty entry is reached + * 3. an entry with the same key is found + */ + while(i < PORTMAP_SIZE && + portmap->map[index].port_in != PORTMAP_DEFAULT_VALUE && + portmap->map[index].port_in != key) { + index++; + i++; + } + + if(i < PORTMAP_SIZE) + /* a good entry has been found */ + return index; + else + /* the map is full */ + return PORTMAP_SIZE; +} + +/** + * Add an entry to the port map by either finding an available entry, + * or write on an existing one with the same key. + * Return true if successful, or false otherwise. + */ +int add_entry(PortMap *portmap, uint16_t port_in, uint16_t port_out) +{ + Tracee *tracee = TRACEE(global_portmap_extension); + uint16_t index = get_index(portmap, port_in); + + /* no available entry has been found */ + if(index == PORTMAP_SIZE) + return -1; + + portmap->map[index].port_in = port_in; + portmap->map[index].port_out = port_out; + + VERBOSE(tracee, PORTMAP_VERBOSITY, "new port mapping entry: %d -> %d", ntohs(port_in), ntohs(port_out)); + + return 0; +} + +/** + * Find the entry corresponding to port_in, + * and returns the associated port_out. + * If no entry is found, return PORTMAP_DEFAULT_VALUE. + */ +uint16_t get_port(PortMap *portmap, uint16_t port_in) +{ + uint16_t index = get_index(portmap, port_in); + + /* no corresponding entry has been found */ + if(index == PORTMAP_SIZE) + return PORTMAP_DEFAULT_VALUE; + + if(portmap->map[index].port_in == port_in) + return portmap->map[index].port_out; + else + return PORTMAP_DEFAULT_VALUE; +} diff --git a/proot/proot_linux/extension/portmap/portmap.c b/proot/proot_linux/extension/portmap/portmap.c new file mode 100644 index 0000000..39c1e22 --- /dev/null +++ b/proot/proot_linux/extension/portmap/portmap.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2016 Vincent Hage + */ + +#include /* intptr_t, */ +#include /* strtoul(3), */ +#include /* memset */ +#include /* strncpy */ +#include /* AF_UNIX, AF_INET */ +#include /* inet_ntop */ +#include /* SYS_*, */ +#include "cli/note.h" +#include "extension/extension.h" +#include "tracee/mem.h" /* read_data */ +#include "syscall/chain.h" /* register_chained_syscall */ +#include "extension/portmap/portmap.h" + +Extension *global_portmap_extension = NULL; + +typedef struct Config { + PortMap portmap; + bool netcoop_mode; + bool need_to_check_new_port; + uint16_t old_port; + word_t sockfd; +} Config; + +/** + * Change the port of the socket address, if it maps with an entry. + * Return 0 if no relevant entry is found, and 1 if the port has been changed. + */ +int change_inet_socket_port(Tracee *tracee, Config *config, word_t sockfd, struct sockaddr_in *sockaddr, bool bind_mode) { + uint16_t port_in, port_out; + + port_in = sockaddr->sin_port; + port_out = get_port(&config->portmap, port_in); + + if(port_out == PORTMAP_DEFAULT_VALUE) { + if (bind_mode && config->netcoop_mode && !config->need_to_check_new_port) { + VERBOSE(tracee, PORTMAP_VERBOSITY, "ipv4 netcoop mode with: %d", htons(port_in)); + sockaddr->sin_port = 0; // the system will assign an available port + config->old_port = port_in; // we keep this one for adding a new entry + config->need_to_check_new_port = true; + config->sockfd = sockfd; + return 1; + } + + VERBOSE(tracee, PORTMAP_VERBOSITY, "ipv4 port ignored: %d ", htons(port_in)); + return 0; + } + + sockaddr->sin_port = port_out; + VERBOSE(tracee, PORTMAP_VERBOSITY, "ipv4 port translation: %d -> %d (NOT GUARANTEED: bind might still fail on target port)", htons(port_in), htons(port_out)); + + return 1; +} + +/** + * Change the port of the socket address, if it maps with an entry. + * Return 0 if no relevant entry is found, and 1 if the port has been changed. + */ +int change_inet6_socket_port(Tracee *tracee, Config *config, word_t sockfd, struct sockaddr_in6 *sockaddr, bool bind_mode) { + uint16_t port_in, port_out; + + port_in = sockaddr->sin6_port; + port_out = get_port(&config->portmap, port_in); + + if(port_out == PORTMAP_DEFAULT_VALUE) { + if (bind_mode && config->netcoop_mode && !config->need_to_check_new_port) { + VERBOSE(tracee, PORTMAP_VERBOSITY, "ipv6 netcoop mode with: %d", htons(port_in)); + sockaddr->sin6_port = 0; // the system will assign an available port + config->old_port = port_in; // we keep this one for adding a new entry + config->need_to_check_new_port = true; + config->sockfd = sockfd; + return 1; + } + + VERBOSE(tracee, PORTMAP_VERBOSITY, "ipv6 port ignored: %d ", htons(port_in)); + return 0; + } + + sockaddr->sin6_port = port_out; + VERBOSE(tracee, PORTMAP_VERBOSITY, "ipv6 port translation: %d -> %d (NOT GUARANTEED: bind might still fail on target port)", htons(port_in), htons(port_out)); + + return 1; +} + +int prepare_getsockname_chained_syscall(Tracee *tracee, Config *config, word_t sockfd, int is_socketcall) { + int status = 0; + word_t sock_addr, size_addr; + struct sockaddr_un sockaddr; + socklen_t size; + + size = sizeof(sockaddr); + + /* we check that it's the correct socket */ + if(sockfd != config->sockfd) + return 0; + + /* we allocate a memory place to store the socket address. + * This is a buffer that will be filled by the getsockname() syscall. + */ + sock_addr = alloc_mem(tracee, sizeof(sockaddr)); + if (sock_addr == 0) + return -EFAULT; + size_addr = alloc_mem(tracee, sizeof(socklen_t)); + if(size_addr == 0) + return -EFAULT; + + memset(&sockaddr, '\0', sizeof(sockaddr)); + + /* we write the modified socket in this new address */ + status = write_data(tracee, sock_addr, &sockaddr, sizeof(sockaddr)); + if (status < 0) + return status; + status = write_data(tracee, size_addr, &size, sizeof(size)); + if (status < 0) + return status; + + /* Only by using getsockname can we retrieve the port automatically + * assigned by the system to the socket. + */ + if (!is_socketcall) { + status = register_chained_syscall( + tracee, PR_getsockname, + sockfd, // SYS_ARG1, socket file descriptor. + sock_addr, + size_addr, + 0, 0, 0 + ); + } else { + unsigned long args[6]; + + args[0] = sockfd; + args[1] = sock_addr; + args[2] = size_addr; + args[3] = 0; + args[4] = 0; + args[5] = 0; + + /* We allocate a little bloc of memory to store socketcall's arguments */ + word_t args_addr = alloc_mem(tracee, 6 * sizeof_word(tracee)); + + status = write_data(tracee, args_addr, &args, 6 * sizeof_word(tracee)); + + status = register_chained_syscall( + tracee, PR_socketcall, + SYS_GETSOCKNAME, + args_addr, // SYS_ARG1, socket file descriptor. + 0, + 0, + 0, 0 + ); + } + + if (status < 0) + return status; + //status = restart_original_syscall(tracee); + //if (status < 0) + // return status; + + return 0; +} + +int translate_port(Tracee *tracee, Config *config, word_t sockfd, word_t *sock_addr, int size, int is_bind_syscall) { + struct sockaddr_un sockaddr; + int status; + + if (sock_addr == 0) + return 0; + + /* Essential step, we clean the structure before adding data to it */ + memset(&sockaddr, '\0', sizeof(sockaddr)); + + /* Next, we read the socket address structure from the tracee's memory */ + status = read_data(tracee, &sockaddr, *sock_addr, size); + + if (status < 0) + return status; + + //if(sysnum == PR_connect || sysnum == PR_bind) { + /* Before binding to a socket, the system does some connect() + * to the NSCD (Name Service Cache Daemon) on its own. + * These connect calls are for sockets in the AF_FILE domain; + * remember that AF_FILE and AF_UNIX have the same value. + * Their path is always '/var/run/nscd/socket'. + */ + + status = 0; + if (sockaddr.sun_family == AF_INET) { + status = change_inet_socket_port(tracee, config, sockfd, (struct sockaddr_in *) &sockaddr, is_bind_syscall); + } + else if (sockaddr.sun_family == AF_INET6) { + status = change_inet6_socket_port(tracee, config, sockfd, (struct sockaddr_in6 *) &sockaddr, is_bind_syscall); + } + + if (status <= 0) { + /* the socket has been ignored, or an error occured */ + return status; + } + + /* we allocate a new memory place for the modified socket address */ + *sock_addr = alloc_mem(tracee, sizeof(sockaddr)); + if (sock_addr == 0) + return -EFAULT; + + /* we write the modified socket in this new address */ + status = write_data(tracee, *sock_addr, &sockaddr, sizeof(sockaddr)); + if (status < 0) + return status; + + return 0; +} + +static int handle_sysenter_end(Tracee *tracee, Config *config) +{ + int status; + int sysnum; + + sysnum = get_sysnum(tracee, CURRENT); + + switch(sysnum) { +#define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) + +#define PEEK_WORD(addr, forced_errno) \ + peek_word(tracee, addr); \ + if (errno != 0) { \ + status = forced_errno ?: -errno; \ + break; \ + } + +#define POKE_WORD(addr, value) \ + poke_word(tracee, addr, value); \ + if (errno != 0) { \ + status = -errno; \ + break; \ + } + + case PR_socketcall: { + word_t sockfd; + word_t args_addr; + word_t sock_addr_saved; + word_t sock_addr; + word_t size; + word_t call; + int is_bind_syscall = sysnum == PR_bind; + + call = peek_reg(tracee, CURRENT, SYSARG_1); + is_bind_syscall = call == SYS_BIND; + args_addr = peek_reg(tracee, CURRENT, SYSARG_2); + + switch(call) { + case SYS_BIND: + case SYS_CONNECT: { + /* Remember: PEEK_WORD puts -errno in status and breaks if an + * error occured. */ + sockfd = PEEK_WORD(SYSARG_ADDR(1), 0); + sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); + size = PEEK_WORD(SYSARG_ADDR(3), 0); + + sock_addr_saved = sock_addr; + status = translate_port(tracee, config, sockfd, &sock_addr, size, is_bind_syscall); + if (status < 0) + break; + + /* These parameters are used/restored at the exit stage. */ + poke_reg(tracee, SYSARG_5, sock_addr_saved); + poke_reg(tracee, SYSARG_6, size); + + /* Remember: POKE_WORD puts -errno in status and breaks if an + * error occured. */ + POKE_WORD(SYSARG_ADDR(2), sock_addr); + POKE_WORD(SYSARG_ADDR(3), sizeof(struct sockaddr_un)); + return 0; + } + case SYS_LISTEN: { + word_t sockfd; + + if(!config->netcoop_mode || !config->need_to_check_new_port) + return 0; + + /* we retrieve this one from the listen() system call */ + sockfd = PEEK_WORD(SYSARG_ADDR(1), 0); + + status = prepare_getsockname_chained_syscall(tracee, config, sockfd, true); + + return status; + } + default: + return 0; + } + break; + } + +#undef SYSARG_ADDR +#undef PEEK_WORD +#undef POKE_WORD + + case PR_connect: + case PR_bind: { + int size; + int is_bind_syscall; + word_t sockfd, sock_addr; + + /* + * Get the reg address of the socket, and the size of the structure. + * Note that the sockaddr and addrlen are at the same position for all 4 of these syscalls. + */ + sockfd = peek_reg(tracee, CURRENT, SYSARG_1); + sock_addr = peek_reg(tracee, CURRENT, SYSARG_2); + size = (int) peek_reg(tracee, CURRENT, SYSARG_3); + is_bind_syscall = sysnum == PR_bind; + + status = translate_port(tracee, config, sockfd, &sock_addr, size, is_bind_syscall); + if (status < 0) { + return status; + } + + /* then we modify the syscall argument so that it uses the modified socket address */ + poke_reg(tracee, SYSARG_2, sock_addr); + //poke_reg(tracee, SYSARG_3, size); + poke_reg(tracee, SYSARG_3, sizeof(struct sockaddr_un)); + + return 0; + } + case PR_listen: { + word_t sockfd; + + if(!config->netcoop_mode || !config->need_to_check_new_port) + return 0; + + /* we retrieve this one from the listen() system call */ + sockfd = peek_reg(tracee, CURRENT, SYSARG_1); + + status = prepare_getsockname_chained_syscall(tracee, config, sockfd, false); + return status; + } + default: + return 0; + } + + return 0; +} + +int add_changed_port_as_entry(Tracee *tracee, Config *config, word_t sockfd, word_t sock_addr, int result) { + int status; + struct sockaddr_un sockaddr; + struct sockaddr_in *sockaddr_in; + struct sockaddr_in6 *sockaddr_in6; + uint16_t port_in, port_out; + + if(!config->need_to_check_new_port) + return 0; + + + if (sock_addr == 0) + return 0; + + if (sockfd != config->sockfd) + return 0; + + if (result < 0) + return -result; + + /* Essential step, we clean the structure before adding data to it */ + memset(&sockaddr, '\0', sizeof(sockaddr)); + + /* Next, we read the socket address structure from the tracee's memory */ + status = read_data(tracee, &sockaddr, sock_addr, sizeof(sockaddr)); + if (status < 0) + return status; + + port_in = config->old_port; + + if (sockaddr.sun_family == AF_INET) { + sockaddr_in = (struct sockaddr_in *) &sockaddr; + port_out = sockaddr_in->sin_port; + } + else if (sockaddr.sun_family == AF_INET6) { + sockaddr_in6 = (struct sockaddr_in6 *) &sockaddr; + port_out = sockaddr_in6->sin6_port; + } + else + return 0; + + add_portmap_entry(htons(port_in), htons(port_out)); + config->need_to_check_new_port = false; + config->sockfd = 0; + + return 0; +} + +static int handle_syschained_exit(Tracee *tracee, Config *config) +{ + int sysnum; + + sysnum = get_sysnum(tracee, CURRENT); + + switch(sysnum) { + +#define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) + +#define PEEK_WORD(addr, forced_errno) \ + peek_word(tracee, addr); \ + if (errno != 0) { \ + status = forced_errno ?: -errno; \ + break; \ + } + +#define POKE_WORD(addr, value) \ + poke_word(tracee, addr, value); \ + if (errno != 0) { \ + status = -errno; \ + break; \ + } + + case PR_socketcall: { + word_t args_addr; + word_t call; + + call = peek_reg(tracee, CURRENT, SYSARG_1); + args_addr = peek_reg(tracee, CURRENT, SYSARG_2); + + switch(call) { + case SYS_GETSOCKNAME:{ + word_t sockfd, sock_addr; + int result, status; + + sockfd = PEEK_WORD(SYSARG_ADDR(1), 0); + sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + + status = add_changed_port_as_entry(tracee, config, sockfd, sock_addr, result); + return status; + } + default: + return 0; + } + return 0; + } + +#undef SYSARG_ADDR +#undef PEEK_WORD +#undef POKE_WORD + + case PR_getsockname:{ + word_t sockfd, sock_addr; + int result; + + // On AArch64 and ARM, SYSARG_1 is the same as SYSARG_RESULT (r0/x0) + // so `peek_reg(tracee, CURRENT, SYSARG_1)` would have returned + // the result of the syscall instead. + // Since the chained call is currently only used for syscall with + // `sockfd` as the first argument, we can just use + // the ORIGINAL version to get the fd. + sockfd = peek_reg(tracee, ORIGINAL, SYSARG_1); + sock_addr = peek_reg(tracee, CURRENT, SYSARG_2); + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + + return add_changed_port_as_entry(tracee, config, sockfd, sock_addr, result); + } + default: + return 0; + } +} + +/* List of syscalls handled by this extension. */ +static FilteredSysnum filtered_sysnums[] = { + { PR_bind, 0 }, + { PR_connect, 0 }, + { PR_listen, FILTER_SYSEXIT }, /* the exit stage is required to chain syscalls */ +// { PR_getsockname, FILTER_SYSEXIT }, /* not needed here, see CHAINED EXIT event */ + { PR_socketcall, 0 }, /* for x86 processors with kernel < 4.3 */ + FILTERED_SYSNUM_END, +}; + +int add_portmap_entry(uint16_t port_in, uint16_t port_out) { + if(global_portmap_extension == NULL) + return 0; + else { + Config *config = talloc_get_type_abort(global_portmap_extension->config, Config); + /* careful with little/big endian numbers */ + return add_entry(&config->portmap, ntohs(port_in), ntohs(port_out)); + } +} + +int activate_netcoop_mode() { + if(global_portmap_extension != NULL) { + Config *config = talloc_get_type_abort(global_portmap_extension->config, Config); + config->netcoop_mode = true; + } + + return 0; +} + +/** + * Handler for this @extension. It is triggered each time an @event + * occured. See ExtensionEvent for the meaning of @data1 and @data2. + */ +int portmap_callback(Extension *extension, ExtensionEvent event, + intptr_t data1 UNUSED, intptr_t data2 UNUSED) +{ + switch (event) { + case INITIALIZATION: { + Config *config; + + if(global_portmap_extension != NULL) + return -1; + + extension->config = talloc_zero(extension, Config); + if (extension->config == NULL) + return -1; + + config = talloc_get_type_abort(extension->config, Config); + initialize_portmap(&config->portmap); + config->netcoop_mode = false; + config->need_to_check_new_port = false; + config->sockfd = 0; + + extension->filtered_sysnums = filtered_sysnums; + + global_portmap_extension = extension; + return 0; + } + case SYSCALL_ENTER_END: { + /* As PRoot only translate unix sockets, + * it doesn't actually matter whether we do this + * on the ENTER_START or ENTER_END stage. */ + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + return handle_sysenter_end(tracee, config); + } + case SYSCALL_CHAINED_EXIT: { + Tracee *tracee = TRACEE(extension); + Config *config = talloc_get_type_abort(extension->config, Config); + return handle_syschained_exit(tracee, config); + } + case INHERIT_PARENT: { + /* Shared configuration with the parent, + * as port maps do not change from tracee to tracee. */ + return 0; + } + default: + return 0; + } +} diff --git a/proot/proot_linux/extension/portmap/portmap.h b/proot/proot_linux/extension/portmap/portmap.h new file mode 100644 index 0000000..65d7a30 --- /dev/null +++ b/proot/proot_linux/extension/portmap/portmap.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2016 Vincent Hage + */ + +#ifndef PORTMAP_H +#define PORTMAP_H + +#include "extension/extension.h" + +#define PORTMAP_SIZE 4096 /* must be a power of 2 */ +#define PORTMAP_DEFAULT_VALUE 0 /* default value that indicates an unused entry */ +#define PORTMAP_VERBOSITY 1 + +typedef struct PortMapEntry { + uint16_t port_in; + uint16_t port_out; +} PortMapEntry; + +typedef struct PortMap { + PortMapEntry map[PORTMAP_SIZE]; + uint16_t table_mask; +} PortMap; + +void initialize_portmap(PortMap *portmap); +uint16_t get_index(PortMap *portmap, uint16_t key); +int add_entry(PortMap *portmap, uint16_t port_in, uint16_t port_out); +uint16_t get_port(PortMap *portmap, uint16_t port_in); + +int add_portmap_entry(uint16_t port_in, uint16_t port_out); +int activate_netcoop_mode(); + +#endif /* PORTMAP_H */ diff --git a/proot/proot_linux/extension/python/proot.i b/proot/proot_linux/extension/python/proot.i new file mode 100644 index 0000000..4f79373 --- /dev/null +++ b/proot/proot_linux/extension/python/proot.i @@ -0,0 +1,106 @@ +%module proot +%{ +#define SWIG_FILE_WITH_INIT + +#include "arch.h" +#include "syscall/sysnum.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "extension/extension.h" + +/* define an internal global with correct PR number */ +#define SYSNUM(item) static const int PR_internal ## item = PR_ ## item; +#include "syscall/sysnums.list" +#undef SYSNUM +%} + +/* now say PR_item has value PR_internal */ +/* works but ugly. Another way to do this ? */ +#define SYSNUM(item) static const int PR_ ## item = PR_internal ## item; +%include "syscall/sysnums.list" +#undef SYSNUM + +/* python extension helper */ +%inline %{ +Tracee *get_tracee_from_extension(long extension_handle) +{ + Extension *extension = (Extension *)extension_handle; + Tracee *tracee = TRACEE(extension); + + return tracee; +} +%} + +/* arch.h */ +typedef unsigned long word_t; + +/* tracee/tracee.h */ +typedef enum { + CURRENT = 0, + ORIGINAL = 1, + MODIFIED = 2, + NB_REG_VERSION +} RegVersion; + +/* syscall/sysnum.h */ +typedef enum Sysnum; +extern Sysnum get_sysnum(const Tracee *tracee, RegVersion version); +extern void set_sysnum(Tracee *tracee, Sysnum sysnum); + +/* tracee/reg.h */ +typedef enum { + SYSARG_NUM = 0, + SYSARG_1, + SYSARG_2, + SYSARG_3, + SYSARG_4, + SYSARG_5, + SYSARG_6, + SYSARG_RESULT, + STACK_POINTER, + INSTR_POINTER, + RTLD_FINI, + STATE_FLAGS, + USERARG_1, +} Reg; + +extern word_t peek_reg(const Tracee *tracee, RegVersion version, Reg reg); +extern void poke_reg(Tracee *tracee, Reg reg, word_t value); + +/* tracee/mem.h */ +/* make read_data / write_data pythonic */ +%apply (char *STRING, size_t LENGTH) { (const void *src_tracer, word_t size2) }; +extern int write_data(const Tracee *tracee, word_t dest_tracee, const void *src_tracer, word_t size2); + + %include +%rename(read_data) read_data_for_python; +%cstring_output_withsize(void *dest_tracer, int *size2); +%inline %{ +void read_data_for_python(const Tracee *tracee, word_t src_tracee, void *dest_tracer, int *size2) +{ + int res = read_data(tracee, dest_tracer, src_tracee, *size2); + /* in case of error we return empty string */ + if (res) + *size2 = 0; +} +%} + +/* extension/extention.h */ +typedef enum { + GUEST_PATH, + HOST_PATH, + SYSCALL_ENTER_START, + SYSCALL_ENTER_END, + SYSCALL_EXIT_START, + SYSCALL_EXIT_END, + NEW_STATUS, + INHERIT_PARENT, + INHERIT_CHILD, + SYSCALL_CHAINED_ENTER, + SYSCALL_CHAINED_EXIT, + INITIALIZATION, + REMOVED, + PRINT_CONFIG, + PRINT_USAGE, +} ExtensionEvent; diff --git a/proot/proot_linux/extension/python/python.c b/proot/proot_linux/extension/python/python.c new file mode 100644 index 0000000..f3ba13c --- /dev/null +++ b/proot/proot_linux/extension/python/python.c @@ -0,0 +1,190 @@ +#include +#include +#include +#include + +#include "extension/extension.h" +#include "cli/note.h" +#include "path/temp.h" + +/* FIXME: need to handle error code properly */ + +static PyObject *python_callback_func; + +//static bool is_seccomp_disabling_done = false; +/* List of syscalls handled by this extensions. */ +static FilteredSysnum filtered_sysnums[] = { + FILTERED_SYSNUM_END, +}; + +/* build by swig */ +extern void init_proot(void); +extern void PyInit__proot(void); + +/* create python files */ +extern unsigned char _binary_python_extension_py_start; +extern unsigned char _binary_python_extension_py_end; +extern unsigned char _binary_proot_py_start; +extern unsigned char _binary_proot_py_end; + +static int create_python_file(const char *tmp_dir, const char *python_file_name, unsigned char *start_file, unsigned char *end_file) +{ + void *start = (void *) start_file; + size_t size = end_file - start_file; + char python_full_file_name[PATH_MAX]; + int fd; + int status; + + status = snprintf(python_full_file_name, PATH_MAX, "%s/%s", tmp_dir, python_file_name); + if (status < 0 || status >= PATH_MAX) { + status = -1; + } else { + fd = open(python_full_file_name, O_WRONLY | O_CREAT, S_IRWXU); + if (fd >= 0) { + status = write(fd, start, size); + close(fd); + } + } + + return status>0?0:-1; +} + +static int create_python_extension(const char *tmp_dir) +{ + return create_python_file(tmp_dir, "python_extension.py", + &_binary_python_extension_py_start, + &_binary_python_extension_py_end); +} + +static int create_proot(const char *tmp_dir) +{ + return create_python_file(tmp_dir, "proot.py", + &_binary_proot_py_start, + &_binary_proot_py_end); +} + +/* init python once */ +void init_python_env() +{ + static bool is_done = false; + + if (!is_done) { + char path_insert[PATH_MAX]; + PyObject *pName, *pModule; + const char *tmp_dir; + int status; + + tmp_dir = create_temp_directory(NULL, "proot-python"); + status = snprintf(path_insert, PATH_MAX, "sys.path.insert(0, '%s')", tmp_dir); + if (status < 0 || status >= PATH_MAX) { + note(NULL, ERROR, USER, "Unable to create tmp directory\n"); + } else if (create_python_extension(tmp_dir) || create_proot(tmp_dir)) { + note(NULL, ERROR, USER, "Unable to create python file\n"); + is_done = true; + } else { + Py_Initialize(); +#if PY_VERSION_HEX >= 0x03000000 + PyInit__proot(); +#else + init_proot(); +#endif + PyRun_SimpleString("import sys"); + PyRun_SimpleString(path_insert); + pName = PyUnicode_FromString("python_extension"); + if (pName) { + pModule = PyImport_Import(pName); + Py_DECREF(pName); + if (pModule) { + python_callback_func = PyObject_GetAttrString(pModule, "python_callback"); + if (python_callback_func && PyCallable_Check(python_callback_func)) + ;//note(NULL, INFO, USER, "python_callback find\n"); + else + note(NULL, ERROR, USER, "python_callback_func error\n"); + } else { + PyErr_Print(); + note(NULL, ERROR, USER, "pModule error\n"); + } + } else + note(NULL, ERROR, USER, "pName error\n"); + is_done = true; + } + } +} + +/* call python callback */ +static int python_callback_func_wrapper(Extension *extension, ExtensionEvent event, intptr_t data1, intptr_t data2) +{ + int res = 0; + PyObject *pArgs; + PyObject *pValue; + + pArgs = PyTuple_New(4); + if (pArgs) { + /* setargs */ + pValue = PyLong_FromLong((long) extension); + if (!pValue) + note(NULL, ERROR, USER, "pValue allocation failure\n"); + PyTuple_SetItem(pArgs, 0, pValue); + + pValue = PyLong_FromLong(event); + if (!pValue) + note(NULL, ERROR, USER, "pValue allocation failure\n"); + PyTuple_SetItem(pArgs, 1, pValue); + + pValue = PyLong_FromLong(data1); + if (!pValue) + note(NULL, ERROR, USER, "pValue allocation failure\n"); + PyTuple_SetItem(pArgs, 2, pValue); + + pValue = PyLong_FromLong(data2); + if (!pValue) + note(NULL, ERROR, USER, "pValue allocation failure\n"); + PyTuple_SetItem(pArgs, 3, pValue); + + /* call function */ + pValue = PyObject_CallObject(python_callback_func, pArgs); + if (pValue != NULL) { + res = PyLong_AsLong(pValue); + Py_DECREF(pValue); + } else { + PyErr_Print(); + note(NULL, ERROR, USER, "fail to call callback\n"); + } + Py_DECREF(pArgs); + } else + note(NULL, ERROR, USER, "pArgs allocation failure\n"); + + return res; +} + +/** + * Handler for this @extension. It is triggered each time an @event + * occurred. See ExtensionEvent for the meaning of @data1 and @data2. + */ +int python_callback(Extension *extension, ExtensionEvent event, intptr_t data1, intptr_t data2) +{ + int res = 0; + + switch (event) { + case INITIALIZATION: + { + /* not working. Use 'export PROOT_NO_SECCOMP=1' */ + /*if (!is_seccomp_disabling_done) { + Tracee *tracee = TRACEE(extension); + + if (tracee->seccomp == ENABLED) + tracee->seccomp = DISABLING; + is_seccomp_disabling_done = true; + }*/ + init_python_env(); + res = python_callback_func_wrapper(extension, event, data1, data2); + + extension->filtered_sysnums = filtered_sysnums; + } + break; + default: + res = python_callback_func_wrapper(extension, event, data1, data2); + } + + return res; +} diff --git a/proot/proot_linux/extension/python/python_extension.py b/proot/proot_linux/extension/python/python_extension.py new file mode 100644 index 0000000..80c1058 --- /dev/null +++ b/proot/proot_linux/extension/python/python_extension.py @@ -0,0 +1,19 @@ +from proot import * +import ctypes +import imp + +client = None + +def python_callback(extension, event, data1, data2): + global client + res = 0 + + if event == 11: + if client: + print "Already have a client => refuse to use %s" % (ctypes.string_at(data1)) + else: + client = imp.load_source('client', ctypes.string_at(data1)) + if client: + return client.python_callback(extension, event, data1, data2) + + return 0 diff --git a/proot/proot_linux/loader/assembly-arm.h b/proot/proot_linux/loader/assembly-arm.h new file mode 100644 index 0000000..59a7fe0 --- /dev/null +++ b/proot/proot_linux/loader/assembly-arm.h @@ -0,0 +1,93 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +/* According to the ARM EABI, all registers have undefined values at + * program startup except: + * + * - the instruction pointer (r15) + * - the stack pointer (r13) + * - the rtld_fini pointer (r0) + */ +#define BRANCH(stack_pointer, destination) do { \ + asm volatile ( \ + "// Restore initial stack pointer. \n\t" \ + "mov sp, %0 \n\t" \ + " \n\t" \ + "// Clear rtld_fini. \n\t" \ + "mov r0, #0 \n\t" \ + " \n\t" \ + "// Start the program. \n\t" \ + "mov pc, %1 \n" \ + : /* no output */ \ + : "r" (stack_pointer), "r" (destination) \ + : "memory", "sp", "r0", "pc"); \ + __builtin_unreachable(); \ + } while (0) + +#define PREPARE_ARGS_1(arg1_) \ + register word_t arg1 asm("r0") = arg1_; \ + +#define PREPARE_ARGS_3(arg1_, arg2_, arg3_) \ + PREPARE_ARGS_1(arg1_) \ + register word_t arg2 asm("r1") = arg2_; \ + register word_t arg3 asm("r2") = arg3_; \ + +#define PREPARE_ARGS_6(arg1_, arg2_, arg3_, arg4_, arg5_, arg6_) \ + PREPARE_ARGS_3(arg1_, arg2_, arg3_) \ + register word_t arg4 asm("r3") = arg4_; \ + register word_t arg5 asm("r4") = arg5_; \ + register word_t arg6 asm("r5") = arg6_; + +#define OUTPUT_CONTRAINTS_1 \ + "r" (arg1) + +#define OUTPUT_CONTRAINTS_3 \ + OUTPUT_CONTRAINTS_1, \ + "r" (arg2), "r" (arg3) + +#define OUTPUT_CONTRAINTS_6 \ + OUTPUT_CONTRAINTS_3, \ + "r" (arg4), "r" (arg5), "r" (arg6) + +#define SYSCALL(number_, nb_args, args...) \ + ({ \ + register word_t number asm("r7") = number_; \ + register word_t result asm("r0"); \ + PREPARE_ARGS_##nb_args(args) \ + asm volatile ( \ + "svc #0x00000000 \n\t" \ + : "=r" (result) \ + : "r" (number), \ + OUTPUT_CONTRAINTS_##nb_args \ + : "memory"); \ + result; \ + }) + +#define OPEN 5 +#define CLOSE 6 +#define MMAP 192 +#define MMAP_OFFSET_SHIFT 12 +#define EXECVE 11 +#define EXIT 1 +#define PRCTL 172 +#define MPROTECT 125 + diff --git a/proot/proot_linux/loader/assembly-arm64.h b/proot/proot_linux/loader/assembly-arm64.h new file mode 100644 index 0000000..4a79db3 --- /dev/null +++ b/proot/proot_linux/loader/assembly-arm64.h @@ -0,0 +1,98 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +/* According to the ARM64 EABI, all registers have undefined values at + * program startup except: + * + * - the instruction pointer (pc) + * - the stack pointer (sp) + * - the rtld_fini pointer (x0) + */ +#define BRANCH(stack_pointer, destination) do { \ + asm volatile ( \ + "// Restore initial stack pointer. \n\t" \ + "mov sp, %0 \n\t" \ + " \n\t" \ + "// Clear rtld_fini. \n\t" \ + "mov x0, #0 \n\t" \ + " \n\t" \ + "// Start the program. \n\t" \ + "br %1 \n" \ + : /* no output */ \ + : "r" (stack_pointer), "r" (destination) \ + : "memory", "sp", "x0"); \ + __builtin_unreachable(); \ + } while (0) + +#define PREPARE_ARGS_1(arg1_) \ + register word_t arg1 asm("x0") = arg1_; \ + +#define PREPARE_ARGS_3(arg1_, arg2_, arg3_) \ + PREPARE_ARGS_1(arg1_) \ + register word_t arg2 asm("x1") = arg2_; \ + register word_t arg3 asm("x2") = arg3_; \ + +#define PREPARE_ARGS_4(arg1_, arg2_, arg3_, arg4_) \ + PREPARE_ARGS_3(arg1_, arg2_, arg3_) \ + register word_t arg4 asm("x3") = arg4_; \ + +#define PREPARE_ARGS_6(arg1_, arg2_, arg3_, arg4_, arg5_, arg6_) \ + PREPARE_ARGS_4(arg1_, arg2_, arg3_, arg4_) \ + register word_t arg5 asm("x4") = arg5_; \ + register word_t arg6 asm("x5") = arg6_; + +#define OUTPUT_CONTRAINTS_1 \ + "r" (arg1) + +#define OUTPUT_CONTRAINTS_3 \ + OUTPUT_CONTRAINTS_1, \ + "r" (arg2), "r" (arg3) + +#define OUTPUT_CONTRAINTS_4 \ + OUTPUT_CONTRAINTS_3, \ + "r" (arg4) + +#define OUTPUT_CONTRAINTS_6 \ + OUTPUT_CONTRAINTS_4, \ + "r" (arg5), "r" (arg6) + +#define SYSCALL(number_, nb_args, args...) \ + ({ \ + register word_t number asm("x8") = number_; \ + register word_t result asm("x0"); \ + PREPARE_ARGS_##nb_args(args) \ + asm volatile ( \ + "svc #0x00000000 \n\t" \ + : "=r" (result) \ + : "r" (number), \ + OUTPUT_CONTRAINTS_##nb_args \ + : "memory"); \ + result; \ + }) + +#define OPENAT 56 +#define CLOSE 57 +#define MMAP 222 +#define EXECVE 221 +#define EXIT 93 +#define PRCTL 167 +#define MPROTECT 226 diff --git a/proot/proot_linux/loader/assembly-x86.h b/proot/proot_linux/loader/assembly-x86.h new file mode 100644 index 0000000..4045144 --- /dev/null +++ b/proot/proot_linux/loader/assembly-x86.h @@ -0,0 +1,68 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +/* According to the x86 ABI, all registers have undefined values at + * program startup except: + * + * - the instruction pointer (rip) + * - the stack pointer (rsp) + * - the rtld_fini pointer (rdx) + * - the system flags (eflags) + */ +#define BRANCH(stack_pointer, destination) do { \ + asm volatile ( \ + "// Restore initial stack pointer. \n\t" \ + "movl %0, %%esp \n\t" \ + " \n\t" \ + "// Clear state flags. \n\t" \ + "pushl $0 \n\t" \ + "popfl \n\t" \ + " \n\t" \ + "// Clear rtld_fini. \n\t" \ + "movl $0, %%edx \n\t" \ + " \n\t" \ + "// Start the program. \n\t" \ + "jmpl *%%eax \n" \ + : /* no output */ \ + : "irm" (stack_pointer), "a" (destination) \ + : "memory", "cc", "esp", "edx"); \ + __builtin_unreachable(); \ + } while (0) + +extern word_t syscall_6(word_t number, + word_t arg1, word_t arg2, word_t arg3, + word_t arg4, word_t arg5, word_t arg6); + +extern word_t syscall_3(word_t number, word_t arg1, word_t arg2, word_t arg3); + +extern word_t syscall_1(word_t number, word_t arg1); + +#define SYSCALL(number, nb_args, args...) syscall_##nb_args(number, args) + +#define OPEN 5 +#define CLOSE 6 +#define MMAP 192 +#define MMAP_OFFSET_SHIFT 12 +#define EXECVE 11 +#define EXIT 1 +#define PRCTL 172 +#define MPROTECT 125 diff --git a/proot/proot_linux/loader/assembly-x86_64.h b/proot/proot_linux/loader/assembly-x86_64.h new file mode 100644 index 0000000..6f431be --- /dev/null +++ b/proot/proot_linux/loader/assembly-x86_64.h @@ -0,0 +1,96 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +/* According to the x86_64 ABI, all registers have undefined values at + * program startup except: + * + * - the instruction pointer (rip) + * - the stack pointer (rsp) + * - the rtld_fini pointer (rdx) + * - the system flags (rflags) + */ +#define BRANCH(stack_pointer, destination) do { \ + asm volatile ( \ + "// Restore initial stack pointer. \n\t" \ + "movq %0, %%rsp \n\t" \ + " \n\t" \ + "// Clear state flags. \n\t" \ + "pushq $0 \n\t" \ + "popfq \n\t" \ + " \n\t" \ + "// Clear rtld_fini. \n\t" \ + "movq $0, %%rdx \n\t" \ + " \n\t" \ + "// Start the program. \n\t" \ + "jmpq *%%rax \n" \ + : /* no output */ \ + : "irm" (stack_pointer), "a" (destination) \ + : "memory", "cc", "rsp", "rdx"); \ + __builtin_unreachable(); \ + } while (0) + +#define PREPARE_ARGS_1(arg1_) \ + register word_t arg1 asm("rdi") = arg1_; \ + +#define PREPARE_ARGS_3(arg1_, arg2_, arg3_) \ + PREPARE_ARGS_1(arg1_) \ + register word_t arg2 asm("rsi") = arg2_; \ + register word_t arg3 asm("rdx") = arg3_; \ + +#define PREPARE_ARGS_6(arg1_, arg2_, arg3_, arg4_, arg5_, arg6_) \ + PREPARE_ARGS_3(arg1_, arg2_, arg3_) \ + register word_t arg4 asm("r10") = arg4_; \ + register word_t arg5 asm("r8") = arg5_; \ + register word_t arg6 asm("r9") = arg6_; + +#define OUTPUT_CONTRAINTS_1 \ + "r" (arg1) + +#define OUTPUT_CONTRAINTS_3 \ + OUTPUT_CONTRAINTS_1, \ + "r" (arg2), "r" (arg3) + +#define OUTPUT_CONTRAINTS_6 \ + OUTPUT_CONTRAINTS_3, \ + "r" (arg4), "r" (arg5), "r" (arg6) + +#define SYSCALL(number_, nb_args, args...) \ + ({ \ + register word_t number asm("rax") = number_; \ + register word_t result asm("rax"); \ + PREPARE_ARGS_##nb_args(args) \ + asm volatile ( \ + "syscall \n\t" \ + : "=r" (result) \ + : "r" (number), \ + OUTPUT_CONTRAINTS_##nb_args \ + : "memory", "cc", "rcx", "r11"); \ + result; \ + }) + +#define OPEN 2 +#define CLOSE 3 +#define MMAP 9 +#define EXECVE 59 +#define EXIT 60 +#define PRCTL 157 +#define MPROTECT 10 diff --git a/proot/proot_linux/loader/assembly.S b/proot/proot_linux/loader/assembly.S new file mode 100644 index 0000000..ca46997 --- /dev/null +++ b/proot/proot_linux/loader/assembly.S @@ -0,0 +1,62 @@ +#if defined(__i386__) + .text + +/* + ABI user-land kernel-land + ====== ========= =========== + number %eax %eax + arg1 %edx %ebx + arg2 %ecx %ecx + arg3 16(%esp) %edx + arg4 12(%esp) %esi + arg5 8(%esp) %edi + arg6 4(%esp) %ebp + result N/A %eax +*/ +.globl syscall_6 +.type syscall_6, @function +syscall_6: + /* Callee-saved registers. */ + pushl %ebp // %esp -= 0x04 + pushl %edi // %esp -= 0x08 + pushl %esi // %esp -= 0x0c + pushl %ebx // %esp -= 0x10 + +// mov %eax, %eax // number + mov %edx, %ebx // arg1 +// mov %ecx, %ecx // arg2 + mov 0x14(%esp), %edx // arg3 + mov 0x18(%esp), %esi // arg4 + mov 0x1c(%esp), %edi // arg5 + mov 0x20(%esp), %ebp // arg6 + + int $0x80 + + popl %ebx + popl %esi + popl %edi + popl %ebp + +// mov %eax, %eax // result + ret + +.globl syscall_3 +.type syscall_3, @function +syscall_3: + pushl %ebx + mov %edx, %ebx + mov 0x8(%esp), %edx + int $0x80 + popl %ebx + ret + +.globl syscall_1 +.type syscall_1, @function +syscall_1: + pushl %ebx + mov %edx, %ebx + int $0x80 + popl %ebx + ret + +#endif /* defined(__i386__) */ diff --git a/proot/proot_linux/loader/loader b/proot/proot_linux/loader/loader new file mode 100755 index 0000000000000000000000000000000000000000..8cd9b0d5dca7a1ef4b8055f8bcb66da8bd3c9e30 GIT binary patch literal 15880 zcmeHNe{@t;e!us5RsQKDgjc17L$-nLNq_<%!>y1 zRHFl~BW4YIbh~!Na1L$p)UD8#RY0u)Gyzt-v36Y#$33z?+)C9I-96~K#q{&N@4d+| zN{^@Q>FFPHbMC$0&-eHJ-uvF0d6WBAL~c+xXHHYtwJe327+Yowj#W4s#4Be4V;&1g zT`?hI?9BN1cq170+&IM&Ix|dUqZk@dAsWz#-;_2xNjS4@M>hYJT?ypWoY`$bw)Q=< z^}l0R2Rq&N@l7T>&2OTb)yY;MTY+o^vK7cyAX|ZK1+o>$Rv=q}Yz49v$X4M0tOE5$ z^1Z5CR+Nrc{Ylk7(Rb~t`kK*K@g07=YF%XDvK)M$U27!&Xe2K>l|JyV1ZQt0g z*0g9_BmC6(DgUf~O7?(?A(H&qFSZT)ANnc2Y8%_X4k zu!4Q?Ow~aOYHVNYc05G3E5`Pin;g{+Tz|yve`q#r{Ij+~51PrtMq(Jbl(XL78QZG@ z#y}ld<8P@QcXyYIjY=uz%ZP*h4V;#%)-@a};drSyPKpcz9h=diL~xoEsk>G?KCO z8v3_wN4V4fNHLu{m&p1imw#?xC$Z<-Hj&C82LCJ^k$cX!Uq!-ry8Y}-``M(j+Oo%% zg)-%Cl9yQWh*}+pBo9aC9LEMEPPzPBufZ9N4Ai+J$sQqg9!ZWF$+sdE zZ}=1MW34yNIehcL^t>AfR*grJ8^&r9f5^RSc}?P1XWIN958pH}X=>HkqX9GM&*Mho za3E4~+$^-uhs6r{^Qz8(-HTzvn1AbI@EPIn8p%HxiBWER_QuG{aWLM9RQxRdt;8?5 zi8YRL$pojPZDPODNLkMj7j|`2aa6B8AIDC8a8;T`8kyRE`+6Cb@6n8kvDinEiX;BS zOq45<95x1OxiM$V80Z@}663jd{b{7)@ZGo6R2-9+C9U4@FF$M~M~%e4oiPR`EnaqZ zG+<8i=P7a%Do&U)&kM>d3F@)@j8So-7sKcNt$&Y`WAHDHWg6Rfp*QR3+~`RR?jGfl<1FJTk}Vyx_2$eiq)-c`|;1^ADxd)N#rQlCT4}FY( zN1(-3tIFAmMd%IAbi7llj1rJ=HtvPC6}%1T4C z*F;-GMzd)P=9SH1SHTf|Wnj$tzXFg=jL8VZ&S?9gCK#8>c`qWZ8S2UcHTBO4QoHGV zbKg3sDYFW%hj8)&43pfhU6_0^3`(f*7GiOC0+ZWaESW)AT>@ugu~pRiGD0 z+O5#Phb)wIKO4#+#^}{f}+I45e(mwVV?oD2fe7R zmGM89te*lNL>IK0dnKzHUKgQzDaHqQ(<7b-asTAWh_em+IJ#zlMJI}02~q3DapMm{ zvMq-e$X9b8rWDvR0qU7HpALGHwFpq`8OQkBBF#7j$k+xjzLhMRaOh}8Q&cD z9ppy-Fg^7YAY&U?soTIx-3C_bHn38+ft9)qkouC0)PLke-kvA+?-;Yo!!_P<1=6X>D}!;!q}AXter zdmXxbjdlWPNnnhpj%Zs;0|g~>4ZB7=Oi!VoSbs9(7C-LnZ*R6EF4tNrcTEu zo&$@D!c){z;-K+!zMOf`7{pFJjINghJPYD!0@VPoffzx@oo{VMDtF?j$bxv!OJxF4 zDh{e=)7HV=NpjVM&T7#4E|IeV+)u#$6?)DlfO#q&dg!_dPz$1xzIMPuLkIOxU87;XGOd5 z5WT2_7t$GN12SpNeZTM@$6BbpjNBN2w@Q$b;X{ ze8p~}0wR5VjOFF=%W{LhLGIhZb#>N7vuGUwq&)d@M*6`*%1(w%@~M)^@B_;QSvUCB`)+~b zX(?`fp~Jh#hh3YFbwNHOjFc=-S4^!5hJ2LP%yec5TC2*ZJ-R=geDVFb04tyT;xhdrMOhiwL)u0_*x-ug7asD2x2+9{f|Vs8hc1LQTf)LK~Z-P9?o|N3SSN4=Tv|Z|*|~ zb=Q4QVUvUT!70HhwBX80QB8Z-CW3tyRgMiB5&`&B^y`9R8W)ASa&b_o`dgs5n5Hk$ zd2Iu5uY+;TKu}lHKbx5w-7L%cSD9tiWDAe^|FMuOhy^eZS ziu_vW#6{5t{g9aAK2CxM#1zCYLSiznpE=OV3k57fUZ}$4rDBSeRuR%VE;rnk4#Lm+ zG`lpJP}2%Hv(xhMy%1a8Wy2D7K=j$8fX*xJ>OT z6G~U~3TA;Br+TCt$VsW#%<<7>< zJssU+Ozrw0Gm>M9U^*0oFnq7SV7u>+I=n52ZAa25W9b)VSGYQo>QQ z5y3V#$D^#P`Oavgm2b2!+F}P~U0uzaS#yio(bFySf~XBEmYFirkC~Jv*45l8({Qq- zIMKMh(C*$YX2zOZqOHiNxs~k9XcrbqS+=kUcxzLvyL)1-SYc0F8}dVeTJ4e%qlnnuialfVH}!Ny=f^u@o1(Fv zjo8L$*@F4)9VXUz=X~sEk2w!=Nz9MOT3C2K)h8OWr|<*^FCQpq7>BAQ*0I?HVc?@C zJ*=IkvAMinoBOVpueIMxITAdUnZ_REO39G+Apbz&>$MM*y&A#2+Uo-Fb#dH7@VMt4 z58ykV4<-E2^Qaear}teiqzq4g1&?+4fdJ8z(jjfG$#2)peok~h->m`dhEvNmQn?+0 z87F0U-;F=sh@>yvps9Ms;kT1-&^r4$Uk$fUbAYG$D+<>43K9pBdDqLlfsT7ldLTPV z*>6Dh?|ESNo@cL@G`$DCkR8Mt_y&BAp+9O06=lt>P&}%>rBWR2FEvcBi=PWjd%gR; zm|pM>(=_UR4HFe0qiJ*d_*$*6pR4iBRDmZ{z$etFREeKb-yr-30;uuTL)u&F9u0U8 z(wBIzCU+~%C)L;KOnaD?UZ?Hjhp7;U`KugHts(7IKB9oVS9@E-^tSenrv4eCzoUI$ z0DoURDI|VU93Xr^ylCMU1@^km+T*%ZOzm+x5e-&F8!Tyq_Ms=~<()PFqtCTE! zj>9e8aw~=B#NUzrZ}nX+9F<+JJubDE4{3W``v~uIJ#XRXT}KH2t?f3=hxR+yb8gi3 zoO_oC>F@IF@u((w@a!kN-}8cnU+^Fc`iPA0xL=5I7s|BTNm4S;iI>RFOYRTdu#=JM znDx4nGKJYmaY~37yJp&uc1nCfNA(M_U$;5?^`GmaR|qk9Nkh+pE3leG`(0`FA-^#t(1~ zjwxdbL_5GeDaJf9C@tp#jV?fqeL`PZ&ujGmbCd1MlfF4PZ_?@gXMk=bArRm@tpLQ= z@n>9sFP}cq7;dYLjT_@}`}x%vZEZH286LW=c*h&x(q%Sp#B9c_d6Pq#(LR%fyL-%N zxV^hKjJx*cXw2Ng!m%FdAROJ)*cPJ)P`IV1OFrJhtpQ)--ehFPqIm?*xyH()NIxJ{Ttn9&BUDK+9` zoY&;nS8yErVcUKVc^pb0qKo&sX6n_X|xpBL%7%?GW|Ms zjxPO7kjW$2?Y8}ajD$0F@3??{!Ik6%zc*wUWI)yjolG9tar(&%tt8_Ko~gSPI>gId zXK5)}GZu0-#)7F>j-N-t|2q4+1(v}E+s>IBJ7*rU?H#!@KWp1ZG7gMm=jiRdfc+QO zSQ^$1+|mAOt;k=-5~cEL`(Zd}o6`M`^qlQ1vibDW*H=GkFm}qezu4Aurp)~FE?_Tz On1k$P$IzZL?f(-eux+vc literal 0 HcmV?d00001 diff --git a/proot/proot_linux/loader/loader-m32 b/proot/proot_linux/loader/loader-m32 new file mode 100755 index 0000000000000000000000000000000000000000..b0be46a51c1288e088af5336cee293fda7c9b21f GIT binary patch literal 18544 zcmeHPdvH`$nm^~>b8Z?s?`}wdARPw~6*^ClNKgnSye0DX!Sr;Jn-@uUrn`fnqPUp= zJ#9vqa%-$wmAJLz&Q7t`JWw;)afgSCqF5DI*Tr3G2L;)%y0hZ4wM;E;@9#VJ-sIvq zTeDUBm({20ob$cTcfRj;&bhbm?M@!6U(>)jXHLuQmQk?^mD#@vC z_&CvmaDK1;9@2kBA^!-Et%v@r<|p3>GgC4MWDv+8kU=1WKn8&f0vQA{2xJh*Ado>I zgTU7gflbMexBQA8dN(IeZWtZW8GHQ1M|d{x`*iN>_2<$kuh*ZYtdrX=^xa|P?fVm$ zq_$m18L6lGs$szYA^p+74wl+<4$>id#2$SaCjCeA_77_0Cp)!SKvi+7lMT@ug3%|S z?t3jiS$~d<9wwuc9u>nU&mK5ge}<o0~V zdAgH5J51T6&(S=mv&{{B;W{cnLxUj5F|gaKW${%qgL;)?X8Q(sUh$RkyM4mc+3 zuB9}rdVepby7Xf;U$I-hjpW})Sm)|{pI$5lDSs)*`{tLu& zsnV^-vb>DYhX~HmYiI_DWqN}rc|3XI<~Nh?4AhCf5A~bhK$dqM$8Jm>OV*!Bp6NSx zt>(@6%;dI_)Y>!2`qS8tI0wmh`;POII;$D!y^?~e{&aHf85q>mzZbui^l$bZ6R%^T z7^^uJyIgZ7ukWNQoCC?@eMk98jn$lqUAc4^8hhP4=?o21kezy`);Qz^H98B8EBZUY zlGS)Vb4gM%^Tehtn>QriO>La%PbFp+ryiMElIofncze(N-wbU#VQDS46TbiEN1gA%H5a{cB%SU;E&1DY z`qOvP>0hEg1O71j64Xy2KaDyU^kKVy!tP1-|5+38i=P{)!XH}C_#+-ZEnA5FcsvR~ zT7o*~-_q%i@V0<0&GqzabvbhBKD}YDSk{L`={EpNP%nO-PE)R#l0hJYKn8&f0vQA{ z2xJh*Ado>IgFps>3<4PhzU~MtU$*S_z#KgJ-yEozAFK#g1S-ob=a?8n4LuQ*;yi&}f08btK}R@1paF(oPcIh!Hn>$rdvI ztQSdk1kG$qrrikoCPfSFQP6V~?a?|h?q)@2Ym~t)iq41MdB?$OxzGod41P;0ZlJ&j zjY>_@N0KGub`C2P%Zd0^hy9RopqKlcoL=R|7E|++J8WD!vvN8WC~KSiJQ>ri)WD2 z?5ClwBJGPLB;tQ>SoTHO+(w4){GJgk`z*9IWI{hF8287Da=`Nk=@%hthwx{9h*d8( zl+;Hwy~Clu`w+K7wkzw;A)>FaX)g%+Wx+#AMC-eQM6aQFSS9y&px!~9(%QoKZY8=5 z_B^VHqIpz_2=G~`Q`H>5fc){o7coZ{WcQ&?n@l3SBNE>PaV-1i(4B((=ct14I$m%a z6r~ztGf)LB)Cnc31bYjK=#A5BYV9_HU5}bK1TVL6Hpr=|#gH$&0nCwe3i2ixJb-Gv z2WAh5hlu$#nCCz|jmqTD02hIMI|gyy$QN+FUO%|iU~)Gc(!y=9G@9oyRQygRkKv3x z0>yTF$Rd(guzdWQN4X+56_9Bj{4yZ#GfYED3( z6{?XB=AS^EC1x6!Q4kl2nGNPDh|h?r05cUK;5aj*7EBPtEyOGbQx9S(DwA973`;QK z^%*`04P{tuYvyx)4=-nO#Lj%JCD%N=Nm0S%H7F?au%kH2Bo;@>dR?+Y8;S4PjvX}e zaq0S9h**j6a%~!N%B544OPhQ8cR=yx$?DL20yhP;C!1#zpS2o|IgzAUw7pvu$#R!L zd;?Nve2HH#pfkIqlWZ*fEBJ2yn2$qmRI>`7EzY{m7S}`{_j)a-r8*f;7@wt7lJ|*| z$-CEbj_dYJ=Jnp;)pKzL2=Y6pP)GvG(!86dtn}4RAs;z%e8Sv3<}DTEC~rEi@U7=V zb>0GAXlyOz_jCZ9$f z`7S|f@H{<;IW1FyAfR%(!7U8HY6d15b4WX@faMn*6j#k!l5He*8#R(Mi4DXC_5TE;`FuXVIG-d7n#vZ*oF%eg6SRx{_JsFd?@ph{X;f`O z3KhqzmYU0%iVezK>T7Kl^Q^}7wKU%cQu>TPl7`{gA#cFE1+}0TOR1MUC;bLOSR{+V zi?wpHe(_O&;+GR)rco#Xa>`{cctr+@-zw*#(~V(ER!cHtt1MN0KUxjui>^=3Zj8c?dM3vvr~$N#sfY*_PGs^ z=S3GneA{prR%@-H9}5EsVYFA`N(X5h4J0-2m^8M2 zhW;iSmIbiLsgnizLpFd#ve*X%xhxI>Q9bg8f;UG8zy*5LK{+3Kw^_`yO1ZMuAe+0_ zz|cEAt+tm*D<;!u|vX-l+|wt0J~C)yE7n2v4Ju7nw9;Y6sXy@h14jz}9b zBZ=59%aAqq#^TXfC7CEOc}eufZPO6ziA58Pyo3_0HxgIV2AbO`m|gMaa93Ao0qcrJ z+5%%rOwYnCYPRkKivE$$_!=$ynubQV>gxaV*s=MoFoMlv5#dx&jzR3r=2%Bh0)!LeyqyadxLAaIuYJR$^P&na z0D{fsjbiRwI^Qhb(noZC$Ie4yME@v@Z?>O*$1R`YTKOUI6n{q74mOHs^z(xF^Wv%` z{`Dh`Rc{hV(R{rpKz+>`tV#6F|HE6}|w z-WQ1MeenTtA7C1CPLqHo-+T*#rzh;Wr1*gB8gZc*FDf~GLK^ez)(&nJyPw3TaLn>+ zPMiuK{Z)yu(W`I|;xoIeh^^nkh@bISY2Y>e00s1l7^ZGmye;^8#Ph7d*HeyPDq`+_ z4oLSSHwEdF+=lp!1@Rd@C7@4<147@?ehA%J;?E*a#h({y32d(fpsR`>@Ye{c5q?=i z5a-2jD3;%d%R-M~`7VpS68v77l!{Nvlf<8tr!4-Iyh!{-dCB50$*bV;sR@362yoUo zxp489{HJ&X5kFOHzQSJX74{mhpdlBvYZ||YK624w{8&4pX*){95$!__D1E41B-Zi= zZ(prdyTh$L3y*MoksRX#`}4-z1hQ4Yy~5DtT+Pz1^ioioY;kBJ-D^k}*~_^^i&uV(@X| zmAP?ng+aO-F^FzcRK(mtUHk-xh~cz=92I*E@gtlIrM<3ExY-iHE~h7g4&Z6e5^+X< zO0fbTOC6rxC49@pPXV8gCqZ;5oJ~i2&NaRi{7+o`Ebs?h<8J{!=+e_&Hs8fpfj{T+ zzYzRU7r&UsyZAcrdtCf-@a--h>{y|EX|K`$#vma+g0m*{bkioj5E8nP>tdWgND53z z@8U>LPp9;7OTt3j&7UBm+~V`$@98h`hrY!B8Tee6e|0Z2V|0mKQ#l_^MFlnGXlhUw zqTYsDO@eCFs!D3eriw(BWK>C3l?zyC`I>cgOV@EcxeA)?q1G7Pa)WKrM6jm|*3Hpwbq5c&m`%NH zp>Ql5X~U-*o9&3SMjblb)D$y!I9wNQe-4fL9g@U|f1HmtcXh-QV-91HHjAf4`NPUI zn9-S`*KH_+^dqN9)~Qnhu&U6$r{kusE%~quSpQo^DbYAXszH-KvYP|DIRSM(z*)ty zqc>cR-4N`C0*sAcVaEM6*miVBpnXmMeokQv!y8*lbeGt4;NJsTYcJIi=os4aB-CoIt$`GnU6CbX_+ixQ5;^?S% ug~Rs4ZV /* bool, true, false, */ + +#define NO_LIBC_HEADER +#include "loader/script.h" +#include "compat.h" +#include "arch.h" + +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) + +#if GCC_VERSION < 40500 +#define __builtin_unreachable() +#endif + +#if defined(ARCH_X86_64) +# include "loader/assembly-x86_64.h" +#elif defined(ARCH_ARM_EABI) +# include "loader/assembly-arm.h" +#elif defined(ARCH_X86) +# include "loader/assembly-x86.h" +#elif defined(ARCH_ARM64) +# include "loader/assembly-arm64.h" +#else +# error "Unsupported architecture" +#endif + +#if !defined(MMAP_OFFSET_SHIFT) +# define MMAP_OFFSET_SHIFT 0 +#endif + +#define FATAL() do { \ + SYSCALL(EXIT, 1, 182); \ + __builtin_unreachable(); \ + } while (0) + +#define unlikely(expr) __builtin_expect(!!(expr), 0) + +/** + * Clear the memory from @start (inclusive) to @end (exclusive). + */ +static inline void clear(word_t start, word_t end) +{ + byte_t *start_misaligned; + byte_t *end_misaligned; + + word_t *start_aligned; + word_t *end_aligned; + + /* Compute the number of mis-aligned bytes. */ + word_t start_bytes = start % sizeof(word_t); + word_t end_bytes = end % sizeof(word_t); + + /* Compute aligned addresses. */ + start_aligned = (word_t *) (start_bytes ? start + sizeof(word_t) - start_bytes : start); + end_aligned = (word_t *) (end - end_bytes); + + /* Clear leading mis-aligned bytes. */ + start_misaligned = (byte_t *) start; + while (start_misaligned < (byte_t *) start_aligned) + *start_misaligned++ = 0; + + /* Clear aligned bytes. */ + while (start_aligned < end_aligned) + *start_aligned++ = 0; + + /* Clear trailing mis-aligned bytes. */ + end_misaligned = (byte_t *) end_aligned; + while (end_misaligned < (byte_t *) end) + *end_misaligned++ = 0; +} + +/** + * Return the address of the last path component of @string_. Note + * that @string_ is not modified. + */ +static inline word_t __basename(word_t string_) +{ + byte_t *string = (byte_t *) string_; + byte_t *cursor; + + for (cursor = string; *cursor != 0; cursor++) + ; + + for (; *cursor != (byte_t) '/' && cursor > string; cursor--) + ; + + if (cursor != string) + cursor++; + + return (word_t) cursor; +} diff --git a/proot/proot_linux/loader/script.h b/proot/proot_linux/loader/script.h new file mode 100644 index 0000000..6ae7621 --- /dev/null +++ b/proot/proot_linux/loader/script.h @@ -0,0 +1,78 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef SCRIPT +#define SCRIPT + +#include "arch.h" +#include "attribute.h" + +struct load_statement { + word_t action; + + union { + struct { + word_t string_address; + } open; + + struct { + word_t addr; + word_t length; + word_t prot; + word_t offset; + word_t clear_length; + } mmap; + + struct { + word_t start; + } make_stack_exec; + + struct { + word_t stack_pointer; + word_t entry_point; + word_t at_phdr; + word_t at_phent; + word_t at_phnum; + word_t at_entry; + word_t at_execfn; + } start; + }; +} PACKED; + +typedef struct load_statement LoadStatement; + +#define LOAD_STATEMENT_SIZE(statement, type) \ + (sizeof((statement).action) + sizeof((statement).type)) + +/* Don't use enum, since sizeof(enum) doesn't have to be equal to + * sizeof(word_t). Keep values in the same order as their respective + * actions appear in loader.c to get a change GCC produces a jump + * table. */ +#define LOAD_ACTION_OPEN_NEXT 0 +#define LOAD_ACTION_OPEN 1 +#define LOAD_ACTION_MMAP_FILE 2 +#define LOAD_ACTION_MMAP_ANON 3 +#define LOAD_ACTION_MAKE_STACK_EXEC 4 +#define LOAD_ACTION_START_TRACED 5 +#define LOAD_ACTION_START 6 + +#endif /* SCRIPT */ diff --git a/proot/proot_linux/path/binding.c b/proot/proot_linux/path/binding.c new file mode 100644 index 0000000..c0e2f84 --- /dev/null +++ b/proot/proot_linux/path/binding.c @@ -0,0 +1,735 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* lstat(2), */ +#include /* getcwd(2), lstat(2), */ +#include /* string(3), */ +#include /* bzero(3), */ +#include /* assert(3), */ +#include /* PATH_MAX, */ +#include /* E* */ +#include /* CIRCLEQ_*, */ +#include /* talloc_*, */ + +#include "path/binding.h" +#include "path/path.h" +#include "path/canon.h" +#include "cli/note.h" + +#include "compat.h" + +#define HEAD(tracee, side) \ + (side == GUEST \ + ? (tracee)->fs->bindings.guest \ + : (side == HOST \ + ? (tracee)->fs->bindings.host \ + : (tracee)->fs->bindings.pending)) + +#define NEXT(binding, side) \ + (side == GUEST \ + ? CIRCLEQ_NEXT(binding, link.guest) \ + : (side == HOST \ + ? CIRCLEQ_NEXT(binding, link.host) \ + : CIRCLEQ_NEXT(binding, link.pending))) + +#define CIRCLEQ_FOREACH_(tracee, binding, side) \ + for (binding = CIRCLEQ_FIRST(HEAD(tracee, side)); \ + binding != (void *) HEAD(tracee, side); \ + binding = NEXT(binding, side)) + +#define CIRCLEQ_INSERT_AFTER_(tracee, previous, binding, side) do { \ + switch (side) { \ + case GUEST: CIRCLEQ_INSERT_AFTER(HEAD(tracee, side), previous, binding, link.guest); break; \ + case HOST: CIRCLEQ_INSERT_AFTER(HEAD(tracee, side), previous, binding, link.host); break; \ + default: CIRCLEQ_INSERT_AFTER(HEAD(tracee, side), previous, binding, link.pending); break; \ + } \ + (void) talloc_reference(HEAD(tracee, side), binding); \ +} while (0) + +#define CIRCLEQ_INSERT_BEFORE_(tracee, next, binding, side) do { \ + switch (side) { \ + case GUEST: CIRCLEQ_INSERT_BEFORE(HEAD(tracee, side), next, binding, link.guest); break; \ + case HOST: CIRCLEQ_INSERT_BEFORE(HEAD(tracee, side), next, binding, link.host); break; \ + default: CIRCLEQ_INSERT_BEFORE(HEAD(tracee, side), next, binding, link.pending); break; \ + } \ + (void) talloc_reference(HEAD(tracee, side), binding); \ +} while (0) + +#define CIRCLEQ_INSERT_HEAD_(tracee, binding, side) do { \ + switch (side) { \ + case GUEST: CIRCLEQ_INSERT_HEAD(HEAD(tracee, side), binding, link.guest); break; \ + case HOST: CIRCLEQ_INSERT_HEAD(HEAD(tracee, side), binding, link.host); break; \ + default: CIRCLEQ_INSERT_HEAD(HEAD(tracee, side), binding, link.pending); break; \ + } \ + (void) talloc_reference(HEAD(tracee, side), binding); \ +} while (0) + +#define IS_LINKED(binding, link) \ + ((binding)->link.cqe_next != NULL && (binding)->link.cqe_prev != NULL) + +#define CIRCLEQ_REMOVE_(tracee, binding, name) do { \ + CIRCLEQ_REMOVE((tracee)->fs->bindings.name, binding, link.name);\ + (binding)->link.name.cqe_next = NULL; \ + (binding)->link.name.cqe_prev = NULL; \ + talloc_unlink((tracee)->fs->bindings.name, binding); \ +} while (0) + + +/** + * Print all bindings (verbose purpose). + */ +static void print_bindings(const Tracee *tracee) +{ + const Binding *binding; + + if (tracee->fs->bindings.guest == NULL) + return; + + CIRCLEQ_FOREACH_(tracee, binding, GUEST) { + if (compare_paths(binding->host.path, binding->guest.path) == PATHS_ARE_EQUAL) + note(tracee, INFO, USER, "binding = %s", binding->host.path); + else + note(tracee, INFO, USER, "binding = %s:%s", + binding->host.path, binding->guest.path); + } +} + +/** + * Get the binding for the given @path (relatively to the given + * binding @side). + */ +Binding *get_binding(const Tracee *tracee, Side side, const char path[PATH_MAX]) +{ + Binding *binding; + size_t path_length = strlen(path); + + /* Sanity checks. */ + assert(path != NULL && path[0] == '/'); + + CIRCLEQ_FOREACH_(tracee, binding, side) { + Comparison comparison; + const Path *ref; + + switch (side) { + case GUEST: + ref = &binding->guest; + break; + + case HOST: + ref = &binding->host; + break; + + default: + assert(0); + return NULL; + } + + comparison = compare_paths2(ref->path, ref->length, path, path_length); + if ( comparison != PATHS_ARE_EQUAL + && comparison != PATH1_IS_PREFIX) + continue; + + /* Avoid false positive when a prefix of the rootfs is + * used as an asymmetric binding, ex.: + * + * proot -m /usr:/location /usr/local/slackware + */ + if ( side == HOST + && compare_paths(get_root(tracee), "/") != PATHS_ARE_EQUAL + && belongs_to_guestfs(tracee, path)) + continue; + + return binding; + } + + return NULL; +} + +/** + * Get the binding path for the given @path (relatively to the given + * binding @side). + */ +const char *get_path_binding(const Tracee *tracee, Side side, const char path[PATH_MAX]) +{ + const Binding *binding; + + binding = get_binding(tracee, side, path); + if (!binding) + return NULL; + + switch (side) { + case GUEST: + return binding->guest.path; + + case HOST: + return binding->host.path; + + default: + assert(0); + return NULL; + } +} + +/** + * Return the path to the guest rootfs for the given @tracee, from the + * host point-of-view obviously. Depending on whether + * initialize_bindings() was called or not, the path is retrieved from + * the "bindings.guest" list or from the "bindings.pending" list, + * respectively. + */ +const char *get_root(const Tracee* tracee) +{ + const Binding *binding; + + if (tracee == NULL || tracee->fs == NULL) + return NULL; + + if (tracee->fs->bindings.guest == NULL) { + if (tracee->fs->bindings.pending == NULL + || CIRCLEQ_EMPTY(tracee->fs->bindings.pending)) + return NULL; + + binding = CIRCLEQ_LAST(tracee->fs->bindings.pending); + if (compare_paths(binding->guest.path, "/") != PATHS_ARE_EQUAL) + return NULL; + + return binding->host.path; + } + + assert(!CIRCLEQ_EMPTY(tracee->fs->bindings.guest)); + + binding = CIRCLEQ_LAST(tracee->fs->bindings.guest); + + assert(strcmp(binding->guest.path, "/") == 0); + + return binding->host.path; +} + +/** + * Substitute the guest path (if any) with the host path in @path. + * This function returns: + * + * * -errno if an error occured + * + * * 0 if it is a binding location but no substitution is needed + * ("symetric" binding) + * + * * 1 if it is a binding location and a substitution was performed + * ("asymmetric" binding) + */ +int substitute_binding(const Tracee *tracee, Side side, char path[PATH_MAX]) +{ + const Path *reverse_ref; + const Path *ref; + const Binding *binding; + + binding = get_binding(tracee, side, path); + if (!binding) + return -ENOENT; + + /* Is it a "symetric" binding? */ + if (!binding->need_substitution) + return 0; + + switch (side) { + case GUEST: + ref = &binding->guest; + reverse_ref = &binding->host; + break; + + case HOST: + ref = &binding->host; + reverse_ref = &binding->guest; + break; + + default: + assert(0); + return -EACCES; + } + + substitute_path_prefix(path, ref->length, reverse_ref->path, reverse_ref->length); + + return 1; +} + +/** + * Remove @binding from all the @tracee's lists of bindings it belongs to. + */ +void remove_binding_from_all_lists(const Tracee *tracee, Binding *binding) +{ + if (IS_LINKED(binding, link.pending)) + CIRCLEQ_REMOVE_(tracee, binding, pending); + + if (IS_LINKED(binding, link.guest)) + CIRCLEQ_REMOVE_(tracee, binding, guest); + + if (IS_LINKED(binding, link.host)) + CIRCLEQ_REMOVE_(tracee, binding, host); +} + +/** + * Insert @binding into the list of @bindings, in a sorted manner so + * as to make the substitution of nested bindings determistic, ex.: + * + * -b /bin:/foo/bin -b /usr/bin/more:/foo/bin/more + * + * Note: "nested" from the @side point-of-view. + */ +static void insort_binding(const Tracee *tracee, Side side, Binding *binding) +{ + Binding *iterator; + Binding *previous = NULL; + Binding *next = CIRCLEQ_FIRST(HEAD(tracee, side)); + + /* Find where it should be added in the list. */ + CIRCLEQ_FOREACH_(tracee, iterator, side) { + Comparison comparison; + const Path *binding_path; + const Path *iterator_path; + + switch (side) { + case PENDING: + case GUEST: + binding_path = &binding->guest; + iterator_path = &iterator->guest; + break; + + case HOST: + binding_path = &binding->host; + iterator_path = &iterator->host; + break; + + default: + assert(0); + return; + } + + comparison = compare_paths2(binding_path->path, binding_path->length, + iterator_path->path, iterator_path->length); + switch (comparison) { + case PATHS_ARE_EQUAL: + if (side == HOST) { + previous = iterator; + break; + } + + if (tracee->verbose > 0 && getenv("PROOT_IGNORE_MISSING_BINDINGS") == NULL) { + note(tracee, WARNING, USER, + "both '%s' and '%s' are bound to '%s', " + "only the last binding is active.", + iterator->host.path, binding->host.path, + binding->guest.path); + } + + /* Replace this iterator with the new binding. */ + CIRCLEQ_INSERT_AFTER_(tracee, iterator, binding, side); + remove_binding_from_all_lists(tracee, iterator); + return; + + case PATH1_IS_PREFIX: + /* The new binding contains the iterator. */ + previous = iterator; + break; + + case PATH2_IS_PREFIX: + /* The iterator contains the new binding. + * Use the deepest container. */ + if (next == (void *) HEAD(tracee, side)) + next = iterator; + break; + + case PATHS_ARE_NOT_COMPARABLE: + break; + + default: + assert(0); + return; + } + } + + /* Insert this binding in the list. */ + if (previous != NULL) + CIRCLEQ_INSERT_AFTER_(tracee, previous, binding, side); + else if (next != (void *) HEAD(tracee, side)) + CIRCLEQ_INSERT_BEFORE_(tracee, next, binding, side); + else + CIRCLEQ_INSERT_HEAD_(tracee, binding, side); +} + +/** + * c.f. function above. + */ +static void insort_binding2(const Tracee *tracee, Binding *binding) +{ + binding->need_substitution = + compare_paths(binding->host.path, binding->guest.path) != PATHS_ARE_EQUAL; + + insort_binding(tracee, GUEST, binding); + insort_binding(tracee, HOST, binding); +} + +/** + * Create and insert a new binding (@host_path:@guest_path) into the + * list of @tracee's bindings. The Talloc parent of this new binding + * is @context. This function returns NULL if an error occurred, + * otherwise a pointer to the newly created binding. + */ +Binding *insort_binding3(const Tracee *tracee, const TALLOC_CTX *context, + const char host_path[PATH_MAX], + const char guest_path[PATH_MAX]) +{ + Binding *binding; + + binding = talloc_zero(context, Binding); + if (binding == NULL) + return NULL; + + strcpy(binding->host.path, host_path); + strcpy(binding->guest.path, guest_path); + + binding->host.length = strlen(binding->host.path); + binding->guest.length = strlen(binding->guest.path); + + insort_binding2(tracee, binding); + + return binding; +} + +/** + * Free all bindings from @bindings. + * + * Note: this is a Talloc destructor. + */ +static int remove_bindings(Bindings *bindings) +{ + Binding *binding; + Tracee *tracee; + + /* Unlink all bindings from the @link list. */ +#define CIRCLEQ_REMOVE_ALL(name) do { \ + binding = CIRCLEQ_FIRST(bindings); \ + while (binding != (void *) bindings) { \ + Binding *next = CIRCLEQ_NEXT(binding, link.name);\ + CIRCLEQ_REMOVE_(tracee, binding, name); \ + binding = next; \ + } \ +} while (0) + + /* Search which link is used by this list. */ + tracee = TRACEE(bindings); + if (bindings == tracee->fs->bindings.pending) + CIRCLEQ_REMOVE_ALL(pending); + else if (bindings == tracee->fs->bindings.guest) + CIRCLEQ_REMOVE_ALL(guest); + else if (bindings == tracee->fs->bindings.host) + CIRCLEQ_REMOVE_ALL(host); + + bzero(bindings, sizeof(Bindings)); + + return 0; +} + +/** + * Allocate a new binding "@host:@guest" and attach it to + * @tracee->fs->bindings.pending. This function complains about + * missing @host path only if @must_exist is true. This function + * returns the allocated binding on success, NULL on error. + */ +Binding *new_binding(Tracee *tracee, const char *host, const char *guest, bool must_exist) +{ + Binding *binding; + char base[PATH_MAX]; + int status; + + /* Lasy allocation of the list of bindings specified by the + * user. This list will be used by initialize_bindings(). */ + if (tracee->fs->bindings.pending == NULL) { + tracee->fs->bindings.pending = talloc_zero(tracee->fs, Bindings); + if (tracee->fs->bindings.pending == NULL) + return NULL; + CIRCLEQ_INIT(tracee->fs->bindings.pending); + talloc_set_destructor(tracee->fs->bindings.pending, remove_bindings); + } + + /* Allocate an empty binding. */ + binding = talloc_zero(tracee->ctx, Binding); + if (binding == NULL) + return NULL; + + /* Canonicalize the host part of the binding, as expected by + * get_binding(). */ + status = realpath2(tracee->reconf.tracee, binding->host.path, host, true); + if (status < 0) { + if (must_exist && getenv("PROOT_IGNORE_MISSING_BINDINGS") == NULL) + note(tracee, WARNING, INTERNAL, "can't sanitize binding \"%s\": %s", + host, strerror(-status)); + goto error; + } + binding->host.length = strlen(binding->host.path); + + /* Symetric binding? */ + guest = guest ?: host; + + /* When not absolute, assume the guest path is relative to the + * current working directory, as with ``-b .`` for instance. */ + if (guest[0] != '/') { + status = getcwd2(tracee->reconf.tracee, base); + if (status < 0) { + note(tracee, WARNING, INTERNAL, "can't sanitize binding \"%s\": %s", + binding->guest.path, strerror(-status)); + goto error; + } + } + else + strcpy(base, "/"); + + status = join_paths(2, binding->guest.path, base, guest); + if (status < 0) { + note(tracee, WARNING, SYSTEM, "can't sanitize binding \"%s\"", + binding->guest.path); + goto error; + } + binding->guest.length = strlen(binding->guest.path); + + /* Keep the list of bindings specified by the user ordered, + * for the sake of consistency. For instance binding to "/" + * has to be the last in the list. */ + insort_binding(tracee, PENDING, binding); + + return binding; + +error: + TALLOC_FREE(binding); + return NULL; +} + +/** + * Canonicalize the guest part of the given @binding, insert it into + * @tracee->fs->bindings.guest and @tracee->fs->bindings.host. This + * function returns -1 if an error occured, 0 otherwise. + */ +static void initialize_binding(Tracee *tracee, Binding *binding) +{ + char path[PATH_MAX]; + struct stat statl; + int status; + + /* All bindings but "/" must be canonicalized. The exception + * for "/" is required to bootstrap the canonicalization. */ + if (compare_paths(binding->guest.path, "/") != PATHS_ARE_EQUAL) { + bool dereference; + size_t length; + + strcpy(path, binding->guest.path); + length = strlen(path); + assert(length > 0); + + /* Does the user explicitly tell not to dereference + * guest path? */ + dereference = (path[length - 1] != '!'); + if (!dereference) + path[length - 1] = '\0'; + + /* Initial state before canonicalization. */ + strcpy(binding->guest.path, "/"); + + /* Remember the type of the final component, it will + * be used in build_glue() later. */ + status = lstat(binding->host.path, &statl); + tracee->glue_type = (status < 0 || S_ISBLK(statl.st_mode) || S_ISCHR(statl.st_mode) + ? S_IFREG : statl.st_mode & S_IFMT); + + /* Sanitize the guest path of the binding within the + alternate rootfs since it is assumed by + substitute_binding(). */ + status = canonicalize(tracee, path, dereference, binding->guest.path, 0); + if (status < 0) { + note(tracee, WARNING, INTERNAL, + "sanitizing the guest path (binding) \"%s\": %s", + path, strerror(-status)); + return; + } + + /* Remove the trailing "/" or "/." as expected by + * substitute_binding(). */ + chop_finality(binding->guest.path); + + /* Disable definitively the creation of the glue for + * this binding. */ + tracee->glue_type = 0; + } + + binding->guest.length = strlen(binding->guest.path); + + insort_binding2(tracee, binding); +} + +/** + * Add bindings induced by @new_binding when @tracee is being sub-reconfigured. + * For example, if the previous configuration ("-r /rootfs1") contains this + * binding: + * + * -b /home/ced:/usr/local/ced + * + * and if the current configuration ("-r /rootfs2") introduces such a new + * binding: + * + * -b /usr:/media + * + * then the following binding is induced: + * + * -b /home/ced:/media/local/ced + */ +static void add_induced_bindings(Tracee *tracee, const Binding *new_binding) +{ + Binding *old_binding; + char path[PATH_MAX]; + int status; + + /* Only for reconfiguration. */ + if (tracee->reconf.tracee == NULL) + return; + + /* From the example, PRoot has already converted "-b /usr:/media" into + * "-b /rootfs1/usr:/media" in order to ensure the host part is really a + * host path. Here, the host part is converted back to "/usr" since the + * comparison can't be made on "/rootfs1/usr". + */ + strcpy(path, new_binding->host.path); + status = detranslate_path(tracee->reconf.tracee, path, NULL); + if (status < 0) + return; + + CIRCLEQ_FOREACH_(tracee->reconf.tracee, old_binding, GUEST) { + Binding *induced_binding; + Comparison comparison; + char path2[PATH_MAX]; + size_t prefix_length; + + /* Check if there's an induced binding by searching a common + * path prefix in between new/old bindings: + * + * -b /home/ced:[/usr]/local/ced + * -b [/usr]:/media + */ + comparison = compare_paths(path, old_binding->guest.path); + if (comparison != PATH1_IS_PREFIX) + continue; + + /* Convert the path of this induced binding to the new + * filesystem namespace. From the example, "/usr/local/ced" is + * converted into "/media/local/ced". Note: substitute_binding + * can't be used in this case since it would expect + * "/rootfs1/usr/local/ced instead". + */ + prefix_length = strlen(path); + if (prefix_length == 1) + prefix_length = 0; + + status = join_paths(2, path2, new_binding->guest.path, old_binding->guest.path + prefix_length); + if (status < 0) + continue; + + /* Install the induced binding. From the example: + * + * -b /home/ced:/media/local/ced + */ + induced_binding = talloc_zero(tracee->ctx, Binding); + if (induced_binding == NULL) + continue; + + strcpy(induced_binding->host.path, old_binding->host.path); + strcpy(induced_binding->guest.path, path2); + + induced_binding->host.length = strlen(induced_binding->host.path); + induced_binding->guest.length = strlen(induced_binding->guest.path); + + VERBOSE(tracee, 2, "induced binding: %s:%s (old) & %s:%s (new) -> %s:%s (induced)", + old_binding->host.path, old_binding->guest.path, path, new_binding->guest.path, + induced_binding->host.path, induced_binding->guest.path); + + insort_binding2(tracee, induced_binding); + } +} + +/** + * Allocate @tracee->fs->bindings.guest and + * @tracee->fs->bindings.host, then call initialize_binding() on each + * binding listed in @tracee->fs->bindings.pending. + */ +int initialize_bindings(Tracee *tracee) +{ + Binding *binding; + + /* Sanity checks. */ + assert(get_root(tracee) != NULL); + assert(tracee->fs->bindings.pending != NULL); + assert(tracee->fs->bindings.guest == NULL); + assert(tracee->fs->bindings.host == NULL); + + /* Allocate @tracee->fs->bindings.guest and + * @tracee->fs->bindings.host. */ + tracee->fs->bindings.guest = talloc_zero(tracee->fs, Bindings); + tracee->fs->bindings.host = talloc_zero(tracee->fs, Bindings); + if (tracee->fs->bindings.guest == NULL || tracee->fs->bindings.host == NULL) { + note(tracee, ERROR, INTERNAL, "can't allocate enough memory"); + TALLOC_FREE(tracee->fs->bindings.guest); + TALLOC_FREE(tracee->fs->bindings.host); + return -1; + } + + CIRCLEQ_INIT(tracee->fs->bindings.guest); + CIRCLEQ_INIT(tracee->fs->bindings.host); + + talloc_set_destructor(tracee->fs->bindings.guest, remove_bindings); + talloc_set_destructor(tracee->fs->bindings.host, remove_bindings); + + /* The binding to "/" has to be installed before other + * bindings since this former is required to canonicalize + * these latters. */ + binding = CIRCLEQ_LAST(tracee->fs->bindings.pending); + assert(compare_paths(binding->guest.path, "/") == PATHS_ARE_EQUAL); + + /* Call initialize_binding() on each pending binding in + * reverse order: the last binding "/" is used to bootstrap + * the canonicalization. */ + while (binding != (void *) tracee->fs->bindings.pending) { + Binding *previous; + previous = CIRCLEQ_PREV(binding, link.pending); + + /* Canonicalize then insert this binding into + * tracee->fs->bindings.guest/host. */ + initialize_binding(tracee, binding); + + /* Add induced bindings on sub-reconfiguration. */ + add_induced_bindings(tracee, binding); + + binding = previous; + } + + TALLOC_FREE(tracee->fs->bindings.pending); + + if (tracee->verbose > 0) + print_bindings(tracee); + + return 0; +} diff --git a/proot/proot_linux/path/binding.h b/proot/proot_linux/path/binding.h new file mode 100644 index 0000000..b7f8d46 --- /dev/null +++ b/proot/proot_linux/path/binding.h @@ -0,0 +1,58 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef BINDING_H +#define BINDING_H + +#include /* PATH_MAX, */ +#include + +#include "tracee/tracee.h" +#include "path.h" + +typedef struct binding { + Path host; + Path guest; + + bool need_substitution; + bool must_exist; + + struct { + CIRCLEQ_ENTRY(binding) pending; + CIRCLEQ_ENTRY(binding) guest; + CIRCLEQ_ENTRY(binding) host; + } link; +} Binding; + +typedef CIRCLEQ_HEAD(bindings, binding) Bindings; + +extern Binding *insort_binding3(const Tracee *tracee, const TALLOC_CTX *context, + const char host_path[PATH_MAX], const char guest_path[PATH_MAX]); +extern Binding *new_binding(Tracee *tracee, const char *host, const char *guest, bool must_exist); +extern int initialize_bindings(Tracee *tracee); +extern const char *get_path_binding(const Tracee* tracee, Side side, const char path[PATH_MAX]); +extern Binding *get_binding(const Tracee *tracee, Side side, const char path[PATH_MAX]); +extern const char *get_root(const Tracee* tracee); +extern int substitute_binding(const Tracee* tracee, Side side, char path[PATH_MAX]); +extern void remove_binding_from_all_lists(const Tracee *tracee, Binding *binding); + +#endif /* BINDING_H */ diff --git a/proot/proot_linux/path/canon.c b/proot/proot_linux/path/canon.c new file mode 100644 index 0000000..4b3c006 --- /dev/null +++ b/proot/proot_linux/path/canon.c @@ -0,0 +1,372 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* pid_t */ +#include /* PATH_MAX, */ +#include /* MAXSYMLINKS, */ +#include /* E*, */ +#include /* lstat(2), S_ISREG(), */ +#include /* access(2), lstat(2), */ +#include /* string(3), */ +#include /* assert(3), */ +#include /* sscanf(3), */ + +#include "path/canon.h" +#include "path/path.h" +#include "path/binding.h" +#include "path/glue.h" +#include "path/proc.h" +#include "extension/extension.h" + +/** + * Put an end-of-string ('\0') right before the last component of @path. + */ +static inline void pop_component(char *path) +{ + int offset; + + /* Sanity checks. */ + assert(path != NULL); + + offset = strlen(path) - 1; + assert(offset >= 0); + + /* Don't pop over "/", it doesn't mean anything. */ + if (offset == 0) { + assert(path[0] == '/' && path[1] == '\0'); + return; + } + + /* Skip trailing path separators. */ + while (offset > 1 && path[offset] == '/') + offset--; + + /* Search for the previous path separator. */ + while (offset > 1 && path[offset] != '/') + offset--; + + /* Cut the end of the string before the last component. */ + path[offset] = '\0'; + assert(path[0] == '/'); +} + +/** + * Copy in @component the first path component pointed to by @cursor, + * this later is updated to point to the next component for a further + * call. This function returns: + * + * - -errno if an error occured. + * + * - FINAL_SLASH if it the last component of the path but we + * really expect a directory. + * + * - FINAL_NORMAL if it the last component of the path. + * + * - 0 otherwise. + */ +static inline Finality next_component(char component[NAME_MAX], const char **cursor) +{ + const char *start; + ptrdiff_t length; + bool want_dir; + + /* Sanity checks. */ + assert(component != NULL); + assert(cursor != NULL); + + /* Skip leading path separators. */ + while (**cursor != '\0' && **cursor == '/') + (*cursor)++; + + /* Find the next component. */ + start = *cursor; + while (**cursor != '\0' && **cursor != '/') + (*cursor)++; + length = *cursor - start; + + if (length >= NAME_MAX) + return -ENAMETOOLONG; + + /* Extract the component. */ + strncpy(component, start, length); + component[length] = '\0'; + + /* Check if a [link to a] directory is expected. */ + want_dir = (**cursor == '/'); + + /* Skip trailing path separators. */ + while (**cursor != '\0' && **cursor == '/') + (*cursor)++; + + if (**cursor == '\0') + return (want_dir + ? FINAL_SLASH + : FINAL_NORMAL); + + return NOT_FINAL; +} + +/** + * Resolve bindings (if any) in @guest_path and copy the translated + * path into @host_path. Also, this function checks that a non-final + * component is either a directory (returned value is 0) or a symlink + * (returned value is 1), otherwise it returns -errno or -ENOTDIR. + */ +static inline int substitute_binding_stat(Tracee *tracee, Finality finality, unsigned int recursion_level, + const char guest_path[PATH_MAX], char host_path[PATH_MAX]) +{ + struct stat statl; + int status; + + strcpy(host_path, guest_path); + status = substitute_binding(tracee, GUEST, host_path); + if (status < 0) + return status; + + /* Don't notify extensions during the initialization of a binding. */ + if (tracee->glue_type == 0) { + status = notify_extensions(tracee, HOST_PATH, (intptr_t)host_path, + IS_FINAL(finality) && recursion_level == 0); + if (status < 0) + return status; + } + + statl.st_mode = 0; + status = lstat(host_path, &statl); + + /* Build the glue between the hostfs and the guestfs during + * the initialization of a binding. */ + if (status < 0 && tracee->glue_type != 0) { + statl.st_mode = build_glue(tracee, guest_path, host_path, finality); + if (statl.st_mode == 0) + status = -1; + } + + /* Return an error if a non-final component isn't a directory + * nor a symlink. The error depends on why the component + * could not be accessed (ENOENT, EACCES, ...), otherwise the + * error is "Not a directory". */ + if (!IS_FINAL(finality) && !S_ISDIR(statl.st_mode) && !S_ISLNK(statl.st_mode)) + return (status < 0 ? -errno : -ENOTDIR); + + return (S_ISLNK(statl.st_mode) ? 1 : 0); +} + +/** + * Copy in @guest_path the canonicalization (see `man 3 realpath`) of + * @user_path regarding to @tracee->root. The path to canonicalize + * could be either absolute or relative to @guest_path. When the last + * component of @user_path is a link, it is dereferenced only if + * @deref_final is true -- it is useful for syscalls like lstat(2). + * The parameter @recursion_level should be set to 0 unless you know + * what you are doing. This function returns -errno if an error + * occured, otherwise it returns 0. + */ +int canonicalize(Tracee *tracee, const char *user_path, bool deref_final, + char guest_path[PATH_MAX], unsigned int recursion_level) +{ + char scratch_path[PATH_MAX]; + char host_path[PATH_MAX]; + Finality finality; + const char *cursor; + int status; + + /* Avoid infinite loop on circular links. */ + if (recursion_level > MAXSYMLINKS) + return -ELOOP; + + /* Sanity checks. */ + assert(user_path != NULL); + assert(guest_path != NULL); + assert(user_path != guest_path); + + if (strnlen(guest_path, PATH_MAX) >= PATH_MAX) + return -ENAMETOOLONG; + + if (user_path[0] != '/') { + /* Ensure 'guest_path' contains an absolute base of + * the relative `user_path`. */ + if (guest_path[0] != '/') + return -EINVAL; + } + else + strcpy(guest_path, "/"); + + + /* Resolve bindings for the initial '/' component or user_path, + * which is not handled in the loop below. + * In particular HOST_PATH extensions are called from there. */ + status = substitute_binding_stat(tracee, NOT_FINAL, recursion_level, guest_path, host_path); + if (status < 0) + return status; + + /* Canonicalize recursely 'user_path' into 'guest_path'. */ + cursor = user_path; + finality = NOT_FINAL; + while (!IS_FINAL(finality)) { + Comparison comparison; + char component[NAME_MAX]; + + finality = next_component(component, &cursor); + status = (int) finality; + if (status < 0) + return status; + + if (strcmp(component, ".") == 0) { + if (IS_FINAL(finality)) + finality = FINAL_DOT; + continue; + } + + if (strcmp(component, "..") == 0) { + pop_component(guest_path); + if (IS_FINAL(finality)) + finality = FINAL_SLASH; + continue; + } + + status = join_paths(2, scratch_path, guest_path, component); + if (status < 0) + return status; + + /* Resolve bindings and check that a non-final + * component exists and either is a directory or is a + * symlink. For this latter case, we check that the + * symlink points to a directory once it is + * canonicalized, at the end of this loop. */ + status = substitute_binding_stat(tracee, finality, recursion_level, scratch_path, host_path); + if (status < 0) + return status; + + /* Nothing special to do if it's not a link or if we + * explicitly ask to not dereference 'user_path', as + * required by syscalls like lstat(2). Obviously, this + * later condition does not apply to intermediate path + * components. Errors are explicitly ignored since + * they should be handled by the caller. */ + if (status <= 0 || (finality == FINAL_NORMAL && !deref_final)) { + strcpy(scratch_path, guest_path); + status = join_paths(2, guest_path, scratch_path, component); + if (status < 0) + return status; + continue; + } + + /* It's a link, so we have to dereference *and* + * canonicalize to ensure we are not going outside the + * new root. */ + comparison = compare_paths("/proc", guest_path); + switch (comparison) { + case PATHS_ARE_EQUAL: + case PATH1_IS_PREFIX: + /* Some links in "/proc" are generated + * dynamically by the kernel. PRoot has to + * emulate some of them. */ + status = readlink_proc(tracee, scratch_path, + guest_path, component, comparison); + switch (status) { + case CANONICALIZE: + /* The symlink is already dereferenced, + * now canonicalize it. */ + goto canon; + + case DONT_CANONICALIZE: + /* If and only very final, this symlink + * shouldn't be dereferenced nor canonicalized. */ + if (finality == FINAL_NORMAL) { + strcpy(guest_path, scratch_path); + return 0; + } + break; + + default: + if (status < 0) + return status; + } + + default: + break; + } + + status = readlink(host_path, scratch_path, sizeof(scratch_path)); + if (status < 0) + return status; + else if (status == sizeof(scratch_path)) + return -ENAMETOOLONG; + scratch_path[status] = '\0'; + + /* Remove the leading "root" part if needed, it's + * useful for "/proc/self/cwd/" for instance. */ + status = detranslate_path(tracee, scratch_path, host_path); + if (status < 0) + return status; + + canon: + /* Canonicalize recursively the referee in case it + * is/contains a link, moreover if it is not an + * absolute link then it is relative to + * 'guest_path'. */ + status = canonicalize(tracee, scratch_path, true, guest_path, recursion_level + 1); + if (status < 0) + return status; + + /* Check that a non-final canonicalized/dereferenced + * symlink exists and is a directory. */ + status = substitute_binding_stat(tracee, finality, recursion_level, guest_path, host_path); + if (status < 0) + return status; + + /* Here, 'guest_path' shouldn't be a symlink anymore, + * unless it is a named file descriptor. */ + assert(status != 1 || sscanf(guest_path, "/proc/%*d/fd/%d", &status) == 1); + } + + /* At the exit stage of the first level of recursion, + * `guest_path` is fully canonicalized but a terminating '/' + * or a terminating '.' may be required to keep the initial + * semantic of `user_path`. */ + if (recursion_level == 0) { + switch (finality) { + case FINAL_NORMAL: + break; + + case FINAL_SLASH: + strcpy(scratch_path, guest_path); + status = join_paths(2, guest_path, scratch_path, ""); + if (status < 0) + return status; + break; + + case FINAL_DOT: + strcpy(scratch_path, guest_path); + status = join_paths(2, guest_path, scratch_path, "."); + if (status < 0) + return status; + break; + + default: + assert(0); + } + } + + return 0; +} diff --git a/proot/proot_linux/path/canon.h b/proot/proot_linux/path/canon.h new file mode 100644 index 0000000..fa6546f --- /dev/null +++ b/proot/proot_linux/path/canon.h @@ -0,0 +1,34 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef CANON_H +#define CANON_H + +#include +#include + +#include "tracee/tracee.h" + +extern int canonicalize(Tracee *tracee, const char *user_path, bool deref_final, + char guest_path[PATH_MAX], unsigned int nb_recursion); + +#endif /* CANON_H */ diff --git a/proot/proot_linux/path/glue.c b/proot/proot_linux/path/glue.c new file mode 100644 index 0000000..816a339 --- /dev/null +++ b/proot/proot_linux/path/glue.c @@ -0,0 +1,192 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* mkdir(2), lstat(2), */ +#include /* mkdir(2), lstat(2), */ +#include /* mknod(2), */ +#include /* mknod(2), lstat(2), unlink(2), rmdir(2), */ +#include /* string(3), */ +#include /* assert(3), */ +#include /* PATH_MAX, */ +#include /* errno, E* */ +#include /* talloc_*, */ + +#include "path/binding.h" +#include "path/path.h" +#include "path/temp.h" +#include "cli/note.h" + +#include "compat.h" + +/** + * Remove @path if it is empty only. + * + * Note: this is a Talloc destructor. + */ +static int remove_placeholder(char *path) +{ + struct stat statl; + int status; + + status = lstat(path, &statl); + if (status) + return 0; /* Not fatal. */ + + if (!S_ISDIR(statl.st_mode)) { + if (statl.st_size != 0) + return 0; /* Not fatal. */ + status = unlink(path); + } + else + status = rmdir(path); + if (status) + return 0; /* Not fatal. */ + + return 0; +} + +/** + * Attach a copy of @path to the autofree context, and set its + * destructor to remove_placeholder(). + */ +static void set_placeholder_destructor(const char *path) +{ + TALLOC_CTX *autofreed; + char *placeholder; + + autofreed = talloc_autofree_context(); + if (autofreed == NULL) + return; + + placeholder = talloc_strdup(autofreed, path); + if (placeholder == NULL) + return; + + talloc_set_destructor(placeholder, remove_placeholder); +} + +/** + * Build in a temporary filesystem the glue between the guest part and + * the host part of the @binding_path. This function returns the type + * of the bound path, otherwise 0 if an error occured. + * + * For example, assuming the host path "/opt" is mounted/bound to the + * guest path "/black/holes/and/revelations", and assuming this path + * can't be created in the guest rootfs (eg. permission denied), then + * it is created in a temporary rootfs and all these paths are glued + * that way: + * + * $GUEST/black/ --> $GLUE/black/ + * ./holes + * ./holes/and + * ./holes/and/revelations --> $HOST/opt/ + * + * This glue allows operations on paths that do not exist in the guest + * rootfs but that were specified as the guest part of a binding. + */ +mode_t build_glue(Tracee *tracee, const char *guest_path, char host_path[PATH_MAX], + Finality finality) +{ + bool belongs_to_gluefs; + Comparison comparison; + Binding *binding; + mode_t type; + mode_t mode; + int status; + + assert(tracee->glue_type != 0); + + /* Create the temporary directory where the "glue" rootfs will + * lie. */ + if (tracee->glue == NULL) { + tracee->glue = create_temp_directory(NULL, tracee->tool_name); + if (tracee->glue == NULL) { + note(tracee, ERROR, INTERNAL, "can't create glue rootfs"); + return 0; + } + talloc_set_name_const(tracee->glue, "$glue"); + } + + comparison = compare_paths(tracee->glue, host_path); + belongs_to_gluefs = (comparison == PATHS_ARE_EQUAL || comparison == PATH1_IS_PREFIX); + + /* If it's not a final component then it is a directory. I definitely + * hate how the potential type of the final component is propagated + * from initialize_binding() down to here, sadly there's no elegant way + * to know its type at this stage. */ + if (IS_FINAL(finality)) { + type = tracee->glue_type; + mode = (belongs_to_gluefs ? 0777 : 0); + } + else { + type = S_IFDIR; + mode = 0777; + } + + if (getenv("PROOT_DONT_POLLUTE_ROOTFS") != NULL && !belongs_to_gluefs) + goto create_binding; + + /* Try to create this component into the "guest" or "glue" + * rootfs (depending if there were a glue previously). */ + if (S_ISDIR(type)) + status = mkdir(host_path, mode); + else /* S_IFREG, S_IFCHR, S_IFBLK, S_IFIFO or S_IFSOCK. */ + status = mknod(host_path, mode | type, 0); + + /* Remove placeholders from the guest rootfs once PRoot is + * terminated. */ + if (status >= 0 && !belongs_to_gluefs) + set_placeholder_destructor(host_path); + + /* Nothing else to do if the path already exists or if it is + * the final component since it will be pointed to by the + * binding being initialized (from the example, + * "$GUEST/black/holes/and/revelations" -> "$HOST/opt"). */ + if (status >= 0 || errno == EEXIST || IS_FINAL(finality)) + return type; + + /* mkdir/mknod are supposed to always succeed in + * tracee->glue. */ + if (belongs_to_gluefs) { + note(tracee, WARNING, SYSTEM, "mkdir/mknod"); + return 0; + } + +create_binding: + /* Sanity checks. */ + if ( strnlen(tracee->glue, PATH_MAX) >= PATH_MAX + || strnlen(guest_path, PATH_MAX) >= PATH_MAX) { + note(tracee, WARNING, INTERNAL, "installing the binding: guest path too long"); + return 0; + } + + /* From the example, create the binding "/black" -> + * "$GLUE/black". */ + binding = insort_binding3(tracee, tracee->glue, tracee->glue, guest_path); + if (binding == NULL) + return 0; + + /* TODO: emulation of getdents(parent(guest_path)) to finalize + * the glue, "black" in getdents("/") from the example. */ + + return type; +} diff --git a/proot/proot_linux/path/glue.h b/proot/proot_linux/path/glue.h new file mode 100644 index 0000000..cebf3a9 --- /dev/null +++ b/proot/proot_linux/path/glue.h @@ -0,0 +1,34 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef GLUE_H +#define GLUE_H + +#include /* PATH_MAX, */ + +#include "tracee/tracee.h" +#include "path.h" + +extern mode_t build_glue(Tracee *tracee, const char *guest_path, char host_path[PATH_MAX], + Finality finality); + +#endif /* GLUE_H */ diff --git a/proot/proot_linux/path/path.c b/proot/proot_linux/path/path.c new file mode 100644 index 0000000..bc6c49a --- /dev/null +++ b/proot/proot_linux/path/path.c @@ -0,0 +1,739 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* string(3), */ +#include /* va_*(3), */ +#include /* assert(3), */ +#include /* AT_*, */ +#include /* readlink*(2), *stat(2), getpid(2), */ +#include /* pid_t, */ +#include /* S_ISDIR, */ +#include /* opendir(3), readdir(3), */ +#include /* snprintf(3), */ +#include /* E*, */ +#include /* ptrdiff_t, */ +#include /* PRI*, */ + +#include "path/path.h" +#include "path/binding.h" +#include "path/canon.h" +#include "path/proc.h" +#include "extension/extension.h" +#include "cli/note.h" +#include "build.h" + +#include "compat.h" + +/** + * Copy in @result the concatenation of several paths (@number_paths) + * and adds a path separator ('/') in between when needed. This + * function returns -errno if an error occured, otherwise it returns 0. + */ +int join_paths(int number_paths, char result[PATH_MAX], ...) +{ + va_list paths; + size_t length; + int status; + int i; + + result[0] = '\0'; + length = 0; + status = 0; + + /* Parse the list of variadic arguments. */ + va_start(paths, result); + for (i = 0; i < number_paths; i++) { + const char *path; + size_t path_length; + size_t new_length; + + path = va_arg(paths, const char *); + if (path == NULL) + continue; + path_length = strlen(path); + + /* A new path separator is needed. */ + if (length > 0 && result[length - 1] != '/' && path[0] != '/') { + new_length = length + path_length + 1; + if (new_length + 1 >= PATH_MAX) { + status = -ENAMETOOLONG; + break; + } + strcat(result + length, "/"); + strcat(result + length, path); + length = new_length; + } + /* There are already two path separators. */ + else if (length > 0 && result[length - 1] == '/' && path[0] == '/') { + new_length = length + path_length - 1; + if (new_length + 1 >= PATH_MAX) { + status = -ENAMETOOLONG; + break; + } + strcat(result + length, path + 1); + length += path_length - 1; + } + /* There's already one path separator or result[] is empty. */ + else { + new_length = length + path_length; + if (new_length + 1 >= PATH_MAX) { + status = -ENAMETOOLONG; + break; + } + strcat(result + length, path); + length += path_length; + } + + status = 0; + } + va_end(paths); + + return status; +} + +/** + * Put in @host_path the full path to the given shell @command. The + * @command is searched in @paths if not null, otherwise in $PATH + * (relatively to the @tracee's file-system name-space). This + * function always returns -1 on error, otherwise 0. + */ +int which(Tracee *tracee, const char *paths, char host_path[PATH_MAX], const char *command) +{ + char path[PATH_MAX]; + const char *cursor; + struct stat statr; + int status; + + bool is_explicit; + bool found; + + assert(command != NULL); + is_explicit = (strchr(command, '/') != NULL); + + /* Is the command available without any $PATH look-up? */ + status = realpath2(tracee, host_path, command, true); + if (status == 0 && stat(host_path, &statr) == 0) { + if (is_explicit && !S_ISREG(statr.st_mode)) { + note(tracee, ERROR, USER, "'%s' is not a regular file", command); + return -EACCES; + } + + if (is_explicit && (statr.st_mode & S_IXUSR) == 0) { + note(tracee, ERROR, USER, "'%s' is not executable", command); + return -EACCES; + } + + found = true; + + /* Don't dereference the final component to preserve + * argv0 in case it is a symlink to script. */ + (void) realpath2(tracee, host_path, command, false); + } + else + found = false; + + /* Is the the explicit command was found? */ + if (is_explicit) { + if (found) + return 0; + else + goto not_found; + } + + /* Otherwise search the command in $PATH. */ + paths = paths ?: getenv("PATH"); + if (paths == NULL || strcmp(paths, "") == 0) + goto not_found; + + cursor = paths; + do { + size_t length; + + length = strcspn(cursor, ":"); + cursor += length + 1; + + if (length >= PATH_MAX) + continue; + else if (length == 0) + strcpy(path, "."); + else { + strncpy(path, cursor - length - 1, length); + path[length] = '\0'; + } + + /* Avoid buffer-overflow. */ + if (length + strlen(command) + 2 >= PATH_MAX) + continue; + + strcat(path, "/"); + strcat(path, command); + + status = realpath2(tracee, host_path, path, true); + if (status == 0 + && stat(host_path, &statr) == 0 + && S_ISREG(statr.st_mode) + && (statr.st_mode & S_IXUSR) != 0) { + /* Don't dereference the final component to preserve + * argv0 in case it is a symlink to script. */ + (void) realpath2(tracee, host_path, path, false); + return 0; + } + } while (*(cursor - 1) != '\0'); + +not_found: + status = getcwd2(tracee, path); + if (status < 0) + strcpy(path, ""); + + note(tracee, ERROR, USER, "'%s' not found (root = %s, cwd = %s, $PATH=%s)", + command, get_root(tracee), path, paths); + + /* Check if the command was found without any $PATH look-up + * but it didn't contain "/". */ + if (found && !is_explicit) + note(tracee, ERROR, USER, + "to execute a local program, use the './' prefix, for example: ./%s", command); + + return -1; +} + +/** + * Put in @host_path the canonicalized form of @path. In the nominal + * case (@tracee == NULL), this function is barely equivalent to + * realpath(), but when doing sub-reconfiguration, the path is + * canonicalized relatively to the current @tracee's file-system + * name-space. This function returns -errno on error, otherwise 0. + */ +int realpath2(Tracee *tracee, char host_path[PATH_MAX], const char *path, bool deref_final) +{ + int status; + + if (tracee == NULL) + status = (realpath(path, host_path) == NULL ? -errno : 0); + else + status = translate_path(tracee, host_path, AT_FDCWD, path, deref_final); + return status; +} + +/** + * Put in @guest_path the canonicalized current working directory. In + * the nominal case (@tracee == NULL), this function is barely + * equivalent to realpath(), but when doing sub-reconfiguration, the + * path is canonicalized relatively to the current @tracee's + * file-system name-space. This function returns -errno on error, + * otherwise 0. + */ +int getcwd2(Tracee *tracee, char guest_path[PATH_MAX]) +{ + if (tracee == NULL) { + if (getcwd(guest_path, PATH_MAX) == NULL) + return -errno; + } + else { + if (strlen(tracee->fs->cwd) >= PATH_MAX) + return -ENAMETOOLONG; + + strcpy(guest_path, tracee->fs->cwd); + } + + return 0; +} + +/** + * Remove the trailing "/" or "/.". + */ +void chop_finality(char *path) +{ + size_t length = strlen(path); + + if (path[length - 1] == '.') { + assert(length >= 2); + /* Special case for "/." */ + if (length == 2) + path[length - 1] = '\0'; + else + path[length - 2] = '\0'; + } + else if (path[length - 1] == '/') { + /* Special case for "/" */ + if (length > 1) + path[length - 1] = '\0'; + } +} + +/** + * Put in @path the result of readlink(/proc/@pid/fd/@fd). This + * function returns -errno if an error occured, otherwise 0. + */ +int readlink_proc_pid_fd(pid_t pid, int fd, char path[PATH_MAX]) +{ + char link[32]; /* 32 > sizeof("/proc//cwd") + sizeof(#ULONG_MAX) */ + int status; + + /* Format the path to the "virtual" link. */ + status = snprintf(link, sizeof(link), "/proc/%d/fd/%d", pid, fd); + if (status < 0) + return -EBADF; + if ((size_t) status >= sizeof(link)) + return -EBADF; + + /* Read the value of this "virtual" link. */ + status = readlink(link, path, PATH_MAX); + if (status < 0) + return -EBADF; + if (status >= PATH_MAX) + return -ENAMETOOLONG; + path[status] = '\0'; + + return 0; +} + +/** + * Copy in @result the equivalent of "@tracee->root + canon(@dir_fd + + * @user_path)". If @user_path is not absolute then it is relative to + * the directory referred by the descriptor @dir_fd (AT_FDCWD is for + * the current working directory). See the documentation of + * canonicalize() for the meaning of @deref_final. This function + * returns -errno if an error occured, otherwise 0. + */ +int translate_path(Tracee *tracee, char result[PATH_MAX], int dir_fd, + const char *user_path, bool deref_final) +{ + char guest_path[PATH_MAX]; + int status; + + /* Use "/" as the base if it is an absolute guest path. */ + if (user_path[0] == '/') { + strcpy(result, "/"); + } + /* It is relative to a directory referred by a descriptor, see + * openat(2) for details. */ + else if (dir_fd != AT_FDCWD) { + /* /proc/@tracee->pid/fd/@dir_fd -> result. */ + status = readlink_proc_pid_fd(tracee->pid, dir_fd, result); + if (status < 0) + return status; + + /* Named file descriptors may reference special + * objects like pipes, sockets, inodes, ... Such + * objects do not belong to the file-system. */ + if (result[0] != '/') + return -ENOTDIR; + + /* Remove the leading "root" part of the base + * (required!). */ + status = detranslate_path(tracee, result, NULL); + if (status < 0) + return status; + } + /* It is relative to the current working directory. */ + else { + status = getcwd2(tracee, result); + if (status < 0) + return status; + } + + VERBOSE(tracee, 2, "vpid %" PRIu64 ": translate(\"%s\" + \"%s\")", + tracee != NULL ? tracee->vpid : 0, result, user_path); + + status = notify_extensions(tracee, GUEST_PATH, (intptr_t) result, (intptr_t) user_path); + if (status < 0) + return status; + if (status > 0) + goto skip; + + /* So far "result" was used as a base path, it's time to join + * it to the user path. */ + assert(result[0] == '/'); + status = join_paths(2, guest_path, result, user_path); + if (status < 0) + return status; + strcpy(result, "/"); + + /* Canonicalize regarding the new root. */ + status = canonicalize(tracee, guest_path, deref_final, result, 0); + if (status < 0) + return status; + + /* Final binding substitution to convert "result" into a host + * path, since canonicalize() works from the guest + * point-of-view. */ + status = substitute_binding(tracee, GUEST, result); + if (status < 0) + return status; + +skip: + VERBOSE(tracee, 2, "vpid %" PRIu64 ": -> \"%s\"", + tracee != NULL ? tracee->vpid : 0, result); + + status = notify_extensions(tracee, TRANSLATED_PATH, (intptr_t) result, 0); + if (status < 0) + return status; + + return 0; +} + +/** + * Remove/substitute the leading part of a "translated" @path. It + * returns 0 if no transformation is required (ie. symmetric binding), + * otherwise it returns the size in bytes of the updated @path, + * including the end-of-string terminator. On error it returns + * -errno. + */ +int detranslate_path(Tracee *tracee, char path[PATH_MAX], const char t_referrer[PATH_MAX]) +{ + size_t prefix_length; + ssize_t new_length; + + bool sanity_check; + bool follow_binding; + + /* Sanity check. */ + if (strnlen(path, PATH_MAX) >= PATH_MAX) + return -ENAMETOOLONG; + + /* Don't try to detranslate relative paths (typically the + * target of a relative symbolic link). */ + if (path[0] != '/') + return 0; + + /* Is it a symlink? */ + if (t_referrer != NULL) { + Comparison comparison; + + sanity_check = false; + follow_binding = false; + + /* In some cases bindings have to be resolved. */ + comparison = compare_paths("/proc", t_referrer); + if (comparison == PATH1_IS_PREFIX) { + /* Some links in "/proc" are generated + * dynamically by the kernel. PRoot has to + * emulate some of them. */ + char proc_path[PATH_MAX]; + strcpy(proc_path, path); + new_length = readlink_proc2(tracee, proc_path, t_referrer); + if (new_length < 0) + return new_length; + if (new_length != 0) { + strcpy(path, proc_path); + return new_length + 1; + } + + /* Always resolve bindings for symlinks in + * "/proc", they always point to the emulated + * file-system namespace by design. */ + follow_binding = true; + } + else if (!belongs_to_guestfs(tracee, t_referrer)) { + const char *binding_referree; + const char *binding_referrer; + + binding_referree = get_path_binding(tracee, HOST, path); + binding_referrer = get_path_binding(tracee, HOST, t_referrer); + assert(binding_referrer != NULL); + + /* Resolve bindings for symlinks that belong + * to a binding and point to the same binding. + * For example, if "-b /lib:/foo" is specified + * and the symlink "/lib/a -> /lib/b" exists + * in the host rootfs namespace, then it + * should appear as "/foo/a -> /foo/b" in the + * guest rootfs namespace for consistency + * reasons. */ + if (binding_referree != NULL) { + comparison = compare_paths(binding_referree, binding_referrer); + follow_binding = (comparison == PATHS_ARE_EQUAL); + } + } + } + else { + sanity_check = true; + follow_binding = true; + } + + if (follow_binding) { + switch (substitute_binding(tracee, HOST, path)) { + case 0: + return 0; + case 1: + return strlen(path) + 1; + default: + break; + } + } + + switch (compare_paths(get_root(tracee), path)) { + case PATH1_IS_PREFIX: + /* Remove the leading part, that is, the "root". */ + prefix_length = strlen(get_root(tracee)); + + /* Special case when path to the guest rootfs == "/". */ + if (prefix_length == 1) + prefix_length = 0; + + new_length = strlen(path) - prefix_length; + memmove(path, path + prefix_length, new_length); + + path[new_length] = '\0'; + break; + + case PATHS_ARE_EQUAL: + /* Special case when path == root. */ + new_length = 1; + strcpy(path, "/"); + break; + + default: + /* Ensure the path is within the new root. */ + if (sanity_check) + return -EPERM; + else + return 0; + } + + return new_length + 1; +} + +/** + * Check if the translated @host_path belongs to the guest rootfs, + * that is, isn't from a binding. + */ +bool belongs_to_guestfs(const Tracee *tracee, const char *host_path) +{ + Comparison comparison; + + comparison = compare_paths(get_root(tracee), host_path); + return (comparison == PATHS_ARE_EQUAL || comparison == PATH1_IS_PREFIX); +} + +/** + * Compare @path1 with @path2, which are respectively @length1 and + * @length2 long. + * + * This function works only with paths canonicalized in the same + * namespace (host/guest)! + */ +Comparison compare_paths2(const char *path1, size_t length1, const char *path2, size_t length2) +{ + size_t length_min; + bool is_prefix; + char sentinel; + +#if defined DEBUG_OPATH + assert(length(path1) == length1); + assert(length(path2) == length2); +#endif + assert(length1 > 0); + assert(length2 > 0); + + if (!length1 || !length2) { + return PATHS_ARE_NOT_COMPARABLE; + } + + /* Remove potential trailing '/' for the comparison. */ + if (path1[length1 - 1] == '/') + length1--; + + if (path2[length2 - 1] == '/') + length2--; + + if (length1 < length2) { + length_min = length1; + sentinel = path2[length_min]; + } + else { + length_min = length2; + sentinel = path1[length_min]; + } + + /* Optimize obvious cases. */ + if (sentinel != '/' && sentinel != '\0') + return PATHS_ARE_NOT_COMPARABLE; + + is_prefix = (strncmp(path1, path2, length_min) == 0); + + if (!is_prefix) + return PATHS_ARE_NOT_COMPARABLE; + + if (length1 == length2) + return PATHS_ARE_EQUAL; + else if (length1 < length2) + return PATH1_IS_PREFIX; + else if (length1 > length2) + return PATH2_IS_PREFIX; + + assert(0); + return PATHS_ARE_NOT_COMPARABLE; +} + +Comparison compare_paths(const char *path1, const char *path2) +{ + return compare_paths2(path1, strlen(path1), path2, strlen(path2)); +} + +typedef int (*foreach_fd_t)(const Tracee *tracee, int fd, char path[PATH_MAX]); + +/** + * Call @callback on each open file descriptors of @pid. It returns + * the status of the first failure, that is, if @callback returned + * seomthing lesser than 0, otherwise 0. + */ +static int foreach_fd(const Tracee *tracee, foreach_fd_t callback) +{ + struct dirent *dirent; + char path[PATH_MAX]; + char proc_fd[32]; /* 32 > sizeof("/proc//fd") + sizeof(#ULONG_MAX) */ + int status; + DIR *dirp; + + /* Format the path to the "virtual" directory. */ + status = snprintf(proc_fd, sizeof(proc_fd), "/proc/%d/fd", tracee->pid); + if (status < 0 || (size_t) status >= sizeof(proc_fd)) + return 0; + + /* Open the virtual directory "/proc/$pid/fd". */ + dirp = opendir(proc_fd); + if (dirp == NULL) + return 0; + + while ((dirent = readdir(dirp)) != NULL) { + /* Read the value of this "virtual" link. Don't use + * readlinkat(2) here since it would require Linux >= + * 2.6.16 and Glibc >= 2.4, whereas PRoot is supposed + * to work on any Linux 2.6 systems. */ + + char tmp[PATH_MAX]; + if (strlen(proc_fd) + strlen(dirent->d_name) + 1 >= PATH_MAX) + continue; + + strcpy(tmp, proc_fd); + strcat(tmp, "/"); + strcat(tmp, dirent->d_name); + + status = readlink(tmp, path, PATH_MAX); + if (status < 0 || status >= PATH_MAX) + continue; + path[status] = '\0'; + + /* Ensure it points to a path (not a socket or somethink like that). */ + if (path[0] != '/') + continue; + + status = callback(tracee, atoi(dirent->d_name), path); + if (status < 0) + goto end; + } + status = 0; + +end: + closedir(dirp); + return status; +} + +/** + * Helper for list_open_fd(). + */ +static int list_open_fd_callback(const Tracee *tracee, int fd, char path[PATH_MAX]) +{ + VERBOSE(tracee, 1, "pid %d: access to \"%s\" (fd %d) won't be translated until closed", + tracee->pid, path, fd); + notify_extensions((Tracee*)tracee, ALREADY_OPENED_FD, (intptr_t)path, (intptr_t)fd); + return 0; +} + +/** + * Warn for files that are open. It is useful right after PRoot has + * attached a process. + */ +int list_open_fd(const Tracee *tracee) +{ + return foreach_fd(tracee, list_open_fd_callback); +} + +/** + * Substitute the first @old_prefix_length bytes of @path with + * @new_prefix (the caller has to provides a correct + * @new_prefix_length). This function returns the new length of + * @path. Note: this function takes care about special cases (like + * "/"). + */ +size_t substitute_path_prefix(char path[PATH_MAX], size_t old_prefix_length, + const char *new_prefix, size_t new_prefix_length) +{ + size_t path_length; + size_t new_length; + + path_length = strlen(path); + + assert(old_prefix_length < PATH_MAX); + assert(new_prefix_length < PATH_MAX); + + if (new_prefix_length == 1) { + /* Special case: "/foo" -> "/". Substitute "/foo/bin" + * with "/bin" not "//bin". */ + + new_length = path_length - old_prefix_length; + if (new_length != 0) + memmove(path, path + old_prefix_length, new_length); + else { + /* Special case: "/". */ + path[0] = '/'; + new_length = 1; + } + } + else if (old_prefix_length == 1) { + /* Special case: "/" -> "/foo". Substitute "/bin" with + * "/foo/bin" not "/foobin". */ + + new_length = new_prefix_length + path_length; + if (new_length >= PATH_MAX) + return -ENAMETOOLONG; + + if (path_length > 1) { + memmove(path + new_prefix_length, path, path_length); + memcpy(path, new_prefix, new_prefix_length); + } + else { + /* Special case: "/". */ + memcpy(path, new_prefix, new_prefix_length); + new_length = new_prefix_length; + } + } + else { + /* Generic case. */ + + new_length = path_length - old_prefix_length + new_prefix_length; + if (new_length >= PATH_MAX) + return -ENAMETOOLONG; + + memmove(path + new_prefix_length, + path + old_prefix_length, + path_length - old_prefix_length); + memcpy(path, new_prefix, new_prefix_length); + } + + assert(new_length < PATH_MAX); + path[new_length] = '\0'; + + return new_length; +} diff --git a/proot/proot_linux/path/path.h b/proot/proot_linux/path/path.h new file mode 100644 index 0000000..d455894 --- /dev/null +++ b/proot/proot_linux/path/path.h @@ -0,0 +1,99 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef PATH_H +#define PATH_H + +#include /* pid_t, */ +#include /* AT_FDCWD, */ +#include /* PATH_MAX, */ +#include + +#include "tracee/tracee.h" + +/* File type. */ +typedef enum { + REGULAR, + SYMLINK, +} Type; + +/* Path point-of-view. */ +typedef enum { + GUEST, + HOST, + + /* Used for bindings as specified by the user but not + * canonicalized yet (new_binding, initialize_binding). */ + PENDING, +} Side; + +/* Path with cached attributes. */ +typedef struct { + char path[PATH_MAX]; + size_t length; + Side side; +} Path; + +/* Path ending type. */ +typedef enum { + NOT_FINAL, + FINAL_NORMAL, + FINAL_SLASH, + FINAL_DOT +} Finality; + +#define IS_FINAL(a) ((a) != NOT_FINAL) + +/* Comparison between two paths. */ +typedef enum Comparison { + PATHS_ARE_EQUAL, + PATH1_IS_PREFIX, + PATH2_IS_PREFIX, + PATHS_ARE_NOT_COMPARABLE, +} Comparison; + +extern int which(Tracee *tracee, const char *paths, char host_path[PATH_MAX], const char *command); +extern int realpath2(Tracee *tracee, char host_path[PATH_MAX], const char *path, bool deref_final); +extern int getcwd2(Tracee *tracee, char guest_path[PATH_MAX]); +extern void chop_finality(char *path); + +extern int translate_path(Tracee *tracee, char host_path[PATH_MAX], + int dir_fd, const char *guest_path, bool deref_final); + +extern int detranslate_path(Tracee *tracee, char path[PATH_MAX], const char t_referrer[PATH_MAX]); +extern bool belongs_to_guestfs(const Tracee *tracee, const char *path); + +extern int join_paths(int number_paths, char result[PATH_MAX], ...); +extern int list_open_fd(const Tracee *tracee); + +extern Comparison compare_paths(const char *path1, const char *path2); +extern Comparison compare_paths2(const char *path1, size_t length1, const char *path2, size_t length2); + +extern size_t substitute_path_prefix(char path[PATH_MAX], size_t old_prefix_length, + const char *new_prefix, size_t new_prefix_length); + +extern int readlink_proc_pid_fd(pid_t pid, int fd, char path[PATH_MAX]); + +/* Check if path interpretable relatively to dirfd, see openat(2) for details. */ +#define AT_FD(dirfd, path) ((dirfd) != AT_FDCWD && ((path) != NULL && (path)[0] != '/')) + +#endif /* PATH_H */ diff --git a/proot/proot_linux/path/proc.c b/proot/proot_linux/path/proc.c new file mode 100644 index 0000000..dc49611 --- /dev/null +++ b/proot/proot_linux/path/proc.c @@ -0,0 +1,195 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* snprintf(3), */ +#include /* strcmp(3), */ +#include /* atoi(3), strtol(3), */ +#include /* E*, */ +#include /* assert(3), */ + +#include "path/proc.h" +#include "tracee/tracee.h" +#include "path/path.h" +#include "path/binding.h" + +/** + * This function emulates the @result of readlink("@base/@component") + * with respect to @tracee, where @base belongs to "/proc" (according + * to @comparison). This function returns -errno on error, an enum + * @action otherwise (c.f. above). + * + * Unlike readlink(), this function includes the nul terminating byte + * to @result. + */ +Action readlink_proc(const Tracee *tracee, char result[PATH_MAX], + const char base[PATH_MAX], const char component[NAME_MAX], + Comparison comparison) +{ + const Tracee *known_tracee; + char proc_path[64]; /* 64 > sizeof("/proc//fd/") + 2 * sizeof(#ULONG_MAX) */ + int status; + pid_t pid; + + assert(comparison == compare_paths("/proc", base)); + + /* Remember: comparison = compare_paths("/proc", base) */ + switch (comparison) { + case PATHS_ARE_EQUAL: + /* Substitute "/proc/self" with "/proc/". */ + if (strcmp(component, "self") != 0) + return DEFAULT; + + status = snprintf(result, PATH_MAX, "/proc/%d", tracee->pid); + if (status < 0 || status >= PATH_MAX) + return -EPERM; + + return CANONICALIZE; + + case PATH1_IS_PREFIX: + /* Handle "/proc/" below, where is process + * monitored by PRoot. */ + break; + + default: + return DEFAULT; + } + + pid = atoi(base + strlen("/proc/")); + if (pid == 0) + return DEFAULT; + + /* Handle links in "/proc//". */ + status = snprintf(proc_path, sizeof(proc_path), "/proc/%d", pid); + if (status < 0 || (size_t) status >= sizeof(proc_path)) + return -EPERM; + + comparison = compare_paths(proc_path, base); + switch (comparison) { + case PATHS_ARE_EQUAL: + known_tracee = get_tracee(tracee, pid, false); + if (known_tracee == NULL) + return DEFAULT; + +#define SUBSTITUTE(name, string) \ + do { \ + if (strcmp(component, #name) != 0) \ + break; \ + \ + status = strlen(string); \ + if (status >= PATH_MAX) \ + return -EPERM; \ + \ + strncpy(result, string, status + 1); \ + return CANONICALIZE; \ + } while (0) + + /* Substitute link "/proc//???" with the content + * of tracee->???. */ + SUBSTITUTE(exe, known_tracee->exe); + SUBSTITUTE(cwd, known_tracee->fs->cwd); + SUBSTITUTE(root, get_root(known_tracee)); +#undef SUBSTITUTE + return DEFAULT; + + case PATH1_IS_PREFIX: + /* Handle "/proc//???" below. */ + break; + + default: + return DEFAULT; + } + + /* Handle links in "/proc//fd/". */ + status = snprintf(proc_path, sizeof(proc_path), "/proc/%d/fd", pid); + if (status < 0 || (size_t) status >= sizeof(proc_path)) + return -EPERM; + + comparison = compare_paths(proc_path, base); + switch (comparison) { + char *end_ptr; + + case PATHS_ARE_EQUAL: + /* Sanity check: a number is expected. */ + errno = 0; + (void) strtol(component, &end_ptr, 10); + if (errno != 0 || end_ptr == component) + return -EPERM; + + /* Don't dereference "/proc//fd/???" now: they + * can point to anonymous pipe, socket, ... otherwise + * they point to a path already canonicalized by the + * kernel. + * + * Note they are still correctly detranslated in + * syscall/exit.c if a monitored process uses + * readlink() against any of them. */ + status = snprintf(result, PATH_MAX, "%s/%s", base, component); + if (status < 0 || status >= PATH_MAX) + return -EPERM; + + return DONT_CANONICALIZE; + + default: + break; + } + + return DEFAULT; +} + +/** + * This function emulates the @result of readlink("@referer") with + * respect to @tracee, where @referer is a strict subpath of "/proc". + * This function returns -errno if an error occured, the length of + * @result if the readlink was emulated, 0 otherwise. + * + * Unlike readlink(), this function includes the nul terminating byte + * to @result (but this byte is not counted in the returned value). + */ +ssize_t readlink_proc2(const Tracee *tracee, char result[PATH_MAX], const char referer[PATH_MAX]) +{ + Action action; + char base[PATH_MAX]; + char *component; + + /* Sanity check. */ + if (strnlen(referer, PATH_MAX) >= PATH_MAX) + return -ENAMETOOLONG; + + assert(compare_paths("/proc", referer) == PATH1_IS_PREFIX); + + /* It's safe to use strrchr() here since @referer was + * previously canonicalized. */ + strcpy(base, referer); + component = strrchr(base, '/'); + + /* These cases are not possible: @referer is supposed to be a + * canonicalized subpath of "/proc". */ + assert(component != NULL && component != base); + + component[0] = '\0'; + component++; + if (component[0] == '\0') + return 0; + + action = readlink_proc(tracee, result, base, component, PATH1_IS_PREFIX); + return (action == CANONICALIZE ? strlen(result) : 0); +} diff --git a/proot/proot_linux/path/proc.h b/proot/proot_linux/path/proc.h new file mode 100644 index 0000000..7081dd7 --- /dev/null +++ b/proot/proot_linux/path/proc.h @@ -0,0 +1,44 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef PROC_H +#define PROC_H + +#include + +#include "tracee/tracee.h" +#include "path/path.h" + +/* Action to do after a call to readlink_proc(). */ +typedef enum { + DEFAULT, /* Nothing special to do, treat it as a regular link. */ + CANONICALIZE, /* The symlink was dereferenced, now canonicalize it. */ + DONT_CANONICALIZE, /* The symlink shouldn't be dereferenced nor canonicalized. */ +} Action; + + +extern Action readlink_proc(const Tracee *tracee, char result[PATH_MAX], const char path[PATH_MAX], + const char component[NAME_MAX], Comparison comparison); + +extern ssize_t readlink_proc2(const Tracee *tracee, char result[PATH_MAX], const char path[PATH_MAX]); + +#endif /* PROC_H */ diff --git a/proot/proot_linux/path/temp.c b/proot/proot_linux/path/temp.c new file mode 100644 index 0000000..998024f --- /dev/null +++ b/proot/proot_linux/path/temp.c @@ -0,0 +1,393 @@ +#include /* stat(2), opendir(3), */ +#include /* stat(2), chmod(2), */ +#include /* stat(2), rmdir(2), unlink(2), readlink(2), */ +#include /* errno(2), */ +#include /* readdir(3), opendir(3), */ +#include /* strcmp(3), */ +#include /* free(3), getenv(3), */ +#include /* P_tmpdir, */ +#include /* talloc(3), */ + +#include "cli/note.h" + +/** + * Return the path to a directory where temporary files should be + * created. + */ +const char *get_temp_directory() +{ + static const char *temp_directory = NULL; + char *tmp; + + if (temp_directory != NULL) + return temp_directory; + + temp_directory = getenv("PROOT_TMP_DIR"); + if (temp_directory == NULL) { + temp_directory = P_tmpdir; + return temp_directory; + } + + tmp = realpath(temp_directory, NULL); + if (tmp == NULL) { + note(NULL, WARNING, SYSTEM, + "can't canonicalize %s, using %s instead of PROOT_TMP_DIR", + temp_directory, P_tmpdir); + + temp_directory = P_tmpdir; + return temp_directory; + } + + temp_directory = talloc_strdup(talloc_autofree_context(), tmp); + if (temp_directory == NULL) + temp_directory = tmp; + else + free(tmp); + + return temp_directory; +} + +/** + * Handle the return of d_type = DT_UNKNOWN by readdir(3) + * Not all filesystems support returning d_type in readdir(3) + */ +static int get_dtype(struct dirent *de) +{ + int dtype = de ? de->d_type : DT_UNKNOWN; + struct stat st; + + if (dtype != DT_UNKNOWN) + return dtype; + if (lstat(de->d_name, &st)) + return dtype; + if (S_ISREG(st.st_mode)) + return DT_REG; + if (S_ISDIR(st.st_mode)) + return DT_DIR; + if (S_ISLNK(st.st_mode)) + return DT_LNK; + return dtype; +} + +/** + * Remove recursively the content of the current working directory. + * This latter has to lie in temp_directory (ie. "/tmp" on most + * systems). This function returns -1 if a fatal error occured + * (ie. the recursion must be stopped), the number of non-fatal errors + * otherwise. + * + * WARNING: this function changes the current working directory for + * the calling process. + */ +static int clean_temp_cwd() +{ + const char *temp_directory = get_temp_directory(); + const size_t length_temp_directory = strlen(temp_directory); + char *prefix = NULL; + int nb_errors = 0; + DIR *dir = NULL; + int status; + + prefix = talloc_size(NULL, length_temp_directory + 1); + if (prefix == NULL) { + note(NULL, WARNING, INTERNAL, "can't allocate memory"); + nb_errors++; + goto end; + } + + /* Sanity check: ensure the current directory lies in + * "/tmp". */ + status = readlink("/proc/self/cwd", prefix, length_temp_directory); + if (status < 0) { + note(NULL, WARNING, SYSTEM, "can't readlink '/proc/self/cwd'"); + nb_errors++; + goto end; + } + prefix[status] = '\0'; + + if (strncmp(prefix, temp_directory, length_temp_directory) != 0) { + note(NULL, ERROR, INTERNAL, + "trying to remove a directory outside of '%s', " + "please report this error.\n", temp_directory); + nb_errors++; + goto end; + } + + dir = opendir("."); + if (dir == NULL) { + note(NULL, WARNING, SYSTEM, "can't open '.'"); + nb_errors++; + goto end; + } + + while (1) { + struct dirent *entry; + + errno = 0; + entry = readdir(dir); + if (entry == NULL) + break; + + if ( strcmp(entry->d_name, ".") == 0 + || strcmp(entry->d_name, "..") == 0) + continue; + + status = chmod(entry->d_name, 0700); + if (status < 0) { + note(NULL, WARNING, SYSTEM, "cant chmod '%s'", entry->d_name); + nb_errors++; + continue; + } + + if (get_dtype(entry) == DT_DIR) { + status = chdir(entry->d_name); + if (status < 0) { + note(NULL, WARNING, SYSTEM, "can't chdir '%s'", entry->d_name); + nb_errors++; + continue; + } + + /* Recurse. */ + status = clean_temp_cwd(); + if (status < 0) { + nb_errors = -1; + goto end; + } + nb_errors += status; + + status = chdir(".."); + if (status < 0) { + note(NULL, ERROR, SYSTEM, "can't chdir to '..'"); + nb_errors = -1; + goto end; + } + + status = rmdir(entry->d_name); + } + else { + status = unlink(entry->d_name); + } + if (status < 0) { + note(NULL, WARNING, SYSTEM, "can't remove '%s'", entry->d_name); + nb_errors++; + continue; + } + } + if (errno != 0) { + note(NULL, WARNING, SYSTEM, "can't readdir '.'"); + nb_errors++; + } + +end: + TALLOC_FREE(prefix); + + if (dir != NULL) + (void) closedir(dir); + + return nb_errors; +} + +/** + * Remove recursively @path. This latter has to be a directory lying + * in temp_directory (ie. "/tmp" on most systems). This function + * returns -1 on error, otherwise 0. + */ +static int remove_temp_directory2(const char *path) +{ + int result; + int status; + char *cwd; + +#ifdef __ANDROID__ + cwd = malloc(PATH_MAX); + getcwd(cwd, PATH_MAX); +#else + cwd = get_current_dir_name(); +#endif + + status = chmod(path, 0700); + if (status < 0) { + note(NULL, ERROR, SYSTEM, "can't chmod '%s'", path); + result = -1; + goto end; + } + + status = chdir(path); + if (status < 0) { + note(NULL, ERROR, SYSTEM, "can't chdir to '%s'", path); + result = -1; + goto end; + } + + status = clean_temp_cwd(); + result = (status == 0 ? 0 : -1); + + /* Try to remove path even if something went wrong. */ + status = chdir(".."); + if (status < 0) { + note(NULL, ERROR, SYSTEM, "can't chdir to '..'"); + result = -1; + goto end; + } + + status = rmdir(path); + if (status < 0) { + note(NULL, ERROR, SYSTEM, "cant remove '%s'", path); + result = -1; + goto end; + } + +end: + if (cwd != NULL) { + status = chdir(cwd); + if (status < 0) { + result = -1; + note(NULL, ERROR, SYSTEM, "can't chdir to '%s'", cwd); + } + free(cwd); + } + + return result; +} + +/** + * Like remove_temp_directory2() but always return 0. + * + * Note: this is a talloc destructor. + */ +static int remove_temp_directory(char *path) +{ + (void) remove_temp_directory2(path); + return 0; +} + +/** + * Remove the file @path. This function always returns 0. + * + * Note: this is a talloc destructor. + */ +static int remove_temp_file(char *path) +{ + int status; + + status = unlink(path); + if (status < 0) + note(NULL, ERROR, SYSTEM, "can't remove '%s'", path); + + return 0; +} + +/** + * Create a path name with the following format: + * "/tmp/@prefix-$PID-XXXXXX". The returned C string is either + * auto-freed if @context is NULL. This function returns NULL if an + * error occurred. + */ +char *create_temp_name(TALLOC_CTX *context, const char *prefix) +{ + const char *temp_directory = get_temp_directory(); + char *name; + + if (context == NULL) + context = talloc_autofree_context(); + + name = talloc_asprintf(context, "%s/%s-%d-XXXXXX", temp_directory, prefix, getpid()); + if (name == NULL) { + note(NULL, ERROR, INTERNAL, "can't allocate memory"); + return NULL; + } + + return name; +} + +/** + * Create a directory that will be automatically removed either on + * PRoot termination if @context is NULL, or once its path name + * (attached to @context) is freed. This function returns NULL on + * error, otherwise the absolute path name to the created directory + * (@prefix-ed). + */ +const char *create_temp_directory(TALLOC_CTX *context, const char *prefix) +{ + char *name; + + name = create_temp_name(context, prefix); + if (name == NULL) + return NULL; + + name = mkdtemp(name); + if (name == NULL) { + note(NULL, ERROR, SYSTEM, "can't create temporary directory"); + note(NULL, INFO, USER, "Please set PROOT_TMP_DIR env. variable " + "to an alternate location (with write permission)."); + return NULL; + } + + talloc_set_destructor(name, remove_temp_directory); + + return name; +} + +/** + * Create a file that will be automatically removed either on PRoot + * termination if @context is NULL, or once its path name (attached to + * @context) is freed. This function returns NULL on error, + * otherwise the absolute path name to the created file (@prefix-ed). + */ +const char *create_temp_file(TALLOC_CTX *context, const char *prefix) +{ + char *name; + int fd; + + name = create_temp_name(context, prefix); + if (name == NULL) + return NULL; + + fd = mkstemp(name); + if (fd < 0) { + note(NULL, ERROR, SYSTEM, "can't create temporary file"); + note(NULL, INFO, USER, "Please set PROOT_TMP_DIR env. variable " + "to an alternate location (with write permission)."); + return NULL; + } + close(fd); + + talloc_set_destructor(name, remove_temp_file); + + return name; +} + +/** + * Like create_temp_file() but returns an open file stream to the + * created file. It's up to the caller to close returned stream. + */ +FILE* open_temp_file(TALLOC_CTX *context, const char *prefix) +{ + char *name; + FILE *file; + int fd; + + name = create_temp_name(context, prefix); + if (name == NULL) + return NULL; + + fd = mkstemp(name); + if (fd < 0) + goto error; + + talloc_set_destructor(name, remove_temp_file); + + file = fdopen(fd, "w"); + if (file == NULL) + goto error; + + return file; + +error: + if (fd >= 0) + close(fd); + note(NULL, ERROR, SYSTEM, "can't create temporary file"); + note(NULL, INFO, USER, "Please set PROOT_TMP_DIR env. variable " + "to an alternate location (with write permission)."); + return NULL; +} diff --git a/proot/proot_linux/path/temp.h b/proot/proot_linux/path/temp.h new file mode 100644 index 0000000..27fca9f --- /dev/null +++ b/proot/proot_linux/path/temp.h @@ -0,0 +1,34 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef TEMP_H +#define TEMP_H + +#include + +extern char *create_temp_name(TALLOC_CTX *context, const char *prefix); +extern const char *create_temp_directory(TALLOC_CTX *context, const char *prefix); +extern const char *create_temp_file(TALLOC_CTX *context, const char *prefix); +extern FILE* open_temp_file(TALLOC_CTX *context, const char *prefix); +extern const char *get_temp_directory(); + +#endif /* TEMP_H */ diff --git a/proot/proot_linux/proot.go b/proot/proot_linux/proot.go new file mode 100644 index 0000000..10bf6f1 --- /dev/null +++ b/proot/proot_linux/proot.go @@ -0,0 +1,128 @@ +package prootlinux + +/* +#cgo CPPFLAGS: -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -I. +#cgo CFLAGS: -I./ -Wall -Wextra -O2 +#cgo LDFLAGS: -ltalloc -Wl,-z,noexecstack + +#ifndef CGD +#define CGD +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__GLIBC__) +#include +#endif + +// #include "cli/cli.c" +#include "cli/note.c" +// #include "cli/proot.c" +#include "execve/aoxp.c" +#include "execve/auxv.c" +#include "execve/elf.c" +#include "execve/enter.c" +#include "execve/exit.c" +#include "execve/ldso.c" +#include "execve/shebang.c" +#include "loader/loader.c" +#include "path/binding.c" +#include "path/canon.c" +#include "path/glue.c" +#include "path/path.c" +#include "path/proc.c" +#include "path/temp.c" +#include "ptrace/user.c" +#include "ptrace/ptrace.c" +#include "ptrace/wait.c" +#include "syscall/chain.c" +#include "syscall/enter.c" +#include "syscall/exit.c" +#include "syscall/heap.c" +#include "syscall/rlimit.c" +#include "syscall/seccomp.c" +#include "syscall/socket.c" +#include "syscall/syscall.c" +#include "syscall/sysnum.c" +#include "tracee/mem.c" +#include "tracee/event.c" +#include "tracee/reg.c" +#include "tracee/tracee.c" +// #include "extension/fake_id0/fake_id0.c" +// #include "extension/kompat/kompat.c" +// #include "extension/link2symlink/link2symlink.c" +// #include "extension/portmap/map.c" +// #include "extension/portmap/portmap.c" +// #include "extension/extension.c" + +#include "attribute.h" + +#endif +*/ +import "C" + +import ( + "fmt" + "unsafe" +) + +type Tracee struct{ Pointer unsafe.Pointer } + +func NewCli() (*Tracee, error) { + // Pre-create the first tracee (pid == 0). + tracee := C.get_tracee(nil, 0, true) + if tracee == nil { + return nil, fmt.Errorf("cannot alloc new tarcee (pid == 0)") + } + tracee.verbose = 3 + + return &Tracee{Pointer: unsafe.Pointer(tracee)}, nil +} + +func (tracee *Tracee) SetPID(pid int) { + n := (*C.Tracee)(tracee.Pointer) + n.pid = C.pid_t(pid) +} + +func (tracee *Tracee) SetRootfs(chroot string) bool { + n := (*C.Tracee)(tracee.Pointer) + b := C.CString(chroot) + defer C.free(unsafe.Pointer(b)) + + d := C.CString("/") + defer C.free(unsafe.Pointer(d)) + + // new_binding(tracee, value, "/", true) + return C.new_binding(n, b, d, true) != nil +} + +func (tracee *Tracee) SetMountBind(host, guest string) bool { + n := (*C.Tracee)(tracee.Pointer) + b := C.CString(host) + defer C.free(unsafe.Pointer(b)) + + d := C.CString(guest) + defer C.free(unsafe.Pointer(d)) + + // new_binding(tracee, value, "/", true) + return C.new_binding(n, b, d, true) != nil +} + +func (tracee *Tracee) SetWorkdir(workdir string) bool { + n := (*C.Tracee)(tracee.Pointer) + + n.fs.cwd = C.CString(workdir) + return true +} + +func (tracee *Tracee) LoopEvent() { + C.event_loop() +} diff --git a/proot/proot_linux/ptrace/ptrace.c b/proot/proot_linux/ptrace/ptrace.c new file mode 100644 index 0000000..8085b31 --- /dev/null +++ b/proot/proot_linux/ptrace/ptrace.c @@ -0,0 +1,670 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* PTRACE_*, */ +#include /* E*, */ +#include /* assert(3), */ +#include /* bool, true, false, */ +#include /* siginfo_t, */ +#include /* struct iovec, */ +#include /* MIN(), MAX(), */ +#include /* memcpy(3), */ + +#include "ptrace/ptrace.h" +#include "ptrace/user.h" +#include "tracee/tracee.h" +#include "syscall/sysnum.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "tracee/abi.h" +#include "tracee/event.h" +#include "cli/note.h" +#include "arch.h" + +#include "compat.h" + +#if defined(ARCH_X86_64) || defined(ARCH_X86) +#include /* struct user_desc, */ +#endif + +#if defined(ARCH_X86_64) +#include /* ARCH_{G,S}ET_{F,G}S, */ +#endif + +#if defined(ARCH_ARM_EABI) +#define user_fpregs_struct user_fpregs +#endif + +#if defined(ARCH_ARM64) +#define user_fpregs_struct user_fpsimd_struct +#endif + +static const char *stringify_ptrace(PTRACE_REQUEST_TYPE request) +{ +#define CASE_STR(a) case a: return #a; break; + switch ((int) request) { + CASE_STR(PTRACE_TRACEME) CASE_STR(PTRACE_PEEKTEXT) CASE_STR(PTRACE_PEEKDATA) + CASE_STR(PTRACE_PEEKUSER) CASE_STR(PTRACE_POKETEXT) CASE_STR(PTRACE_POKEDATA) + CASE_STR(PTRACE_POKEUSER) CASE_STR(PTRACE_CONT) CASE_STR(PTRACE_KILL) + CASE_STR(PTRACE_SINGLESTEP) CASE_STR(PTRACE_GETREGS) CASE_STR(PTRACE_SETREGS) + CASE_STR(PTRACE_GETFPREGS) CASE_STR(PTRACE_SETFPREGS) CASE_STR(PTRACE_ATTACH) + CASE_STR(PTRACE_DETACH) CASE_STR(PTRACE_GETFPXREGS) CASE_STR(PTRACE_SETFPXREGS) + CASE_STR(PTRACE_SYSCALL) CASE_STR(PTRACE_SETOPTIONS) CASE_STR(PTRACE_GETEVENTMSG) + CASE_STR(PTRACE_GETSIGINFO) CASE_STR(PTRACE_SETSIGINFO) CASE_STR(PTRACE_GETREGSET) + CASE_STR(PTRACE_SETREGSET) CASE_STR(PTRACE_SEIZE) CASE_STR(PTRACE_INTERRUPT) + CASE_STR(PTRACE_LISTEN) CASE_STR(PTRACE_SET_SYSCALL) + CASE_STR(PTRACE_GET_THREAD_AREA) CASE_STR(PTRACE_SET_THREAD_AREA) + CASE_STR(PTRACE_GETVFPREGS) CASE_STR(PTRACE_SINGLEBLOCK) CASE_STR(PTRACE_ARCH_PRCTL) + default: return "PTRACE_???"; } +} + +/** + * Translate the ptrace syscall made by @tracee into a "void" syscall + * in order to emulate the ptrace mechanism within PRoot. This + * function returns -errno if an error occured (unsupported request), + * otherwise 0. + */ +int translate_ptrace_enter(Tracee *tracee) +{ + /* The ptrace syscall have to be emulated since it can't be nested. */ + set_sysnum(tracee, PR_void); + return 0; +} + +/** + * Set @ptracee's tracer to @ptracer, and increment ptracees counter + * of this later. + */ +void attach_to_ptracer(Tracee *ptracee, Tracee *ptracer) +{ + bzero(&(PTRACEE), sizeof(PTRACEE)); + PTRACEE.ptracer = ptracer; + + PTRACER.nb_ptracees++; +} + +/** + * Unset @ptracee's tracer, and decrement ptracees counter of this + * later. + */ +void detach_from_ptracer(Tracee *ptracee) +{ + Tracee *ptracer = PTRACEE.ptracer; + + PTRACEE.ptracer = NULL; + + assert(PTRACER.nb_ptracees > 0); + PTRACER.nb_ptracees--; +} + +/** + * Emulate the ptrace syscall made by @tracee. This function returns + * -errno if an error occured (unsupported request), otherwise 0. + */ +int translate_ptrace_exit(Tracee *tracee) +{ + word_t request, pid, address, data, result; + Tracee *ptracee, *ptracer; + int forced_signal = -1; + int signal; + int status; + + /* Read ptrace parameters. */ + request = peek_reg(tracee, ORIGINAL, SYSARG_1); + pid = peek_reg(tracee, ORIGINAL, SYSARG_2); + address = peek_reg(tracee, ORIGINAL, SYSARG_3); + data = peek_reg(tracee, ORIGINAL, SYSARG_4); + + /* Propagate signedness for this special value. */ + if (is_32on64_mode(tracee) && pid == 0xFFFFFFFF) + pid = (word_t) -1; + + /* The TRACEME request is the only one used by a tracee. */ + if (request == PTRACE_TRACEME) { + ptracer = tracee->parent; + ptracee = tracee; + + /* The emulated ptrace in PRoot has the same + * limitation as the real ptrace in the Linux kernel: + * only one tracer per process. */ + if (PTRACEE.ptracer != NULL || ptracee == ptracer) + return -EPERM; + + attach_to_ptracer(ptracee, ptracer); + + /* Detect when the ptracer has gone to wait before the + * ptracee did the ptrace(ATTACHME) request. */ + if (PTRACER.waits_in == WAITS_IN_KERNEL) { + status = kill(ptracer->pid, SIGSTOP); + if (status < 0) + note(tracee, WARNING, INTERNAL, + "can't wake ptracer %d", ptracer->pid); + else { + ptracer->sigstop = SIGSTOP_IGNORED; + PTRACER.waits_in = WAITS_IN_PROOT; + } + } + + /* Disable seccomp acceleration for this tracee and + * all its children since we can't assume what are the + * syscalls its tracer is interested with. */ + if (tracee->seccomp == ENABLED) + tracee->seccomp = DISABLING; + + return 0; + } + + /* The ATTACH, SEIZE, and INTERRUPT requests are the only ones + * where the ptracee is in an unknown state. */ + if (request == PTRACE_ATTACH) { + ptracer = tracee; + ptracee = get_tracee(ptracer, pid, false); + if (ptracee == NULL) + return -ESRCH; + + /* The emulated ptrace in PRoot has the same + * limitation as the real ptrace in the Linux kernel: + * only one tracer per process. */ + if (PTRACEE.ptracer != NULL || ptracee == ptracer) + return -EPERM; + + attach_to_ptracer(ptracee, ptracer); + + /* The tracee is sent a SIGSTOP, but will not + * necessarily have stopped by the completion of this + * call. + * + * -- man 2 ptrace. */ + kill(pid, SIGSTOP); + + return 0; + } + + /* Here, the tracee is a ptracer. Also, the requested ptracee + * has to be in the "stopped for ptracer" state. */ + ptracer = tracee; + ptracee = get_stopped_ptracee(ptracer, pid, false, __WALL); + if (ptracee == NULL) { + static bool warned = false; + + /* Ensure we didn't get there only because inheritance + * mechanism has missed this one. */ + ptracee = get_tracee(tracee, pid, false); + if (ptracee != NULL && ptracee->exe == NULL && !warned) { + warned = true; + note(ptracer, WARNING, INTERNAL, "ptrace request to an unexpected ptracee"); + } + + return -ESRCH; + } + + /* Sanity checks. */ + if ( PTRACEE.is_zombie + || PTRACEE.ptracer != ptracer + || pid == (word_t) -1) + return -ESRCH; + + switch (request) { + case PTRACE_SYSCALL: + PTRACEE.ignore_syscalls = false; + forced_signal = (int) data; + status = 0; + break; /* Restart the ptracee. */ + + case PTRACE_CONT: + PTRACEE.ignore_syscalls = true; + forced_signal = (int) data; + status = 0; + break; /* Restart the ptracee. */ + + case PTRACE_SINGLESTEP: + ptracee->restart_how = PTRACE_SINGLESTEP; + forced_signal = (int) data; + status = 0; + break; /* Restart the ptracee. */ + + case PTRACE_SINGLEBLOCK: + ptracee->restart_how = PTRACE_SINGLEBLOCK; + forced_signal = (int) data; + status = 0; + break; /* Restart the ptracee. */ + + case PTRACE_DETACH: + detach_from_ptracer(ptracee); + status = 0; + break; /* Restart the ptracee. */ + + case PTRACE_KILL: + status = ptrace(request, pid, NULL, NULL); + break; /* Restart the ptracee. */ + + case PTRACE_SETOPTIONS: + if (data & PTRACE_O_TRACESECCOMP) { + /* We don't really support forwarding seccomp traps */ + note(ptracer, WARNING, INTERNAL, + "ptrace option PTRACE_O_TRACESECCOMP " + "not supported yet"); + return -EINVAL; + } + PTRACEE.options = data; + return 0; /* Don't restart the ptracee. */ + + case PTRACE_GETEVENTMSG: { + status = ptrace(request, pid, NULL, &result); + if (status < 0) + return -errno; + + poke_word(ptracer, data, result); + if (errno != 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_PEEKUSER: + if (is_32on64_mode(ptracer)) { + address = convert_user_offset(address); + if (address == (word_t) -1) + return -EIO; + } + /* Fall through. */ + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + errno = 0; + result = (word_t) ptrace(request, pid, address, NULL); + if (errno != 0) + return -errno; + + poke_word(ptracer, data, result); + if (errno != 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + + case PTRACE_POKEUSER: + if (is_32on64_mode(ptracer)) { + address = convert_user_offset(address); + if (address == (word_t) -1) + return -EIO; + } + + status = ptrace(request, pid, address, data); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + if (is_32on64_mode(ptracer)) { + word_t tmp; + + errno = 0; + tmp = (word_t) ptrace(PTRACE_PEEKDATA, ptracee->pid, address, NULL); + if (errno != 0) + return -errno; + + data |= (tmp & 0xFFFFFFFF00000000ULL); + } + + status = ptrace(request, pid, address, data); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + + case PTRACE_GETSIGINFO: { + siginfo_t siginfo; + + status = ptrace(request, pid, NULL, &siginfo); + if (status < 0) + return -errno; + + status = write_data(ptracer, data, &siginfo, sizeof(siginfo)); + if (status < 0) + return status; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_SETSIGINFO: { + siginfo_t siginfo; + + status = read_data(ptracer, &siginfo, data, sizeof(siginfo)); + if (status < 0) + return status; + + status = ptrace(request, pid, NULL, &siginfo); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_GETREGS: { + size_t size; + union { + struct user_regs_struct regs; + uint32_t regs32[USER32_NB_REGS]; + } buffer; + + status = ptrace(request, pid, NULL, &buffer); + if (status < 0) + return -errno; + + if (is_32on64_mode(tracee)) { + struct user_regs_struct regs64; + + memcpy(®s64, &buffer.regs, sizeof(struct user_regs_struct)); + convert_user_regs_struct(false, (uint64_t *) ®s64, buffer.regs32); + + size = sizeof(buffer.regs32); + } + else + size = sizeof(buffer.regs); + + status = write_data(ptracer, data, &buffer, size); + if (status < 0) + return status; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_SETREGS: { + size_t size; + union { + struct user_regs_struct regs; + uint32_t regs32[USER32_NB_REGS]; + } buffer; + + size = (is_32on64_mode(ptracer) + ? sizeof(buffer.regs32) + : sizeof(buffer.regs)); + + status = read_data(ptracer, &buffer, data, size); + if (status < 0) + return status; + + if (is_32on64_mode(ptracer)) { + uint32_t regs32[USER32_NB_REGS]; + + memcpy(regs32, buffer.regs32, sizeof(regs32)); + convert_user_regs_struct(true, (uint64_t *) &buffer.regs, regs32); + } + + status = ptrace(request, pid, NULL, &buffer); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_GETFPREGS: { + size_t size; + union { + struct user_fpregs_struct fpregs; + uint32_t fpregs32[USER32_NB_FPREGS]; + } buffer; + + status = ptrace(request, pid, NULL, &buffer); + if (status < 0) + return -errno; + + if (is_32on64_mode(tracee)) { +#if 0 /* TODO */ + struct user_fpregs_struct fpregs64; + + memcpy(&fpregs64, &buffer.fpregs, sizeof(struct user_fpregs_struct)); + convert_user_fpregs_struct(false, (uint64_t *) &fpregs64, buffer.fpregs32); +#else + static bool warned = false; + if (!warned) + note(ptracer, WARNING, INTERNAL, + "ptrace 32-bit request '%s' not supported on 64-bit yet", + stringify_ptrace(request)); + warned = true; + bzero(&buffer, sizeof(buffer)); +#endif + size = sizeof(buffer.fpregs32); + } + else + size = sizeof(buffer.fpregs); + + status = write_data(ptracer, data, &buffer, size); + if (status < 0) + return status; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_SETFPREGS: { + size_t size; + union { + struct user_fpregs_struct fpregs; + uint32_t fpregs32[USER32_NB_FPREGS]; + } buffer; + + size = (is_32on64_mode(ptracer) + ? sizeof(buffer.fpregs32) + : sizeof(buffer.fpregs)); + + status = read_data(ptracer, &buffer, data, size); + if (status < 0) + return status; + + if (is_32on64_mode(ptracer)) { +#if 0 /* TODO */ + uint32_t fpregs32[USER32_NB_FPREGS]; + + memcpy(fpregs32, buffer.fpregs32, sizeof(fpregs32)); + convert_user_fpregs_struct(true, (uint64_t *) &buffer.fpregs, fpregs32); +#else + static bool warned = false; + if (!warned) + note(ptracer, WARNING, INTERNAL, + "ptrace 32-bit request '%s' not supported on 64-bit yet", + stringify_ptrace(request)); + warned = true; + return -ENOTSUP; +#endif + } + + status = ptrace(request, pid, NULL, &buffer); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + } + +#if defined(ARCH_X86_64) || defined(ARCH_X86) + case PTRACE_GET_THREAD_AREA: { + struct user_desc user_desc; + + status = ptrace(request, pid, address, &user_desc); + if (status < 0) + return -errno; + + status = write_data(ptracer, data, &user_desc, sizeof(user_desc)); + if (status < 0) + return status; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_SET_THREAD_AREA: { + struct user_desc user_desc; + + status = read_data(ptracer, &user_desc, data, sizeof(user_desc)); + if (status < 0) + return status; + + status = ptrace(request, pid, address, &user_desc); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + } +#endif + + case PTRACE_GETREGSET: { + struct iovec local_iovec; + word_t remote_iovec_base; + word_t remote_iovec_len; + + remote_iovec_base = peek_word(ptracer, data); + if (errno != 0) + return -errno; + + remote_iovec_len = peek_word(ptracer, data + sizeof_word(ptracer)); + if (errno != 0) + return -errno; + + /* Sanity check. */ + assert(sizeof(local_iovec.iov_len) == sizeof(word_t)); + + local_iovec.iov_len = remote_iovec_len; + local_iovec.iov_base = talloc_zero_size(ptracer->ctx, remote_iovec_len); + if (local_iovec.iov_base == NULL) + return -ENOMEM; + + status = ptrace(PTRACE_GETREGSET, pid, address, &local_iovec); + if (status < 0) + return status; + + remote_iovec_len = local_iovec.iov_len = + MIN(remote_iovec_len, local_iovec.iov_len); + + /* Update remote vector content. */ + status = writev_data(ptracer, remote_iovec_base, &local_iovec, 1); + if (status < 0) + return status; + + /* Update remote vector length. */ + poke_word(ptracer, data + sizeof_word(ptracer), remote_iovec_len); + if (errno != 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_SETREGSET: { + struct iovec local_iovec; + word_t remote_iovec_base; + word_t remote_iovec_len; + + remote_iovec_base = peek_word(ptracer, data); + if (errno != 0) + return -errno; + + remote_iovec_len = peek_word(ptracer, data + sizeof_word(ptracer)); + if (errno != 0) + return -errno; + + /* Sanity check. */ + assert(sizeof(local_iovec.iov_len) == sizeof(word_t)); + + local_iovec.iov_len = remote_iovec_len; + local_iovec.iov_base = talloc_zero_size(ptracer->ctx, remote_iovec_len); + if (local_iovec.iov_base == NULL) + return -ENOMEM; + + /* Copy remote content into the local vector. */ + status = read_data(ptracer, local_iovec.iov_base, + remote_iovec_base, local_iovec.iov_len); + if (status < 0) + return status; + + status = ptrace(PTRACE_SETREGSET, pid, address, &local_iovec); + if (status < 0) + return status; + + return 0; /* Don't restart the ptracee. */ + } + + case PTRACE_GETVFPREGS: + case PTRACE_GETFPXREGS: { + static bool warned = false; + if (!warned) + note(ptracer, WARNING, INTERNAL, "ptrace request '%s' not supported yet", + stringify_ptrace(request)); + warned = true; + return -ENOTSUP; + } + +#if defined(ARCH_X86_64) + case PTRACE_ARCH_PRCTL: + switch (data) { + case ARCH_GET_GS: + case ARCH_GET_FS: + status = ptrace(request, pid, &result, data); + if (status < 0) + return -errno; + + poke_word(ptracer, address, result); + if (errno != 0) + return -errno; + break; + + case ARCH_SET_GS: + case ARCH_SET_FS: { + static bool warned = false; + if (!warned) + note(ptracer, WARNING, INTERNAL, + "ptrace request '%s' ARCH_SET_{G,F}S not supported yet", + stringify_ptrace(request)); + return -ENOTSUP; + } + + default: + return -ENOTSUP; + } + + return 0; /* Don't restart the ptracee. */ +#endif + + case PTRACE_SET_SYSCALL: + status = ptrace(request, pid, address, data); + if (status < 0) + return -errno; + + return 0; /* Don't restart the ptracee. */ + + default: + note(ptracer, WARNING, INTERNAL, "ptrace request '%s' not supported yet", + stringify_ptrace(request)); + return -ENOTSUP; + } + + /* Now, the initial tracee's event can be handled. */ + signal = PTRACEE.event4.proot.pending + ? handle_tracee_event(ptracee, PTRACEE.event4.proot.value) + : PTRACEE.event4.proot.value; + + /* The restarting signal from the ptracer overrides the + * restarting signal from PRoot. */ + if (forced_signal != -1) + signal = forced_signal; + + (void) restart_tracee(ptracee, signal); + + return status; +} diff --git a/proot/proot_linux/ptrace/ptrace.h b/proot/proot_linux/ptrace/ptrace.h new file mode 100644 index 0000000..9083ea9 --- /dev/null +++ b/proot/proot_linux/ptrace/ptrace.h @@ -0,0 +1,36 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef PTRACE_H +#define PTRACE_H + +#include "tracee/tracee.h" + +extern int translate_ptrace_enter(Tracee *tracee); +extern int translate_ptrace_exit(Tracee *tracee); +extern void attach_to_ptracer(Tracee *ptracee, Tracee *ptracer); +extern void detach_from_ptracer(Tracee *ptracee); + +#define PTRACEE (ptracee->as_ptracee) +#define PTRACER (ptracer->as_ptracer) + +#endif /* PTRACE_H */ diff --git a/proot/proot_linux/ptrace/user.c b/proot/proot_linux/ptrace/user.c new file mode 100644 index 0000000..669dbe6 --- /dev/null +++ b/proot/proot_linux/ptrace/user.c @@ -0,0 +1,166 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include + +#include "ptrace/user.h" +#include "cli/note.h" + +#if defined(ARCH_X86_64) + +/** + * Return the index in the "regs" field of a 64-bit "user" area that + * corresponds to the specified @index in the "regs" field of a 32-bit + * "user" area. + */ +static inline size_t convert_user_regs_index(size_t index) +{ + static size_t mapping[USER32_NB_REGS] = { + 05, /* ?bx */ 11, /* ?cx */ 12, /* ?dx */ + 13, /* ?si */ 14, /* ?di */ 04, /* ?bp */ + 10, /* ?ax */ 23, /* ds */ 24, /* es */ + 25, /* fs */ 26, /* gs */ 15, /* orig_?ax */ + 16, /* ?ip */ 17, /* cs */ 18, /* eflags */ + 19, /* ?sp */ 20, /* ss */ }; + + /* Sanity check. */ + assert(index < USER32_NB_REGS); + + return mapping[index]; +} + +/* Layout of a 32-bit "user" area. */ +#define USER32_REGS_OFFSET 0 +#define USER32_REGS_SIZE (USER32_NB_REGS * sizeof(uint32_t)) +#define USER32_FPVALID_OFFSET (USER32_REGS_OFFSET + USER32_REGS_SIZE) +#define USER32_I387_OFFSET (USER32_FPVALID_OFFSET + sizeof(uint32_t)) +#define USER32_I387_SIZE (USER32_NB_FPREGS * sizeof(uint32_t)) +#define USER32_TSIZE_OFFSET (USER32_I387_OFFSET + USER32_I387_SIZE) +#define USER32_DSIZE_OFFSET (USER32_TSIZE_OFFSET + sizeof(uint32_t)) +#define USER32_SSIZE_OFFSET (USER32_DSIZE_OFFSET + sizeof(uint32_t)) +#define USER32_START_CODE_OFFSET (USER32_SSIZE_OFFSET + sizeof(uint32_t)) +#define USER32_START_STACK_OFFSET (USER32_START_CODE_OFFSET + sizeof(uint32_t)) +#define USER32_SIGNAL_OFFSET (USER32_START_STACK_OFFSET + sizeof(uint32_t)) +#define USER32_RESERVED_OFFSET (USER32_SIGNAL_OFFSET + sizeof(uint32_t)) +#define USER32_AR0_OFFSET (USER32_RESERVED_OFFSET + sizeof(uint32_t)) +#define USER32_FPSTATE_OFFSET (USER32_AR0_OFFSET + sizeof(uint32_t)) +#define USER32_MAGIC_OFFSET (USER32_FPSTATE_OFFSET + sizeof(uint32_t)) +#define USER32_COMM_OFFSET (USER32_MAGIC_OFFSET + sizeof(uint32_t)) +#define USER32_COMM_SIZE (32 * sizeof(uint8_t)) +#define USER32_DEBUGREG_OFFSET (USER32_COMM_OFFSET + USER32_COMM_SIZE) +#define USER32_DEBUGREG_SIZE (8 * sizeof(uint32_t)) + +/** + * Return the offset in the "debugreg" field of a 64-bit "user" area + * that corresponds to the specified @offset in the "debugreg" field + * of a 32-bit "user" area. + */ +static inline size_t convert_user_debugreg_offset(size_t offset) +{ + size_t index; + + /* Sanity check. */ + assert(offset >= USER32_DEBUGREG_OFFSET + && offset < USER32_DEBUGREG_OFFSET + USER32_DEBUGREG_SIZE); + + index = (offset - USER32_DEBUGREG_OFFSET) / sizeof(uint32_t); + return offsetof(struct user, u_debugreg) + index * sizeof(uint64_t); +} + +/** + * Return the offset in a 64-bit "user" area that corresponds to the + * specified @offset in a 32-bit "user" area. This function returns + * "(word_t) -1" if the specified @offset is invalid. + */ +word_t convert_user_offset(word_t offset) +{ + const char *area_name = NULL; + + if (/* offset >= 0 && */ offset < USER32_REGS_OFFSET + USER32_REGS_SIZE) { + /* Sanity checks. */ + if ((offset % sizeof(uint32_t)) != 0) + return (word_t) -1; + + return convert_user_regs_index(offset / sizeof(uint32_t)) * sizeof(uint64_t); + } + else if (offset == USER32_FPVALID_OFFSET) + area_name = "fpvalid"; /* Not yet supported. */ + else if (offset >= USER32_I387_OFFSET && offset < USER32_I387_OFFSET + USER32_I387_SIZE) + area_name = "i387"; /* Not yet supported. */ + else if (offset == USER32_TSIZE_OFFSET) + area_name = "tsize"; /* Not yet supported. */ + else if (offset == USER32_DSIZE_OFFSET) + area_name = "dsize"; /* Not yet supported. */ + else if (offset == USER32_SSIZE_OFFSET) + area_name = "ssize"; /* Not yet supported. */ + else if (offset == USER32_START_CODE_OFFSET) + area_name = "start_code"; /* Not yet supported. */ + else if (offset == USER32_START_STACK_OFFSET) + area_name = "start_stack"; /* Not yet supported. */ + else if (offset == USER32_SIGNAL_OFFSET) + area_name = "signal"; /* Not yet supported. */ + else if (offset == USER32_RESERVED_OFFSET) + area_name = "reserved"; /* Not yet supported. */ + else if (offset == USER32_AR0_OFFSET) + area_name = "ar0"; /* Not yet supported. */ + else if (offset == USER32_FPSTATE_OFFSET) + area_name = "fpstate"; /* Not yet supported. */ + else if (offset == USER32_MAGIC_OFFSET) + area_name = "magic"; /* Not yet supported. */ + else if (offset >= USER32_COMM_OFFSET && offset < USER32_COMM_OFFSET + USER32_COMM_SIZE) + area_name = "comm"; /* Not yet supported. */ + else if (offset >= USER32_DEBUGREG_OFFSET && offset < USER32_DEBUGREG_OFFSET + USER32_DEBUGREG_SIZE) + return convert_user_debugreg_offset(offset); + else + area_name = ""; + + note(NULL, WARNING, INTERNAL, "ptrace user area '%s' not supported yet", area_name); + return (word_t) -1; /* Unknown offset. */ +} + +/** + * Convert the "regs" field from a 64-bit "user" area into a "regs" + * field from a 32-bit "user" area, or vice versa according to + * @reverse. + */ +void convert_user_regs_struct(bool reverse, uint64_t *user_regs64, + uint32_t user_regs32[USER32_NB_REGS]) +{ + size_t index32; + + for (index32 = 0; index32 < USER32_NB_REGS; index32++) { + size_t index64 = convert_user_regs_index(index32); + assert(index64 != (size_t) -1); + + if (reverse) + user_regs64[index64] = (uint64_t) user_regs32[index32]; + else + user_regs32[index32] = (uint32_t) user_regs64[index64]; + } +} + +#endif /* ARCH_X86_64 */ diff --git a/proot/proot_linux/ptrace/user.h b/proot/proot_linux/ptrace/user.h new file mode 100644 index 0000000..474b87a --- /dev/null +++ b/proot/proot_linux/ptrace/user.h @@ -0,0 +1,56 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include +#include +#include + +#include "arch.h" +#include "attribute.h" + +#if defined(ARCH_X86_64) + +#define USER32_NB_REGS 17 +#define USER32_NB_FPREGS 27 + +extern word_t convert_user_offset(word_t offset); +extern void convert_user_regs_struct(bool reverse, uint64_t *user_regs64, + uint32_t user_regs32[USER32_NB_REGS]); + +#else + +#define USER32_NB_REGS 0 +#define USER32_NB_FPREGS 0 + +static inline word_t convert_user_offset(word_t offset UNUSED) +{ + assert(0); +} + +static inline void convert_user_regs_struct(bool reverse UNUSED, + uint64_t *user_regs64 UNUSED, + uint32_t user_regs32[USER32_NB_REGS] UNUSED) +{ + assert(0); +} + +#endif diff --git a/proot/proot_linux/ptrace/wait.c b/proot/proot_linux/ptrace/wait.c new file mode 100644 index 0000000..4875315 --- /dev/null +++ b/proot/proot_linux/ptrace/wait.c @@ -0,0 +1,361 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* PTRACE_*, */ +#include /* E*, */ +#include /* assert(3), */ +#include /* bool, true, false, */ +#include /* SIG*, */ +#include /* talloc*, */ + +#include "ptrace/wait.h" +#include "ptrace/ptrace.h" +#include "syscall/sysnum.h" +#include "syscall/chain.h" +#include "tracee/tracee.h" +#include "tracee/event.h" +#include "tracee/reg.h" +#include "tracee/mem.h" + +#include "attribute.h" + +static const char *stringify_event(int event) UNUSED; +static const char *stringify_event(int event) +{ + if (WIFEXITED(event)) + return "exited"; + else if (WIFSIGNALED(event)) + return "signaled"; + else if (WIFCONTINUED(event)) + return "continued"; + else if (WIFSTOPPED(event)) { + switch ((event & 0xfff00) >> 8) { + case SIGTRAP: + return "stopped: SIGTRAP"; + case SIGTRAP | 0x80: + return "stopped: SIGTRAP: 0x80"; + case SIGTRAP | PTRACE_EVENT_VFORK << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_VFORK"; + case SIGTRAP | PTRACE_EVENT_FORK << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_FORK"; + case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_VFORK_DONE"; + case SIGTRAP | PTRACE_EVENT_CLONE << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_CLONE"; + case SIGTRAP | PTRACE_EVENT_EXEC << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_EXEC"; + case SIGTRAP | PTRACE_EVENT_EXIT << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_EXIT"; + case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_SECCOMP2"; + case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: + return "stopped: SIGTRAP: PTRACE_EVENT_SECCOMP"; + case SIGSTOP: + return "stopped: SIGSTOP"; + default: + return "stopped: unknown"; + } + } + return "unknown"; +} + +/** + * Translate the wait syscall made by @ptracer into a "void" syscall + * if the expected pid is one of its ptracees, in order to emulate the + * ptrace mechanism within PRoot. This function returns -errno if an + * error occured (unsupported request), otherwise 0. + */ +int translate_wait_enter(Tracee *ptracer) +{ + Tracee *ptracee; + pid_t pid; + + PTRACER.waits_in = WAITS_IN_KERNEL; + + /* Don't emulate the ptrace mechanism if it's not a ptracer. */ + if (PTRACER.nb_ptracees == 0) + return 0; + + /* Don't emulate the ptrace mechanism if the requested pid is + * not a ptracee. */ + pid = (pid_t) peek_reg(ptracer, ORIGINAL, SYSARG_1); + if (pid != -1) { + ptracee = get_ptracee(ptracer, pid, false, true, + peek_reg(ptracer, ORIGINAL, SYSARG_3)); + if (ptracee == NULL || PTRACEE.ptracer != ptracer) + return 0; + } + + /* This syscall is canceled at the enter stage in order to be + * handled at the exit stage. */ + set_sysnum(ptracer, PR_void); + PTRACER.waits_in = WAITS_IN_PROOT; + + return 0; +} + +/** + * Update pid & wait status of @ptracer's wait(2) for the given + * @ptracee. This function returns -errno if an error occurred, 0 if + * the wait syscall will be restarted (ie. the event is discarded), + * otherwise @ptracee's pid. + */ +static int update_wait_status(Tracee *ptracer, Tracee *ptracee) +{ + word_t address; + int result; + + /* Special case: the Linux kernel reports the terminating + * event issued by a process to both its parent and its + * tracer, except when they are the same. In this case the + * Linux kernel reports the terminating event only once to the + * tracing parent ... */ + if (PTRACEE.ptracer == ptracee->parent + && (WIFEXITED(PTRACEE.event4.ptracer.value) + || WIFSIGNALED(PTRACEE.event4.ptracer.value))) { + /* ... So hide this terminating event (toward its + * tracer, ie. PRoot) and make the second one appear + * (towards its parent, ie. the ptracer). This will + * ensure its exit status is collected from a kernel + * point-of-view (ie. it doesn't stay a zombie + * forever). */ + restart_original_syscall(ptracer); + + /* Detach this ptracee from its ptracer, PRoot doesn't + * have anything else to emulate. */ + detach_from_ptracer(ptracee); + + /* Zombies can rest in peace once the ptracer is + * notified. */ + if (PTRACEE.is_zombie) + TALLOC_FREE(ptracee); + + return 0; + } + + address = peek_reg(ptracer, ORIGINAL, SYSARG_2); + if (address != 0) { + poke_int32(ptracer, address, PTRACEE.event4.ptracer.value); + if (errno != 0) + return -errno; + } + + PTRACEE.event4.ptracer.pending = false; + + /* Be careful; ptracee might get freed before its pid is + * returned. */ + result = ptracee->pid; + + /* Zombies can rest in peace once the ptracer is notified. */ + if (PTRACEE.is_zombie) { + detach_from_ptracer(ptracee); + TALLOC_FREE(ptracee); + } + + return result; +} + +/** + * Emulate the wait* syscall made by @ptracer if it was in the context + * of the ptrace mechanism. This function returns -errno if an error + * occured, otherwise the pid of the expected tracee. + */ +int translate_wait_exit(Tracee *ptracer, bool *set_result) +{ + Tracee *ptracee; + word_t options; + int status; + pid_t pid; + + *set_result = true; + assert(PTRACER.waits_in == WAITS_IN_PROOT); + PTRACER.waits_in = DOESNT_WAIT; + + pid = (pid_t) peek_reg(ptracer, ORIGINAL, SYSARG_1); + options = peek_reg(ptracer, ORIGINAL, SYSARG_3); + + /* Is there such a stopped ptracee with an event not yet + * passed to its ptracer? */ + ptracee = get_stopped_ptracee(ptracer, pid, true, options); + if (ptracee == NULL) { + /* Is there still living ptracees? */ + if (PTRACER.nb_ptracees == 0) + return -ECHILD; + + /* Non blocking wait(2) ? */ + if ((options & WNOHANG) != 0) { + /* if WNOHANG was specified and one or more + * child(ren) specified by pid exist, but have + * not yet changed state, then 0 is returned. + * On error, -1 is returned. + * + * -- man 2 waitpid */ + return (has_ptracees(ptracer, pid, options) ? 0 : -ECHILD); + } + + /* Otherwise put this ptracer in the "waiting for + * ptracee" state, it will be woken up in + * handle_ptracee_event() later. */ + PTRACER.wait_pid = pid; + PTRACER.wait_options = options; + + return 0; + } + + status = update_wait_status(ptracer, ptracee); + // If the syscall is restarted, don't touch the result. + // Not only is it unnecessary, it could overwrite syscall argument on ARM. + if (status == 0) + *set_result = false; + return status; +} + +/** + * For the given @ptracee, pass its current @event to its ptracer if + * this latter is waiting for it, otherwise put the @ptracee in the + * "waiting for ptracer" state. This function returns whether + * @ptracee shall be kept in the stop state or not. + */ +bool handle_ptracee_event(Tracee *ptracee, int event) +{ + bool handled_by_proot_first = false; + Tracee *ptracer = PTRACEE.ptracer; + bool keep_stopped; + + assert(ptracer != NULL); + + /* Remember what the event initially was, this will be + * required by PRoot to handle this event later. */ + PTRACEE.event4.proot.value = event; + PTRACEE.event4.proot.pending = true; + + /* By default, this ptracee should be kept stopped until its + * ptracer restarts it. */ + keep_stopped = true; + + /* Not all events are expected for this ptracee. */ + if (WIFSTOPPED(event)) { + switch ((event & 0xfff00) >> 8) { + case SIGTRAP | 0x80: + if (PTRACEE.ignore_syscalls || PTRACEE.ignore_loader_syscalls) + return false; + + if ((PTRACEE.options & PTRACE_O_TRACESYSGOOD) == 0) + event &= ~(0x80 << 8); + + handled_by_proot_first = IS_IN_SYSEXIT(ptracee); + break; + +#define PTRACE_EVENT_VFORKDONE PTRACE_EVENT_VFORK_DONE +#define CASE_FILTER_EVENT(name) \ + case SIGTRAP | PTRACE_EVENT_ ##name << 8: \ + if ((PTRACEE.options & PTRACE_O_TRACE ##name) == 0) \ + return false; \ + PTRACEE.tracing_started = true; \ + handled_by_proot_first = true; \ + break; + + CASE_FILTER_EVENT(FORK); + CASE_FILTER_EVENT(VFORK); + CASE_FILTER_EVENT(VFORKDONE); + CASE_FILTER_EVENT(CLONE); + CASE_FILTER_EVENT(EXIT); + CASE_FILTER_EVENT(EXEC); + + /* Never reached. */ + assert(0); + + case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: + case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: + /* These events are not supported [yet?] under + * ptrace emulation. */ + return false; + + default: + PTRACEE.tracing_started = true; + break; + } + } + /* In these cases, the ptracee isn't really alive anymore. To + * ensure it will not be in limbo, PRoot restarts it whether + * its ptracer is waiting for it or not. */ + else if (WIFEXITED(event) || WIFSIGNALED(event)) { + PTRACEE.tracing_started = true; + keep_stopped = false; + } + + /* A process is not traced right from the TRACEME request; it + * is traced from the first received signal, whether it was + * raised by the process itself (implicitly or explicitly), or + * it was induced by a PTRACE_EVENT_*. */ + if (!PTRACEE.tracing_started) + return false; + + /* Under some circumstances, the event must be handled by + * PRoot first. */ + if (handled_by_proot_first) { + int signal; + signal = handle_tracee_event(ptracee, PTRACEE.event4.proot.value); + PTRACEE.event4.proot.value = signal; + + /* The computed signal is always 0 since we can come + * in this block only on sysexit and special events + * (as for now). */ + assert(signal == 0); + } + + /* Remember what the new event is, this will be required by + the ptracer in translate_ptrace_exit() in order to restart + this ptracee. */ + PTRACEE.event4.ptracer.value = event; + PTRACEE.event4.ptracer.pending = true; + + /* Notify asynchronously the ptracer about this event, as the + * kernel does. */ + kill(ptracer->pid, SIGCHLD); + + /* Note: wait_pid is set in translate_wait_exit() if no + * ptracee event was pending when the ptracer started to + * wait. */ + if ( (PTRACER.wait_pid == -1 || PTRACER.wait_pid == ptracee->pid) + && EXPECTED_WAIT_CLONE(PTRACER.wait_options, ptracee)) { + bool restarted; + int status; + + status = update_wait_status(ptracer, ptracee); + if (status != 0) + poke_reg(ptracer, SYSARG_RESULT, (word_t) status); + + /* Write ptracer's register cache back. */ + (void) push_regs(ptracer); + + /* Restart the ptracer. */ + PTRACER.wait_pid = 0; + restarted = restart_tracee(ptracer, 0); + if (!restarted) + keep_stopped = false; + + return keep_stopped; + } + + return keep_stopped; +} diff --git a/proot/proot_linux/ptrace/wait.h b/proot/proot_linux/ptrace/wait.h new file mode 100644 index 0000000..5bb11be --- /dev/null +++ b/proot/proot_linux/ptrace/wait.h @@ -0,0 +1,47 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef PTRACE_WAIT_H +#define PTRACE_WAIT_H + +#include "tracee/tracee.h" + +extern int translate_wait_enter(Tracee *ptracer); +extern int translate_wait_exit(Tracee *ptracer, bool *set_result); +extern bool handle_ptracee_event(Tracee *ptracee, int wait_status); + +/* __WCLONE: Wait for "clone" children only. If omitted then wait for + * "non-clone" children only. (A "clone" child is one which delivers + * no signal, or a signal other than SIGCHLD to its parent upon + * termination.) This option is ignored if __WALL is also specified. + * + * __WALL: Wait for all children, regardless of type ("clone" or + * "non-clone"). + * + * -- wait(2) man-page + */ +#define EXPECTED_WAIT_CLONE(wait_options, tracee) \ + ((((wait_options) & __WALL) != 0) \ + || ((((wait_options) & __WCLONE) != 0) && (tracee)->clone) \ + || ((((wait_options) & __WCLONE) == 0) && !(tracee)->clone)) + +#endif /* PTRACE_WAIT_H */ diff --git a/proot/proot_linux/syscall/chain.c b/proot/proot_linux/syscall/chain.c new file mode 100644 index 0000000..6823332 --- /dev/null +++ b/proot/proot_linux/syscall/chain.c @@ -0,0 +1,161 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* talloc*, */ +#include /* STAILQ_*, */ +#include /* E*, */ +#include /* assert(3), */ + +#include "syscall/chain.h" +#include "syscall/sysnum.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "arch.h" + +struct chained_syscall { + Sysnum sysnum; + word_t sysargs[6]; + STAILQ_ENTRY(chained_syscall) link; +}; + +STAILQ_HEAD(chained_syscalls, chained_syscall); + +/** + * Append a new syscall (@sysnum, @sysarg_*) to the list of + * "unrequested" syscalls for the given @tracee. These new syscalls + * will be triggered in order once the current syscall is done. The + * caller is free to force the last result of this syscall chain in + * @tracee->chain.final_result. This function returns -errno if an + * error occurred, otherwise 0. + */ +int register_chained_syscall(Tracee *tracee, Sysnum sysnum, + word_t sysarg_1, word_t sysarg_2, word_t sysarg_3, + word_t sysarg_4, word_t sysarg_5, word_t sysarg_6) +{ + struct chained_syscall *syscall; + + if (tracee->chain.syscalls == NULL) { + tracee->chain.syscalls = talloc_zero(tracee, struct chained_syscalls); + if (tracee->chain.syscalls == NULL) + return -ENOMEM; + + STAILQ_INIT(tracee->chain.syscalls); + } + + syscall = talloc_zero(tracee->chain.syscalls, struct chained_syscall); + if (syscall == NULL) + return -ENOMEM; + + syscall->sysnum = sysnum; + syscall->sysargs[0] = sysarg_1; + syscall->sysargs[1] = sysarg_2; + syscall->sysargs[2] = sysarg_3; + syscall->sysargs[3] = sysarg_4; + syscall->sysargs[4] = sysarg_5; + syscall->sysargs[5] = sysarg_6; + + STAILQ_INSERT_TAIL(tracee->chain.syscalls, syscall, link); + + return 0; +} + +/** + * Use/remove the first element of @tracee->chain.syscalls to forge a + * new syscall. This function should be called only at the end of in + * the sysexit stage. + */ +void chain_next_syscall(Tracee *tracee) +{ + struct chained_syscall *syscall; + word_t instr_pointer; + word_t sysnum; + + assert(tracee->chain.syscalls != NULL); + + /* No more chained syscalls: force the result of the initial + * syscall (the one explicitly requested by the tracee). */ + if (STAILQ_EMPTY(tracee->chain.syscalls)) { + TALLOC_FREE(tracee->chain.syscalls); + + if (tracee->chain.force_final_result) + poke_reg(tracee, SYSARG_RESULT, tracee->chain.final_result); + + tracee->chain.force_final_result = false; + tracee->chain.final_result = 0; + + return; + } + + /* Original register values will be restored right after the + * last chained syscall. */ + tracee->restore_original_regs = false; + + /* The list of chained syscalls is a FIFO. */ + syscall = STAILQ_FIRST(tracee->chain.syscalls); + STAILQ_REMOVE_HEAD(tracee->chain.syscalls, link); + + poke_reg(tracee, SYSARG_1, syscall->sysargs[0]); + poke_reg(tracee, SYSARG_2, syscall->sysargs[1]); + poke_reg(tracee, SYSARG_3, syscall->sysargs[2]); + poke_reg(tracee, SYSARG_4, syscall->sysargs[3]); + poke_reg(tracee, SYSARG_5, syscall->sysargs[4]); + poke_reg(tracee, SYSARG_6, syscall->sysargs[5]); + + sysnum = detranslate_sysnum(get_abi(tracee), syscall->sysnum); + poke_reg(tracee, SYSTRAP_NUM, sysnum); + + /* Move the instruction pointer back to the original trap. */ + instr_pointer = peek_reg(tracee, CURRENT, INSTR_POINTER); + poke_reg(tracee, INSTR_POINTER, instr_pointer - SYSTRAP_SIZE); +} + +/** + * Force the last result of the @tracee's current syscall chain to be + * @forced_result. + */ +void force_chain_final_result(Tracee *tracee, word_t forced_result) +{ + tracee->chain.force_final_result = true; + tracee->chain.final_result = forced_result; +} + +/** + * Restart the original syscall of the given @tracee. The result of + * the current syscall will be overwritten. This function returns the + * same status as register_chained_syscall(). + */ +int restart_original_syscall(Tracee *tracee) +{ + poke_reg(tracee, SYSARG_1, peek_reg(tracee, ORIGINAL, SYSARG_1)); + poke_reg(tracee, SYSARG_2, peek_reg(tracee, ORIGINAL, SYSARG_2)); + poke_reg(tracee, SYSARG_3, peek_reg(tracee, ORIGINAL, SYSARG_3)); + poke_reg(tracee, SYSARG_4, peek_reg(tracee, ORIGINAL, SYSARG_4)); + poke_reg(tracee, SYSARG_5, peek_reg(tracee, ORIGINAL, SYSARG_5)); + poke_reg(tracee, SYSARG_6, peek_reg(tracee, ORIGINAL, SYSARG_6)); + poke_reg(tracee, SYSTRAP_NUM, peek_reg(tracee, ORIGINAL, SYSARG_NUM)); + + /* Move the instruction pointer back to the original trap. */ + poke_reg(tracee, INSTR_POINTER, + peek_reg(tracee, CURRENT, INSTR_POINTER) - SYSTRAP_SIZE); + + return 0; +} diff --git a/proot/proot_linux/syscall/chain.h b/proot/proot_linux/syscall/chain.h new file mode 100644 index 0000000..8f6bd51 --- /dev/null +++ b/proot/proot_linux/syscall/chain.h @@ -0,0 +1,41 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef CHAIN_H +#define CHAIN_H + +#include "tracee/tracee.h" +#include "syscall/sysnum.h" +#include "arch.h" + +extern int register_chained_syscall(Tracee *tracee, Sysnum sysnum, + word_t sysarg_1, word_t sysarg_2, word_t sysarg_3, + word_t sysarg_4, word_t sysarg_5, word_t sysarg_6); + +extern void force_chain_final_result(Tracee *tracee, word_t forced_result); + +extern int restart_original_syscall(Tracee *tracee); + +extern void chain_next_syscall(Tracee *tracee); + + +#endif /* CHAIN_H */ diff --git a/proot/proot_linux/syscall/enter.c b/proot/proot_linux/syscall/enter.c new file mode 100644 index 0000000..6bee61a --- /dev/null +++ b/proot/proot_linux/syscall/enter.c @@ -0,0 +1,592 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* errno(3), E* */ +#include /* talloc_*, */ +#include /* struct sockaddr_un, */ +#include /* SYS_*, */ +#include /* AT_FDCWD, */ +#include /* PATH_MAX, */ +#include /* strcpy */ +#include /* PR_SET_DUMPABLE */ +#include "syscall/syscall.h" +#include "syscall/sysnum.h" +#include "syscall/socket.h" +#include "ptrace/ptrace.h" +#include "ptrace/wait.h" +#include "syscall/heap.h" +#include "extension/extension.h" +#include "execve/execve.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "tracee/abi.h" +#include "path/path.h" +#include "path/canon.h" +#include "arch.h" + +/** + * Translate @path and put the result in the @tracee's memory address + * space pointed to by the @reg argument of the current syscall. See + * the documentation of translate_path() about the meaning of + * @type. This function returns -errno if an error occured, otherwise + * 0. + */ +static int translate_path2(Tracee *tracee, int dir_fd, char path[PATH_MAX], Reg reg, Type type) +{ + char new_path[PATH_MAX]; + int status; + + /* Special case where the argument was NULL. */ + if (path[0] == '\0') + return 0; + + /* Translate the original path. */ + status = translate_path(tracee, new_path, dir_fd, path, type != SYMLINK); + if (status < 0) + return status; + + return set_sysarg_path(tracee, new_path, reg); +} + +/** + * A helper, see the comment of the function above. + */ +static int translate_sysarg(Tracee *tracee, Reg reg, Type type) +{ + char old_path[PATH_MAX]; + int status; + + /* Extract the original path. */ + status = get_sysarg_path(tracee, old_path, reg); + if (status < 0) + return status; + + return translate_path2(tracee, AT_FDCWD, old_path, reg, type); +} + +/** + * Translate the input arguments of the current @tracee's syscall in the + * @tracee->pid process area. This function sets @tracee->status to + * -errno if an error occured from the tracee's point-of-view (EFAULT + * for instance), otherwise 0. + */ +int translate_syscall_enter(Tracee *tracee) +{ + int flags; + int dirfd; + int olddirfd; + int newdirfd; + + int status; + int status2; + + char path[PATH_MAX]; + char oldpath[PATH_MAX]; + char newpath[PATH_MAX]; + + word_t syscall_number; + bool special = false; + + status = notify_extensions(tracee, SYSCALL_ENTER_START, 0, 0); + if (status < 0) + goto end; + if (status > 0) + return 0; + + /* Translate input arguments. */ + syscall_number = get_sysnum(tracee, ORIGINAL); + switch (syscall_number) { + default: + /* Nothing to do. */ + status = 0; + break; + + case PR_execve: + status = translate_execve_enter(tracee); + break; + + case PR_ptrace: + status = translate_ptrace_enter(tracee); + break; + + case PR_wait4: + case PR_waitpid: + status = translate_wait_enter(tracee); + break; + + case PR_brk: + translate_brk_enter(tracee); + status = 0; + break; + + case PR_getcwd: + set_sysnum(tracee, PR_void); + status = 0; + break; + + case PR_fchdir: + case PR_chdir: { + struct stat statl; + char *tmp; + + /* The ending "." ensures an error will be reported if + * path does not exist or if it is not a directory. */ + if (syscall_number == PR_chdir) { + status = get_sysarg_path(tracee, path, SYSARG_1); + if (status < 0) + break; + + status = join_paths(2, oldpath, path, "."); + if (status < 0) + break; + + dirfd = AT_FDCWD; + } + else { + strcpy(oldpath, "."); + dirfd = peek_reg(tracee, CURRENT, SYSARG_1); + } + + status = translate_path(tracee, path, dirfd, oldpath, true); + if (status < 0) + break; + + status = lstat(path, &statl); + if (status < 0) + break; + + /* Check this directory is accessible. */ + if ((statl.st_mode & S_IXUSR) == 0) + return -EACCES; + + /* Sadly this method doesn't detranslate statefully, + * this means that there's an ambiguity when several + * bindings are from the same host path: + * + * $ proot -m /tmp:/a -m /tmp:/b fchdir_getcwd /a + * /b + * + * $ proot -m /tmp:/b -m /tmp:/a fchdir_getcwd /a + * /a + * + * A solution would be to follow each file descriptor + * just like it is done for cwd. + */ + + status = detranslate_path(tracee, path, NULL); + if (status < 0) + break; + + /* Remove the trailing "/" or "/.". */ + chop_finality(path); + + tmp = talloc_strdup(tracee->fs, path); + if (tmp == NULL) { + status = -ENOMEM; + break; + } + TALLOC_FREE(tracee->fs->cwd); + + tracee->fs->cwd = tmp; + talloc_set_name_const(tracee->fs->cwd, "$cwd"); + + set_sysnum(tracee, PR_void); + status = 0; + break; + } + + case PR_bind: + case PR_connect: { + word_t address; + word_t size; + + address = peek_reg(tracee, CURRENT, SYSARG_2); + size = peek_reg(tracee, CURRENT, SYSARG_3); + + status = translate_socketcall_enter(tracee, &address, size); + if (status <= 0) + break; + + poke_reg(tracee, SYSARG_2, address); + poke_reg(tracee, SYSARG_3, sizeof(struct sockaddr_un)); + + status = 0; + break; + } + +#define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) + +#define PEEK_WORD(addr, forced_errno) \ + peek_word(tracee, addr); \ + if (errno != 0) { \ + status = forced_errno ?: -errno; \ + break; \ + } + +#define POKE_WORD(addr, value) \ + poke_word(tracee, addr, value); \ + if (errno != 0) { \ + status = -errno; \ + break; \ + } + + case PR_accept: + case PR_accept4: + /* Nothing special to do if no sockaddr was specified. */ + if (peek_reg(tracee, ORIGINAL, SYSARG_2) == 0) { + status = 0; + break; + } + special = true; + /* Fall through. */ + case PR_getsockname: + case PR_getpeername:{ + int size; + + /* Remember: PEEK_WORD puts -errno in status and breaks if an + * error occured. */ + size = (int) PEEK_WORD(peek_reg(tracee, ORIGINAL, SYSARG_3), special ? -EINVAL : 0); + + /* The "size" argument is both used as an input parameter + * (max. size) and as an output parameter (actual size). The + * exit stage needs to know the max. size to not overwrite + * anything, that's why it is copied in the 6th argument + * (unused) before the kernel updates it. */ + poke_reg(tracee, SYSARG_6, size); + + status = 0; + break; + } + + case PR_socketcall: { + word_t args_addr; + word_t sock_addr_saved; + word_t sock_addr; + word_t size_addr; + word_t size; + + args_addr = peek_reg(tracee, CURRENT, SYSARG_2); + + switch (peek_reg(tracee, CURRENT, SYSARG_1)) { + case SYS_BIND: + case SYS_CONNECT: + /* Handle these cases below. */ + status = 1; + break; + + case SYS_ACCEPT: + case SYS_ACCEPT4: + /* Nothing special to do if no sockaddr was specified. */ + sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); + if (sock_addr == 0) { + status = 0; + break; + } + special = true; + /* Fall through. */ + case SYS_GETSOCKNAME: + case SYS_GETPEERNAME: + /* Remember: PEEK_WORD puts -errno in status and breaks + * if an error occured. */ + size_addr = PEEK_WORD(SYSARG_ADDR(3), 0); + size = (int) PEEK_WORD(size_addr, special ? -EINVAL : 0); + + /* See case PR_accept for explanation. */ + poke_reg(tracee, SYSARG_6, size); + status = 0; + break; + + default: + status = 0; + break; + } + + /* An error occured or there's nothing else to do. */ + if (status <= 0) + break; + + /* Remember: PEEK_WORD puts -errno in status and breaks if an + * error occured. */ + sock_addr = PEEK_WORD(SYSARG_ADDR(2), 0); + size = PEEK_WORD(SYSARG_ADDR(3), 0); + + sock_addr_saved = sock_addr; + status = translate_socketcall_enter(tracee, &sock_addr, size); + if (status <= 0) + break; + + /* These parameters are used/restored at the exit stage. */ + poke_reg(tracee, SYSARG_5, sock_addr_saved); + poke_reg(tracee, SYSARG_6, size); + + /* Remember: POKE_WORD puts -errno in status and breaks if an + * error occured. */ + POKE_WORD(SYSARG_ADDR(2), sock_addr); + POKE_WORD(SYSARG_ADDR(3), sizeof(struct sockaddr_un)); + + status = 0; + break; + } + +#undef SYSARG_ADDR +#undef PEEK_WORD +#undef POKE_WORD + + case PR_access: + case PR_acct: + case PR_chmod: + case PR_chown: + case PR_chown32: + case PR_chroot: + case PR_getxattr: + case PR_listxattr: + case PR_mknod: + case PR_oldstat: + case PR_creat: + case PR_removexattr: + case PR_setxattr: + case PR_stat: + case PR_stat64: + case PR_statfs: + case PR_statfs64: + case PR_swapoff: + case PR_swapon: + case PR_truncate: + case PR_truncate64: + case PR_umount: + case PR_umount2: + case PR_uselib: + case PR_utime: + case PR_utimes: + status = translate_sysarg(tracee, SYSARG_1, REGULAR); + break; + + case PR_open: + flags = peek_reg(tracee, CURRENT, SYSARG_2); + + if ( ((flags & O_NOFOLLOW) != 0) + || ((flags & O_EXCL) != 0 && (flags & O_CREAT) != 0)) + status = translate_sysarg(tracee, SYSARG_1, SYMLINK); + else + status = translate_sysarg(tracee, SYSARG_1, REGULAR); + break; + + case PR_fchownat: + case PR_fstatat64: + case PR_newfstatat: + case PR_statx: + case PR_utimensat: + case PR_utimensat_time64: + case PR_name_to_handle_at: + dirfd = peek_reg(tracee, CURRENT, SYSARG_1); + + status = get_sysarg_path(tracee, path, SYSARG_2); + if (status < 0) + break; + + flags = ( syscall_number == PR_fchownat + || syscall_number == PR_name_to_handle_at) + ? peek_reg(tracee, CURRENT, SYSARG_5) + : ((syscall_number == PR_statx) ? + peek_reg(tracee, CURRENT, SYSARG_3) : + peek_reg(tracee, CURRENT, SYSARG_4)); + + if ((flags & AT_SYMLINK_NOFOLLOW) != 0) + status = translate_path2(tracee, dirfd, path, SYSARG_2, SYMLINK); + else + status = translate_path2(tracee, dirfd, path, SYSARG_2, REGULAR); + break; + + case PR_fchmodat: + case PR_faccessat: + case PR_faccessat2: + case PR_futimesat: + case PR_mknodat: + dirfd = peek_reg(tracee, CURRENT, SYSARG_1); + + status = get_sysarg_path(tracee, path, SYSARG_2); + if (status < 0) + break; + + status = translate_path2(tracee, dirfd, path, SYSARG_2, REGULAR); + break; + + case PR_inotify_add_watch: + flags = peek_reg(tracee, CURRENT, SYSARG_3); + + if ((flags & IN_DONT_FOLLOW) != 0) + status = translate_sysarg(tracee, SYSARG_2, SYMLINK); + else + status = translate_sysarg(tracee, SYSARG_2, REGULAR); + break; + + case PR_readlink: + case PR_lchown: + case PR_lchown32: + case PR_lgetxattr: + case PR_llistxattr: + case PR_lremovexattr: + case PR_lsetxattr: + case PR_lstat: + case PR_lstat64: + case PR_oldlstat: + case PR_unlink: + case PR_rmdir: + case PR_mkdir: + status = translate_sysarg(tracee, SYSARG_1, SYMLINK); + break; + + case PR_pivot_root: + status = translate_sysarg(tracee, SYSARG_1, REGULAR); + if (status < 0) + break; + + status = translate_sysarg(tracee, SYSARG_2, REGULAR); + break; + + case PR_linkat: + olddirfd = peek_reg(tracee, CURRENT, SYSARG_1); + newdirfd = peek_reg(tracee, CURRENT, SYSARG_3); + flags = peek_reg(tracee, CURRENT, SYSARG_5); + + status = get_sysarg_path(tracee, oldpath, SYSARG_2); + if (status < 0) + break; + + status = get_sysarg_path(tracee, newpath, SYSARG_4); + if (status < 0) + break; + + if ((flags & AT_SYMLINK_FOLLOW) != 0) + status = translate_path2(tracee, olddirfd, oldpath, SYSARG_2, REGULAR); + else + status = translate_path2(tracee, olddirfd, oldpath, SYSARG_2, SYMLINK); + if (status < 0) + break; + + status = translate_path2(tracee, newdirfd, newpath, SYSARG_4, SYMLINK); + break; + + case PR_mount: + status = get_sysarg_path(tracee, path, SYSARG_1); + if (status < 0) + break; + + /* The following check covers only 90% of the cases. */ + if (path[0] == '/' || path[0] == '.') { + status = translate_path2(tracee, AT_FDCWD, path, SYSARG_1, REGULAR); + if (status < 0) + break; + } + + status = translate_sysarg(tracee, SYSARG_2, REGULAR); + break; + + case PR_openat: + dirfd = peek_reg(tracee, CURRENT, SYSARG_1); + flags = peek_reg(tracee, CURRENT, SYSARG_3); + + status = get_sysarg_path(tracee, path, SYSARG_2); + if (status < 0) + break; + + if ( ((flags & O_NOFOLLOW) != 0) + || ((flags & O_EXCL) != 0 && (flags & O_CREAT) != 0)) + status = translate_path2(tracee, dirfd, path, SYSARG_2, SYMLINK); + else + status = translate_path2(tracee, dirfd, path, SYSARG_2, REGULAR); + break; + + case PR_readlinkat: + case PR_unlinkat: + case PR_mkdirat: + dirfd = peek_reg(tracee, CURRENT, SYSARG_1); + + status = get_sysarg_path(tracee, path, SYSARG_2); + if (status < 0) + break; + + status = translate_path2(tracee, dirfd, path, SYSARG_2, SYMLINK); + break; + + case PR_link: + case PR_rename: + status = translate_sysarg(tracee, SYSARG_1, SYMLINK); + if (status < 0) + break; + + status = translate_sysarg(tracee, SYSARG_2, SYMLINK); + break; + + case PR_renameat: + case PR_renameat2: + olddirfd = peek_reg(tracee, CURRENT, SYSARG_1); + newdirfd = peek_reg(tracee, CURRENT, SYSARG_3); + + status = get_sysarg_path(tracee, oldpath, SYSARG_2); + if (status < 0) + break; + + status = get_sysarg_path(tracee, newpath, SYSARG_4); + if (status < 0) + break; + + status = translate_path2(tracee, olddirfd, oldpath, SYSARG_2, SYMLINK); + if (status < 0) + break; + + status = translate_path2(tracee, newdirfd, newpath, SYSARG_4, SYMLINK); + break; + + case PR_symlink: + status = translate_sysarg(tracee, SYSARG_2, SYMLINK); + break; + + case PR_symlinkat: + newdirfd = peek_reg(tracee, CURRENT, SYSARG_2); + + status = get_sysarg_path(tracee, newpath, SYSARG_3); + if (status < 0) + break; + + status = translate_path2(tracee, newdirfd, newpath, SYSARG_3, SYMLINK); + break; + + case PR_prctl: + /* Prevent tracees from setting dumpable flag. + * (Otherwise it could break tracee memory access) */ + if (peek_reg(tracee, CURRENT, SYSARG_1) == PR_SET_DUMPABLE) { + set_sysnum(tracee, PR_void); + status = 0; + } + break; + } + +end: + status2 = notify_extensions(tracee, SYSCALL_ENTER_END, status, 0); + if (status2 < 0) + status = status2; + + return status; +} + diff --git a/proot/proot_linux/syscall/exit.c b/proot/proot_linux/syscall/exit.c new file mode 100644 index 0000000..0491ee6 --- /dev/null +++ b/proot/proot_linux/syscall/exit.c @@ -0,0 +1,473 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* errno(3), E* */ +#include /* struct utsname, */ +#include /* SYS_*, */ +#include /* strlen(3), */ + +#include "syscall/syscall.h" +#include "syscall/sysnum.h" +#include "syscall/socket.h" +#include "syscall/chain.h" +#include "syscall/heap.h" +#include "syscall/rlimit.h" +#include "execve/execve.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "tracee/abi.h" +#include "path/path.h" +#include "ptrace/ptrace.h" +#include "ptrace/wait.h" +#include "extension/extension.h" +#include "arch.h" + +/** + * Translate the output arguments of the current @tracee's syscall in + * the @tracee->pid process area. This function sets the result of + * this syscall to @tracee->status if an error occured previously + * during the translation, that is, if @tracee->status is less than 0. + */ +void translate_syscall_exit(Tracee *tracee) +{ + word_t syscall_number; + word_t syscall_result; + int status; + + status = notify_extensions(tracee, SYSCALL_EXIT_START, 0, 0); + if (status < 0) { + poke_reg(tracee, SYSARG_RESULT, (word_t) status); + goto end; + } + if (status > 0) + return; + + /* Set the tracee's errno if an error occured previously during + * the translation. */ + if (tracee->status < 0) { + poke_reg(tracee, SYSARG_RESULT, (word_t) tracee->status); + goto end; + } + + /* Translate output arguments: + * - break: update the syscall result register with "status" + * - goto end: nothing else to do. + */ + syscall_number = get_sysnum(tracee, ORIGINAL); + syscall_result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + switch (syscall_number) { + case PR_brk: + translate_brk_exit(tracee); + goto end; + + case PR_getcwd: { + char path[PATH_MAX]; + size_t new_size; + size_t size; + word_t output; + + size = (size_t) peek_reg(tracee, ORIGINAL, SYSARG_2); + if (size == 0) { + status = -EINVAL; + break; + } + + /* Ensure cwd still exists. */ + status = translate_path(tracee, path, AT_FDCWD, ".", false); + if (status < 0) + break; + + new_size = strlen(tracee->fs->cwd) + 1; + if (size < new_size) { + status = -ERANGE; + break; + } + + /* Overwrite the path. */ + output = peek_reg(tracee, ORIGINAL, SYSARG_1); + status = write_data(tracee, output, tracee->fs->cwd, new_size); + if (status < 0) + break; + + /* The value of "status" is used to update the returned value + * in translate_syscall_exit(). */ + status = new_size; + break; + } + + case PR_accept: + case PR_accept4: + /* Nothing special to do if no sockaddr was specified. */ + if (peek_reg(tracee, ORIGINAL, SYSARG_2) == 0) + goto end; + /* Fall through. */ + case PR_getsockname: + case PR_getpeername: { + word_t sock_addr; + word_t size_addr; + word_t max_size; + + /* Error reported by the kernel. */ + if ((int) syscall_result < 0) + goto end; + + sock_addr = peek_reg(tracee, ORIGINAL, SYSARG_2); + size_addr = peek_reg(tracee, MODIFIED, SYSARG_3); + max_size = peek_reg(tracee, MODIFIED, SYSARG_6); + + status = translate_socketcall_exit(tracee, sock_addr, size_addr, max_size); + if (status < 0) + break; + + /* Don't overwrite the syscall result. */ + goto end; + } + +#define SYSARG_ADDR(n) (args_addr + ((n) - 1) * sizeof_word(tracee)) + +#define POKE_WORD(addr, value) \ + poke_word(tracee, addr, value); \ + if (errno != 0) { \ + status = -errno; \ + break; \ + } + +#define PEEK_WORD(addr) \ + peek_word(tracee, addr); \ + if (errno != 0) { \ + status = -errno; \ + break; \ + } + + case PR_socketcall: { + word_t args_addr; + word_t sock_addr; + word_t size_addr; + word_t max_size; + + args_addr = peek_reg(tracee, ORIGINAL, SYSARG_2); + + switch (peek_reg(tracee, ORIGINAL, SYSARG_1)) { + case SYS_ACCEPT: + case SYS_ACCEPT4: + /* Nothing special to do if no sockaddr was specified. */ + sock_addr = PEEK_WORD(SYSARG_ADDR(2)); + if (sock_addr == 0) + goto end; + /* Fall through. */ + case SYS_GETSOCKNAME: + case SYS_GETPEERNAME: + /* Handle these cases below. */ + status = 1; + break; + + case SYS_BIND: + case SYS_CONNECT: + /* Restore the initial parameters: this memory was + * overwritten at the enter stage. Remember: POKE_WORD + * puts -errno in status and breaks if an error + * occured. */ + POKE_WORD(SYSARG_ADDR(2), peek_reg(tracee, MODIFIED, SYSARG_5)); + POKE_WORD(SYSARG_ADDR(3), peek_reg(tracee, MODIFIED, SYSARG_6)); + + status = 0; + break; + + default: + status = 0; + break; + } + + /* Error reported by the kernel or there's nothing else to do. */ + if ((int) syscall_result < 0 || status == 0) + goto end; + + /* An error occured in SYS_BIND or SYS_CONNECT. */ + if (status < 0) + break; + + /* Remember: PEEK_WORD puts -errno in status and breaks if an + * error occured. */ + sock_addr = PEEK_WORD(SYSARG_ADDR(2)); + size_addr = PEEK_WORD(SYSARG_ADDR(3)); + max_size = peek_reg(tracee, MODIFIED, SYSARG_6); + + status = translate_socketcall_exit(tracee, sock_addr, size_addr, max_size); + if (status < 0) + break; + + /* Don't overwrite the syscall result. */ + goto end; + } + +#undef SYSARG_ADDR +#undef PEEK_WORD +#undef POKE_WORD + + case PR_fchdir: + case PR_chdir: + /* These syscalls are fully emulated, see enter.c for details + * (like errors). */ + status = 0; + break; + + case PR_rename: + case PR_renameat: + case PR_renameat2: { + char old_path[PATH_MAX]; + char new_path[PATH_MAX]; + ssize_t old_length; + ssize_t new_length; + Comparison comparison; + Reg old_reg; + Reg new_reg; + char *tmp; + + /* Error reported by the kernel. */ + if ((int) syscall_result < 0) + goto end; + + if (syscall_number == PR_rename) { + old_reg = SYSARG_1; + new_reg = SYSARG_2; + } + else { + old_reg = SYSARG_2; + new_reg = SYSARG_4; + } + + /* Get the old path, then convert it to the same + * "point-of-view" as tracee->fs->cwd (guest). */ + status = read_path(tracee, old_path, peek_reg(tracee, MODIFIED, old_reg)); + if (status < 0) + break; + + status = detranslate_path(tracee, old_path, NULL); + if (status < 0) + break; + old_length = (status > 0 ? status - 1 : (ssize_t) strlen(old_path)); + + /* Nothing special to do if the moved path is not the + * current working directory. */ + comparison = compare_paths(old_path, tracee->fs->cwd); + if (comparison != PATH1_IS_PREFIX && comparison != PATHS_ARE_EQUAL) { + status = 0; + break; + } + + /* Get the new path, then convert it to the same + * "point-of-view" as tracee->fs->cwd (guest). */ + status = read_path(tracee, new_path, peek_reg(tracee, MODIFIED, new_reg)); + if (status < 0) + break; + + status = detranslate_path(tracee, new_path, NULL); + if (status < 0) + break; + new_length = (status > 0 ? status - 1 : (ssize_t) strlen(new_path)); + + /* Sanity check. */ + if (strlen(tracee->fs->cwd) >= PATH_MAX) { + status = 0; + break; + } + strcpy(old_path, tracee->fs->cwd); + + /* Update the virtual current working directory. */ + substitute_path_prefix(old_path, old_length, new_path, new_length); + + tmp = talloc_strdup(tracee->fs, old_path); + if (tmp == NULL) { + status = -ENOMEM; + break; + } + + TALLOC_FREE(tracee->fs->cwd); + tracee->fs->cwd = tmp; + + status = 0; + break; + } + + case PR_readlink: + case PR_readlinkat: { + char referee[PATH_MAX]; + char referer[PATH_MAX]; + size_t old_size; + size_t new_size; + size_t max_size; + word_t input; + word_t output; + + /* Error reported by the kernel. */ + if ((int) syscall_result < 0) + goto end; + + old_size = syscall_result; + + if (syscall_number == PR_readlink) { + output = peek_reg(tracee, ORIGINAL, SYSARG_2); + max_size = peek_reg(tracee, ORIGINAL, SYSARG_3); + input = peek_reg(tracee, MODIFIED, SYSARG_1); + } + else { + output = peek_reg(tracee, ORIGINAL, SYSARG_3); + max_size = peek_reg(tracee, ORIGINAL, SYSARG_4); + input = peek_reg(tracee, MODIFIED, SYSARG_2); + } + + if (max_size > PATH_MAX) + max_size = PATH_MAX; + + if (max_size == 0) { + status = -EINVAL; + break; + } + + /* The kernel does NOT put the NULL terminating byte for + * readlink(2). */ + status = read_data(tracee, referee, output, old_size); + if (status < 0) + break; + referee[old_size] = '\0'; + + /* Not optimal but safe (path is fully translated). */ + status = read_path(tracee, referer, input); + if (status < 0) + break; + + if (status >= PATH_MAX) { + status = -ENAMETOOLONG; + break; + } + + status = detranslate_path(tracee, referee, referer); + if (status < 0) + break; + + /* The original path doesn't require any transformation, i.e + * it is a symetric binding. */ + if (status == 0) + goto end; + + /* Overwrite the path. Note: the output buffer might be + * initialized with zeros but it was updated with the kernel + * result, and then with the detranslated result. This later + * might be shorter than the former, so it's safier to add a + * NULL terminating byte when possible. This problem was + * exposed by IDA Demo 6.3. */ + if ((size_t) status < max_size) { + new_size = status - 1; + status = write_data(tracee, output, referee, status); + } + else { + new_size = max_size; + status = write_data(tracee, output, referee, max_size); + } + if (status < 0) + break; + + /* The value of "status" is used to update the returned value + * in translate_syscall_exit(). */ + status = new_size; + break; + } + +#if defined(ARCH_X86_64) + case PR_uname: { + struct utsname utsname; + word_t address; + size_t size; + + if (get_abi(tracee) != ABI_2) + goto end; + + /* Error reported by the kernel. */ + if ((int) syscall_result < 0) + goto end; + + address = peek_reg(tracee, ORIGINAL, SYSARG_1); + + status = read_data(tracee, &utsname, address, sizeof(utsname)); + if (status < 0) + break; + + /* Some 32-bit programs like package managers can be + * confused when the kernel reports "x86_64". */ + size = sizeof(utsname.machine); + strncpy(utsname.machine, "i686", size); + utsname.machine[size - 1] = '\0'; + + status = write_data(tracee, address, &utsname, sizeof(utsname)); + if (status < 0) + break; + + status = 0; + break; + } +#endif + + case PR_execve: + translate_execve_exit(tracee); + goto end; + + case PR_ptrace: + status = translate_ptrace_exit(tracee); + break; + + case PR_wait4: + case PR_waitpid: { + bool set_result = true; + if (tracee->as_ptracer.waits_in != WAITS_IN_PROOT) + goto end; + + status = translate_wait_exit(tracee, &set_result); + if (!set_result) + goto end; + break; + } + + case PR_setrlimit: + case PR_prlimit64: + /* Error reported by the kernel. */ + if ((int) syscall_result < 0) + goto end; + + status = translate_setrlimit_exit(tracee, syscall_number == PR_prlimit64); + if (status < 0) + break; + + /* Don't overwrite the syscall result. */ + goto end; + + default: + goto end; + } + + poke_reg(tracee, SYSARG_RESULT, (word_t) status); + +end: + status = notify_extensions(tracee, SYSCALL_EXIT_END, 0, 0); + if (status < 0) + poke_reg(tracee, SYSARG_RESULT, (word_t) status); +} diff --git a/proot/proot_linux/syscall/heap.c b/proot/proot_linux/syscall/heap.c new file mode 100644 index 0000000..adbb290 --- /dev/null +++ b/proot/proot_linux/syscall/heap.c @@ -0,0 +1,213 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* PROT_*, MAP_*, */ +#include /* assert(3), */ +#include /* strerror(3), */ +#include /* sysconf(3), */ +#include /* MIN(), MAX(), */ + +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "syscall/sysnum.h" +#include "execve/execve.h" +#include "cli/note.h" + +#include "compat.h" + +#define DEBUG_BRK(...) /* fprintf(stderr, __VA_ARGS__) */ + +/* The size of the heap can be zero, unlike the size of a memory + * mapping. As a consequence, the first page of the "heap" memory + * mapping is discarded in order to emulate an empty heap. */ +static word_t heap_offset = 0; + +/** + * Put @tracee's heap to a reliable location. By default the Linux + * kernel puts it near loader's BSS, but this default location is not + * reliable since the kernel might put another memory mapping right + * after it (ie. continuously). In this case, @tracee's heap can't + * grow anymore and some programs like Bash will abort. This issue + * can be reproduced when using a Ubuntu 12.04 x86_64 rootfs on RHEL 5 + * x86_64. + */ +void translate_brk_enter(Tracee *tracee) +{ + word_t new_brk_address; + size_t old_heap_size; + size_t new_heap_size; + + if (tracee->heap->disabled) + return; + + if (heap_offset == 0) { + heap_offset = sysconf(_SC_PAGE_SIZE); + if ((int) heap_offset <= 0) + heap_offset = 0x1000; + } + + new_brk_address = peek_reg(tracee, CURRENT, SYSARG_1); + DEBUG_BRK("brk(0x%lx)\n", new_brk_address); + + /* Allocate a new mapping for the emulated heap. */ + if (tracee->heap->base == 0) { + Sysnum sysnum; + Mapping *mappings; + Mapping *bss; + + /* From PRoot's point-of-view this is the first time this + * tracee calls brk(2), although an address was specified. + * This is not supposed to happen the first time. It is + * likely because this tracee is the very first child of PRoot + * but the first execve(2) didn't happen yet (so this is not + * its first call to brk(2)). For instance, the installation + * of seccomp filters is made after this very first process is + * traced, and might call malloc(3) before the first + * execve(2). */ + if (new_brk_address != 0) { + if (tracee->verbose > 0) + note(tracee, WARNING, INTERNAL, + "process %d is doing suspicious brk()", tracee->pid); + return; + } + + /* Put the heap as close to the BSS as possible since + * some programs -- like dump-emacs -- assume the gap + * between the end of the BSS and the start of the + * heap is relatively small (ie. < 1MB) even if ALSR + * is enabled. Note that bss->addr + bss->length is + * naturally aligned to a page boundary according to + * add_mapping() in execve/enter.c, ie. no need to + * align new_brk_address again. Now, the gap between + * the BSS and the heap is only "heap_offset" bytes + * long. To emulate ADDR_NO_RANDOMIZE personality, + * this gap should be removed (not yet supported). */ + mappings = tracee->load_info->mappings; + bss = &mappings[talloc_array_length(mappings) - 1]; + new_brk_address = bss->addr + bss->length; + + /* I don't understand yet why mmap(2) fails (EFAULT) + * on architectures that also have mmap2(2). Maybe + * this former implies MAP_FIXED in such cases. */ + sysnum = detranslate_sysnum(get_abi(tracee), PR_mmap2) != SYSCALL_AVOIDER + ? PR_mmap2 + : PR_mmap; + + set_sysnum(tracee, sysnum); + poke_reg(tracee, SYSARG_1 /* address */, new_brk_address); + poke_reg(tracee, SYSARG_2 /* length */, heap_offset); + poke_reg(tracee, SYSARG_3 /* prot */, PROT_READ | PROT_WRITE); + poke_reg(tracee, SYSARG_4 /* flags */, MAP_PRIVATE | MAP_ANONYMOUS); + poke_reg(tracee, SYSARG_5 /* fd */, -1); + poke_reg(tracee, SYSARG_6 /* offset */, 0); + + return; + } + + /* The size of the heap can't be negative. */ + if (new_brk_address < tracee->heap->base) { + set_sysnum(tracee, PR_void); + return; + } + + new_heap_size = new_brk_address - tracee->heap->base; + old_heap_size = tracee->heap->size; + + /* Actually resizing. */ + set_sysnum(tracee, PR_mremap); + poke_reg(tracee, SYSARG_1 /* old_address */, tracee->heap->base - heap_offset); + poke_reg(tracee, SYSARG_2 /* old_size */, old_heap_size + heap_offset); + poke_reg(tracee, SYSARG_3 /* new_size */, new_heap_size + heap_offset); + poke_reg(tracee, SYSARG_4 /* flags */, 0); + poke_reg(tracee, SYSARG_5 /* new_address */, 0); + + return; +} + +/** + * c.f. function above. + */ +void translate_brk_exit(Tracee *tracee) +{ + word_t result; + word_t sysnum; + int tracee_errno; + + if (tracee->heap->disabled) + return; + + assert(heap_offset > 0); + + sysnum = get_sysnum(tracee, MODIFIED); + result = peek_reg(tracee, CURRENT, SYSARG_RESULT); + tracee_errno = (int) result; + + switch (sysnum) { + case PR_void: + poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); + break; + + case PR_mmap: + case PR_mmap2: + /* On error, mmap(2) returns -errno (the last 4k is + * reserved for this), whereas brk(2) returns the + * previous value. */ + if (tracee_errno < 0 && tracee_errno > -4096) { + poke_reg(tracee, SYSARG_RESULT, 0); + break; + } + + tracee->heap->base = result + heap_offset; + tracee->heap->size = 0; + + poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); + break; + + case PR_mremap: + /* On error, mremap(2) returns -errno (the last 4k is + * reserved this), whereas brk(2) returns the previous + * value. */ + if ( (tracee_errno < 0 && tracee_errno > -4096) + || (tracee->heap->base != result + heap_offset)) { + poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); + break; + } + + tracee->heap->size = peek_reg(tracee, MODIFIED, SYSARG_3) - heap_offset; + + poke_reg(tracee, SYSARG_RESULT, tracee->heap->base + tracee->heap->size); + break; + + case PR_brk: + /* Is it confirmed that this suspicious call to brk(2) + * is actually legit? */ + if (result == peek_reg(tracee, ORIGINAL, SYSARG_1)) + tracee->heap->disabled = true; + break; + + default: + assert(0); + } + + DEBUG_BRK("brk() = 0x%lx\n", peek_reg(tracee, CURRENT, SYSARG_RESULT)); +} diff --git a/proot/proot_linux/syscall/heap.h b/proot/proot_linux/syscall/heap.h new file mode 100644 index 0000000..8097834 --- /dev/null +++ b/proot/proot_linux/syscall/heap.h @@ -0,0 +1,31 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef HEAP_H +#define HEAP_H + +#include "tracee/tracee.h" + +extern void translate_brk_enter(Tracee *tracee); +extern void translate_brk_exit(Tracee *tracee); + +#endif /* HEAP_H */ diff --git a/proot/proot_linux/syscall/rlimit.c b/proot/proot_linux/syscall/rlimit.c new file mode 100644 index 0000000..90eba1d --- /dev/null +++ b/proot/proot_linux/syscall/rlimit.c @@ -0,0 +1,117 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* bool, */ +#include /* prlimit(2), */ +#include /* prlimit(2), */ + +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" +#include "tracee/abi.h" +#include "cli/note.h" + +/** + * Set PRoot's stack soft limit to @tracee's one if this latter is + * greater. This allows to workaround a Linux kernel bug that + * prevents a tracer to access a tracee's stack beyond its last mapped + * page, as it might by the case under PRoot. This function returns + * -errno if an error occurred, otherwise 0. + * + * Details: when a tracer tries to access a tracee's stack beyond its + * last mapped page, the Linux kernel should be able to increase + * tracee's stack up to its soft limit. Unfortunately the Linux + * kernel checks the limit of the tracer instead the limit of the + * tracee. This bug was exposed using UMEQ under PRoot. + * + * Ref.: https://bugzilla.kernel.org/show_bug.cgi?id=91791 + * + * Three strategies were possible: + * + * - set PRoot's stack soft limit to the hard limit; this might make + * the system collapse if PRoot starts to recurses indefinitely. + * + * - as it's done here; this appears to be a good compromise between + * the strategy above and the one below. + * + * - as it's done here + reduce PRoot's stack soft limit as soon as + * it's possible; this would be overly complicated. + */ +int translate_setrlimit_exit(const Tracee *tracee, bool is_prlimit) +{ + struct rlimit proot_stack; + word_t resource; + word_t address; + word_t tracee_stack_limit; + Reg sysarg; + int status; + + sysarg = (is_prlimit ? SYSARG_2 : SYSARG_1); + + resource = peek_reg(tracee, ORIGINAL, sysarg); + address = peek_reg(tracee, ORIGINAL, sysarg + 1); + + /* Not the resource we're looking for? */ + if (resource != RLIMIT_STACK) + return 0; + + /* Retrieve new tracee's stack limit. */ + if (is_prlimit) { + /* Not the prlimit usage we're looking for? */ + if (address == 0) + return 0; + + tracee_stack_limit = peek_uint64(tracee, address); + } + else { + tracee_stack_limit = peek_word(tracee, address); + + /* Convert this special value from 32-bit to 64-bit, + * if needed. */ + if (is_32on64_mode(tracee) && tracee_stack_limit == (uint32_t) -1) + tracee_stack_limit = RLIM_INFINITY; + } + if (errno != 0) + return -errno; + + /* Get current PRoot's stack limit. */ + status = prlimit(0, RLIMIT_STACK, NULL, &proot_stack); + if (status < 0) { + VERBOSE(tracee, 1, "can't get stack limit."); + return 0; /* Not fatal. */ + } + + /* No need to increase current PRoot's stack limit? */ + if (proot_stack.rlim_cur >= tracee_stack_limit) + return 0; + + proot_stack.rlim_cur = tracee_stack_limit; + + /* Increase current PRoot's stack limit. */ + status = prlimit(0, RLIMIT_STACK, &proot_stack, NULL); + if (status < 0) + VERBOSE(tracee, 1, "can't set stack limit."); + return 0; /* Not fatal. */ + + VERBOSE(tracee, 1, "stack soft limit increased to %ld bytes", proot_stack.rlim_cur); + return 0; +} diff --git a/proot/proot_linux/syscall/rlimit.h b/proot/proot_linux/syscall/rlimit.h new file mode 100644 index 0000000..7792023 --- /dev/null +++ b/proot/proot_linux/syscall/rlimit.h @@ -0,0 +1,31 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef RLIMIT_H +#define RLIMIT_H + +#include +#include "tracee/tracee.h" + +extern int translate_setrlimit_exit(const Tracee *tracee, bool is_prlimit); + +#endif /* RLIMIT_H */ diff --git a/proot/proot_linux/syscall/seccomp.c b/proot/proot_linux/syscall/seccomp.c new file mode 100644 index 0000000..792c79e --- /dev/null +++ b/proot/proot_linux/syscall/seccomp.c @@ -0,0 +1,518 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include "build.h" +#include "arch.h" + +#if defined(HAVE_SECCOMP_FILTER) + +#include /* prctl(2), PR_* */ +#include /* struct sock_*, */ +#include /* SECCOMP_MODE_FILTER, */ +#include /* struct sock_*, */ +#include /* AUDIT_, */ +#include /* LIST_FOREACH, */ +#include /* size_t, */ +#include /* talloc_*, */ +#include /* E*, */ +#include /* memcpy(3), */ +#include /* offsetof(3), */ +#include /* uint*_t, UINT*_MAX, */ +#include /* assert(3), */ + +#include "syscall/seccomp.h" +#include "tracee/tracee.h" +#include "syscall/syscall.h" +#include "syscall/sysnum.h" +#include "extension/extension.h" +#include "cli/note.h" + +#include "compat.h" +#include "attribute.h" + +#define DEBUG_FILTER(...) /* fprintf(stderr, __VA_ARGS__) */ + +/** + * Allocate an empty @program->filter. This function returns -errno + * if an error occurred, otherwise 0. + */ +static int new_program_filter(struct sock_fprog *program) +{ + program->filter = talloc_array(NULL, struct sock_filter, 0); + if (program->filter == NULL) + return -ENOMEM; + + program->len = 0; + return 0; +} + +/** + * Append to @program->filter the given @statements (@nb_statements + * items). This function returns -errno if an error occurred, + * otherwise 0. + */ +static int add_statements(struct sock_fprog *program, size_t nb_statements, + struct sock_filter *statements) +{ + size_t length; + void *tmp; + size_t i; + + length = talloc_array_length(program->filter); + tmp = talloc_realloc(NULL, program->filter, struct sock_filter, length + nb_statements); + if (tmp == NULL) + return -ENOMEM; + program->filter = tmp; + + for (i = 0; i < nb_statements; i++, length++) + memcpy(&program->filter[length], &statements[i], sizeof(struct sock_filter)); + + return 0; +} + +/** + * Append to @program->filter the statements required to notify PRoot + * about the given @syscall made by a tracee, with the given @flag. + * This function returns -errno if an error occurred, otherwise 0. + */ +static int add_trace_syscall(struct sock_fprog *program, word_t syscall, int flag) +{ + int status; + + /* Sanity check. */ + if (syscall > UINT32_MAX) + return -ERANGE; + + #define LENGTH_TRACE_SYSCALL 2 + struct sock_filter statements[LENGTH_TRACE_SYSCALL] = { + /* Compare the accumulator with the expected syscall: + * skip the next statement if not equal. */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, syscall, 0, 1), + + /* Notify the tracer. */ + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE + flag) + }; + + DEBUG_FILTER("FILTER: trace if syscall == %ld\n", syscall); + + status = add_statements(program, LENGTH_TRACE_SYSCALL, statements); + if (status < 0) + return status; + + return 0; +} + +/** + * Append to @program->filter the statements that allow anything (if + * unfiltered). Note that @nb_traced_syscalls is used to make a + * sanity check. This function returns -errno if an error occurred, + * otherwise 0. + */ +static int end_arch_section(struct sock_fprog *program, size_t nb_traced_syscalls) +{ + int status; + + #define LENGTH_END_SECTION 1 + struct sock_filter statements[LENGTH_END_SECTION] = { + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW) + }; + + DEBUG_FILTER("FILTER: allow\n"); + + status = add_statements(program, LENGTH_END_SECTION, statements); + if (status < 0) + return status; + + /* Sanity check, see start_arch_section(). */ + if ( talloc_array_length(program->filter) - program->len + != LENGTH_END_SECTION + nb_traced_syscalls * LENGTH_TRACE_SYSCALL) + return -ERANGE; + + return 0; +} + +/** + * Append to @program->filter the statements that check the current + * @architecture. Note that @nb_traced_syscalls is used to make a + * sanity check. This function returns -errno if an error occurred, + * otherwise 0. + */ +static int start_arch_section(struct sock_fprog *program, uint32_t arch, size_t nb_traced_syscalls) +{ + const size_t arch_offset = offsetof(struct seccomp_data, arch); + const size_t syscall_offset = offsetof(struct seccomp_data, nr); + const size_t section_length = LENGTH_END_SECTION + + nb_traced_syscalls * LENGTH_TRACE_SYSCALL; + int status; + + /* Sanity checks. */ + if ( arch_offset > UINT32_MAX + || syscall_offset > UINT32_MAX + || section_length > UINT32_MAX - 1) + return -ERANGE; + + #define LENGTH_START_SECTION 4 + struct sock_filter statements[LENGTH_START_SECTION] = { + /* Load the current architecture into the + * accumulator. */ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, arch_offset), + + /* Compare the accumulator with the expected + * architecture: skip the following statement if + * equal. */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arch, 1, 0), + + /* This is not the expected architecture, so jump + * unconditionally to the end of this section. */ + BPF_STMT(BPF_JMP + BPF_JA + BPF_K, section_length + 1), + + /* This is the expected architecture, so load the + * current syscall into the accumulator. */ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, syscall_offset) + }; + + DEBUG_FILTER("FILTER: if arch == %ld, up to %zdth statement\n", + arch, nb_traced_syscalls); + + status = add_statements(program, LENGTH_START_SECTION, statements); + if (status < 0) + return status; + + /* See the sanity check in end_arch_section(). */ + program->len = talloc_array_length(program->filter); + + return 0; +} + +/** + * Append to @program->filter the statements that forbid anything (if + * unfiltered) and update @program->len. This function returns -errno + * if an error occurred, otherwise 0. + */ +static int finalize_program_filter(struct sock_fprog *program) +{ + int status; + + #define LENGTH_FINALIZE 1 + struct sock_filter statements[LENGTH_FINALIZE] = { + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL) + }; + + DEBUG_FILTER("FILTER: kill\n"); + + status = add_statements(program, LENGTH_FINALIZE, statements); + if (status < 0) + return status; + + program->len = talloc_array_length(program->filter); + + return 0; +} + +/** + * Free @program->filter and set @program->len to 0. + */ +static void free_program_filter(struct sock_fprog *program) +{ + TALLOC_FREE(program->filter); + program->len = 0; +} + +/** + * Convert the given @sysnums into BPF filters according to the + * following pseudo-code, then enabled them for the given @tracee and + * all of its future children: + * + * for each handled architectures + * for each filtered syscall + * trace + * allow + * kill + * + * This function returns -errno if an error occurred, otherwise 0. + */ +static int set_seccomp_filters(const FilteredSysnum *sysnums) +{ + SeccompArch seccomp_archs[] = SECCOMP_ARCHS; + size_t nb_archs = sizeof(seccomp_archs) / sizeof(SeccompArch); + + struct sock_fprog program = { .len = 0, .filter = NULL }; + size_t nb_traced_syscalls; + size_t i, j, k; + int status; + + status = new_program_filter(&program); + if (status < 0) + goto end; + + /* For each handled architectures */ + for (i = 0; i < nb_archs; i++) { + word_t syscall; + + nb_traced_syscalls = 0; + + /* Pre-compute the number of traced syscalls for this architecture. */ + for (j = 0; j < seccomp_archs[i].nb_abis; j++) { + for (k = 0; sysnums[k].value != PR_void; k++) { + syscall = detranslate_sysnum(seccomp_archs[i].abis[j], sysnums[k].value); + if (syscall != SYSCALL_AVOIDER) + nb_traced_syscalls++; + } + } + + /* Filter: if handled architecture */ + status = start_arch_section(&program, seccomp_archs[i].value, nb_traced_syscalls); + if (status < 0) + goto end; + + for (j = 0; j < seccomp_archs[i].nb_abis; j++) { + for (k = 0; sysnums[k].value != PR_void; k++) { + /* Get the architecture specific syscall number. */ + syscall = detranslate_sysnum(seccomp_archs[i].abis[j], sysnums[k].value); + if (syscall == SYSCALL_AVOIDER) + continue; + + /* Filter: trace if handled syscall */ + status = add_trace_syscall(&program, syscall, sysnums[k].flags); + if (status < 0) + goto end; + } + } + + /* Filter: allow untraced syscalls for this architecture */ + status = end_arch_section(&program, nb_traced_syscalls); + if (status < 0) + goto end; + } + + status = finalize_program_filter(&program); + if (status < 0) + goto end; + + status = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if (status < 0) + goto end; + + /* To output this BPF program for debug purpose: + * + * write(2, program.filter, program.len * sizeof(struct sock_filter)); + */ + + status = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &program); + if (status < 0) + goto end; + + status = 0; +end: + free_program_filter(&program); + return status; +} + +/* List of sysnums handled by PRoot. */ +static FilteredSysnum proot_sysnums[] = { + { PR_accept, FILTER_SYSEXIT }, + { PR_accept4, FILTER_SYSEXIT }, + { PR_access, 0 }, + { PR_acct, 0 }, + { PR_bind, 0 }, + { PR_brk, FILTER_SYSEXIT }, + { PR_chdir, FILTER_SYSEXIT }, + { PR_chmod, 0 }, + { PR_chown, 0 }, + { PR_chown32, 0 }, + { PR_chroot, 0 }, + { PR_connect, 0 }, + { PR_creat, 0 }, + { PR_execve, FILTER_SYSEXIT }, + { PR_faccessat, 0 }, + { PR_fchdir, FILTER_SYSEXIT }, + { PR_fchmodat, 0 }, + { PR_fchownat, 0 }, + { PR_fstatat64, 0 }, + { PR_futimesat, 0 }, + { PR_getcwd, FILTER_SYSEXIT }, + { PR_getpeername, FILTER_SYSEXIT }, + { PR_getsockname, FILTER_SYSEXIT }, + { PR_getxattr, 0 }, + { PR_inotify_add_watch, 0 }, + { PR_lchown, 0 }, + { PR_lchown32, 0 }, + { PR_lgetxattr, 0 }, + { PR_link, 0 }, + { PR_linkat, 0 }, + { PR_listxattr, 0 }, + { PR_llistxattr, 0 }, + { PR_lremovexattr, 0 }, + { PR_lsetxattr, 0 }, + { PR_lstat, 0 }, + { PR_lstat64, 0 }, + { PR_mkdir, 0 }, + { PR_mkdirat, 0 }, + { PR_mknod, 0 }, + { PR_mknodat, 0 }, + { PR_mount, 0 }, + { PR_name_to_handle_at, 0 }, + { PR_newfstatat, 0 }, + { PR_oldlstat, 0 }, + { PR_oldstat, 0 }, + { PR_open, 0 }, + { PR_openat, 0 }, + { PR_pivot_root, 0 }, + { PR_prctl, 0 }, + { PR_prlimit64, FILTER_SYSEXIT }, + { PR_ptrace, FILTER_SYSEXIT }, + { PR_readlink, FILTER_SYSEXIT }, + { PR_readlinkat, FILTER_SYSEXIT }, + { PR_removexattr, 0 }, + { PR_rename, FILTER_SYSEXIT }, + { PR_renameat, FILTER_SYSEXIT }, + { PR_renameat2, FILTER_SYSEXIT }, + { PR_rmdir, 0 }, + { PR_setrlimit, FILTER_SYSEXIT }, + { PR_setxattr, 0 }, + { PR_socketcall, FILTER_SYSEXIT }, + { PR_stat, 0 }, + { PR_statx, 0 }, + { PR_faccessat2, 0 }, + { PR_stat64, 0 }, + { PR_statfs, 0 }, + { PR_statfs64, 0 }, + { PR_swapoff, 0 }, + { PR_swapon, 0 }, + { PR_symlink, 0 }, + { PR_symlinkat, 0 }, + { PR_truncate, 0 }, + { PR_truncate64, 0 }, + { PR_umount, 0 }, + { PR_umount2, 0 }, + { PR_uname, FILTER_SYSEXIT }, + { PR_unlink, 0 }, + { PR_unlinkat, 0 }, + { PR_uselib, 0 }, + { PR_utime, 0 }, + { PR_utimensat, 0 }, + { PR_utimensat_time64, 0 }, + { PR_utimes, 0 }, + { PR_wait4, FILTER_SYSEXIT }, + { PR_waitpid, FILTER_SYSEXIT }, + FILTERED_SYSNUM_END, +}; + +/** + * Add the @new_sysnums to the list of filtered @sysnums, using the + * given Talloc @context. This function returns -errno if an error + * occurred, otherwise 0. + */ +static int merge_filtered_sysnums(TALLOC_CTX *context, FilteredSysnum **sysnums, + const FilteredSysnum *new_sysnums) +{ + size_t i, j; + + assert(sysnums != NULL); + + if (*sysnums == NULL) { + /* Start with no sysnums but the terminator. */ + *sysnums = talloc_array(context, FilteredSysnum, 1); + if (*sysnums == NULL) + return -ENOMEM; + + (*sysnums)[0].value = PR_void; + } + + for (i = 0; new_sysnums[i].value != PR_void; i++) { + /* Search for the given sysnum. */ + for (j = 0; (*sysnums)[j].value != PR_void + && (*sysnums)[j].value != new_sysnums[i].value; j++) + ; + + if ((*sysnums)[j].value == PR_void) { + /* No such sysnum, allocate a new entry. */ + (*sysnums) = talloc_realloc(context, (*sysnums), FilteredSysnum, j + 2); + if ((*sysnums) == NULL) + return -ENOMEM; + + (*sysnums)[j] = new_sysnums[i]; + + /* The last item is the terminator. */ + (*sysnums)[j + 1].value = PR_void; + } + else + /* The sysnum is already filtered, merge the + * flags. */ + (*sysnums)[j].flags |= new_sysnums[i].flags; + } + + return 0; +} + +/** + * Tell the kernel to trace only syscalls handled by PRoot and its + * extensions. This filter will be enabled for the given @tracee and + * all of its future children. This function returns -errno if an + * error occurred, otherwise 0. + */ +int enable_syscall_filtering(const Tracee *tracee) +{ + FilteredSysnum *filtered_sysnums = NULL; + Extension *extension; + int status; + + assert(tracee != NULL && tracee->ctx != NULL); + + /* Add the sysnums required by PRoot to the list of filtered + * sysnums. TODO: only if path translation is required. */ + status = merge_filtered_sysnums(tracee->ctx, &filtered_sysnums, proot_sysnums); + if (status < 0) + return status; + + /* Merge the sysnums required by the extensions to the list + * of filtered sysnums. */ + if (tracee->extensions != NULL) { + LIST_FOREACH(extension, tracee->extensions, link) { + if (extension->filtered_sysnums == NULL) + continue; + + status = merge_filtered_sysnums(tracee->ctx, &filtered_sysnums, + extension->filtered_sysnums); + if (status < 0) + return status; + } + } + + status = set_seccomp_filters(filtered_sysnums); + if (status < 0) + return status; + + return 0; +} + +#else + +#include "tracee/tracee.h" +#include "attribute.h" + +int enable_syscall_filtering(const Tracee *tracee UNUSED) +{ + return 0; +} + +#endif /* defined(HAVE_SECCOMP_FILTER) */ diff --git a/proot/proot_linux/syscall/seccomp.h b/proot/proot_linux/syscall/seccomp.h new file mode 100644 index 0000000..3d5ba40 --- /dev/null +++ b/proot/proot_linux/syscall/seccomp.h @@ -0,0 +1,48 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef SECCOMP_H +#define SECCOMP_H + +#include "syscall/sysnum.h" +#include "tracee/tracee.h" +#include "attribute.h" +#include "arch.h" + +typedef struct { + Sysnum value; + word_t flags; +} FilteredSysnum; + +typedef struct { + unsigned int value; + size_t nb_abis; + Abi abis[NB_MAX_ABIS]; +} SeccompArch; + +#define FILTERED_SYSNUM_END { PR_void, 0 } + +#define FILTER_SYSEXIT 0x1 + +extern int enable_syscall_filtering(const Tracee *tracee); + +#endif /* SECCOMP_H */ diff --git a/proot/proot_linux/syscall/socket.c b/proot/proot_linux/syscall/socket.c new file mode 100644 index 0000000..8cb1413 --- /dev/null +++ b/proot/proot_linux/syscall/socket.c @@ -0,0 +1,217 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* offsetof(3), */ +#include /* bzero(3), */ +#include /* strncpy(3), strlen(3), */ +#include /* assert(3), */ +#include /* E*, */ +#include /* struct sockaddr_un, AF_UNIX, */ +#include /* struct sockaddr_un, */ +#include /* MIN(), MAX(), */ + +#include "syscall/socket.h" +#include "tracee/tracee.h" +#include "tracee/mem.h" +#include "path/binding.h" +#include "path/temp.h" +#include "path/path.h" +#include "arch.h" + +#include "compat.h" + +/* The sockaddr_un structure has exactly the same layout on all + * architectures. */ +static const off_t offsetof_path = offsetof(struct sockaddr_un, sun_path); +extern struct sockaddr_un sockaddr_un__; +static const size_t sizeof_path = sizeof(sockaddr_un__.sun_path); + +/** + * Copy in @sockaddr the struct sockaddr_un stored in the @tracee + * memory at the given @address. Also, its pathname is copied to the + * null-terminated @path. Only @size bytes are read from the @tracee + * memory (should be <= @max_size <= sizeof(struct sockaddr_un)). + * This function returns -errno if an error occurred, 0 if the + * structure was not found (not a sockaddr_un or @size > @max_size), + * otherwise 1. + */ +static int read_sockaddr_un(Tracee *tracee, struct sockaddr_un *sockaddr, word_t max_size, + char path[PATH_MAX], word_t address, int size) +{ + int status; + + assert(max_size <= sizeof(struct sockaddr_un)); + + /* Nothing to do if the sockaddr has an unexpected size. */ + if (size <= offsetof_path || (word_t) size > max_size) + return 0; + + bzero(sockaddr, sizeof(struct sockaddr_un)); + status = read_data(tracee, sockaddr, address, size); + if (status < 0) + return status; + + /* Nothing to do if it's not a named Unix domain socket. */ + if ((sockaddr->sun_family != AF_UNIX) + || sockaddr->sun_path[0] == '\0') + return 0; + + /* Be careful: sun_path doesn't have to be null-terminated. */ + assert(sizeof_path < PATH_MAX - 1); + strncpy(path, sockaddr->sun_path, sizeof_path); + path[sizeof_path] = '\0'; + + return 1; +} + +/** + * Translate the pathname of the struct sockaddr_un currently stored + * in the @tracee memory at the given @address. See the documentation + * of read_sockaddr_un() for the meaning of the @size parameter. + * Also, the new address of the translated sockaddr_un is put in the + * @address parameter. This function returns -errno if an error + * occurred, otherwise 0. + */ +int translate_socketcall_enter(Tracee *tracee, word_t *address, int size) +{ + struct sockaddr_un sockaddr; + char user_path[PATH_MAX]; + char host_path[PATH_MAX]; + int status; + + if (*address == 0) + return 0; + + status = read_sockaddr_un(tracee, &sockaddr, sizeof(sockaddr), user_path, *address, size); + if (status <= 0) + return status; + + status = translate_path(tracee, host_path, AT_FDCWD, user_path, true); + if (status < 0) + return status; + + /* Be careful: sun_path doesn't have to be null-terminated. */ + if (strlen(host_path) > sizeof_path) { + const char *shorter_host_dir; + const char *shorter_host_path; + Binding *binding; + + /* Ensure the guest path of this new binding is + * canonicalized, as it is always assumed. */ + strcpy(user_path, host_path); + status = detranslate_path(tracee, user_path, NULL); + if (status < 0) + return -EINVAL; + + /* The translated path is too long to fit the sun_path + * array, so let's bind it to a shorter path. */ + shorter_host_dir = create_temp_directory(tracee->ctx, "proot"); + if (shorter_host_dir == NULL) + return -EINVAL; + + shorter_host_path = talloc_asprintf(tracee->ctx, "%s/s", shorter_host_dir); + if (strlen(shorter_host_path) > sizeof_path) + return -EINVAL; + + /* Bing the guest path to a shorter host path. */ + binding = insort_binding3(tracee, tracee->ctx, shorter_host_path, user_path); + if (binding == NULL) + return -EINVAL; + + /* This temporary file (shorter_host_path) will be removed once the + * binding is destroyed. */ + talloc_reparent(tracee->ctx, binding, shorter_host_dir); + talloc_reparent(tracee->ctx, binding, shorter_host_path); + + /* Let's use this shorter path now. */ + strcpy(host_path, shorter_host_path); + } + strncpy(sockaddr.sun_path, host_path, sizeof_path); + + /* Push the updated sockaddr to a newly allocated space. */ + *address = alloc_mem(tracee, sizeof(sockaddr)); + if (*address == 0) + return -EFAULT; + + status = write_data(tracee, *address, &sockaddr, sizeof(sockaddr)); + if (status < 0) + return status; + + return 1; +} + +/** + * Detranslate the pathname of the struct sockaddr_un currently stored + * in the @tracee memory at the given @sock_addr. See the + * documentation of read_sockaddr_un() for the meaning of the + * @size_addr and @max_size parameters. This function returns -errno + * if an error occurred, otherwise 0. + */ +int translate_socketcall_exit(Tracee *tracee, word_t sock_addr, word_t size_addr, word_t max_size) +{ + struct sockaddr_un sockaddr; + bool is_truncated = false; + char path[PATH_MAX]; + int status; + int size; + + if (sock_addr == 0) + return 0; + + size = peek_int32(tracee, size_addr); + if (errno != 0) + return -errno; + + max_size = MIN(max_size, sizeof(sockaddr)); + status = read_sockaddr_un(tracee, &sockaddr, max_size, path, sock_addr, size); + if (status <= 0) + return status; + + status = detranslate_path(tracee, path, NULL); + if (status < 0) + return status; + + /* Be careful: sun_path doesn't have to be null-terminated. */ + size = offsetof_path + strlen(path) + 1; + if (size < 0 || (word_t) size > max_size) { + size = max_size; + is_truncated = true; + } + strncpy(sockaddr.sun_path, path, sizeof_path); + + /* Overwrite the sockaddr and socklen parameters. */ + status = write_data(tracee, sock_addr, &sockaddr, size); + if (status < 0) + return status; + + /* If sockaddr is truncated (because the buffer provided is + * too small), addrlen will return a value greater than was + * supplied to the call. See man 2 accept. */ + if (is_truncated) + size = max_size + 1; + + poke_int32(tracee, size_addr, size); + if (errno != 0) + return -errno; + + return 0; +} diff --git a/proot/proot_linux/syscall/socket.h b/proot/proot_linux/syscall/socket.h new file mode 100644 index 0000000..8c16cb1 --- /dev/null +++ b/proot/proot_linux/syscall/socket.h @@ -0,0 +1,32 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef SOCKET_H +#define SOCKET_H + +#include "arch.h" /* word_t */ +#include "tracee/tracee.h" + +int translate_socketcall_enter(Tracee *tracee, word_t *sock_addr, int size); +int translate_socketcall_exit(Tracee *tracee, word_t sock_addr, word_t size_addr, word_t max_size); + +#endif /* SOCKET_H */ diff --git a/proot/proot_linux/syscall/syscall.c b/proot/proot_linux/syscall/syscall.c new file mode 100644 index 0000000..8991a40 --- /dev/null +++ b/proot/proot_linux/syscall/syscall.c @@ -0,0 +1,181 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* assert(3), */ +#include /* PATH_MAX, */ +#include /* strlen(3), */ +#include /* errno(3), E* */ + +#include "syscall/syscall.h" +#include "syscall/chain.h" +#include "extension/extension.h" +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "tracee/mem.h" + +/** + * Copy in @path a C string (PATH_MAX bytes max.) from the @tracee's + * memory address space pointed to by the @reg argument of the + * current syscall. This function returns -errno if an error occured, + * otherwise it returns the size in bytes put into the @path. + */ +int get_sysarg_path(const Tracee *tracee, char path[PATH_MAX], Reg reg) +{ + int size; + word_t src; + + src = peek_reg(tracee, CURRENT, reg); + + /* Check if the parameter is not NULL. Technically we should + * not return an -EFAULT for this special value since it is + * allowed for some syscall, utimensat(2) for instance. */ + if (src == 0) { + path[0] = '\0'; + return 0; + } + + /* Get the path from the tracee's memory space. */ + size = read_path(tracee, path, src); + if (size < 0) + return size; + + path[size] = '\0'; + return size; +} + +/** + * Copy @size bytes of the data pointed to by @tracer_ptr into a + * @tracee's memory block and make the @reg argument of the current + * syscall points to this new block. This function returns -errno if + * an error occured, otherwise 0. + */ +static int set_sysarg_data(Tracee *tracee, const void *tracer_ptr, word_t size, Reg reg) +{ + word_t tracee_ptr; + int status; + + /* Allocate space into the tracee's memory to host the new data. */ + tracee_ptr = alloc_mem(tracee, size); + if (tracee_ptr == 0) + return -EFAULT; + + /* Copy the new data into the previously allocated space. */ + status = write_data(tracee, tracee_ptr, tracer_ptr, size); + if (status < 0) + return status; + + /* Make this argument point to the new data. */ + poke_reg(tracee, reg, tracee_ptr); + + return 0; +} + +/** + * Copy @path to a @tracee's memory block and make the @reg argument + * of the current syscall points to this new block. This function + * returns -errno if an error occured, otherwise 0. + */ +int set_sysarg_path(Tracee *tracee, const char path[PATH_MAX], Reg reg) +{ + return set_sysarg_data(tracee, path, strlen(path) + 1, reg); +} + +void translate_syscall(Tracee *tracee) +{ + const bool is_enter_stage = IS_IN_SYSENTER(tracee); + int status; + + assert(tracee->exe != NULL); + + status = fetch_regs(tracee); + if (status < 0) + return; + + if (is_enter_stage) { + /* Never restore original register values at the end + * of this stage. */ + tracee->restore_original_regs = false; + + print_current_regs(tracee, 3, "sysenter start"); + + /* Translate the syscall only if it was actually + * requested by the tracee, it is not a syscall + * chained by PRoot. */ + if (tracee->chain.syscalls == NULL) { + save_current_regs(tracee, ORIGINAL); + status = translate_syscall_enter(tracee); + save_current_regs(tracee, MODIFIED); + } + else { + status = notify_extensions(tracee, SYSCALL_CHAINED_ENTER, 0, 0); + tracee->restart_how = PTRACE_SYSCALL; + } + + /* Remember the tracee status for the "exit" stage and + * avoid the actual syscall if an error was reported + * by the translation/extension. */ + if (status < 0) { + set_sysnum(tracee, PR_void); + poke_reg(tracee, SYSARG_RESULT, (word_t) status); + tracee->status = status; + } + else + tracee->status = 1; + + /* Restore tracee's stack pointer now if it won't hit + * the sysexit stage (i.e. when seccomp is enabled and + * there's nothing else to do). */ + if (tracee->restart_how == PTRACE_CONT) { + tracee->status = 0; + poke_reg(tracee, STACK_POINTER, peek_reg(tracee, ORIGINAL, STACK_POINTER)); + } + } + else { + /* By default, restore original register values at the + * end of this stage. */ + tracee->restore_original_regs = true; + + print_current_regs(tracee, 5, "sysexit start"); + + /* Translate the syscall only if it was actually + * requested by the tracee, it is not a syscall + * chained by PRoot. */ + if (tracee->chain.syscalls == NULL) + translate_syscall_exit(tracee); + else + (void) notify_extensions(tracee, SYSCALL_CHAINED_EXIT, 0, 0); + + /* Reset the tracee's status. */ + tracee->status = 0; + + /* Insert the next chained syscall, if any. */ + if (tracee->chain.syscalls != NULL) + chain_next_syscall(tracee); + } + + (void) push_regs(tracee); + + if (is_enter_stage) + print_current_regs(tracee, 5, "sysenter end" ); + else + print_current_regs(tracee, 4, "sysexit end"); +} diff --git a/proot/proot_linux/syscall/syscall.h b/proot/proot_linux/syscall/syscall.h new file mode 100644 index 0000000..f99bd94 --- /dev/null +++ b/proot/proot_linux/syscall/syscall.h @@ -0,0 +1,38 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef SYSCALL_H +#define SYSCALL_H + +#include /* PATH_MAX, */ + +#include "tracee/tracee.h" +#include "tracee/reg.h" + +extern int get_sysarg_path(const Tracee *tracee, char path[PATH_MAX], Reg reg); +extern int set_sysarg_path(Tracee *tracee, const char path[PATH_MAX], Reg reg); + +extern void translate_syscall(Tracee *tracee); +extern int translate_syscall_enter(Tracee *tracee); +extern void translate_syscall_exit(Tracee *tracee); + +#endif /* SYSCALL_H */ diff --git a/proot/proot_linux/syscall/sysnum.c b/proot/proot_linux/syscall/sysnum.c new file mode 100644 index 0000000..b5440a6 --- /dev/null +++ b/proot/proot_linux/syscall/sysnum.c @@ -0,0 +1,161 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include + +#include "syscall/sysnum.h" +#include "tracee/tracee.h" +#include "tracee/abi.h" +#include "tracee/reg.h" +#include "arch.h" +#include "cli/note.h" + +#include SYSNUMS_HEADER1 + +#ifdef SYSNUMS_HEADER2 +#include SYSNUMS_HEADER2 +#endif + +#ifdef SYSNUMS_HEADER3 +#include SYSNUMS_HEADER3 +#endif + +typedef struct { + const Sysnum *table; + word_t offset; + word_t length; +} Sysnums; + +/** + * Update @sysnums' fields with the sysnum table for the given @abi. + */ +static void get_sysnums(Abi abi, Sysnums *sysnums) +{ + switch (abi) { + case ABI_DEFAULT: + sysnums->table = SYSNUMS_ABI1; + sysnums->length = sizeof(SYSNUMS_ABI1) / sizeof(Sysnum); + sysnums->offset = 0; + return; +#ifdef SYSNUMS_ABI2 + case ABI_2: + sysnums->table = SYSNUMS_ABI2; + sysnums->length = sizeof(SYSNUMS_ABI2) / sizeof(Sysnum); + sysnums->offset = 0; + return; +#endif +#ifdef SYSNUMS_ABI3 + case ABI_3: + sysnums->table = SYSNUMS_ABI3; + sysnums->length = sizeof(SYSNUMS_ABI3) / sizeof(Sysnum); + sysnums->offset = 0x40000000; /* x32 */ + return; +#endif + default: + assert(0); + } +} + +/** + * Return the neutral value of @sysnum from the given @abi. + */ +static Sysnum translate_sysnum(Abi abi, word_t sysnum) +{ + Sysnums sysnums; + word_t index; + + get_sysnums(abi, &sysnums); + + /* Sanity checks. */ + if (sysnum < sysnums.offset) + return PR_void; + + index = sysnum - sysnums.offset; + + /* Sanity checks. */ + if (index > sysnums.length) + return PR_void; + + return sysnums.table[index]; +} + +/** + * Return the architecture value of @sysnum for the given @abi. + */ +word_t detranslate_sysnum(Abi abi, Sysnum sysnum) +{ + Sysnums sysnums; + size_t i; + + /* Very special case. */ + if (sysnum == PR_void) + return SYSCALL_AVOIDER; + + get_sysnums(abi, &sysnums); + + for (i = 0; i < sysnums.length; i++) { + if (sysnums.table[i] != sysnum) + continue; + + return i + sysnums.offset; + } + + return SYSCALL_AVOIDER; +} + +/** + * Return the neutral value of the @tracee's current syscall number. + */ +Sysnum get_sysnum(const Tracee *tracee, RegVersion version) +{ + return translate_sysnum(get_abi(tracee), peek_reg(tracee, version, SYSARG_NUM)); +} + +/** + * Overwrite the @tracee's current syscall number with @sysnum. Note: + * this neutral value is automatically converted into the architecture + * value. + */ +void set_sysnum(Tracee *tracee, Sysnum sysnum) +{ + poke_reg(tracee, SYSARG_NUM, detranslate_sysnum(get_abi(tracee), sysnum)); +} + +/** + * Return the human readable name of @sysnum. + */ +const char *stringify_sysnum(Sysnum sysnum) +{ + #define SYSNUM(item) [ PR_ ## item ] = #item, + static const char *names[] = { + #include "syscall/sysnums.list" + }; + #undef SYSNUM + + if (sysnum == 0) + return "void"; + + if (sysnum >= PR_NB_SYSNUM) + return ""; + + return names[sysnum]; +} diff --git a/proot/proot_linux/syscall/sysnum.h b/proot/proot_linux/syscall/sysnum.h new file mode 100644 index 0000000..8d56ab3 --- /dev/null +++ b/proot/proot_linux/syscall/sysnum.h @@ -0,0 +1,45 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef SYSNUM_H +#define SYSNUM_H + +#include + +#include "tracee/tracee.h" +#include "tracee/abi.h" +#include "tracee/reg.h" + +#define SYSNUM(item) PR_ ## item, +typedef enum { + PR_void = 0, + #include "syscall/sysnums.list" + PR_NB_SYSNUM +} Sysnum; +#undef SYSNUM + +extern Sysnum get_sysnum(const Tracee *tracee, RegVersion version); +extern void set_sysnum(Tracee *tracee, Sysnum sysnum); +extern word_t detranslate_sysnum(Abi abi, Sysnum sysnum); +extern const char *stringify_sysnum(Sysnum sysnum); + +#endif /* SYSNUM_H */ diff --git a/proot/proot_linux/syscall/sysnums-arm.h b/proot/proot_linux/syscall/sysnums-arm.h new file mode 100644 index 0000000..dfd512f --- /dev/null +++ b/proot/proot_linux/syscall/sysnums-arm.h @@ -0,0 +1,344 @@ +#include "syscall/sysnum.h" + +static const Sysnum sysnums_arm[] = { + [ 0 ] = PR_restart_syscall, + [ 1 ] = PR_exit, + [ 2 ] = PR_fork, + [ 3 ] = PR_read, + [ 4 ] = PR_write, + [ 5 ] = PR_open, + [ 6 ] = PR_close, + [ 8 ] = PR_creat, + [ 9 ] = PR_link, + [ 10 ] = PR_unlink, + [ 11 ] = PR_execve, + [ 12 ] = PR_chdir, + [ 14 ] = PR_mknod, + [ 15 ] = PR_chmod, + [ 16 ] = PR_lchown, + [ 19 ] = PR_lseek, + [ 20 ] = PR_getpid, + [ 21 ] = PR_mount, + [ 23 ] = PR_setuid, + [ 24 ] = PR_getuid, + [ 26 ] = PR_ptrace, + [ 29 ] = PR_pause, + [ 33 ] = PR_access, + [ 34 ] = PR_nice, + [ 36 ] = PR_sync, + [ 37 ] = PR_kill, + [ 38 ] = PR_rename, + [ 39 ] = PR_mkdir, + [ 40 ] = PR_rmdir, + [ 41 ] = PR_dup, + [ 42 ] = PR_pipe, + [ 43 ] = PR_times, + [ 45 ] = PR_brk, + [ 46 ] = PR_setgid, + [ 47 ] = PR_getgid, + [ 49 ] = PR_geteuid, + [ 50 ] = PR_getegid, + [ 51 ] = PR_acct, + [ 52 ] = PR_umount2, + [ 54 ] = PR_ioctl, + [ 55 ] = PR_fcntl, + [ 57 ] = PR_setpgid, + [ 60 ] = PR_umask, + [ 61 ] = PR_chroot, + [ 62 ] = PR_ustat, + [ 63 ] = PR_dup2, + [ 64 ] = PR_getppid, + [ 65 ] = PR_getpgrp, + [ 66 ] = PR_setsid, + [ 67 ] = PR_sigaction, + [ 70 ] = PR_setreuid, + [ 71 ] = PR_setregid, + [ 72 ] = PR_sigsuspend, + [ 73 ] = PR_sigpending, + [ 74 ] = PR_sethostname, + [ 75 ] = PR_setrlimit, + [ 77 ] = PR_getrusage, + [ 78 ] = PR_gettimeofday, + [ 79 ] = PR_settimeofday, + [ 80 ] = PR_getgroups, + [ 81 ] = PR_setgroups, + [ 83 ] = PR_symlink, + [ 85 ] = PR_readlink, + [ 86 ] = PR_uselib, + [ 87 ] = PR_swapon, + [ 88 ] = PR_reboot, + [ 91 ] = PR_munmap, + [ 92 ] = PR_truncate, + [ 93 ] = PR_ftruncate, + [ 94 ] = PR_fchmod, + [ 95 ] = PR_fchown, + [ 96 ] = PR_getpriority, + [ 97 ] = PR_setpriority, + [ 99 ] = PR_statfs, + [ 100 ] = PR_fstatfs, + [ 103 ] = PR_syslog, + [ 104 ] = PR_setitimer, + [ 105 ] = PR_getitimer, + [ 106 ] = PR_stat, + [ 107 ] = PR_lstat, + [ 108 ] = PR_fstat, + [ 111 ] = PR_vhangup, + [ 114 ] = PR_wait4, + [ 115 ] = PR_swapoff, + [ 116 ] = PR_sysinfo, + [ 118 ] = PR_fsync, + [ 119 ] = PR_sigreturn, + [ 120 ] = PR_clone, + [ 121 ] = PR_setdomainname, + [ 122 ] = PR_uname, + [ 124 ] = PR_adjtimex, + [ 125 ] = PR_mprotect, + [ 126 ] = PR_sigprocmask, + [ 128 ] = PR_init_module, + [ 129 ] = PR_delete_module, + [ 131 ] = PR_quotactl, + [ 132 ] = PR_getpgid, + [ 133 ] = PR_fchdir, + [ 134 ] = PR_bdflush, + [ 135 ] = PR_sysfs, + [ 136 ] = PR_personality, + [ 138 ] = PR_setfsuid, + [ 139 ] = PR_setfsgid, + [ 140 ] = PR__llseek, + [ 141 ] = PR_getdents, + [ 142 ] = PR__newselect, + [ 143 ] = PR_flock, + [ 144 ] = PR_msync, + [ 145 ] = PR_readv, + [ 146 ] = PR_writev, + [ 147 ] = PR_getsid, + [ 148 ] = PR_fdatasync, + [ 149 ] = PR__sysctl, + [ 150 ] = PR_mlock, + [ 151 ] = PR_munlock, + [ 152 ] = PR_mlockall, + [ 153 ] = PR_munlockall, + [ 154 ] = PR_sched_setparam, + [ 155 ] = PR_sched_getparam, + [ 156 ] = PR_sched_setscheduler, + [ 157 ] = PR_sched_getscheduler, + [ 158 ] = PR_sched_yield, + [ 159 ] = PR_sched_get_priority_max, + [ 160 ] = PR_sched_get_priority_min, + [ 161 ] = PR_sched_rr_get_interval, + [ 162 ] = PR_nanosleep, + [ 163 ] = PR_mremap, + [ 164 ] = PR_setresuid, + [ 165 ] = PR_getresuid, + [ 168 ] = PR_poll, + [ 169 ] = PR_nfsservctl, + [ 170 ] = PR_setresgid, + [ 171 ] = PR_getresgid, + [ 172 ] = PR_prctl, + [ 173 ] = PR_rt_sigreturn, + [ 174 ] = PR_rt_sigaction, + [ 175 ] = PR_rt_sigprocmask, + [ 176 ] = PR_rt_sigpending, + [ 177 ] = PR_rt_sigtimedwait, + [ 178 ] = PR_rt_sigqueueinfo, + [ 179 ] = PR_rt_sigsuspend, + [ 180 ] = PR_pread64, + [ 181 ] = PR_pwrite64, + [ 182 ] = PR_chown, + [ 183 ] = PR_getcwd, + [ 184 ] = PR_capget, + [ 185 ] = PR_capset, + [ 186 ] = PR_sigaltstack, + [ 187 ] = PR_sendfile, + [ 190 ] = PR_vfork, + [ 191 ] = PR_ugetrlimit, + [ 192 ] = PR_mmap2, + [ 193 ] = PR_truncate64, + [ 194 ] = PR_ftruncate64, + [ 195 ] = PR_stat64, + [ 196 ] = PR_lstat64, + [ 197 ] = PR_fstat64, + [ 198 ] = PR_lchown32, + [ 199 ] = PR_getuid32, + [ 200 ] = PR_getgid32, + [ 201 ] = PR_geteuid32, + [ 202 ] = PR_getegid32, + [ 203 ] = PR_setreuid32, + [ 204 ] = PR_setregid32, + [ 205 ] = PR_getgroups32, + [ 206 ] = PR_setgroups32, + [ 207 ] = PR_fchown32, + [ 208 ] = PR_setresuid32, + [ 209 ] = PR_getresuid32, + [ 210 ] = PR_setresgid32, + [ 211 ] = PR_getresgid32, + [ 212 ] = PR_chown32, + [ 213 ] = PR_setuid32, + [ 214 ] = PR_setgid32, + [ 215 ] = PR_setfsuid32, + [ 216 ] = PR_setfsgid32, + [ 217 ] = PR_getdents64, + [ 218 ] = PR_pivot_root, + [ 219 ] = PR_mincore, + [ 220 ] = PR_madvise, + [ 221 ] = PR_fcntl64, + [ 224 ] = PR_gettid, + [ 225 ] = PR_readahead, + [ 226 ] = PR_setxattr, + [ 227 ] = PR_lsetxattr, + [ 228 ] = PR_fsetxattr, + [ 229 ] = PR_getxattr, + [ 230 ] = PR_lgetxattr, + [ 231 ] = PR_fgetxattr, + [ 232 ] = PR_listxattr, + [ 233 ] = PR_llistxattr, + [ 234 ] = PR_flistxattr, + [ 235 ] = PR_removexattr, + [ 236 ] = PR_lremovexattr, + [ 237 ] = PR_fremovexattr, + [ 238 ] = PR_tkill, + [ 239 ] = PR_sendfile64, + [ 240 ] = PR_futex, + [ 241 ] = PR_sched_setaffinity, + [ 242 ] = PR_sched_getaffinity, + [ 243 ] = PR_io_setup, + [ 244 ] = PR_io_destroy, + [ 245 ] = PR_io_getevents, + [ 246 ] = PR_io_submit, + [ 247 ] = PR_io_cancel, + [ 248 ] = PR_exit_group, + [ 249 ] = PR_lookup_dcookie, + [ 250 ] = PR_epoll_create, + [ 251 ] = PR_epoll_ctl, + [ 252 ] = PR_epoll_wait, + [ 253 ] = PR_remap_file_pages, + [ 256 ] = PR_set_tid_address, + [ 257 ] = PR_timer_create, + [ 258 ] = PR_timer_settime, + [ 259 ] = PR_timer_gettime, + [ 260 ] = PR_timer_getoverrun, + [ 261 ] = PR_timer_delete, + [ 262 ] = PR_clock_settime, + [ 263 ] = PR_clock_gettime, + [ 264 ] = PR_clock_getres, + [ 265 ] = PR_clock_nanosleep, + [ 266 ] = PR_statfs64, + [ 267 ] = PR_fstatfs64, + [ 268 ] = PR_tgkill, + [ 269 ] = PR_utimes, + [ 270 ] = PR_arm_fadvise64_64, + [ 271 ] = PR_pciconfig_iobase, + [ 272 ] = PR_pciconfig_read, + [ 273 ] = PR_pciconfig_write, + [ 274 ] = PR_mq_open, + [ 275 ] = PR_mq_unlink, + [ 276 ] = PR_mq_timedsend, + [ 277 ] = PR_mq_timedreceive, + [ 278 ] = PR_mq_notify, + [ 279 ] = PR_mq_getsetattr, + [ 280 ] = PR_waitid, + [ 281 ] = PR_socket, + [ 282 ] = PR_bind, + [ 283 ] = PR_connect, + [ 284 ] = PR_listen, + [ 285 ] = PR_accept, + [ 286 ] = PR_getsockname, + [ 287 ] = PR_getpeername, + [ 288 ] = PR_socketpair, + [ 289 ] = PR_send, + [ 290 ] = PR_sendto, + [ 291 ] = PR_recv, + [ 292 ] = PR_recvfrom, + [ 293 ] = PR_shutdown, + [ 294 ] = PR_setsockopt, + [ 295 ] = PR_getsockopt, + [ 296 ] = PR_sendmsg, + [ 297 ] = PR_recvmsg, + [ 298 ] = PR_semop, + [ 299 ] = PR_semget, + [ 300 ] = PR_semctl, + [ 301 ] = PR_msgsnd, + [ 302 ] = PR_msgrcv, + [ 303 ] = PR_msgget, + [ 304 ] = PR_msgctl, + [ 305 ] = PR_shmat, + [ 306 ] = PR_shmdt, + [ 307 ] = PR_shmget, + [ 308 ] = PR_shmctl, + [ 309 ] = PR_add_key, + [ 310 ] = PR_request_key, + [ 311 ] = PR_keyctl, + [ 312 ] = PR_semtimedop, + [ 313 ] = PR_vserver, + [ 314 ] = PR_ioprio_set, + [ 315 ] = PR_ioprio_get, + [ 316 ] = PR_inotify_init, + [ 317 ] = PR_inotify_add_watch, + [ 318 ] = PR_inotify_rm_watch, + [ 319 ] = PR_mbind, + [ 320 ] = PR_get_mempolicy, + [ 321 ] = PR_set_mempolicy, + [ 322 ] = PR_openat, + [ 323 ] = PR_mkdirat, + [ 324 ] = PR_mknodat, + [ 325 ] = PR_fchownat, + [ 326 ] = PR_futimesat, + [ 327 ] = PR_fstatat64, + [ 328 ] = PR_unlinkat, + [ 329 ] = PR_renameat, + [ 330 ] = PR_linkat, + [ 331 ] = PR_symlinkat, + [ 332 ] = PR_readlinkat, + [ 333 ] = PR_fchmodat, + [ 334 ] = PR_faccessat, + [ 335 ] = PR_pselect6, + [ 336 ] = PR_ppoll, + [ 337 ] = PR_unshare, + [ 338 ] = PR_set_robust_list, + [ 339 ] = PR_get_robust_list, + [ 340 ] = PR_splice, + [ 341 ] = PR_arm_sync_file_range, + [ 342 ] = PR_tee, + [ 343 ] = PR_vmsplice, + [ 344 ] = PR_move_pages, + [ 345 ] = PR_getcpu, + [ 346 ] = PR_epoll_pwait, + [ 347 ] = PR_kexec_load, + [ 348 ] = PR_utimensat, + [ 349 ] = PR_signalfd, + [ 350 ] = PR_timerfd_create, + [ 351 ] = PR_eventfd, + [ 352 ] = PR_fallocate, + [ 353 ] = PR_timerfd_settime, + [ 354 ] = PR_timerfd_gettime, + [ 355 ] = PR_signalfd4, + [ 356 ] = PR_eventfd2, + [ 357 ] = PR_epoll_create1, + [ 358 ] = PR_dup3, + [ 359 ] = PR_pipe2, + [ 360 ] = PR_inotify_init1, + [ 361 ] = PR_preadv, + [ 362 ] = PR_pwritev, + [ 363 ] = PR_rt_tgsigqueueinfo, + [ 364 ] = PR_perf_event_open, + [ 365 ] = PR_recvmmsg, + [ 366 ] = PR_accept4, + [ 367 ] = PR_fanotify_init, + [ 368 ] = PR_fanotify_mark, + [ 369 ] = PR_prlimit64, + [ 370 ] = PR_name_to_handle_at, + [ 371 ] = PR_open_by_handle_at, + [ 372 ] = PR_clock_adjtime, + [ 373 ] = PR_syncfs, + [ 374 ] = PR_sendmmsg, + [ 375 ] = PR_setns, + [ 376 ] = PR_process_vm_readv, + [ 377 ] = PR_process_vm_writev, + [ 378 ] = PR_kcmp, + [ 379 ] = PR_finit_module, + [ 380 ] = PR_sched_setattr, + [ 381 ] = PR_sched_getattr, + [ 382 ] = PR_renameat2, + [ 397 ] = PR_statx, + [ 412 ] = PR_utimensat_time64, +}; diff --git a/proot/proot_linux/syscall/sysnums-arm64.h b/proot/proot_linux/syscall/sysnums-arm64.h new file mode 100644 index 0000000..dfb9889 --- /dev/null +++ b/proot/proot_linux/syscall/sysnums-arm64.h @@ -0,0 +1,267 @@ +#include "syscall/sysnum.h" + +static const Sysnum sysnums_arm64[] = { + [ 0 ] = PR_io_setup, + [ 1 ] = PR_io_destroy, + [ 2 ] = PR_io_submit, + [ 3 ] = PR_io_cancel, + [ 4 ] = PR_io_getevents, + [ 5 ] = PR_setxattr, + [ 6 ] = PR_lsetxattr, + [ 7 ] = PR_fsetxattr, + [ 8 ] = PR_getxattr, + [ 9 ] = PR_lgetxattr, + [ 10 ] = PR_fgetxattr, + [ 11 ] = PR_listxattr, + [ 12 ] = PR_llistxattr, + [ 13 ] = PR_flistxattr, + [ 14 ] = PR_removexattr, + [ 15 ] = PR_lremovexattr, + [ 16 ] = PR_fremovexattr, + [ 17 ] = PR_getcwd, + [ 18 ] = PR_lookup_dcookie, + [ 19 ] = PR_eventfd2, + [ 20 ] = PR_epoll_create1, + [ 21 ] = PR_epoll_ctl, + [ 22 ] = PR_epoll_pwait, + [ 23 ] = PR_dup, + [ 24 ] = PR_dup3, + [ 25 ] = PR_fcntl, + [ 26 ] = PR_inotify_init1, + [ 27 ] = PR_inotify_add_watch, + [ 28 ] = PR_inotify_rm_watch, + [ 29 ] = PR_ioctl, + [ 30 ] = PR_ioprio_set, + [ 31 ] = PR_ioprio_get, + [ 32 ] = PR_flock, + [ 33 ] = PR_mknodat, + [ 34 ] = PR_mkdirat, + [ 35 ] = PR_unlinkat, + [ 36 ] = PR_symlinkat, + [ 37 ] = PR_linkat, + [ 38 ] = PR_renameat, + [ 39 ] = PR_umount2, + [ 40 ] = PR_mount, + [ 41 ] = PR_pivot_root, + [ 42 ] = PR_nfsservctl, + [ 43 ] = PR_statfs, + [ 44 ] = PR_fstatfs, + [ 45 ] = PR_truncate, + [ 46 ] = PR_ftruncate, + [ 47 ] = PR_fallocate, + [ 48 ] = PR_faccessat, + [ 49 ] = PR_chdir, + [ 50 ] = PR_fchdir, + [ 51 ] = PR_chroot, + [ 52 ] = PR_fchmod, + [ 53 ] = PR_fchmodat, + [ 54 ] = PR_fchownat, + [ 55 ] = PR_fchown, + [ 56 ] = PR_openat, + [ 57 ] = PR_close, + [ 58 ] = PR_vhangup, + [ 59 ] = PR_pipe2, + [ 60 ] = PR_quotactl, + [ 61 ] = PR_getdents64, + [ 62 ] = PR_lseek, + [ 63 ] = PR_read, + [ 64 ] = PR_write, + [ 65 ] = PR_readv, + [ 66 ] = PR_writev, + [ 67 ] = PR_pread64, + [ 68 ] = PR_pwrite64, + [ 69 ] = PR_preadv, + [ 70 ] = PR_pwritev, + [ 71 ] = PR_sendfile, + [ 72 ] = PR_pselect6, + [ 73 ] = PR_ppoll, + [ 74 ] = PR_signalfd4, + [ 75 ] = PR_vmsplice, + [ 76 ] = PR_splice, + [ 77 ] = PR_tee, + [ 78 ] = PR_readlinkat, + [ 79 ] = PR_fstatat64, + [ 80 ] = PR_fstat, + [ 81 ] = PR_sync, + [ 82 ] = PR_fsync, + [ 83 ] = PR_fdatasync, + [ 84 ] = PR_sync_file_range, + [ 85 ] = PR_timerfd_create, + [ 86 ] = PR_timerfd_settime, + [ 87 ] = PR_timerfd_gettime, + [ 88 ] = PR_utimensat, + [ 89 ] = PR_acct, + [ 90 ] = PR_capget, + [ 91 ] = PR_capset, + [ 92 ] = PR_personality, + [ 93 ] = PR_exit, + [ 94 ] = PR_exit_group, + [ 95 ] = PR_waitid, + [ 96 ] = PR_set_tid_address, + [ 97 ] = PR_unshare, + [ 98 ] = PR_futex, + [ 99 ] = PR_set_robust_list, + [ 100 ] = PR_get_robust_list, + [ 101 ] = PR_nanosleep, + [ 102 ] = PR_getitimer, + [ 103 ] = PR_setitimer, + [ 104 ] = PR_kexec_load, + [ 105 ] = PR_init_module, + [ 106 ] = PR_delete_module, + [ 107 ] = PR_timer_create, + [ 108 ] = PR_timer_gettime, + [ 109 ] = PR_timer_getoverrun, + [ 110 ] = PR_timer_settime, + [ 111 ] = PR_timer_delete, + [ 112 ] = PR_clock_settime, + [ 113 ] = PR_clock_gettime, + [ 114 ] = PR_clock_getres, + [ 115 ] = PR_clock_nanosleep, + [ 116 ] = PR_syslog, + [ 117 ] = PR_ptrace, + [ 118 ] = PR_sched_setparam, + [ 119 ] = PR_sched_setscheduler, + [ 120 ] = PR_sched_getscheduler, + [ 121 ] = PR_sched_getparam, + [ 122 ] = PR_sched_setaffinity, + [ 123 ] = PR_sched_getaffinity, + [ 124 ] = PR_sched_yield, + [ 125 ] = PR_sched_get_priority_max, + [ 126 ] = PR_sched_get_priority_min, + [ 127 ] = PR_sched_rr_get_interval, + [ 128 ] = PR_restart_syscall, + [ 129 ] = PR_kill, + [ 130 ] = PR_tkill, + [ 131 ] = PR_tgkill, + [ 132 ] = PR_sigaltstack, + [ 133 ] = PR_rt_sigsuspend, + [ 134 ] = PR_rt_sigaction, + [ 135 ] = PR_rt_sigprocmask, + [ 136 ] = PR_rt_sigpending, + [ 137 ] = PR_rt_sigtimedwait, + [ 138 ] = PR_rt_sigqueueinfo, + [ 139 ] = PR_rt_sigreturn, + [ 140 ] = PR_setpriority, + [ 141 ] = PR_getpriority, + [ 142 ] = PR_reboot, + [ 143 ] = PR_setregid, + [ 144 ] = PR_setgid, + [ 145 ] = PR_setreuid, + [ 146 ] = PR_setuid, + [ 147 ] = PR_setresuid, + [ 148 ] = PR_getresuid, + [ 149 ] = PR_setresgid, + [ 150 ] = PR_getresgid, + [ 151 ] = PR_setfsuid, + [ 152 ] = PR_setfsgid, + [ 153 ] = PR_times, + [ 154 ] = PR_setpgid, + [ 155 ] = PR_getpgid, + [ 156 ] = PR_getsid, + [ 157 ] = PR_setsid, + [ 158 ] = PR_getgroups, + [ 159 ] = PR_setgroups, + [ 160 ] = PR_uname, + [ 161 ] = PR_sethostname, + [ 162 ] = PR_setdomainname, + [ 163 ] = PR_getrlimit, + [ 164 ] = PR_setrlimit, + [ 165 ] = PR_getrusage, + [ 166 ] = PR_umask, + [ 167 ] = PR_prctl, + [ 168 ] = PR_getcpu, + [ 169 ] = PR_gettimeofday, + [ 170 ] = PR_settimeofday, + [ 171 ] = PR_adjtimex, + [ 172 ] = PR_getpid, + [ 173 ] = PR_getppid, + [ 174 ] = PR_getuid, + [ 175 ] = PR_geteuid, + [ 176 ] = PR_getgid, + [ 177 ] = PR_getegid, + [ 178 ] = PR_gettid, + [ 179 ] = PR_sysinfo, + [ 180 ] = PR_mq_open, + [ 181 ] = PR_mq_unlink, + [ 182 ] = PR_mq_timedsend, + [ 183 ] = PR_mq_timedreceive, + [ 184 ] = PR_mq_notify, + [ 185 ] = PR_mq_getsetattr, + [ 186 ] = PR_msgget, + [ 187 ] = PR_msgctl, + [ 188 ] = PR_msgrcv, + [ 189 ] = PR_msgsnd, + [ 190 ] = PR_semget, + [ 191 ] = PR_semctl, + [ 192 ] = PR_semtimedop, + [ 193 ] = PR_semop, + [ 194 ] = PR_shmget, + [ 195 ] = PR_shmctl, + [ 196 ] = PR_shmat, + [ 197 ] = PR_shmdt, + [ 198 ] = PR_socket, + [ 199 ] = PR_socketpair, + [ 200 ] = PR_bind, + [ 201 ] = PR_listen, + [ 202 ] = PR_accept, + [ 203 ] = PR_connect, + [ 204 ] = PR_getsockname, + [ 205 ] = PR_getpeername, + [ 206 ] = PR_sendto, + [ 207 ] = PR_recvfrom, + [ 208 ] = PR_setsockopt, + [ 209 ] = PR_getsockopt, + [ 210 ] = PR_shutdown, + [ 211 ] = PR_sendmsg, + [ 212 ] = PR_recvmsg, + [ 213 ] = PR_readahead, + [ 214 ] = PR_brk, + [ 215 ] = PR_munmap, + [ 216 ] = PR_mremap, + [ 217 ] = PR_add_key, + [ 218 ] = PR_request_key, + [ 219 ] = PR_keyctl, + [ 220 ] = PR_clone, + [ 221 ] = PR_execve, + [ 222 ] = PR_mmap, + [ 223 ] = PR_fadvise64, + [ 224 ] = PR_swapon, + [ 225 ] = PR_swapoff, + [ 226 ] = PR_mprotect, + [ 227 ] = PR_msync, + [ 228 ] = PR_mlock, + [ 229 ] = PR_munlock, + [ 230 ] = PR_mlockall, + [ 231 ] = PR_munlockall, + [ 232 ] = PR_mincore, + [ 233 ] = PR_madvise, + [ 234 ] = PR_remap_file_pages, + [ 235 ] = PR_mbind, + [ 236 ] = PR_get_mempolicy, + [ 237 ] = PR_set_mempolicy, + [ 238 ] = PR_migrate_pages, + [ 239 ] = PR_move_pages, + [ 240 ] = PR_rt_tgsigqueueinfo, + [ 241 ] = PR_perf_event_open, + [ 242 ] = PR_accept4, + [ 243 ] = PR_recvmmsg, + [ 244 ] = PR_arch_specific_syscall, + [ 260 ] = PR_wait4, + [ 261 ] = PR_prlimit64, + [ 262 ] = PR_fanotify_init, + [ 263 ] = PR_fanotify_mark, + [ 264 ] = PR_name_to_handle_at, + [ 265 ] = PR_open_by_handle_at, + [ 266 ] = PR_clock_adjtime, + [ 267 ] = PR_syncfs, + [ 268 ] = PR_setns, + [ 269 ] = PR_sendmmsg, + [ 270 ] = PR_process_vm_readv, + [ 271 ] = PR_process_vm_writev, + [ 272 ] = PR_kcmp, + [ 273 ] = PR_finit_module, + [ 274 ] = PR_sched_setattr, + [ 275 ] = PR_sched_getattr, + [ 276 ] = PR_renameat2, + [ 291 ] = PR_statx, +}; diff --git a/proot/proot_linux/syscall/sysnums-i386.h b/proot/proot_linux/syscall/sysnums-i386.h new file mode 100644 index 0000000..3bbb70e --- /dev/null +++ b/proot/proot_linux/syscall/sysnums-i386.h @@ -0,0 +1,356 @@ +#include "syscall/sysnum.h" + +static const Sysnum sysnums_i386[] = { + [ 0 ] = PR_restart_syscall, + [ 1 ] = PR_exit, + [ 2 ] = PR_fork, + [ 3 ] = PR_read, + [ 4 ] = PR_write, + [ 5 ] = PR_open, + [ 6 ] = PR_close, + [ 7 ] = PR_waitpid, + [ 8 ] = PR_creat, + [ 9 ] = PR_link, + [ 10 ] = PR_unlink, + [ 11 ] = PR_execve, + [ 12 ] = PR_chdir, + [ 13 ] = PR_time, + [ 14 ] = PR_mknod, + [ 15 ] = PR_chmod, + [ 16 ] = PR_lchown, + [ 17 ] = PR_break, + [ 18 ] = PR_oldstat, + [ 19 ] = PR_lseek, + [ 20 ] = PR_getpid, + [ 21 ] = PR_mount, + [ 22 ] = PR_umount, + [ 23 ] = PR_setuid, + [ 24 ] = PR_getuid, + [ 25 ] = PR_stime, + [ 26 ] = PR_ptrace, + [ 27 ] = PR_alarm, + [ 28 ] = PR_oldfstat, + [ 29 ] = PR_pause, + [ 30 ] = PR_utime, + [ 31 ] = PR_stty, + [ 32 ] = PR_gtty, + [ 33 ] = PR_access, + [ 34 ] = PR_nice, + [ 35 ] = PR_ftime, + [ 36 ] = PR_sync, + [ 37 ] = PR_kill, + [ 38 ] = PR_rename, + [ 39 ] = PR_mkdir, + [ 40 ] = PR_rmdir, + [ 41 ] = PR_dup, + [ 42 ] = PR_pipe, + [ 43 ] = PR_times, + [ 44 ] = PR_prof, + [ 45 ] = PR_brk, + [ 46 ] = PR_setgid, + [ 47 ] = PR_getgid, + [ 48 ] = PR_signal, + [ 49 ] = PR_geteuid, + [ 50 ] = PR_getegid, + [ 51 ] = PR_acct, + [ 52 ] = PR_umount2, + [ 53 ] = PR_lock, + [ 54 ] = PR_ioctl, + [ 55 ] = PR_fcntl, + [ 56 ] = PR_mpx, + [ 57 ] = PR_setpgid, + [ 58 ] = PR_ulimit, + [ 59 ] = PR_oldolduname, + [ 60 ] = PR_umask, + [ 61 ] = PR_chroot, + [ 62 ] = PR_ustat, + [ 63 ] = PR_dup2, + [ 64 ] = PR_getppid, + [ 65 ] = PR_getpgrp, + [ 66 ] = PR_setsid, + [ 67 ] = PR_sigaction, + [ 68 ] = PR_sgetmask, + [ 69 ] = PR_ssetmask, + [ 70 ] = PR_setreuid, + [ 71 ] = PR_setregid, + [ 72 ] = PR_sigsuspend, + [ 73 ] = PR_sigpending, + [ 74 ] = PR_sethostname, + [ 75 ] = PR_setrlimit, + [ 76 ] = PR_getrlimit, + [ 77 ] = PR_getrusage, + [ 78 ] = PR_gettimeofday, + [ 79 ] = PR_settimeofday, + [ 80 ] = PR_getgroups, + [ 81 ] = PR_setgroups, + [ 82 ] = PR_select, + [ 83 ] = PR_symlink, + [ 84 ] = PR_oldlstat, + [ 85 ] = PR_readlink, + [ 86 ] = PR_uselib, + [ 87 ] = PR_swapon, + [ 88 ] = PR_reboot, + [ 89 ] = PR_readdir, + [ 90 ] = PR_mmap, + [ 91 ] = PR_munmap, + [ 92 ] = PR_truncate, + [ 93 ] = PR_ftruncate, + [ 94 ] = PR_fchmod, + [ 95 ] = PR_fchown, + [ 96 ] = PR_getpriority, + [ 97 ] = PR_setpriority, + [ 98 ] = PR_profil, + [ 99 ] = PR_statfs, + [ 100 ] = PR_fstatfs, + [ 101 ] = PR_ioperm, + [ 102 ] = PR_socketcall, + [ 103 ] = PR_syslog, + [ 104 ] = PR_setitimer, + [ 105 ] = PR_getitimer, + [ 106 ] = PR_stat, + [ 107 ] = PR_lstat, + [ 108 ] = PR_fstat, + [ 109 ] = PR_olduname, + [ 110 ] = PR_iopl, + [ 111 ] = PR_vhangup, + [ 112 ] = PR_idle, + [ 113 ] = PR_vm86old, + [ 114 ] = PR_wait4, + [ 115 ] = PR_swapoff, + [ 116 ] = PR_sysinfo, + [ 117 ] = PR_ipc, + [ 118 ] = PR_fsync, + [ 119 ] = PR_sigreturn, + [ 120 ] = PR_clone, + [ 121 ] = PR_setdomainname, + [ 122 ] = PR_uname, + [ 123 ] = PR_modify_ldt, + [ 124 ] = PR_adjtimex, + [ 125 ] = PR_mprotect, + [ 126 ] = PR_sigprocmask, + [ 127 ] = PR_create_module, + [ 128 ] = PR_init_module, + [ 129 ] = PR_delete_module, + [ 130 ] = PR_get_kernel_syms, + [ 131 ] = PR_quotactl, + [ 132 ] = PR_getpgid, + [ 133 ] = PR_fchdir, + [ 134 ] = PR_bdflush, + [ 135 ] = PR_sysfs, + [ 136 ] = PR_personality, + [ 137 ] = PR_afs_syscall, + [ 138 ] = PR_setfsuid, + [ 139 ] = PR_setfsgid, + [ 140 ] = PR__llseek, + [ 141 ] = PR_getdents, + [ 142 ] = PR__newselect, + [ 143 ] = PR_flock, + [ 144 ] = PR_msync, + [ 145 ] = PR_readv, + [ 146 ] = PR_writev, + [ 147 ] = PR_getsid, + [ 148 ] = PR_fdatasync, + [ 149 ] = PR__sysctl, + [ 150 ] = PR_mlock, + [ 151 ] = PR_munlock, + [ 152 ] = PR_mlockall, + [ 153 ] = PR_munlockall, + [ 154 ] = PR_sched_setparam, + [ 155 ] = PR_sched_getparam, + [ 156 ] = PR_sched_setscheduler, + [ 157 ] = PR_sched_getscheduler, + [ 158 ] = PR_sched_yield, + [ 159 ] = PR_sched_get_priority_max, + [ 160 ] = PR_sched_get_priority_min, + [ 161 ] = PR_sched_rr_get_interval, + [ 162 ] = PR_nanosleep, + [ 163 ] = PR_mremap, + [ 164 ] = PR_setresuid, + [ 165 ] = PR_getresuid, + [ 166 ] = PR_vm86, + [ 167 ] = PR_query_module, + [ 168 ] = PR_poll, + [ 169 ] = PR_nfsservctl, + [ 170 ] = PR_setresgid, + [ 171 ] = PR_getresgid, + [ 172 ] = PR_prctl, + [ 173 ] = PR_rt_sigreturn, + [ 174 ] = PR_rt_sigaction, + [ 175 ] = PR_rt_sigprocmask, + [ 176 ] = PR_rt_sigpending, + [ 177 ] = PR_rt_sigtimedwait, + [ 178 ] = PR_rt_sigqueueinfo, + [ 179 ] = PR_rt_sigsuspend, + [ 180 ] = PR_pread64, + [ 181 ] = PR_pwrite64, + [ 182 ] = PR_chown, + [ 183 ] = PR_getcwd, + [ 184 ] = PR_capget, + [ 185 ] = PR_capset, + [ 186 ] = PR_sigaltstack, + [ 187 ] = PR_sendfile, + [ 188 ] = PR_getpmsg, + [ 189 ] = PR_putpmsg, + [ 190 ] = PR_vfork, + [ 191 ] = PR_ugetrlimit, + [ 192 ] = PR_mmap2, + [ 193 ] = PR_truncate64, + [ 194 ] = PR_ftruncate64, + [ 195 ] = PR_stat64, + [ 196 ] = PR_lstat64, + [ 197 ] = PR_fstat64, + [ 198 ] = PR_lchown32, + [ 199 ] = PR_getuid32, + [ 200 ] = PR_getgid32, + [ 201 ] = PR_geteuid32, + [ 202 ] = PR_getegid32, + [ 203 ] = PR_setreuid32, + [ 204 ] = PR_setregid32, + [ 205 ] = PR_getgroups32, + [ 206 ] = PR_setgroups32, + [ 207 ] = PR_fchown32, + [ 208 ] = PR_setresuid32, + [ 209 ] = PR_getresuid32, + [ 210 ] = PR_setresgid32, + [ 211 ] = PR_getresgid32, + [ 212 ] = PR_chown32, + [ 213 ] = PR_setuid32, + [ 214 ] = PR_setgid32, + [ 215 ] = PR_setfsuid32, + [ 216 ] = PR_setfsgid32, + [ 217 ] = PR_pivot_root, + [ 218 ] = PR_mincore, + [ 219 ] = PR_madvise, + [ 220 ] = PR_getdents64, + [ 221 ] = PR_fcntl64, + [ 224 ] = PR_gettid, + [ 225 ] = PR_readahead, + [ 226 ] = PR_setxattr, + [ 227 ] = PR_lsetxattr, + [ 228 ] = PR_fsetxattr, + [ 229 ] = PR_getxattr, + [ 230 ] = PR_lgetxattr, + [ 231 ] = PR_fgetxattr, + [ 232 ] = PR_listxattr, + [ 233 ] = PR_llistxattr, + [ 234 ] = PR_flistxattr, + [ 235 ] = PR_removexattr, + [ 236 ] = PR_lremovexattr, + [ 237 ] = PR_fremovexattr, + [ 238 ] = PR_tkill, + [ 239 ] = PR_sendfile64, + [ 240 ] = PR_futex, + [ 241 ] = PR_sched_setaffinity, + [ 242 ] = PR_sched_getaffinity, + [ 243 ] = PR_set_thread_area, + [ 244 ] = PR_get_thread_area, + [ 245 ] = PR_io_setup, + [ 246 ] = PR_io_destroy, + [ 247 ] = PR_io_getevents, + [ 248 ] = PR_io_submit, + [ 249 ] = PR_io_cancel, + [ 250 ] = PR_fadvise64, + [ 252 ] = PR_exit_group, + [ 253 ] = PR_lookup_dcookie, + [ 254 ] = PR_epoll_create, + [ 255 ] = PR_epoll_ctl, + [ 256 ] = PR_epoll_wait, + [ 257 ] = PR_remap_file_pages, + [ 258 ] = PR_set_tid_address, + [ 259 ] = PR_timer_create, + [ 260 ] = PR_timer_settime, + [ 261 ] = PR_timer_gettime, + [ 262 ] = PR_timer_getoverrun, + [ 263 ] = PR_timer_delete, + [ 264 ] = PR_clock_settime, + [ 265 ] = PR_clock_gettime, + [ 266 ] = PR_clock_getres, + [ 267 ] = PR_clock_nanosleep, + [ 268 ] = PR_statfs64, + [ 269 ] = PR_fstatfs64, + [ 270 ] = PR_tgkill, + [ 271 ] = PR_utimes, + [ 272 ] = PR_fadvise64_64, + [ 273 ] = PR_vserver, + [ 274 ] = PR_mbind, + [ 275 ] = PR_get_mempolicy, + [ 276 ] = PR_set_mempolicy, + [ 277 ] = PR_mq_open, + [ 278 ] = PR_mq_unlink, + [ 279 ] = PR_mq_timedsend, + [ 280 ] = PR_mq_timedreceive, + [ 281 ] = PR_mq_notify, + [ 282 ] = PR_mq_getsetattr, + [ 283 ] = PR_kexec_load, + [ 284 ] = PR_waitid, + [ 286 ] = PR_add_key, + [ 287 ] = PR_request_key, + [ 288 ] = PR_keyctl, + [ 289 ] = PR_ioprio_set, + [ 290 ] = PR_ioprio_get, + [ 291 ] = PR_inotify_init, + [ 292 ] = PR_inotify_add_watch, + [ 293 ] = PR_inotify_rm_watch, + [ 294 ] = PR_migrate_pages, + [ 295 ] = PR_openat, + [ 296 ] = PR_mkdirat, + [ 297 ] = PR_mknodat, + [ 298 ] = PR_fchownat, + [ 299 ] = PR_futimesat, + [ 300 ] = PR_fstatat64, + [ 301 ] = PR_unlinkat, + [ 302 ] = PR_renameat, + [ 303 ] = PR_linkat, + [ 304 ] = PR_symlinkat, + [ 305 ] = PR_readlinkat, + [ 306 ] = PR_fchmodat, + [ 307 ] = PR_faccessat, + [ 308 ] = PR_pselect6, + [ 309 ] = PR_ppoll, + [ 310 ] = PR_unshare, + [ 311 ] = PR_set_robust_list, + [ 312 ] = PR_get_robust_list, + [ 313 ] = PR_splice, + [ 314 ] = PR_sync_file_range, + [ 315 ] = PR_tee, + [ 316 ] = PR_vmsplice, + [ 317 ] = PR_move_pages, + [ 318 ] = PR_getcpu, + [ 319 ] = PR_epoll_pwait, + [ 320 ] = PR_utimensat, + [ 321 ] = PR_signalfd, + [ 322 ] = PR_timerfd_create, + [ 323 ] = PR_eventfd, + [ 324 ] = PR_fallocate, + [ 325 ] = PR_timerfd_settime, + [ 326 ] = PR_timerfd_gettime, + [ 327 ] = PR_signalfd4, + [ 328 ] = PR_eventfd2, + [ 329 ] = PR_epoll_create1, + [ 330 ] = PR_dup3, + [ 331 ] = PR_pipe2, + [ 332 ] = PR_inotify_init1, + [ 333 ] = PR_preadv, + [ 334 ] = PR_pwritev, + [ 335 ] = PR_rt_tgsigqueueinfo, + [ 336 ] = PR_perf_event_open, + [ 337 ] = PR_recvmmsg, + [ 338 ] = PR_fanotify_init, + [ 339 ] = PR_fanotify_mark, + [ 340 ] = PR_prlimit64, + [ 341 ] = PR_name_to_handle_at, + [ 342 ] = PR_open_by_handle_at, + [ 343 ] = PR_clock_adjtime, + [ 344 ] = PR_syncfs, + [ 345 ] = PR_sendmmsg, + [ 346 ] = PR_setns, + [ 347 ] = PR_process_vm_readv, + [ 348 ] = PR_process_vm_writev, + [ 349 ] = PR_kcmp, + [ 350 ] = PR_finit_module, + [ 351 ] = PR_sched_setattr, + [ 352 ] = PR_sched_getattr, + [ 353 ] = PR_renameat2, + [ 383 ] = PR_statx, + [ 412 ] = PR_utimensat_time64, +}; diff --git a/proot/proot_linux/syscall/sysnums-sh4.h b/proot/proot_linux/syscall/sysnums-sh4.h new file mode 100644 index 0000000..1d3758c --- /dev/null +++ b/proot/proot_linux/syscall/sysnums-sh4.h @@ -0,0 +1,347 @@ +#include "syscall/sysnum.h" + +static const Sysnum sysnums_sh4[] = { + [ 0 ] = PR_restart_syscall, + [ 1 ] = PR_exit, + [ 2 ] = PR_fork, + [ 3 ] = PR_read, + [ 4 ] = PR_write, + [ 5 ] = PR_open, + [ 6 ] = PR_close, + [ 7 ] = PR_waitpid, + [ 8 ] = PR_creat, + [ 9 ] = PR_link, + [ 10 ] = PR_unlink, + [ 11 ] = PR_execve, + [ 12 ] = PR_chdir, + [ 13 ] = PR_time, + [ 14 ] = PR_mknod, + [ 15 ] = PR_chmod, + [ 16 ] = PR_lchown, + [ 18 ] = PR_oldstat, + [ 19 ] = PR_lseek, + [ 20 ] = PR_getpid, + [ 21 ] = PR_mount, + [ 22 ] = PR_umount, + [ 23 ] = PR_setuid, + [ 24 ] = PR_getuid, + [ 25 ] = PR_stime, + [ 26 ] = PR_ptrace, + [ 27 ] = PR_alarm, + [ 28 ] = PR_oldfstat, + [ 29 ] = PR_pause, + [ 30 ] = PR_utime, + [ 33 ] = PR_access, + [ 34 ] = PR_nice, + [ 36 ] = PR_sync, + [ 37 ] = PR_kill, + [ 38 ] = PR_rename, + [ 39 ] = PR_mkdir, + [ 40 ] = PR_rmdir, + [ 41 ] = PR_dup, + [ 42 ] = PR_pipe, + [ 43 ] = PR_times, + [ 45 ] = PR_brk, + [ 46 ] = PR_setgid, + [ 47 ] = PR_getgid, + [ 48 ] = PR_signal, + [ 49 ] = PR_geteuid, + [ 50 ] = PR_getegid, + [ 51 ] = PR_acct, + [ 52 ] = PR_umount2, + [ 54 ] = PR_ioctl, + [ 55 ] = PR_fcntl, + [ 57 ] = PR_setpgid, + [ 60 ] = PR_umask, + [ 61 ] = PR_chroot, + [ 62 ] = PR_ustat, + [ 63 ] = PR_dup2, + [ 64 ] = PR_getppid, + [ 65 ] = PR_getpgrp, + [ 66 ] = PR_setsid, + [ 67 ] = PR_sigaction, + [ 68 ] = PR_sgetmask, + [ 69 ] = PR_ssetmask, + [ 70 ] = PR_setreuid, + [ 71 ] = PR_setregid, + [ 72 ] = PR_sigsuspend, + [ 73 ] = PR_sigpending, + [ 74 ] = PR_sethostname, + [ 75 ] = PR_setrlimit, + [ 76 ] = PR_getrlimit, + [ 77 ] = PR_getrusage, + [ 78 ] = PR_gettimeofday, + [ 79 ] = PR_settimeofday, + [ 80 ] = PR_getgroups, + [ 81 ] = PR_setgroups, + [ 83 ] = PR_symlink, + [ 84 ] = PR_oldlstat, + [ 85 ] = PR_readlink, + [ 86 ] = PR_uselib, + [ 87 ] = PR_swapon, + [ 88 ] = PR_reboot, + [ 89 ] = PR_readdir, + [ 90 ] = PR_mmap, + [ 91 ] = PR_munmap, + [ 92 ] = PR_truncate, + [ 93 ] = PR_ftruncate, + [ 94 ] = PR_fchmod, + [ 95 ] = PR_fchown, + [ 96 ] = PR_getpriority, + [ 97 ] = PR_setpriority, + [ 99 ] = PR_statfs, + [ 100 ] = PR_fstatfs, + [ 102 ] = PR_socketcall, + [ 103 ] = PR_syslog, + [ 104 ] = PR_setitimer, + [ 105 ] = PR_getitimer, + [ 106 ] = PR_stat, + [ 107 ] = PR_lstat, + [ 108 ] = PR_fstat, + [ 109 ] = PR_olduname, + [ 111 ] = PR_vhangup, + [ 114 ] = PR_wait4, + [ 115 ] = PR_swapoff, + [ 116 ] = PR_sysinfo, + [ 117 ] = PR_ipc, + [ 118 ] = PR_fsync, + [ 119 ] = PR_sigreturn, + [ 120 ] = PR_clone, + [ 121 ] = PR_setdomainname, + [ 122 ] = PR_uname, + [ 123 ] = PR_cacheflush, + [ 124 ] = PR_adjtimex, + [ 125 ] = PR_mprotect, + [ 126 ] = PR_sigprocmask, + [ 128 ] = PR_init_module, + [ 129 ] = PR_delete_module, + [ 131 ] = PR_quotactl, + [ 132 ] = PR_getpgid, + [ 133 ] = PR_fchdir, + [ 134 ] = PR_bdflush, + [ 135 ] = PR_sysfs, + [ 136 ] = PR_personality, + [ 138 ] = PR_setfsuid, + [ 139 ] = PR_setfsgid, + [ 140 ] = PR__llseek, + [ 141 ] = PR_getdents, + [ 142 ] = PR__newselect, + [ 143 ] = PR_flock, + [ 144 ] = PR_msync, + [ 145 ] = PR_readv, + [ 146 ] = PR_writev, + [ 147 ] = PR_getsid, + [ 148 ] = PR_fdatasync, + [ 149 ] = PR__sysctl, + [ 150 ] = PR_mlock, + [ 151 ] = PR_munlock, + [ 152 ] = PR_mlockall, + [ 153 ] = PR_munlockall, + [ 154 ] = PR_sched_setparam, + [ 155 ] = PR_sched_getparam, + [ 156 ] = PR_sched_setscheduler, + [ 157 ] = PR_sched_getscheduler, + [ 158 ] = PR_sched_yield, + [ 159 ] = PR_sched_get_priority_max, + [ 160 ] = PR_sched_get_priority_min, + [ 161 ] = PR_sched_rr_get_interval, + [ 162 ] = PR_nanosleep, + [ 163 ] = PR_mremap, + [ 164 ] = PR_setresuid, + [ 165 ] = PR_getresuid, + [ 168 ] = PR_poll, + [ 169 ] = PR_nfsservctl, + [ 170 ] = PR_setresgid, + [ 171 ] = PR_getresgid, + [ 172 ] = PR_prctl, + [ 173 ] = PR_rt_sigreturn, + [ 174 ] = PR_rt_sigaction, + [ 175 ] = PR_rt_sigprocmask, + [ 176 ] = PR_rt_sigpending, + [ 177 ] = PR_rt_sigtimedwait, + [ 178 ] = PR_rt_sigqueueinfo, + [ 179 ] = PR_rt_sigsuspend, + [ 180 ] = PR_pread64, + [ 181 ] = PR_pwrite64, + [ 182 ] = PR_chown, + [ 183 ] = PR_getcwd, + [ 184 ] = PR_capget, + [ 185 ] = PR_capset, + [ 186 ] = PR_sigaltstack, + [ 187 ] = PR_sendfile, + [ 190 ] = PR_vfork, + [ 191 ] = PR_ugetrlimit, + [ 192 ] = PR_mmap2, + [ 193 ] = PR_truncate64, + [ 194 ] = PR_ftruncate64, + [ 195 ] = PR_stat64, + [ 196 ] = PR_lstat64, + [ 197 ] = PR_fstat64, + [ 198 ] = PR_lchown32, + [ 199 ] = PR_getuid32, + [ 200 ] = PR_getgid32, + [ 201 ] = PR_geteuid32, + [ 202 ] = PR_getegid32, + [ 203 ] = PR_setreuid32, + [ 204 ] = PR_setregid32, + [ 205 ] = PR_getgroups32, + [ 206 ] = PR_setgroups32, + [ 207 ] = PR_fchown32, + [ 208 ] = PR_setresuid32, + [ 209 ] = PR_getresuid32, + [ 210 ] = PR_setresgid32, + [ 211 ] = PR_getresgid32, + [ 212 ] = PR_chown32, + [ 213 ] = PR_setuid32, + [ 214 ] = PR_setgid32, + [ 215 ] = PR_setfsuid32, + [ 216 ] = PR_setfsgid32, + [ 217 ] = PR_pivot_root, + [ 218 ] = PR_mincore, + [ 219 ] = PR_madvise, + [ 220 ] = PR_getdents64, + [ 221 ] = PR_fcntl64, + [ 224 ] = PR_gettid, + [ 225 ] = PR_readahead, + [ 226 ] = PR_setxattr, + [ 227 ] = PR_lsetxattr, + [ 228 ] = PR_fsetxattr, + [ 229 ] = PR_getxattr, + [ 230 ] = PR_lgetxattr, + [ 231 ] = PR_fgetxattr, + [ 232 ] = PR_listxattr, + [ 233 ] = PR_llistxattr, + [ 234 ] = PR_flistxattr, + [ 235 ] = PR_removexattr, + [ 236 ] = PR_lremovexattr, + [ 237 ] = PR_fremovexattr, + [ 238 ] = PR_tkill, + [ 239 ] = PR_sendfile64, + [ 240 ] = PR_futex, + [ 241 ] = PR_sched_setaffinity, + [ 242 ] = PR_sched_getaffinity, + [ 245 ] = PR_io_setup, + [ 246 ] = PR_io_destroy, + [ 247 ] = PR_io_getevents, + [ 248 ] = PR_io_submit, + [ 249 ] = PR_io_cancel, + [ 250 ] = PR_fadvise64, + [ 252 ] = PR_exit_group, + [ 253 ] = PR_lookup_dcookie, + [ 254 ] = PR_epoll_create, + [ 255 ] = PR_epoll_ctl, + [ 256 ] = PR_epoll_wait, + [ 257 ] = PR_remap_file_pages, + [ 258 ] = PR_set_tid_address, + [ 259 ] = PR_timer_create, + [ 260 ] = PR_timer_settime, + [ 261 ] = PR_timer_gettime, + [ 262 ] = PR_timer_getoverrun, + [ 263 ] = PR_timer_delete, + [ 264 ] = PR_clock_settime, + [ 265 ] = PR_clock_gettime, + [ 266 ] = PR_clock_getres, + [ 267 ] = PR_clock_nanosleep, + [ 268 ] = PR_statfs64, + [ 269 ] = PR_fstatfs64, + [ 270 ] = PR_tgkill, + [ 271 ] = PR_utimes, + [ 272 ] = PR_fadvise64_64, + [ 274 ] = PR_mbind, + [ 275 ] = PR_get_mempolicy, + [ 276 ] = PR_set_mempolicy, + [ 277 ] = PR_mq_open, + [ 278 ] = PR_mq_unlink, + [ 279 ] = PR_mq_timedsend, + [ 280 ] = PR_mq_timedreceive, + [ 281 ] = PR_mq_notify, + [ 282 ] = PR_mq_getsetattr, + [ 283 ] = PR_kexec_load, + [ 284 ] = PR_waitid, + [ 285 ] = PR_add_key, + [ 286 ] = PR_request_key, + [ 287 ] = PR_keyctl, + [ 288 ] = PR_ioprio_set, + [ 289 ] = PR_ioprio_get, + [ 290 ] = PR_inotify_init, + [ 291 ] = PR_inotify_add_watch, + [ 292 ] = PR_inotify_rm_watch, + [ 294 ] = PR_migrate_pages, + [ 295 ] = PR_openat, + [ 296 ] = PR_mkdirat, + [ 297 ] = PR_mknodat, + [ 298 ] = PR_fchownat, + [ 299 ] = PR_futimesat, + [ 300 ] = PR_fstatat64, + [ 301 ] = PR_unlinkat, + [ 302 ] = PR_renameat, + [ 303 ] = PR_linkat, + [ 304 ] = PR_symlinkat, + [ 305 ] = PR_readlinkat, + [ 306 ] = PR_fchmodat, + [ 307 ] = PR_faccessat, + [ 308 ] = PR_pselect6, + [ 309 ] = PR_ppoll, + [ 310 ] = PR_unshare, + [ 311 ] = PR_set_robust_list, + [ 312 ] = PR_get_robust_list, + [ 313 ] = PR_splice, + [ 314 ] = PR_sync_file_range, + [ 315 ] = PR_tee, + [ 316 ] = PR_vmsplice, + [ 317 ] = PR_move_pages, + [ 318 ] = PR_getcpu, + [ 319 ] = PR_epoll_pwait, + [ 320 ] = PR_utimensat, + [ 321 ] = PR_signalfd, + [ 322 ] = PR_timerfd_create, + [ 323 ] = PR_eventfd, + [ 324 ] = PR_fallocate, + [ 325 ] = PR_timerfd_settime, + [ 326 ] = PR_timerfd_gettime, + [ 327 ] = PR_signalfd4, + [ 328 ] = PR_eventfd2, + [ 329 ] = PR_epoll_create1, + [ 330 ] = PR_dup3, + [ 331 ] = PR_pipe2, + [ 332 ] = PR_inotify_init1, + [ 333 ] = PR_preadv, + [ 334 ] = PR_pwritev, + [ 335 ] = PR_rt_tgsigqueueinfo, + [ 336 ] = PR_perf_event_open, + [ 337 ] = PR_fanotify_init, + [ 338 ] = PR_fanotify_mark, + [ 339 ] = PR_prlimit64, + [ 340 ] = PR_socket, + [ 341 ] = PR_bind, + [ 342 ] = PR_connect, + [ 343 ] = PR_listen, + [ 344 ] = PR_accept, + [ 345 ] = PR_getsockname, + [ 346 ] = PR_getpeername, + [ 347 ] = PR_socketpair, + [ 348 ] = PR_send, + [ 349 ] = PR_sendto, + [ 350 ] = PR_recv, + [ 351 ] = PR_recvfrom, + [ 352 ] = PR_shutdown, + [ 353 ] = PR_setsockopt, + [ 354 ] = PR_getsockopt, + [ 355 ] = PR_sendmsg, + [ 356 ] = PR_recvmsg, + [ 357 ] = PR_recvmmsg, + [ 358 ] = PR_accept4, + [ 359 ] = PR_name_to_handle_at, + [ 360 ] = PR_open_by_handle_at, + [ 361 ] = PR_clock_adjtime, + [ 362 ] = PR_syncfs, + [ 363 ] = PR_sendmmsg, + [ 364 ] = PR_setns, + [ 365 ] = PR_process_vm_readv, + [ 366 ] = PR_process_vm_writev, + [ 367 ] = PR_kcmp, + [ 368 ] = PR_finit_module, + [ 369 ] = PR_sched_setattr, + [ 370 ] = PR_sched_getattr, + [ 371 ] = PR_renameat2, +}; diff --git a/proot/proot_linux/syscall/sysnums-x32.h b/proot/proot_linux/syscall/sysnums-x32.h new file mode 100644 index 0000000..448c699 --- /dev/null +++ b/proot/proot_linux/syscall/sysnums-x32.h @@ -0,0 +1,312 @@ +#include "syscall/sysnum.h" + +static const Sysnum sysnums_x32[] = { + [ 0 ] = PR_read, + [ 1 ] = PR_write, + [ 2 ] = PR_open, + [ 3 ] = PR_close, + [ 4 ] = PR_stat, + [ 5 ] = PR_fstat, + [ 6 ] = PR_lstat, + [ 7 ] = PR_poll, + [ 8 ] = PR_lseek, + [ 9 ] = PR_mmap, + [ 10 ] = PR_mprotect, + [ 11 ] = PR_munmap, + [ 12 ] = PR_brk, + [ 14 ] = PR_rt_sigprocmask, + [ 17 ] = PR_pread64, + [ 18 ] = PR_pwrite64, + [ 21 ] = PR_access, + [ 22 ] = PR_pipe, + [ 23 ] = PR_select, + [ 24 ] = PR_sched_yield, + [ 25 ] = PR_mremap, + [ 26 ] = PR_msync, + [ 27 ] = PR_mincore, + [ 28 ] = PR_madvise, + [ 29 ] = PR_shmget, + [ 30 ] = PR_shmat, + [ 31 ] = PR_shmctl, + [ 32 ] = PR_dup, + [ 33 ] = PR_dup2, + [ 34 ] = PR_pause, + [ 35 ] = PR_nanosleep, + [ 36 ] = PR_getitimer, + [ 37 ] = PR_alarm, + [ 38 ] = PR_setitimer, + [ 39 ] = PR_getpid, + [ 40 ] = PR_sendfile, + [ 41 ] = PR_socket, + [ 42 ] = PR_connect, + [ 43 ] = PR_accept, + [ 44 ] = PR_sendto, + [ 48 ] = PR_shutdown, + [ 49 ] = PR_bind, + [ 50 ] = PR_listen, + [ 51 ] = PR_getsockname, + [ 52 ] = PR_getpeername, + [ 53 ] = PR_socketpair, + [ 56 ] = PR_clone, + [ 57 ] = PR_fork, + [ 58 ] = PR_vfork, + [ 60 ] = PR_exit, + [ 61 ] = PR_wait4, + [ 62 ] = PR_kill, + [ 63 ] = PR_uname, + [ 64 ] = PR_semget, + [ 65 ] = PR_semop, + [ 66 ] = PR_semctl, + [ 67 ] = PR_shmdt, + [ 68 ] = PR_msgget, + [ 69 ] = PR_msgsnd, + [ 70 ] = PR_msgrcv, + [ 71 ] = PR_msgctl, + [ 72 ] = PR_fcntl, + [ 73 ] = PR_flock, + [ 74 ] = PR_fsync, + [ 75 ] = PR_fdatasync, + [ 76 ] = PR_truncate, + [ 77 ] = PR_ftruncate, + [ 78 ] = PR_getdents, + [ 79 ] = PR_getcwd, + [ 80 ] = PR_chdir, + [ 81 ] = PR_fchdir, + [ 82 ] = PR_rename, + [ 83 ] = PR_mkdir, + [ 84 ] = PR_rmdir, + [ 85 ] = PR_creat, + [ 86 ] = PR_link, + [ 87 ] = PR_unlink, + [ 88 ] = PR_symlink, + [ 89 ] = PR_readlink, + [ 90 ] = PR_chmod, + [ 91 ] = PR_fchmod, + [ 92 ] = PR_chown, + [ 93 ] = PR_fchown, + [ 94 ] = PR_lchown, + [ 95 ] = PR_umask, + [ 96 ] = PR_gettimeofday, + [ 97 ] = PR_getrlimit, + [ 98 ] = PR_getrusage, + [ 99 ] = PR_sysinfo, + [ 100 ] = PR_times, + [ 102 ] = PR_getuid, + [ 103 ] = PR_syslog, + [ 104 ] = PR_getgid, + [ 105 ] = PR_setuid, + [ 106 ] = PR_setgid, + [ 107 ] = PR_geteuid, + [ 108 ] = PR_getegid, + [ 109 ] = PR_setpgid, + [ 110 ] = PR_getppid, + [ 111 ] = PR_getpgrp, + [ 112 ] = PR_setsid, + [ 113 ] = PR_setreuid, + [ 114 ] = PR_setregid, + [ 115 ] = PR_getgroups, + [ 116 ] = PR_setgroups, + [ 117 ] = PR_setresuid, + [ 118 ] = PR_getresuid, + [ 119 ] = PR_setresgid, + [ 120 ] = PR_getresgid, + [ 121 ] = PR_getpgid, + [ 122 ] = PR_setfsuid, + [ 123 ] = PR_setfsgid, + [ 124 ] = PR_getsid, + [ 125 ] = PR_capget, + [ 126 ] = PR_capset, + [ 130 ] = PR_rt_sigsuspend, + [ 132 ] = PR_utime, + [ 133 ] = PR_mknod, + [ 135 ] = PR_personality, + [ 136 ] = PR_ustat, + [ 137 ] = PR_statfs, + [ 138 ] = PR_fstatfs, + [ 139 ] = PR_sysfs, + [ 140 ] = PR_getpriority, + [ 141 ] = PR_setpriority, + [ 142 ] = PR_sched_setparam, + [ 143 ] = PR_sched_getparam, + [ 144 ] = PR_sched_setscheduler, + [ 145 ] = PR_sched_getscheduler, + [ 146 ] = PR_sched_get_priority_max, + [ 147 ] = PR_sched_get_priority_min, + [ 148 ] = PR_sched_rr_get_interval, + [ 149 ] = PR_mlock, + [ 150 ] = PR_munlock, + [ 151 ] = PR_mlockall, + [ 152 ] = PR_munlockall, + [ 153 ] = PR_vhangup, + [ 154 ] = PR_modify_ldt, + [ 155 ] = PR_pivot_root, + [ 157 ] = PR_prctl, + [ 158 ] = PR_arch_prctl, + [ 159 ] = PR_adjtimex, + [ 160 ] = PR_setrlimit, + [ 161 ] = PR_chroot, + [ 162 ] = PR_sync, + [ 163 ] = PR_acct, + [ 164 ] = PR_settimeofday, + [ 165 ] = PR_mount, + [ 166 ] = PR_umount2, + [ 167 ] = PR_swapon, + [ 168 ] = PR_swapoff, + [ 169 ] = PR_reboot, + [ 170 ] = PR_sethostname, + [ 171 ] = PR_setdomainname, + [ 172 ] = PR_iopl, + [ 173 ] = PR_ioperm, + [ 175 ] = PR_init_module, + [ 176 ] = PR_delete_module, + [ 179 ] = PR_quotactl, + [ 181 ] = PR_getpmsg, + [ 182 ] = PR_putpmsg, + [ 183 ] = PR_afs_syscall, + [ 184 ] = PR_tuxcall, + [ 185 ] = PR_security, + [ 186 ] = PR_gettid, + [ 187 ] = PR_readahead, + [ 188 ] = PR_setxattr, + [ 189 ] = PR_lsetxattr, + [ 190 ] = PR_fsetxattr, + [ 191 ] = PR_getxattr, + [ 192 ] = PR_lgetxattr, + [ 193 ] = PR_fgetxattr, + [ 194 ] = PR_listxattr, + [ 195 ] = PR_llistxattr, + [ 196 ] = PR_flistxattr, + [ 197 ] = PR_removexattr, + [ 198 ] = PR_lremovexattr, + [ 199 ] = PR_fremovexattr, + [ 200 ] = PR_tkill, + [ 201 ] = PR_time, + [ 202 ] = PR_futex, + [ 203 ] = PR_sched_setaffinity, + [ 204 ] = PR_sched_getaffinity, + [ 206 ] = PR_io_setup, + [ 207 ] = PR_io_destroy, + [ 208 ] = PR_io_getevents, + [ 209 ] = PR_io_submit, + [ 210 ] = PR_io_cancel, + [ 212 ] = PR_lookup_dcookie, + [ 213 ] = PR_epoll_create, + [ 216 ] = PR_remap_file_pages, + [ 217 ] = PR_getdents64, + [ 218 ] = PR_set_tid_address, + [ 219 ] = PR_restart_syscall, + [ 220 ] = PR_semtimedop, + [ 221 ] = PR_fadvise64, + [ 223 ] = PR_timer_settime, + [ 224 ] = PR_timer_gettime, + [ 225 ] = PR_timer_getoverrun, + [ 226 ] = PR_timer_delete, + [ 227 ] = PR_clock_settime, + [ 228 ] = PR_clock_gettime, + [ 229 ] = PR_clock_getres, + [ 230 ] = PR_clock_nanosleep, + [ 231 ] = PR_exit_group, + [ 232 ] = PR_epoll_wait, + [ 233 ] = PR_epoll_ctl, + [ 234 ] = PR_tgkill, + [ 235 ] = PR_utimes, + [ 237 ] = PR_mbind, + [ 238 ] = PR_set_mempolicy, + [ 239 ] = PR_get_mempolicy, + [ 240 ] = PR_mq_open, + [ 241 ] = PR_mq_unlink, + [ 242 ] = PR_mq_timedsend, + [ 243 ] = PR_mq_timedreceive, + [ 245 ] = PR_mq_getsetattr, + [ 248 ] = PR_add_key, + [ 249 ] = PR_request_key, + [ 250 ] = PR_keyctl, + [ 251 ] = PR_ioprio_set, + [ 252 ] = PR_ioprio_get, + [ 253 ] = PR_inotify_init, + [ 254 ] = PR_inotify_add_watch, + [ 255 ] = PR_inotify_rm_watch, + [ 256 ] = PR_migrate_pages, + [ 257 ] = PR_openat, + [ 258 ] = PR_mkdirat, + [ 259 ] = PR_mknodat, + [ 260 ] = PR_fchownat, + [ 261 ] = PR_futimesat, + [ 262 ] = PR_newfstatat, + [ 263 ] = PR_unlinkat, + [ 264 ] = PR_renameat, + [ 265 ] = PR_linkat, + [ 266 ] = PR_symlinkat, + [ 267 ] = PR_readlinkat, + [ 268 ] = PR_fchmodat, + [ 269 ] = PR_faccessat, + [ 270 ] = PR_pselect6, + [ 271 ] = PR_ppoll, + [ 272 ] = PR_unshare, + [ 275 ] = PR_splice, + [ 276 ] = PR_tee, + [ 277 ] = PR_sync_file_range, + [ 280 ] = PR_utimensat, + [ 281 ] = PR_epoll_pwait, + [ 282 ] = PR_signalfd, + [ 283 ] = PR_timerfd_create, + [ 284 ] = PR_eventfd, + [ 285 ] = PR_fallocate, + [ 286 ] = PR_timerfd_settime, + [ 287 ] = PR_timerfd_gettime, + [ 288 ] = PR_accept4, + [ 289 ] = PR_signalfd4, + [ 290 ] = PR_eventfd2, + [ 291 ] = PR_epoll_create1, + [ 292 ] = PR_dup3, + [ 293 ] = PR_pipe2, + [ 294 ] = PR_inotify_init1, + [ 298 ] = PR_perf_event_open, + [ 300 ] = PR_fanotify_init, + [ 301 ] = PR_fanotify_mark, + [ 302 ] = PR_prlimit64, + [ 303 ] = PR_name_to_handle_at, + [ 304 ] = PR_open_by_handle_at, + [ 305 ] = PR_clock_adjtime, + [ 306 ] = PR_syncfs, + [ 308 ] = PR_setns, + [ 309 ] = PR_getcpu, + [ 312 ] = PR_kcmp, + [ 313 ] = PR_finit_module, + [ 314 ] = PR_sched_setattr, + [ 315 ] = PR_sched_getattr, + [ 316 ] = PR_renameat2, + [ 332 ] = PR_statx, + [ 439 ] = PR_faccessat2, + [ 512 ] = PR_rt_sigaction, + [ 513 ] = PR_rt_sigreturn, + [ 514 ] = PR_ioctl, + [ 515 ] = PR_readv, + [ 516 ] = PR_writev, + [ 517 ] = PR_recvfrom, + [ 518 ] = PR_sendmsg, + [ 519 ] = PR_recvmsg, + [ 520 ] = PR_execve, + [ 521 ] = PR_ptrace, + [ 522 ] = PR_rt_sigpending, + [ 523 ] = PR_rt_sigtimedwait, + [ 524 ] = PR_rt_sigqueueinfo, + [ 525 ] = PR_sigaltstack, + [ 526 ] = PR_timer_create, + [ 527 ] = PR_mq_notify, + [ 528 ] = PR_kexec_load, + [ 529 ] = PR_waitid, + [ 530 ] = PR_set_robust_list, + [ 531 ] = PR_get_robust_list, + [ 532 ] = PR_vmsplice, + [ 533 ] = PR_move_pages, + [ 534 ] = PR_preadv, + [ 535 ] = PR_pwritev, + [ 536 ] = PR_rt_tgsigqueueinfo, + [ 537 ] = PR_recvmmsg, + [ 538 ] = PR_sendmmsg, + [ 539 ] = PR_process_vm_readv, + [ 540 ] = PR_process_vm_writev, + [ 541 ] = PR_setsockopt, + [ 542 ] = PR_getsockopt, +}; diff --git a/proot/proot_linux/syscall/sysnums-x86_64.h b/proot/proot_linux/syscall/sysnums-x86_64.h new file mode 100644 index 0000000..92229f8 --- /dev/null +++ b/proot/proot_linux/syscall/sysnums-x86_64.h @@ -0,0 +1,323 @@ +#include "syscall/sysnum.h" + +static const Sysnum sysnums_x86_64[] = { + [ 0 ] = PR_read, + [ 1 ] = PR_write, + [ 2 ] = PR_open, + [ 3 ] = PR_close, + [ 4 ] = PR_stat, + [ 5 ] = PR_fstat, + [ 6 ] = PR_lstat, + [ 7 ] = PR_poll, + [ 8 ] = PR_lseek, + [ 9 ] = PR_mmap, + [ 10 ] = PR_mprotect, + [ 11 ] = PR_munmap, + [ 12 ] = PR_brk, + [ 13 ] = PR_rt_sigaction, + [ 14 ] = PR_rt_sigprocmask, + [ 15 ] = PR_rt_sigreturn, + [ 16 ] = PR_ioctl, + [ 17 ] = PR_pread64, + [ 18 ] = PR_pwrite64, + [ 19 ] = PR_readv, + [ 20 ] = PR_writev, + [ 21 ] = PR_access, + [ 22 ] = PR_pipe, + [ 23 ] = PR_select, + [ 24 ] = PR_sched_yield, + [ 25 ] = PR_mremap, + [ 26 ] = PR_msync, + [ 27 ] = PR_mincore, + [ 28 ] = PR_madvise, + [ 29 ] = PR_shmget, + [ 30 ] = PR_shmat, + [ 31 ] = PR_shmctl, + [ 32 ] = PR_dup, + [ 33 ] = PR_dup2, + [ 34 ] = PR_pause, + [ 35 ] = PR_nanosleep, + [ 36 ] = PR_getitimer, + [ 37 ] = PR_alarm, + [ 38 ] = PR_setitimer, + [ 39 ] = PR_getpid, + [ 40 ] = PR_sendfile, + [ 41 ] = PR_socket, + [ 42 ] = PR_connect, + [ 43 ] = PR_accept, + [ 44 ] = PR_sendto, + [ 45 ] = PR_recvfrom, + [ 46 ] = PR_sendmsg, + [ 47 ] = PR_recvmsg, + [ 48 ] = PR_shutdown, + [ 49 ] = PR_bind, + [ 50 ] = PR_listen, + [ 51 ] = PR_getsockname, + [ 52 ] = PR_getpeername, + [ 53 ] = PR_socketpair, + [ 54 ] = PR_setsockopt, + [ 55 ] = PR_getsockopt, + [ 56 ] = PR_clone, + [ 57 ] = PR_fork, + [ 58 ] = PR_vfork, + [ 59 ] = PR_execve, + [ 60 ] = PR_exit, + [ 61 ] = PR_wait4, + [ 62 ] = PR_kill, + [ 63 ] = PR_uname, + [ 64 ] = PR_semget, + [ 65 ] = PR_semop, + [ 66 ] = PR_semctl, + [ 67 ] = PR_shmdt, + [ 68 ] = PR_msgget, + [ 69 ] = PR_msgsnd, + [ 70 ] = PR_msgrcv, + [ 71 ] = PR_msgctl, + [ 72 ] = PR_fcntl, + [ 73 ] = PR_flock, + [ 74 ] = PR_fsync, + [ 75 ] = PR_fdatasync, + [ 76 ] = PR_truncate, + [ 77 ] = PR_ftruncate, + [ 78 ] = PR_getdents, + [ 79 ] = PR_getcwd, + [ 80 ] = PR_chdir, + [ 81 ] = PR_fchdir, + [ 82 ] = PR_rename, + [ 83 ] = PR_mkdir, + [ 84 ] = PR_rmdir, + [ 85 ] = PR_creat, + [ 86 ] = PR_link, + [ 87 ] = PR_unlink, + [ 88 ] = PR_symlink, + [ 89 ] = PR_readlink, + [ 90 ] = PR_chmod, + [ 91 ] = PR_fchmod, + [ 92 ] = PR_chown, + [ 93 ] = PR_fchown, + [ 94 ] = PR_lchown, + [ 95 ] = PR_umask, + [ 96 ] = PR_gettimeofday, + [ 97 ] = PR_getrlimit, + [ 98 ] = PR_getrusage, + [ 99 ] = PR_sysinfo, + [ 100 ] = PR_times, + [ 101 ] = PR_ptrace, + [ 102 ] = PR_getuid, + [ 103 ] = PR_syslog, + [ 104 ] = PR_getgid, + [ 105 ] = PR_setuid, + [ 106 ] = PR_setgid, + [ 107 ] = PR_geteuid, + [ 108 ] = PR_getegid, + [ 109 ] = PR_setpgid, + [ 110 ] = PR_getppid, + [ 111 ] = PR_getpgrp, + [ 112 ] = PR_setsid, + [ 113 ] = PR_setreuid, + [ 114 ] = PR_setregid, + [ 115 ] = PR_getgroups, + [ 116 ] = PR_setgroups, + [ 117 ] = PR_setresuid, + [ 118 ] = PR_getresuid, + [ 119 ] = PR_setresgid, + [ 120 ] = PR_getresgid, + [ 121 ] = PR_getpgid, + [ 122 ] = PR_setfsuid, + [ 123 ] = PR_setfsgid, + [ 124 ] = PR_getsid, + [ 125 ] = PR_capget, + [ 126 ] = PR_capset, + [ 127 ] = PR_rt_sigpending, + [ 128 ] = PR_rt_sigtimedwait, + [ 129 ] = PR_rt_sigqueueinfo, + [ 130 ] = PR_rt_sigsuspend, + [ 131 ] = PR_sigaltstack, + [ 132 ] = PR_utime, + [ 133 ] = PR_mknod, + [ 134 ] = PR_uselib, + [ 135 ] = PR_personality, + [ 136 ] = PR_ustat, + [ 137 ] = PR_statfs, + [ 138 ] = PR_fstatfs, + [ 139 ] = PR_sysfs, + [ 140 ] = PR_getpriority, + [ 141 ] = PR_setpriority, + [ 142 ] = PR_sched_setparam, + [ 143 ] = PR_sched_getparam, + [ 144 ] = PR_sched_setscheduler, + [ 145 ] = PR_sched_getscheduler, + [ 146 ] = PR_sched_get_priority_max, + [ 147 ] = PR_sched_get_priority_min, + [ 148 ] = PR_sched_rr_get_interval, + [ 149 ] = PR_mlock, + [ 150 ] = PR_munlock, + [ 151 ] = PR_mlockall, + [ 152 ] = PR_munlockall, + [ 153 ] = PR_vhangup, + [ 154 ] = PR_modify_ldt, + [ 155 ] = PR_pivot_root, + [ 156 ] = PR__sysctl, + [ 157 ] = PR_prctl, + [ 158 ] = PR_arch_prctl, + [ 159 ] = PR_adjtimex, + [ 160 ] = PR_setrlimit, + [ 161 ] = PR_chroot, + [ 162 ] = PR_sync, + [ 163 ] = PR_acct, + [ 164 ] = PR_settimeofday, + [ 165 ] = PR_mount, + [ 166 ] = PR_umount2, + [ 167 ] = PR_swapon, + [ 168 ] = PR_swapoff, + [ 169 ] = PR_reboot, + [ 170 ] = PR_sethostname, + [ 171 ] = PR_setdomainname, + [ 172 ] = PR_iopl, + [ 173 ] = PR_ioperm, + [ 174 ] = PR_create_module, + [ 175 ] = PR_init_module, + [ 176 ] = PR_delete_module, + [ 177 ] = PR_get_kernel_syms, + [ 178 ] = PR_query_module, + [ 179 ] = PR_quotactl, + [ 180 ] = PR_nfsservctl, + [ 181 ] = PR_getpmsg, + [ 182 ] = PR_putpmsg, + [ 183 ] = PR_afs_syscall, + [ 184 ] = PR_tuxcall, + [ 185 ] = PR_security, + [ 186 ] = PR_gettid, + [ 187 ] = PR_readahead, + [ 188 ] = PR_setxattr, + [ 189 ] = PR_lsetxattr, + [ 190 ] = PR_fsetxattr, + [ 191 ] = PR_getxattr, + [ 192 ] = PR_lgetxattr, + [ 193 ] = PR_fgetxattr, + [ 194 ] = PR_listxattr, + [ 195 ] = PR_llistxattr, + [ 196 ] = PR_flistxattr, + [ 197 ] = PR_removexattr, + [ 198 ] = PR_lremovexattr, + [ 199 ] = PR_fremovexattr, + [ 200 ] = PR_tkill, + [ 201 ] = PR_time, + [ 202 ] = PR_futex, + [ 203 ] = PR_sched_setaffinity, + [ 204 ] = PR_sched_getaffinity, + [ 205 ] = PR_set_thread_area, + [ 206 ] = PR_io_setup, + [ 207 ] = PR_io_destroy, + [ 208 ] = PR_io_getevents, + [ 209 ] = PR_io_submit, + [ 210 ] = PR_io_cancel, + [ 211 ] = PR_get_thread_area, + [ 212 ] = PR_lookup_dcookie, + [ 213 ] = PR_epoll_create, + [ 214 ] = PR_epoll_ctl_old, + [ 215 ] = PR_epoll_wait_old, + [ 216 ] = PR_remap_file_pages, + [ 217 ] = PR_getdents64, + [ 218 ] = PR_set_tid_address, + [ 219 ] = PR_restart_syscall, + [ 220 ] = PR_semtimedop, + [ 221 ] = PR_fadvise64, + [ 222 ] = PR_timer_create, + [ 223 ] = PR_timer_settime, + [ 224 ] = PR_timer_gettime, + [ 225 ] = PR_timer_getoverrun, + [ 226 ] = PR_timer_delete, + [ 227 ] = PR_clock_settime, + [ 228 ] = PR_clock_gettime, + [ 229 ] = PR_clock_getres, + [ 230 ] = PR_clock_nanosleep, + [ 231 ] = PR_exit_group, + [ 232 ] = PR_epoll_wait, + [ 233 ] = PR_epoll_ctl, + [ 234 ] = PR_tgkill, + [ 235 ] = PR_utimes, + [ 236 ] = PR_vserver, + [ 237 ] = PR_mbind, + [ 238 ] = PR_set_mempolicy, + [ 239 ] = PR_get_mempolicy, + [ 240 ] = PR_mq_open, + [ 241 ] = PR_mq_unlink, + [ 242 ] = PR_mq_timedsend, + [ 243 ] = PR_mq_timedreceive, + [ 244 ] = PR_mq_notify, + [ 245 ] = PR_mq_getsetattr, + [ 246 ] = PR_kexec_load, + [ 247 ] = PR_waitid, + [ 248 ] = PR_add_key, + [ 249 ] = PR_request_key, + [ 250 ] = PR_keyctl, + [ 251 ] = PR_ioprio_set, + [ 252 ] = PR_ioprio_get, + [ 253 ] = PR_inotify_init, + [ 254 ] = PR_inotify_add_watch, + [ 255 ] = PR_inotify_rm_watch, + [ 256 ] = PR_migrate_pages, + [ 257 ] = PR_openat, + [ 258 ] = PR_mkdirat, + [ 259 ] = PR_mknodat, + [ 260 ] = PR_fchownat, + [ 261 ] = PR_futimesat, + [ 262 ] = PR_newfstatat, + [ 263 ] = PR_unlinkat, + [ 264 ] = PR_renameat, + [ 265 ] = PR_linkat, + [ 266 ] = PR_symlinkat, + [ 267 ] = PR_readlinkat, + [ 268 ] = PR_fchmodat, + [ 269 ] = PR_faccessat, + [ 270 ] = PR_pselect6, + [ 271 ] = PR_ppoll, + [ 272 ] = PR_unshare, + [ 273 ] = PR_set_robust_list, + [ 274 ] = PR_get_robust_list, + [ 275 ] = PR_splice, + [ 276 ] = PR_tee, + [ 277 ] = PR_sync_file_range, + [ 278 ] = PR_vmsplice, + [ 279 ] = PR_move_pages, + [ 280 ] = PR_utimensat, + [ 281 ] = PR_epoll_pwait, + [ 282 ] = PR_signalfd, + [ 283 ] = PR_timerfd_create, + [ 284 ] = PR_eventfd, + [ 285 ] = PR_fallocate, + [ 286 ] = PR_timerfd_settime, + [ 287 ] = PR_timerfd_gettime, + [ 288 ] = PR_accept4, + [ 289 ] = PR_signalfd4, + [ 290 ] = PR_eventfd2, + [ 291 ] = PR_epoll_create1, + [ 292 ] = PR_dup3, + [ 293 ] = PR_pipe2, + [ 294 ] = PR_inotify_init1, + [ 295 ] = PR_preadv, + [ 296 ] = PR_pwritev, + [ 297 ] = PR_rt_tgsigqueueinfo, + [ 298 ] = PR_perf_event_open, + [ 299 ] = PR_recvmmsg, + [ 300 ] = PR_fanotify_init, + [ 301 ] = PR_fanotify_mark, + [ 302 ] = PR_prlimit64, + [ 303 ] = PR_name_to_handle_at, + [ 304 ] = PR_open_by_handle_at, + [ 305 ] = PR_clock_adjtime, + [ 306 ] = PR_syncfs, + [ 307 ] = PR_sendmmsg, + [ 308 ] = PR_setns, + [ 309 ] = PR_getcpu, + [ 310 ] = PR_process_vm_readv, + [ 311 ] = PR_process_vm_writev, + [ 312 ] = PR_kcmp, + [ 313 ] = PR_finit_module, + [ 314 ] = PR_sched_setattr, + [ 315 ] = PR_sched_getattr, + [ 316 ] = PR_renameat2, + [ 332 ] = PR_statx, + [ 439 ] = PR_faccessat2, +}; diff --git a/proot/proot_linux/syscall/sysnums.list b/proot/proot_linux/syscall/sysnums.list new file mode 100644 index 0000000..1d6e666 --- /dev/null +++ b/proot/proot_linux/syscall/sysnums.list @@ -0,0 +1,433 @@ +SYSNUM(ARM_BASE) +SYSNUM(ARM_breakpoint) +SYSNUM(ARM_cacheflush) +SYSNUM(ARM_set_tls) +SYSNUM(ARM_usr26) +SYSNUM(ARM_usr32) +SYSNUM(X32_SYSCALL_BIT) +SYSNUM(_llseek) +SYSNUM(_newselect) +SYSNUM(_sysctl) +SYSNUM(accept) +SYSNUM(accept4) +SYSNUM(access) +SYSNUM(acct) +SYSNUM(add_key) +SYSNUM(adjtimex) +SYSNUM(afs_syscall) +SYSNUM(alarm) +SYSNUM(arch_prctl) +SYSNUM(arch_specific_syscall) +SYSNUM(arm_fadvise64_64) +SYSNUM(arm_sync_file_range) +SYSNUM(bdflush) +SYSNUM(bind) +SYSNUM(break) +SYSNUM(brk) +SYSNUM(cacheflush) +SYSNUM(capget) +SYSNUM(capset) +SYSNUM(chdir) +SYSNUM(chmod) +SYSNUM(chown) +SYSNUM(chown32) +SYSNUM(chroot) +SYSNUM(clock_adjtime) +SYSNUM(clock_getres) +SYSNUM(clock_gettime) +SYSNUM(clock_nanosleep) +SYSNUM(clock_settime) +SYSNUM(clone) +SYSNUM(close) +SYSNUM(connect) +SYSNUM(creat) +SYSNUM(create_module) +SYSNUM(delete_module) +SYSNUM(dup) +SYSNUM(dup2) +SYSNUM(dup3) +SYSNUM(epoll_create) +SYSNUM(epoll_create1) +SYSNUM(epoll_ctl) +SYSNUM(epoll_ctl_old) +SYSNUM(epoll_pwait) +SYSNUM(epoll_wait) +SYSNUM(epoll_wait_old) +SYSNUM(eventfd) +SYSNUM(eventfd2) +SYSNUM(execve) +SYSNUM(exit) +SYSNUM(exit_group) +SYSNUM(faccessat) +SYSNUM(faccessat2) +SYSNUM(fadvise64) +SYSNUM(fadvise64_64) +SYSNUM(fallocate) +SYSNUM(fanotify_init) +SYSNUM(fanotify_mark) +SYSNUM(fchdir) +SYSNUM(fchmod) +SYSNUM(fchmodat) +SYSNUM(fchown) +SYSNUM(fchown32) +SYSNUM(fchownat) +SYSNUM(fcntl) +SYSNUM(fcntl64) +SYSNUM(fdatasync) +SYSNUM(fgetxattr) +SYSNUM(finit_module) +SYSNUM(flistxattr) +SYSNUM(flock) +SYSNUM(fork) +SYSNUM(fremovexattr) +SYSNUM(fsetxattr) +SYSNUM(fstat) +SYSNUM(fstat64) +SYSNUM(fstatat64) +SYSNUM(fstatfs) +SYSNUM(fstatfs64) +SYSNUM(fsync) +SYSNUM(ftime) +SYSNUM(ftruncate) +SYSNUM(ftruncate64) +SYSNUM(futex) +SYSNUM(futimesat) +SYSNUM(get_kernel_syms) +SYSNUM(get_mempolicy) +SYSNUM(get_robust_list) +SYSNUM(get_thread_area) +SYSNUM(getcpu) +SYSNUM(getcwd) +SYSNUM(getdents) +SYSNUM(getdents64) +SYSNUM(getegid) +SYSNUM(getegid32) +SYSNUM(geteuid) +SYSNUM(geteuid32) +SYSNUM(getgid) +SYSNUM(getgid32) +SYSNUM(getgroups) +SYSNUM(getgroups32) +SYSNUM(getitimer) +SYSNUM(getpeername) +SYSNUM(getpgid) +SYSNUM(getpgrp) +SYSNUM(getpid) +SYSNUM(getpmsg) +SYSNUM(getppid) +SYSNUM(getpriority) +SYSNUM(getresgid) +SYSNUM(getresgid32) +SYSNUM(getresuid) +SYSNUM(getresuid32) +SYSNUM(getrlimit) +SYSNUM(getrusage) +SYSNUM(getsid) +SYSNUM(getsockname) +SYSNUM(getsockopt) +SYSNUM(gettid) +SYSNUM(gettimeofday) +SYSNUM(getuid) +SYSNUM(getuid32) +SYSNUM(getxattr) +SYSNUM(gtty) +SYSNUM(idle) +SYSNUM(init_module) +SYSNUM(inotify_add_watch) +SYSNUM(inotify_init) +SYSNUM(inotify_init1) +SYSNUM(inotify_rm_watch) +SYSNUM(io_cancel) +SYSNUM(io_destroy) +SYSNUM(io_getevents) +SYSNUM(io_setup) +SYSNUM(io_submit) +SYSNUM(ioctl) +SYSNUM(ioperm) +SYSNUM(iopl) +SYSNUM(ioprio_get) +SYSNUM(ioprio_set) +SYSNUM(ipc) +SYSNUM(kcmp) +SYSNUM(kexec_load) +SYSNUM(keyctl) +SYSNUM(kill) +SYSNUM(lchown) +SYSNUM(lchown32) +SYSNUM(lgetxattr) +SYSNUM(link) +SYSNUM(linkat) +SYSNUM(listen) +SYSNUM(listxattr) +SYSNUM(llistxattr) +SYSNUM(lock) +SYSNUM(lookup_dcookie) +SYSNUM(lremovexattr) +SYSNUM(lseek) +SYSNUM(lsetxattr) +SYSNUM(lstat) +SYSNUM(lstat64) +SYSNUM(madvise) +SYSNUM(mbind) +SYSNUM(migrate_pages) +SYSNUM(mincore) +SYSNUM(mkdir) +SYSNUM(mkdirat) +SYSNUM(mknod) +SYSNUM(mknodat) +SYSNUM(mlock) +SYSNUM(mlockall) +SYSNUM(mmap) +SYSNUM(mmap2) +SYSNUM(modify_ldt) +SYSNUM(mount) +SYSNUM(move_pages) +SYSNUM(mprotect) +SYSNUM(mpx) +SYSNUM(mq_getsetattr) +SYSNUM(mq_notify) +SYSNUM(mq_open) +SYSNUM(mq_timedreceive) +SYSNUM(mq_timedsend) +SYSNUM(mq_unlink) +SYSNUM(mremap) +SYSNUM(msgctl) +SYSNUM(msgget) +SYSNUM(msgrcv) +SYSNUM(msgsnd) +SYSNUM(msync) +SYSNUM(munlock) +SYSNUM(munlockall) +SYSNUM(munmap) +SYSNUM(name_to_handle_at) +SYSNUM(nanosleep) +SYSNUM(newfstatat) +SYSNUM(nfsservctl) +SYSNUM(nice) +SYSNUM(oldfstat) +SYSNUM(oldlstat) +SYSNUM(oldolduname) +SYSNUM(oldstat) +SYSNUM(olduname) +SYSNUM(open) +SYSNUM(open_by_handle_at) +SYSNUM(openat) +SYSNUM(pause) +SYSNUM(pciconfig_iobase) +SYSNUM(pciconfig_read) +SYSNUM(pciconfig_write) +SYSNUM(perf_event_open) +SYSNUM(personality) +SYSNUM(pipe) +SYSNUM(pipe2) +SYSNUM(pivot_root) +SYSNUM(poll) +SYSNUM(ppoll) +SYSNUM(prctl) +SYSNUM(pread64) +SYSNUM(preadv) +SYSNUM(prlimit64) +SYSNUM(process_vm_readv) +SYSNUM(process_vm_writev) +SYSNUM(prof) +SYSNUM(profil) +SYSNUM(pselect6) +SYSNUM(ptrace) +SYSNUM(putpmsg) +SYSNUM(pwrite64) +SYSNUM(pwritev) +SYSNUM(query_module) +SYSNUM(quotactl) +SYSNUM(read) +SYSNUM(readahead) +SYSNUM(readdir) +SYSNUM(readlink) +SYSNUM(readlinkat) +SYSNUM(readv) +SYSNUM(reboot) +SYSNUM(recv) +SYSNUM(recvfrom) +SYSNUM(recvmmsg) +SYSNUM(recvmsg) +SYSNUM(remap_file_pages) +SYSNUM(removexattr) +SYSNUM(rename) +SYSNUM(renameat) +SYSNUM(renameat2) +SYSNUM(request_key) +SYSNUM(restart_syscall) +SYSNUM(rmdir) +SYSNUM(rt_sigaction) +SYSNUM(rt_sigpending) +SYSNUM(rt_sigprocmask) +SYSNUM(rt_sigqueueinfo) +SYSNUM(rt_sigreturn) +SYSNUM(rt_sigsuspend) +SYSNUM(rt_sigtimedwait) +SYSNUM(rt_tgsigqueueinfo) +SYSNUM(sched_get_priority_max) +SYSNUM(sched_get_priority_min) +SYSNUM(sched_getaffinity) +SYSNUM(sched_getattr) +SYSNUM(sched_getparam) +SYSNUM(sched_getscheduler) +SYSNUM(sched_rr_get_interval) +SYSNUM(sched_setaffinity) +SYSNUM(sched_setattr) +SYSNUM(sched_setparam) +SYSNUM(sched_setscheduler) +SYSNUM(sched_yield) +SYSNUM(security) +SYSNUM(select) +SYSNUM(semctl) +SYSNUM(semget) +SYSNUM(semop) +SYSNUM(semtimedop) +SYSNUM(send) +SYSNUM(sendfile) +SYSNUM(sendfile64) +SYSNUM(sendmmsg) +SYSNUM(sendmsg) +SYSNUM(sendto) +SYSNUM(set_mempolicy) +SYSNUM(set_robust_list) +SYSNUM(set_thread_area) +SYSNUM(set_tid_address) +SYSNUM(setdomainname) +SYSNUM(setfsgid) +SYSNUM(setfsgid32) +SYSNUM(setfsuid) +SYSNUM(setfsuid32) +SYSNUM(setgid) +SYSNUM(setgid32) +SYSNUM(setgroups) +SYSNUM(setgroups32) +SYSNUM(sethostname) +SYSNUM(setitimer) +SYSNUM(setns) +SYSNUM(setpgid) +SYSNUM(setpriority) +SYSNUM(setregid) +SYSNUM(setregid32) +SYSNUM(setresgid) +SYSNUM(setresgid32) +SYSNUM(setresuid) +SYSNUM(setresuid32) +SYSNUM(setreuid) +SYSNUM(setreuid32) +SYSNUM(setrlimit) +SYSNUM(setsid) +SYSNUM(setsockopt) +SYSNUM(settimeofday) +SYSNUM(setuid) +SYSNUM(setuid32) +SYSNUM(setxattr) +SYSNUM(sgetmask) +SYSNUM(shmat) +SYSNUM(shmctl) +SYSNUM(shmdt) +SYSNUM(shmget) +SYSNUM(shutdown) +SYSNUM(sigaction) +SYSNUM(sigaltstack) +SYSNUM(signal) +SYSNUM(signalfd) +SYSNUM(signalfd4) +SYSNUM(sigpending) +SYSNUM(sigprocmask) +SYSNUM(sigreturn) +SYSNUM(sigsuspend) +SYSNUM(socket) +SYSNUM(socketcall) +SYSNUM(socketpair) +SYSNUM(splice) +SYSNUM(ssetmask) +SYSNUM(stat) +SYSNUM(stat64) +SYSNUM(statfs) +SYSNUM(statfs64) +SYSNUM(stime) +SYSNUM(stty) +SYSNUM(swapoff) +SYSNUM(swapon) +SYSNUM(symlink) +SYSNUM(symlinkat) +SYSNUM(sync) +SYSNUM(sync_file_range) +SYSNUM(sync_file_range2) +SYSNUM(syncfs) +SYSNUM(sysfs) +SYSNUM(sysinfo) +SYSNUM(syslog) +SYSNUM(tee) +SYSNUM(tgkill) +SYSNUM(time) +SYSNUM(timer_create) +SYSNUM(timer_delete) +SYSNUM(timer_getoverrun) +SYSNUM(timer_gettime) +SYSNUM(timer_settime) +SYSNUM(timerfd_create) +SYSNUM(timerfd_gettime) +SYSNUM(timerfd_settime) +SYSNUM(times) +SYSNUM(tkill) +SYSNUM(truncate) +SYSNUM(truncate64) +SYSNUM(tuxcall) +SYSNUM(ugetrlimit) +SYSNUM(ulimit) +SYSNUM(umask) +SYSNUM(umount) +SYSNUM(umount2) +SYSNUM(uname) +SYSNUM(unlink) +SYSNUM(unlinkat) +SYSNUM(unshare) +SYSNUM(uselib) +SYSNUM(ustat) +SYSNUM(utime) +SYSNUM(utimensat) +SYSNUM(utimes) +SYSNUM(vfork) +SYSNUM(vhangup) +SYSNUM(vm86) +SYSNUM(vm86old) +SYSNUM(vmsplice) +SYSNUM(vserver) +SYSNUM(wait4) +SYSNUM(waitid) +SYSNUM(waitpid) +SYSNUM(write) +SYSNUM(writev) +SYSNUM(x32_execve) +SYSNUM(x32_get_robust_list) +SYSNUM(x32_ioctl) +SYSNUM(x32_kexec_load) +SYSNUM(x32_move_pages) +SYSNUM(x32_mq_notify) +SYSNUM(x32_preadv) +SYSNUM(x32_process_vm_readv) +SYSNUM(x32_process_vm_writev) +SYSNUM(x32_ptrace) +SYSNUM(x32_pwritev) +SYSNUM(x32_readv) +SYSNUM(x32_recvfrom) +SYSNUM(x32_recvmmsg) +SYSNUM(x32_recvmsg) +SYSNUM(x32_rt_sigaction) +SYSNUM(x32_rt_sigpending) +SYSNUM(x32_rt_sigqueueinfo) +SYSNUM(x32_rt_sigreturn) +SYSNUM(x32_rt_sigtimedwait) +SYSNUM(x32_rt_tgsigqueueinfo) +SYSNUM(x32_sendmmsg) +SYSNUM(x32_sendmsg) +SYSNUM(x32_set_robust_list) +SYSNUM(x32_sigaltstack) +SYSNUM(x32_timer_create) +SYSNUM(x32_vmsplice) +SYSNUM(x32_waitid) +SYSNUM(x32_writev) +SYSNUM(statx) +SYSNUM(utimensat_time64) diff --git a/proot/proot_linux/tracee/abi.h b/proot/proot_linux/tracee/abi.h new file mode 100644 index 0000000..e33606d --- /dev/null +++ b/proot/proot_linux/tracee/abi.h @@ -0,0 +1,131 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef TRACEE_ABI_H +#define TRACEE_ABI_H + +#include +#include /* offsetof(), */ + +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "arch.h" + +#include "attribute.h" + +typedef enum { + ABI_DEFAULT = 0, + ABI_2, /* x86_32 on x86_64. */ + ABI_3, /* x32 on x86_64. */ + NB_MAX_ABIS, +} Abi; + +/** + * Return the ABI currently used by the given @tracee. + */ +#if defined(ARCH_X86_64) +static inline Abi get_abi(const Tracee *tracee) +{ + /* The ABI can be changed by a syscall ("execve" typically), + * however the change is only effective once the syscall has + * *fully* returned, hence the use of _regs[ORIGINAL]. */ + switch (tracee->_regs[ORIGINAL].cs) { + case 0x23: + return ABI_2; + + case 0x33: + if (tracee->_regs[ORIGINAL].ds == 0x2B) + return ABI_3; + /* Fall through. */ + default: + return ABI_DEFAULT; + } +} + +/** + * Return true if @tracee is a 32-bit process running on a 64-bit + * kernel. + */ +static inline bool is_32on64_mode(const Tracee *tracee) +{ + /* Unlike the ABI, 32-bit/64-bit mode change is effective + * immediately, hence _regs[CURRENT].cs. */ + switch (tracee->_regs[CURRENT].cs) { + case 0x23: + return true; + + case 0x33: + if (tracee->_regs[CURRENT].ds == 0x2B) + return true; + /* Fall through. */ + default: + return false; + } +} +#else +static inline Abi get_abi(const Tracee *tracee UNUSED) +{ + return ABI_DEFAULT; +} + +static inline bool is_32on64_mode(const Tracee *tracee UNUSED) +{ + return false; +} +#endif + +/** + * Return the size of a word according to the ABI currently used by + * the given @tracee. + */ +static inline size_t sizeof_word(const Tracee *tracee) +{ + return (is_32on64_mode(tracee) + ? sizeof(word_t) / 2 + : sizeof(word_t)); +} + +#include + +/** + * Return the offset of the 'uid' field in a 'stat' structure + * according to the ABI currently used by the given @tracee. + */ +static inline off_t offsetof_stat_uid(const Tracee *tracee) +{ + return (is_32on64_mode(tracee) + ? OFFSETOF_STAT_UID_32 + : offsetof(struct stat, st_uid)); +} + +/** + * Return the offset of the 'gid' field in a 'stat' structure + * according to the ABI currently used by the given @tracee. + */ +static inline off_t offsetof_stat_gid(const Tracee *tracee) +{ + return (is_32on64_mode(tracee) + ? OFFSETOF_STAT_GID_32 + : offsetof(struct stat, st_gid)); +} + +#endif /* TRACEE_ABI_H */ diff --git a/proot/proot_linux/tracee/event.c b/proot/proot_linux/tracee/event.c new file mode 100644 index 0000000..ce59f07 --- /dev/null +++ b/proot/proot_linux/tracee/event.c @@ -0,0 +1,872 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include +#include /* CLONE_*, */ +#include /* pid_t, */ +#include /* ptrace(1), PTRACE_*, */ +#include /* waitpid(2), */ +#include /* waitpid(2), */ +#include /* uname(2), */ +#include /* fork(2), chdir(2), getpid(2), */ +#include /* strcmp(3), */ +#include /* errno(3), */ +#include /* bool, true, false, */ +#include /* assert(3), */ +#include /* atexit(3), getenv(3), */ +#include /* talloc_*, */ +#include /* PRI*, */ +#include /* KERNEL_VERSION, */ + +#include "tracee/event.h" +#include "cli/note.h" +#include "path/path.h" +#include "path/binding.h" +#include "syscall/syscall.h" +#include "syscall/seccomp.h" +#include "ptrace/wait.h" +#include "extension/extension.h" +#include "execve/elf.h" + +#include "attribute.h" +#include "compat.h" + + +/** + * Start @tracee->exe with the given @argv[]. This function + * returns -errno if an error occurred, otherwise 0. + */ +int launch_process(Tracee *tracee, char *const argv[]) +{ + char *const default_argv[] = { "-sh", NULL }; + long status; + pid_t pid; + + /* Warn about open file descriptors. They won't be + * translated until they are closed. */ + list_open_fd(tracee); + + pid = fork(); + switch(pid) { + case -1: + note(tracee, ERROR, SYSTEM, "fork()"); + return -errno; + + case 0: /* child */ + /* Declare myself as ptraceable before executing the + * requested program. */ + status = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + if (status < 0) { + note(tracee, ERROR, SYSTEM, "ptrace(TRACEME)"); + return -errno; + } + + /* Synchronize with the tracer's event loop. Without + * this trick the tracer only sees the "return" from + * the next execve(2) so PRoot wouldn't handle the + * interpreter/runner. I also verified that strace + * does the same thing. */ + kill(getpid(), SIGSTOP); + + /* Improve performance by using seccomp mode 2, unless + * this support is explicitly disabled. */ + if (getenv("PROOT_NO_SECCOMP") == NULL) + (void) enable_syscall_filtering(tracee); + + /* Now process is ptraced, so the current rootfs is already the + * guest rootfs. Note: Valgrind can't handle execve(2) on + * "foreign" binaries (ENOEXEC) but can handle execvp(3) on such + * binaries. */ + execvp(tracee->exe, argv[0] != NULL ? argv : default_argv); + return -errno; + + default: /* parent */ + /* We know the pid of the first tracee now. */ + tracee->pid = pid; + return 0; + } + + /* Never reached. */ + return -ENOSYS; +} + +/* Send the KILL signal to all tracees when PRoot has received a fatal + * signal. */ +static void kill_all_tracees2(int signum, siginfo_t *siginfo UNUSED, void *ucontext UNUSED) +{ + note(NULL, WARNING, INTERNAL, "signal %d received from process %d", + signum, siginfo->si_pid); + kill_all_tracees(); + + /* Exit immediately for system signals (segmentation fault, + * illegal instruction, ...), otherwise exit cleanly through + * the event loop. */ + if (signum != SIGQUIT) + _exit(EXIT_FAILURE); +} + +/** + * Helper for print_talloc_hierarchy(). + */ +static void print_talloc_chunk(const void *ptr, int depth, int max_depth UNUSED, + int is_ref, void *data UNUSED) +{ + const char *name; + size_t count; + size_t size; + + name = talloc_get_name(ptr); + size = talloc_get_size(ptr); + count = talloc_reference_count(ptr); + + if (depth == 0) + return; + + while (depth-- > 1) + fprintf(stderr, "\t"); + + fprintf(stderr, "%-16s ", name); + + if (is_ref) + fprintf(stderr, "-> %-8p", ptr); + else { + fprintf(stderr, "%-8p %zd bytes %zd ref'", ptr, size, count); + + if (name[0] == '$') { + fprintf(stderr, "\t(\"%s\")", (char *)ptr); + } + if (name[0] == '@') { + char **argv; + int i; + + fprintf(stderr, "\t("); + for (i = 0, argv = (char **)ptr; argv[i] != NULL; i++) + fprintf(stderr, "\"%s\", ", argv[i]); + fprintf(stderr, ")"); + } + else if (strcmp(name, "Tracee") == 0) { + fprintf(stderr, "\t(pid = %d, parent = %p)", + ((Tracee *)ptr)->pid, ((Tracee *)ptr)->parent); + } + else if (strcmp(name, "Bindings") == 0) { + Tracee *tracee; + + tracee = TRACEE(ptr); + + if (ptr == tracee->fs->bindings.pending) + fprintf(stderr, "\t(pending)"); + else if (ptr == tracee->fs->bindings.guest) + fprintf(stderr, "\t(guest)"); + else if (ptr == tracee->fs->bindings.host) + fprintf(stderr, "\t(host)"); + } + else if (strcmp(name, "Binding") == 0) { + Binding *binding = (Binding *)ptr; + fprintf(stderr, "\t(%s:%s)", binding->host.path, binding->guest.path); + } + } + + fprintf(stderr, "\n"); +} + +/* Print on stderr the complete talloc hierarchy. */ +static void print_talloc_hierarchy(int signum, siginfo_t *siginfo UNUSED, void *ucontext UNUSED) +{ + switch (signum) { + case SIGUSR1: + talloc_report_depth_cb(NULL, 0, 100, print_talloc_chunk, NULL); + break; + + case SIGUSR2: + talloc_report_depth_file(NULL, 0, 100, stderr); + break; + + default: + break; + } +} + +static int last_exit_status = -1; + +/** + * Check if kernel >= 4.8 + */ +static bool is_kernel_4_8(void) +{ + static int version_48 = -1; + int major = 0; + int minor = 0; + + if (version_48 != -1) + return version_48; + + version_48 = false; + + struct utsname utsname; + + if (uname(&utsname) < 0) + return false; + + sscanf(utsname.release, "%d.%d", &major, &minor); + + if ((major == 4 && minor >= 8) || major > 4) + version_48 = true; + + return version_48; +} + +/** + * Check if this instance of PRoot can *technically* handle @tracee. + */ +static void check_architecture(Tracee *tracee) +{ + struct utsname utsname; + ElfHeader elf_header; + char path[PATH_MAX]; + int status; + + if (tracee->exe == NULL) + return; + + status = translate_path(tracee, path, AT_FDCWD, tracee->exe, false); + if (status < 0) + return; + + status = open_elf(path, &elf_header); + if (status < 0) + return; + close(status); + + if (!IS_CLASS64(elf_header) || sizeof(word_t) == sizeof(uint64_t)) + return; + + note(tracee, ERROR, USER, + "'%s' is a 64-bit program whereas this version of " + "%s handles 32-bit programs only", path, tracee->tool_name); + + status = uname(&utsname); + if (status < 0) + return; + + if (strcmp(utsname.machine, "x86_64") != 0) + return; + + note(tracee, INFO, USER, + "A 64-bit version that supports 32-bit binaries is required"); +} + +/** + * Wait then handle any event from any tracee. This function returns + * the exit status of the last terminated program. + */ +int event_loop() +{ + struct sigaction signal_action; + long status; + int signum; + + /* Kill all tracees when exiting. */ + status = atexit(kill_all_tracees); + if (status != 0) + note(NULL, WARNING, INTERNAL, "atexit() failed"); + + /* All signals are blocked when the signal handler is called. + * SIGINFO is used to know which process has signaled us and + * RESTART is used to restart waitpid(2) seamlessly. */ + bzero(&signal_action, sizeof(signal_action)); + signal_action.sa_flags = SA_SIGINFO | SA_RESTART; + status = sigfillset(&signal_action.sa_mask); + if (status < 0) + note(NULL, WARNING, SYSTEM, "sigfillset()"); + + /* Handle all signals. */ + for (signum = 0; signum < SIGRTMAX; signum++) { + switch (signum) { + case SIGQUIT: + case SIGILL: + case SIGABRT: + case SIGFPE: + case SIGSEGV: + /* Kill all tracees on abnormal termination + * signals. This ensures no process is left + * untraced. */ + signal_action.sa_sigaction = kill_all_tracees2; + break; + + case SIGUSR1: + case SIGUSR2: + /* Print on stderr the complete talloc + * hierarchy, useful for debug purpose. */ + signal_action.sa_sigaction = print_talloc_hierarchy; + break; + + case SIGCHLD: + case SIGCONT: + case SIGSTOP: + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + /* The default action is OK for these signals, + * they are related to tty and job control. */ + continue; + + default: + /* Ignore all other signals, including + * terminating ones (^C for instance). */ + signal_action.sa_sigaction = (void *)SIG_IGN; + break; + } + + status = sigaction(signum, &signal_action, NULL); + if (status < 0 && errno != EINVAL) + note(NULL, WARNING, SYSTEM, "sigaction(%d)", signum); + } + + while (1) { + int tracee_status; + Tracee *tracee; + int signal; + pid_t pid; + + /* This is the only safe place to free tracees. */ + free_terminated_tracees(); + + /* Wait for the next tracee's stop. */ + pid = waitpid(-1, &tracee_status, __WALL); + if (pid < 0) { + if (errno != ECHILD) { + note(NULL, ERROR, SYSTEM, "waitpid()"); + return EXIT_FAILURE; + } + break; + } + + /* Get information about this tracee. */ + tracee = get_tracee(NULL, pid, true); + assert(tracee != NULL); + + tracee->running = false; + + VERBOSE(tracee, 6, "vpid %" PRIu64 ": got event %x", + tracee->vpid, tracee_status); + + status = notify_extensions(tracee, NEW_STATUS, tracee_status, 0); + if (status != 0) + continue; + + if (tracee->as_ptracee.ptracer != NULL) { + bool keep_stopped = handle_ptracee_event(tracee, tracee_status); + if (keep_stopped) + continue; + } + + signal = handle_tracee_event(tracee, tracee_status); + (void) restart_tracee(tracee, signal); + } + + return last_exit_status; +} + +/** + * For kernels >= 4.8.0 + * Handle the current event (@tracee_status) of the given @tracee. + * This function returns the "computed" signal that should be used to + * restart the given @tracee. + */ +static int handle_tracee_event_kernel_4_8(Tracee *tracee, int tracee_status) +{ + static bool seccomp_detected = false; + static bool seccomp_enabled = false; /* added for 4.8.0 */ + long status; + int signal; + + /* Don't overwrite restart_how if it is explicitly set + * elsewhere, i.e in the ptrace emulation when single + * stepping. */ + if (tracee->restart_how == 0) { + /* When seccomp is enabled, all events are restarted in + * non-stop mode, but this default choice could be overwritten + * later if necessary. The check against "sysexit_pending" + * ensures PTRACE_SYSCALL (used to hit the exit stage under + * seccomp) is not cleared due to an event that would happen + * before the exit stage, eg. PTRACE_EVENT_EXEC for the exit + * stage of execve(2). */ + if (tracee->seccomp == ENABLED && !tracee->sysexit_pending) + tracee->restart_how = PTRACE_CONT; + else + tracee->restart_how = PTRACE_SYSCALL; + } + + /* Not a signal-stop by default. */ + signal = 0; + + if (WIFEXITED(tracee_status)) { + last_exit_status = WEXITSTATUS(tracee_status); + VERBOSE(tracee, 1, + "vpid %" PRIu64 ": exited with status %d", + tracee->vpid, last_exit_status); + terminate_tracee(tracee); + } + else if (WIFSIGNALED(tracee_status)) { + check_architecture(tracee); + VERBOSE(tracee, 1, + "vpid %" PRIu64 ": terminated with signal %d", + tracee->vpid, WTERMSIG(tracee_status)); + terminate_tracee(tracee); + } + else if (WIFSTOPPED(tracee_status)) { + /* Don't use WSTOPSIG() to extract the signal + * since it clears the PTRACE_EVENT_* bits. */ + signal = (tracee_status & 0xfff00) >> 8; + + switch (signal) { + static bool deliver_sigtrap = false; + + case SIGTRAP: { + const unsigned long default_ptrace_options = ( + PTRACE_O_TRACESYSGOOD | + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACEVFORKDONE | + PTRACE_O_TRACEEXEC | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + + /* Distinguish some events from others and + * automatically trace each new process with + * the same options. + * + * Note that only the first bare SIGTRAP is + * related to the tracing loop, others SIGTRAP + * carry tracing information because of + * TRACE*FORK/CLONE/EXEC. */ + if (deliver_sigtrap) + break; /* Deliver this signal as-is. */ + + deliver_sigtrap = true; + + /* Try to enable seccomp mode 2... */ + status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + default_ptrace_options | PTRACE_O_TRACESECCOMP); + if (status < 0) { + seccomp_enabled = false; + /* ... otherwise use default options only. */ + status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + default_ptrace_options); + if (status < 0) { + note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); + exit(EXIT_FAILURE); + } + } + else { + if (getenv("PROOT_NO_SECCOMP") == NULL) + seccomp_enabled = true; + } + } + /* Fall through. */ + case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: + case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: + + if (!seccomp_detected && seccomp_enabled) { + VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); + tracee->seccomp = ENABLED; + seccomp_detected = true; + } + + if (signal == (SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8) || + signal == (SIGTRAP | PTRACE_EVENT_SECCOMP << 8)) { + + unsigned long flags = 0; + signal = 0; + + /* Use the common ptrace flow if seccomp was + * explicitly disabled for this tracee. */ + if (tracee->seccomp != ENABLED) + break; + + status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags); + if (status < 0) + break; + + if ((flags & FILTER_SYSEXIT) == 0) { + tracee->restart_how = PTRACE_CONT; + translate_syscall(tracee); + + if (tracee->seccomp == DISABLING) + tracee->restart_how = PTRACE_SYSCALL; + break; + } + } + + /* Fall through. */ + case SIGTRAP | 0x80: + + signal = 0; + + /* This tracee got signaled then freed during the + sysenter stage but the kernel reports the sysexit + stage; just discard this spurious tracee/event. */ + + if (tracee->exe == NULL) { + tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */ + return 0; + } + + switch (tracee->seccomp) { + case ENABLED: + if (IS_IN_SYSENTER(tracee)) { + /* sysenter: ensure the sysexit + * stage will be hit under seccomp. */ + tracee->restart_how = PTRACE_SYSCALL; + tracee->sysexit_pending = true; + } + else { + /* sysexit: the next sysenter + * will be notified by seccomp. */ + tracee->restart_how = PTRACE_CONT; + tracee->sysexit_pending = false; + } + /* Fall through. */ + case DISABLED: + translate_syscall(tracee); + + /* This syscall has disabled seccomp. */ + if (tracee->seccomp == DISABLING) { + tracee->restart_how = PTRACE_SYSCALL; + tracee->seccomp = DISABLED; + } + + break; + + case DISABLING: + /* Seccomp was disabled by the + * previous syscall, but its sysenter + * stage was already handled. */ + tracee->seccomp = DISABLED; + if (IS_IN_SYSENTER(tracee)) + tracee->status = 1; + break; + } + break; + + case SIGTRAP | PTRACE_EVENT_VFORK << 8: + signal = 0; + (void) new_child(tracee, CLONE_VFORK); + break; + + case SIGTRAP | PTRACE_EVENT_FORK << 8: + case SIGTRAP | PTRACE_EVENT_CLONE << 8: + signal = 0; + (void) new_child(tracee, 0); + break; + + case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: + case SIGTRAP | PTRACE_EVENT_EXEC << 8: + case SIGTRAP | PTRACE_EVENT_EXIT << 8: + signal = 0; + break; + + case SIGSTOP: + /* Stop this tracee until PRoot has received + * the EVENT_*FORK|CLONE notification. */ + if (tracee->exe == NULL) { + tracee->sigstop = SIGSTOP_PENDING; + signal = -1; + } + + /* For each tracee, the first SIGSTOP + * is only used to notify the tracer. */ + if (tracee->sigstop == SIGSTOP_IGNORED) { + tracee->sigstop = SIGSTOP_ALLOWED; + signal = 0; + } + break; + + default: + /* Deliver this signal as-is. */ + break; + } + } + + /* Clear the pending event, if any. */ + tracee->as_ptracee.event4.proot.pending = false; + + return signal; +} + + +/** + * For kernels < 4.8.0 + * Handle the current event (@tracee_status) of the given @tracee. + * This function returns the "computed" signal that should be used to + * restart the given @tracee. + */ +int handle_tracee_event(Tracee *tracee, int tracee_status) +{ + static bool seccomp_detected = false; + long status; + int signal; + + if (is_kernel_4_8()) + return handle_tracee_event_kernel_4_8(tracee, tracee_status); + /* Don't overwrite restart_how if it is explicitly set + * elsewhere, i.e in the ptrace emulation when single + * stepping. */ + if (tracee->restart_how == 0) { + /* When seccomp is enabled, all events are restarted in + * non-stop mode, but this default choice could be overwritten + * later if necessary. The check against "sysexit_pending" + * ensures PTRACE_SYSCALL (used to hit the exit stage under + * seccomp) is not cleared due to an event that would happen + * before the exit stage, eg. PTRACE_EVENT_EXEC for the exit + * stage of execve(2). */ + if (tracee->seccomp == ENABLED && !tracee->sysexit_pending) + tracee->restart_how = PTRACE_CONT; + else + tracee->restart_how = PTRACE_SYSCALL; + } + + /* Not a signal-stop by default. */ + signal = 0; + + if (WIFEXITED(tracee_status)) { + last_exit_status = WEXITSTATUS(tracee_status); + VERBOSE(tracee, 1, + "vpid %" PRIu64 ": exited with status %d", + tracee->vpid, last_exit_status); + terminate_tracee(tracee); + } + else if (WIFSIGNALED(tracee_status)) { + check_architecture(tracee); + VERBOSE(tracee, 1, + "vpid %" PRIu64 ": terminated with signal %d", + tracee->vpid, WTERMSIG(tracee_status)); + terminate_tracee(tracee); + } + else if (WIFSTOPPED(tracee_status)) { + /* Don't use WSTOPSIG() to extract the signal + * since it clears the PTRACE_EVENT_* bits. */ + signal = (tracee_status & 0xfff00) >> 8; + + switch (signal) { + static bool deliver_sigtrap = false; + + case SIGTRAP: { + const unsigned long default_ptrace_options = ( + PTRACE_O_TRACESYSGOOD | + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACEVFORKDONE | + PTRACE_O_TRACEEXEC | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + + /* Distinguish some events from others and + * automatically trace each new process with + * the same options. + * + * Note that only the first bare SIGTRAP is + * related to the tracing loop, others SIGTRAP + * carry tracing information because of + * TRACE*FORK/CLONE/EXEC. */ + if (deliver_sigtrap) + break; /* Deliver this signal as-is. */ + + deliver_sigtrap = true; + + /* Try to enable seccomp mode 2... */ + status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + default_ptrace_options | PTRACE_O_TRACESECCOMP); + if (status < 0) { + /* ... otherwise use default options only. */ + status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + default_ptrace_options); + if (status < 0) { + note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); + exit(EXIT_FAILURE); + } + } + } + + /* Fall through. */ + case SIGTRAP | 0x80: + signal = 0; + + /* This tracee got signaled then freed during the + sysenter stage but the kernel reports the sysexit + stage; just discard this spurious tracee/event. */ + if (tracee->exe == NULL) { + tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */ + return 0; + } + + switch (tracee->seccomp) { + case ENABLED: + if (IS_IN_SYSENTER(tracee)) { + /* sysenter: ensure the sysexit + * stage will be hit under seccomp. */ + tracee->restart_how = PTRACE_SYSCALL; + tracee->sysexit_pending = true; + } + else { + /* sysexit: the next sysenter + * will be notified by seccomp. */ + tracee->restart_how = PTRACE_CONT; + tracee->sysexit_pending = false; + } + /* Fall through. */ + case DISABLED: + translate_syscall(tracee); + + /* This syscall has disabled seccomp. */ + if (tracee->seccomp == DISABLING) { + tracee->restart_how = PTRACE_SYSCALL; + tracee->seccomp = DISABLED; + } + + break; + + case DISABLING: + /* Seccomp was disabled by the + * previous syscall, but its sysenter + * stage was already handled. */ + tracee->seccomp = DISABLED; + if (IS_IN_SYSENTER(tracee)) + tracee->status = 1; + break; + } + break; + + case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: + case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: { + unsigned long flags = 0; + + signal = 0; + + if (!seccomp_detected) { + VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); + tracee->seccomp = ENABLED; + seccomp_detected = true; + } + + /* Use the common ptrace flow if seccomp was + * explicitely disabled for this tracee. */ + if (tracee->seccomp != ENABLED) + break; + + status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags); + if (status < 0) + break; + + /* Use the common ptrace flow when + * sysexit has to be handled. */ + if ((flags & FILTER_SYSEXIT) != 0) { + tracee->restart_how = PTRACE_SYSCALL; + break; + } + + /* Otherwise, handle the sysenter + * stage right now. */ + tracee->restart_how = PTRACE_CONT; + translate_syscall(tracee); + + /* This syscall has disabled seccomp, so move + * the ptrace flow back to the common path to + * ensure its sysexit will be handled. */ + if (tracee->seccomp == DISABLING) + tracee->restart_how = PTRACE_SYSCALL; + break; + } + + case SIGTRAP | PTRACE_EVENT_VFORK << 8: + signal = 0; + (void) new_child(tracee, CLONE_VFORK); + break; + + case SIGTRAP | PTRACE_EVENT_FORK << 8: + case SIGTRAP | PTRACE_EVENT_CLONE << 8: + signal = 0; + (void) new_child(tracee, 0); + break; + + case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: + case SIGTRAP | PTRACE_EVENT_EXEC << 8: + case SIGTRAP | PTRACE_EVENT_EXIT << 8: + signal = 0; + break; + + case SIGSTOP: + /* Stop this tracee until PRoot has received + * the EVENT_*FORK|CLONE notification. */ + if (tracee->exe == NULL) { + tracee->sigstop = SIGSTOP_PENDING; + signal = -1; + } + + /* For each tracee, the first SIGSTOP + * is only used to notify the tracer. */ + if (tracee->sigstop == SIGSTOP_IGNORED) { + tracee->sigstop = SIGSTOP_ALLOWED; + signal = 0; + } + break; + + default: + /* Deliver this signal as-is. */ + break; + } + } + + /* Clear the pending event, if any. */ + tracee->as_ptracee.event4.proot.pending = false; + + return signal; +} + + +/** + * Restart the given @tracee with the specified @signal. This + * function returns false if the tracee was not restarted (error or + * put in the "waiting for ptracee" state), otherwise true. + */ +bool restart_tracee(Tracee *tracee, int signal) +{ + int status; + + /* Put in the "stopped"/"waiting for ptracee" state?. */ + if (tracee->as_ptracer.wait_pid != 0 || signal == -1) + return false; + + /* Restart the tracee and stop it at the next instruction, or + * at the next entry or exit of a system call. */ + status = ptrace(tracee->restart_how, tracee->pid, NULL, signal); + if (status < 0) + return false; /* The process likely died in a syscall. */ + + VERBOSE(tracee, 6, "vpid %" PRIu64 ": restarted using %d, signal %d", + tracee->vpid, tracee->restart_how, signal); + + tracee->restart_how = 0; + tracee->running = true; + + return true; +} diff --git a/proot/proot_linux/tracee/event.h b/proot/proot_linux/tracee/event.h new file mode 100644 index 0000000..ff00cfb --- /dev/null +++ b/proot/proot_linux/tracee/event.h @@ -0,0 +1,35 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef TRACEE_EVENT_H +#define TRACEE_EVENT_H + +#include + +#include "tracee/tracee.h" + +extern int launch_process(Tracee *tracee, char *const argv[]); +extern int event_loop(); +extern int handle_tracee_event(Tracee *tracee, int tracee_status); +extern bool restart_tracee(Tracee *tracee, int signal); + +#endif /* TRACEE_EVENT_H */ diff --git a/proot/proot_linux/tracee/mem.c b/proot/proot_linux/tracee/mem.c new file mode 100644 index 0000000..368daec --- /dev/null +++ b/proot/proot_linux/tracee/mem.c @@ -0,0 +1,548 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* ptrace(2), PTRACE_*, */ +#include /* pid_t, size_t, */ +#include /* NULL, */ +#include /* offsetof(), */ +#include /* struct user*, */ +#include /* errno, */ +#include /* assert(3), */ +#include /* waitpid(2), */ +#include /* memcpy(3), */ +#include /* uint*_t, */ +#include /* process_vm_*, struct iovec, */ +#include /* sysconf(3), */ +#include /* mmap(2), munmap(2), MAP_*, */ + +#include "tracee/mem.h" +#include "tracee/abi.h" +#include "syscall/heap.h" +#include "arch.h" /* word_t, NO_MISALIGNED_ACCESS */ +#include "build.h" /* HAVE_PROCESS_VM, */ +#include "cli/note.h" + +/** + * Load the word at the given @address, potentially *not* aligned. + */ +static inline word_t load_word(const void *address) +{ +#ifdef NO_MISALIGNED_ACCESS + if (((word_t)address) % sizeof(word_t) == 0) + return *(word_t *)address; + else { + word_t value; + memcpy(&value, address, sizeof(word_t)); + return value; + } +#else + return *(word_t *)address; +#endif +} + +/** + * Store the word with the given @value to the given @address, + * potentially *not* aligned. + */ +static inline void store_word(void *address, word_t value) +{ +#ifdef NO_MISALIGNED_ACCESS + if (((word_t)address) % sizeof(word_t) == 0) + *((word_t *)address) = value; + else + memcpy(address, &value, sizeof(word_t)); +#else + *((word_t *)address) = value; +#endif +} + +/** + * Copy @size bytes from the buffer @src_tracer to the address + * @dest_tracee within the memory space of the @tracee process. It + * returns -errno if an error occured, otherwise 0. + */ +int write_data(const Tracee *tracee, word_t dest_tracee, const void *src_tracer, word_t size) +{ + word_t *src = (word_t *)src_tracer; + word_t *dest = (word_t *)dest_tracee; + + long status; + word_t word, i, j; + word_t nb_trailing_bytes; + word_t nb_full_words; + + uint8_t *last_dest_word; + uint8_t *last_src_word; + +#if defined(HAVE_PROCESS_VM) + struct iovec local; + struct iovec remote; + + local.iov_base = src; + local.iov_len = size; + + remote.iov_base = dest; + remote.iov_len = size; + + status = process_vm_writev(tracee->pid, &local, 1, &remote, 1, 0); + if ((size_t) status == size) + return 0; + /* Fallback to ptrace if something went wrong. */ + +#endif /* HAVE_PROCESS_VM */ + + nb_trailing_bytes = size % sizeof(word_t); + nb_full_words = (size - nb_trailing_bytes) / sizeof(word_t); + + /* Copy one word by one word, except for the last one. */ + for (i = 0; i < nb_full_words; i++) { + status = ptrace(PTRACE_POKEDATA, tracee->pid, dest + i, load_word(&src[i])); + if (status < 0) { + note(tracee, WARNING, SYSTEM, "ptrace(POKEDATA)"); + return -EFAULT; + } + } + + if (nb_trailing_bytes == 0) + return 0; + + /* Copy the bytes in the last word carefully since we have to + * overwrite only the relevant ones. */ + + word = ptrace(PTRACE_PEEKDATA, tracee->pid, dest + i, NULL); + if (errno != 0) { + note(tracee, WARNING, SYSTEM, "ptrace(PEEKDATA)"); + return -EFAULT; + } + + last_dest_word = (uint8_t *)&word; + last_src_word = (uint8_t *)&src[i]; + + for (j = 0; j < nb_trailing_bytes; j++) + last_dest_word[j] = last_src_word[j]; + + status = ptrace(PTRACE_POKEDATA, tracee->pid, dest + i, word); + if (status < 0) { + note(tracee, WARNING, SYSTEM, "ptrace(POKEDATA)"); + return -EFAULT; + } + + return 0; +} + +/** + * Gather the @src_tracer_count buffers pointed to by @src_tracer to + * the address @dest_tracee within the memory space of the @tracee + * process. This function returns -errno if an error occured, + * otherwise 0. + */ +int writev_data(const Tracee *tracee, word_t dest_tracee, const struct iovec *src_tracer, int src_tracer_count) +{ + size_t size; + int status; + int i; + +#if defined(HAVE_PROCESS_VM) + struct iovec remote; + + for (i = 0, size = 0; i < src_tracer_count; i++) + size += src_tracer[i].iov_len; + + remote.iov_base = (word_t *)dest_tracee; + remote.iov_len = size; + + status = process_vm_writev(tracee->pid, src_tracer, src_tracer_count, &remote, 1, 0); + if ((size_t) status == size) + return 0; + /* Fallback to iterative-write if something went wrong. */ + +#endif /* HAVE_PROCESS_VM */ + + for (i = 0, size = 0; i < src_tracer_count; i++) { + status = write_data(tracee, dest_tracee + size, + src_tracer[i].iov_base, src_tracer[i].iov_len); + if (status < 0) + return status; + + size += src_tracer[i].iov_len; + } + + return 0; +} + +/** + * Copy @size bytes to the buffer @dest_tracer from the address + * @src_tracee within the memory space of the @tracee process. It + * returns -errno if an error occured, otherwise 0. + */ +int read_data(const Tracee *tracee, void *dest_tracer, word_t src_tracee, word_t size) +{ + word_t *src = (word_t *)src_tracee; + word_t *dest = (word_t *)dest_tracer; + + word_t nb_trailing_bytes; + word_t nb_full_words; + word_t word, i, j; + + uint8_t *last_src_word; + uint8_t *last_dest_word; + +#if defined(HAVE_PROCESS_VM) + long status; + struct iovec local; + struct iovec remote; + + local.iov_base = dest; + local.iov_len = size; + + remote.iov_base = src; + remote.iov_len = size; + + status = process_vm_readv(tracee->pid, &local, 1, &remote, 1, 0); + if ((size_t) status == size) + return 0; + /* Fallback to ptrace if something went wrong. */ + +#endif /* HAVE_PROCESS_VM */ + + nb_trailing_bytes = size % sizeof(word_t); + nb_full_words = (size - nb_trailing_bytes) / sizeof(word_t); + + /* Copy one word by one word, except for the last one. */ + for (i = 0; i < nb_full_words; i++) { + word = ptrace(PTRACE_PEEKDATA, tracee->pid, src + i, NULL); + if (errno != 0) { + note(tracee, WARNING, SYSTEM, "ptrace(PEEKDATA)"); + return -EFAULT; + } + store_word(&dest[i], word); + } + + if (nb_trailing_bytes == 0) + return 0; + + /* Copy the bytes from the last word carefully since we have + * to not overwrite the bytes lying beyond @dest_tracer. */ + + word = ptrace(PTRACE_PEEKDATA, tracee->pid, src + i, NULL); + if (errno != 0) { + note(tracee, WARNING, SYSTEM, "ptrace(PEEKDATA)"); + return -EFAULT; + } + + last_dest_word = (uint8_t *)&dest[i]; + last_src_word = (uint8_t *)&word; + + for (j = 0; j < nb_trailing_bytes; j++) + last_dest_word[j] = last_src_word[j]; + + return 0; +} + +/** + * Copy to @dest_tracer at most @max_size bytes from the string + * pointed to by @src_tracee within the memory space of the @tracee + * process. This function returns -errno on error, otherwise + * it returns the number in bytes of the string, including the + * end-of-string terminator. + */ +int read_string(const Tracee *tracee, char *dest_tracer, word_t src_tracee, word_t max_size) +{ + word_t *src = (word_t *)src_tracee; + word_t *dest = (word_t *)dest_tracer; + + word_t nb_trailing_bytes; + word_t nb_full_words; + word_t word, i, j; + + uint8_t *src_word; + uint8_t *dest_word; + +#if defined(HAVE_PROCESS_VM) + /* [process_vm] system calls do not check the memory regions + * in the remote process until just before doing the + * read/write. Consequently, a partial read/write [1] may + * result if one of the remote_iov elements points to an + * invalid memory region in the remote process. No further + * reads/writes will be attempted beyond that point. Keep + * this in mind when attempting to read data of unknown length + * (such as C strings that are null-terminated) from a remote + * process, by avoiding spanning memory pages (typically 4KiB) + * in a single remote iovec element. (Instead, split the + * remote read into two remote_iov elements and have them + * merge back into a single write local_iov entry. The first + * read entry goes up to the page boundary, while the second + * starts on the next page boundary.). + * + * [1] Partial transfers apply at the granularity of iovec + * elements. These system calls won't perform a partial + * transfer that splits a single iovec element. + * + * -- man 2 process_vm_readv + */ + long status; + size_t size; + size_t offset; + struct iovec local; + struct iovec remote; + + static size_t chunk_size = 0; + static uintptr_t chunk_mask; + + /* A chunk shall not cross a page boundary. */ + if (chunk_size == 0) { + chunk_size = sysconf(_SC_PAGE_SIZE); + chunk_size = (chunk_size > 0 && chunk_size < 1024 ? chunk_size : 1024); + chunk_mask = ~(chunk_size - 1); + } + + /* Read the string by chunk. */ + offset = 0; + do { + uintptr_t current_chunk = (src_tracee + offset) & chunk_mask; + uintptr_t next_chunk = current_chunk + chunk_size; + + /* Compute the number of bytes available up to the + * next chunk or up to max_size. */ + size = next_chunk - (src_tracee + offset); + size = (size < max_size - offset ? size : max_size - offset); + + local.iov_base = (uint8_t *)dest + offset; + local.iov_len = size; + + remote.iov_base = (uint8_t *)src + offset; + remote.iov_len = size; + + status = process_vm_readv(tracee->pid, &local, 1, &remote, 1, 0); + if ((size_t) status != size) + goto fallback; + + status = strnlen(local.iov_base, size); + if ((size_t) status < size) { + size = offset + status + 1; + assert(size <= max_size); + return size; + } + + offset += size; + } while (offset < max_size); + assert(offset == max_size); + + /* Fallback to ptrace if something went wrong. */ +fallback: +#endif /* HAVE_PROCESS_VM */ + + nb_trailing_bytes = max_size % sizeof(word_t); + nb_full_words = (max_size - nb_trailing_bytes) / sizeof(word_t); + + /* Copy one word by one word, except for the last one. */ + for (i = 0; i < nb_full_words; i++) { + word = ptrace(PTRACE_PEEKDATA, tracee->pid, src + i, NULL); + if (errno != 0) + return -EFAULT; + + store_word(&dest[i], word); + + /* Stop once an end-of-string is detected. */ + src_word = (uint8_t *)&word; + for (j = 0; j < sizeof(word_t); j++) + if (src_word[j] == '\0') + return i * sizeof(word_t) + j + 1; + } + + /* Copy the bytes from the last word carefully since we have + * to not overwrite the bytes lying beyond @dest_tracer. */ + + word = ptrace(PTRACE_PEEKDATA, tracee->pid, src + i, NULL); + if (errno != 0) + return -EFAULT; + + dest_word = (uint8_t *)&dest[i]; + src_word = (uint8_t *)&word; + + for (j = 0; j < nb_trailing_bytes; j++) { + dest_word[j] = src_word[j]; + if (src_word[j] == '\0') + break; + } + + return i * sizeof(word_t) + j + 1; +} + +/** + * Return the value of the word at the given @address in the @tracee's + * memory space. The caller must test errno to check if an error + * occured. + */ +word_t peek_word(const Tracee *tracee, word_t address) +{ + word_t result = 0; + +#if defined(HAVE_PROCESS_VM) + int status; + struct iovec local; + struct iovec remote; + + local.iov_base = &result; + local.iov_len = sizeof_word(tracee); + + remote.iov_base = (void *)address; + remote.iov_len = sizeof_word(tracee); + + errno = 0; + status = process_vm_readv(tracee->pid, &local, 1, &remote, 1, 0); + if (status > 0) + return result; + /* Fallback to ptrace if something went wrong. */ +#endif + errno = 0; + result = (word_t) ptrace(PTRACE_PEEKDATA, tracee->pid, address, NULL); + + /* From ptrace(2) manual: "Unfortunately, under Linux, + * different variations of this fault will return EIO or + * EFAULT more or less arbitrarily." */ + if (errno == EIO) + errno = EFAULT; + + /* Use only the 32 LSB when running a 32-bit process on a + * 64-bit kernel. */ + if (is_32on64_mode(tracee)) + result &= 0xFFFFFFFF; + + return result; +} + +/** + * Set the word at the given @address in the @tracee's memory space to + * the given @value. The caller must test errno to check if an error + * occured. + */ +void poke_word(const Tracee *tracee, word_t address, word_t value) +{ + word_t tmp; + +#if defined(HAVE_PROCESS_VM) + int status; + struct iovec local; + struct iovec remote; + + /* Note: &value points to the 32 LSB on 64-bit little-endian + * architecture. */ + local.iov_base = &value; + local.iov_len = sizeof_word(tracee); + + remote.iov_base = (void *)address; + remote.iov_len = sizeof_word(tracee); + + errno = 0; + status = process_vm_writev(tracee->pid, &local, 1, &remote, 1, 0); + if (status > 0) + return; + /* Fallback to ptrace if something went wrong. */ +#endif + /* Don't overwrite the 32 MSB when running a 32-bit process on + * a 64-bit kernel. */ + if (is_32on64_mode(tracee)) { + errno = 0; + tmp = (word_t) ptrace(PTRACE_PEEKDATA, tracee->pid, address, NULL); + if (errno != 0) + return; + + value |= (tmp & 0xFFFFFFFF00000000ULL); + } + + errno = 0; + (void) ptrace(PTRACE_POKEDATA, tracee->pid, address, value); + + /* From ptrace(2) manual: "Unfortunately, under Linux, + * different variations of this fault will return EIO or + * EFAULT more or less arbitrarily." */ + if (errno == EIO) + errno = EFAULT; + + return; +} + +/** + * Allocate @size bytes in the @tracee's memory space. This function + * returns the address of the allocated memory in the @tracee's memory + * space, otherwise 0 if an error occured. + */ +word_t alloc_mem(Tracee *tracee, ssize_t size) +{ + word_t stack_pointer; + + /* This function should be called in sysenter only since the + * stack pointer is systematically restored at the end of + * sysexit (except for execve, but in this case the stack + * pointer should be handled with care since it is used by the + * process to retrieve argc, argv, envp, and auxv). */ + assert(IS_IN_SYSENTER(tracee)); + + /* Get the current value of the stack pointer from the tracee's + * USER area. */ + stack_pointer = peek_reg(tracee, CURRENT, STACK_POINTER); + + /* Some ABIs specify an amount of bytes after the stack + * pointer that shall not be used by anything but the compiler + * (for optimization purpose). */ + if (stack_pointer == peek_reg(tracee, ORIGINAL, STACK_POINTER)) + size += RED_ZONE_SIZE; + + /* Align the stack */ + size = ((size - 1) / STACK_ALIGNMENT + 1) * STACK_ALIGNMENT; + + /* Sanity check. */ + if ( (size > 0 && stack_pointer <= (word_t) size) + || (size < 0 && stack_pointer >= ULONG_MAX + size)) { + note(tracee, WARNING, INTERNAL, "integer under/overflow detected in %s", + __FUNCTION__); + return 0; + } + + /* Remember the stack grows downward. */ + stack_pointer -= size; + + /* Set the new value of the stack pointer in the tracee's USER + * area. */ + poke_reg(tracee, STACK_POINTER, stack_pointer); + return stack_pointer; +} + +/** + * Clear @size bytes at the given @address in the @tracee's memory + * space. This function returns -errno if an error occured, otherwise + * 0. + */ +int clear_mem(const Tracee *tracee, word_t address, size_t size) +{ + int status; + void *zeros; + + zeros = mmap(NULL, size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (zeros == MAP_FAILED) + return -errno; + + status = write_data(tracee, address, zeros, size); + munmap(zeros, size); + return status; +} diff --git a/proot/proot_linux/tracee/mem.h b/proot/proot_linux/tracee/mem.h new file mode 100644 index 0000000..6049594 --- /dev/null +++ b/proot/proot_linux/tracee/mem.h @@ -0,0 +1,112 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef TRACEE_MEM_H +#define TRACEE_MEM_H + +#include /* PATH_MAX, */ +#include /* pid_t, size_t, */ +#include /* pid_t, size_t, */ +#include /* struct iovec, */ +#include /* ENAMETOOLONG, */ + +#include "arch.h" /* word_t, */ +#include "tracee/tracee.h" + +extern int write_data(const Tracee *tracee, word_t dest_tracee, const void *src_tracer, word_t size); +extern int writev_data(const Tracee *tracee, word_t dest_tracee, const struct iovec *src_tracer, int src_tracer_count); +extern int read_data(const Tracee *tracee, void *dest_tracer, word_t src_tracee, word_t size); +extern int read_string(const Tracee *tracee, char *dest_tracer, word_t src_tracee, word_t max_size); +extern word_t peek_word(const Tracee *tracee, word_t address); +extern void poke_word(const Tracee *tracee, word_t address, word_t value); +extern word_t alloc_mem(Tracee *tracee, ssize_t size); +extern int clear_mem(const Tracee *tracee, word_t address, size_t size); + +/** + * Copy to @dest_tracer at most PATH_MAX bytes -- including the + * end-of-string terminator -- from the string pointed to by + * @src_tracee within the memory space of the @tracee process. This + * function returns -errno on error, otherwise it returns the number + * in bytes of the string, including the end-of-string terminator. + */ +static inline int read_path(const Tracee *tracee, char dest_tracer[PATH_MAX], word_t src_tracee) +{ + int status; + + status = read_string(tracee, dest_tracer, src_tracee, PATH_MAX); + if (status < 0) + return status; + if (status >= PATH_MAX) + return -ENAMETOOLONG; + + return status; +} + +/** + * Generate a function that returns the value of the @type at the + * given @address in the @tracee's memory space. The caller must test + * errno to check if an error occured. + */ +#define GENERATE_peek(type) \ +static inline type ## _t peek_ ## type(const Tracee *tracee, word_t address) \ +{ \ + type ## _t result; \ + errno = -read_data(tracee, &result, address, sizeof(type ## _t)); \ + return result; \ +} + +GENERATE_peek(uint8); +GENERATE_peek(uint16); +GENERATE_peek(uint32); +GENERATE_peek(uint64); + +GENERATE_peek(int8); +GENERATE_peek(int16); +GENERATE_peek(int32); +GENERATE_peek(int64); + +#undef GENERATE_peek + +/** + * Generate a function that set the @type at the given @address in the + * @tracee's memory space to the given @value. The caller must test + * errno to check if an error occured. + */ +#define GENERATE_poke(type) \ +static inline void poke_ ## type(const Tracee *tracee, word_t address, type ## _t value) \ +{ \ + errno = -write_data(tracee, address, &value, sizeof(type ## _t)); \ +} + +GENERATE_poke(uint8); +GENERATE_poke(uint16); +GENERATE_poke(uint32); +GENERATE_poke(uint64); + +GENERATE_poke(int8); +GENERATE_poke(int16); +GENERATE_poke(int32); +GENERATE_poke(int64); + +#undef GENERATE_poke + +#endif /* TRACEE_MEM_H */ diff --git a/proot/proot_linux/tracee/reg.c b/proot/proot_linux/tracee/reg.c new file mode 100644 index 0000000..a0c1784 --- /dev/null +++ b/proot/proot_linux/tracee/reg.c @@ -0,0 +1,342 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* off_t */ +#include /* struct user*, */ +#include /* ptrace(2), PTRACE*, */ +#include /* assert(3), */ +#include /* errno(3), */ +#include /* offsetof(), */ +#include /* *int*_t, */ +#include /* PRI*, */ +#include /* ULONG_MAX, */ +#include /* memcpy(3), */ +#include /* struct iovec, */ + +#include "arch.h" + +#if defined(ARCH_ARM64) +#include /* NT_PRSTATUS */ +#endif + +#include "syscall/sysnum.h" +#include "tracee/reg.h" +#include "tracee/abi.h" +#include "cli/note.h" +#include "compat.h" + +/** + * Compute the offset of the register @reg_name in the USER area. + */ +#define USER_REGS_OFFSET(reg_name) \ + (offsetof(struct user, regs) \ + + offsetof(struct user_regs_struct, reg_name)) + +#define REG(tracee, version, index) \ + (*(word_t*) (((uint8_t *) &tracee->_regs[version]) + reg_offset[index])) + +/* Specify the ABI registers (syscall argument passing, stack pointer). + * See sysdeps/unix/sysv/linux/${ARCH}/syscall.S from the GNU C Library. */ +#if defined(ARCH_X86_64) + + static off_t reg_offset[] = { + [SYSARG_NUM] = USER_REGS_OFFSET(orig_rax), + [SYSARG_1] = USER_REGS_OFFSET(rdi), + [SYSARG_2] = USER_REGS_OFFSET(rsi), + [SYSARG_3] = USER_REGS_OFFSET(rdx), + [SYSARG_4] = USER_REGS_OFFSET(r10), + [SYSARG_5] = USER_REGS_OFFSET(r8), + [SYSARG_6] = USER_REGS_OFFSET(r9), + [SYSARG_RESULT] = USER_REGS_OFFSET(rax), + [STACK_POINTER] = USER_REGS_OFFSET(rsp), + [INSTR_POINTER] = USER_REGS_OFFSET(rip), + [RTLD_FINI] = USER_REGS_OFFSET(rdx), + [STATE_FLAGS] = USER_REGS_OFFSET(eflags), + [USERARG_1] = USER_REGS_OFFSET(rdi), + }; + + static off_t reg_offset_x86[] = { + [SYSARG_NUM] = USER_REGS_OFFSET(orig_rax), + [SYSARG_1] = USER_REGS_OFFSET(rbx), + [SYSARG_2] = USER_REGS_OFFSET(rcx), + [SYSARG_3] = USER_REGS_OFFSET(rdx), + [SYSARG_4] = USER_REGS_OFFSET(rsi), + [SYSARG_5] = USER_REGS_OFFSET(rdi), + [SYSARG_6] = USER_REGS_OFFSET(rbp), + [SYSARG_RESULT] = USER_REGS_OFFSET(rax), + [STACK_POINTER] = USER_REGS_OFFSET(rsp), + [INSTR_POINTER] = USER_REGS_OFFSET(rip), + [RTLD_FINI] = USER_REGS_OFFSET(rdx), + [STATE_FLAGS] = USER_REGS_OFFSET(eflags), + [USERARG_1] = USER_REGS_OFFSET(rax), + }; + + #undef REG + #define REG(tracee, version, index) \ + (*(word_t*) (tracee->_regs[version].cs == 0x23 \ + ? (((uint8_t *) &tracee->_regs[version]) + reg_offset_x86[index]) \ + : (((uint8_t *) &tracee->_regs[version]) + reg_offset[index]))) + +#elif defined(ARCH_ARM_EABI) + + static off_t reg_offset[] = { + [SYSARG_NUM] = USER_REGS_OFFSET(uregs[7]), + [SYSARG_1] = USER_REGS_OFFSET(uregs[0]), + [SYSARG_2] = USER_REGS_OFFSET(uregs[1]), + [SYSARG_3] = USER_REGS_OFFSET(uregs[2]), + [SYSARG_4] = USER_REGS_OFFSET(uregs[3]), + [SYSARG_5] = USER_REGS_OFFSET(uregs[4]), + [SYSARG_6] = USER_REGS_OFFSET(uregs[5]), + [SYSARG_RESULT] = USER_REGS_OFFSET(uregs[0]), + [STACK_POINTER] = USER_REGS_OFFSET(uregs[13]), + [INSTR_POINTER] = USER_REGS_OFFSET(uregs[15]), + [USERARG_1] = USER_REGS_OFFSET(uregs[0]), + }; + +#elif defined(ARCH_ARM64) + + #undef USER_REGS_OFFSET + #define USER_REGS_OFFSET(reg_name) offsetof(struct user_regs_struct, reg_name) + + static off_t reg_offset[] = { + [SYSARG_NUM] = USER_REGS_OFFSET(regs[8]), + [SYSARG_1] = USER_REGS_OFFSET(regs[0]), + [SYSARG_2] = USER_REGS_OFFSET(regs[1]), + [SYSARG_3] = USER_REGS_OFFSET(regs[2]), + [SYSARG_4] = USER_REGS_OFFSET(regs[3]), + [SYSARG_5] = USER_REGS_OFFSET(regs[4]), + [SYSARG_6] = USER_REGS_OFFSET(regs[5]), + [SYSARG_RESULT] = USER_REGS_OFFSET(regs[0]), + [STACK_POINTER] = USER_REGS_OFFSET(sp), + [INSTR_POINTER] = USER_REGS_OFFSET(pc), + [USERARG_1] = USER_REGS_OFFSET(regs[0]), + }; + +#elif defined(ARCH_X86) + + static off_t reg_offset[] = { + [SYSARG_NUM] = USER_REGS_OFFSET(orig_eax), + [SYSARG_1] = USER_REGS_OFFSET(ebx), + [SYSARG_2] = USER_REGS_OFFSET(ecx), + [SYSARG_3] = USER_REGS_OFFSET(edx), + [SYSARG_4] = USER_REGS_OFFSET(esi), + [SYSARG_5] = USER_REGS_OFFSET(edi), + [SYSARG_6] = USER_REGS_OFFSET(ebp), + [SYSARG_RESULT] = USER_REGS_OFFSET(eax), + [STACK_POINTER] = USER_REGS_OFFSET(esp), + [INSTR_POINTER] = USER_REGS_OFFSET(eip), + [RTLD_FINI] = USER_REGS_OFFSET(edx), + [STATE_FLAGS] = USER_REGS_OFFSET(eflags), + [USERARG_1] = USER_REGS_OFFSET(eax), + }; + +#elif defined(ARCH_SH4) + + static off_t reg_offset[] = { + [SYSARG_NUM] = USER_REGS_OFFSET(regs[3]), + [SYSARG_1] = USER_REGS_OFFSET(regs[4]), + [SYSARG_2] = USER_REGS_OFFSET(regs[5]), + [SYSARG_3] = USER_REGS_OFFSET(regs[6]), + [SYSARG_4] = USER_REGS_OFFSET(regs[7]), + [SYSARG_5] = USER_REGS_OFFSET(regs[0]), + [SYSARG_6] = USER_REGS_OFFSET(regs[1]), + [SYSARG_RESULT] = USER_REGS_OFFSET(regs[0]), + [STACK_POINTER] = USER_REGS_OFFSET(regs[15]), + [INSTR_POINTER] = USER_REGS_OFFSET(pc), + [RTLD_FINI] = USER_REGS_OFFSET(r4), + }; + +#else + + #error "Unsupported architecture" + +#endif + +/** + * Return the *cached* value of the given @tracees' @reg. + */ +word_t peek_reg(const Tracee *tracee, RegVersion version, Reg reg) +{ + word_t result; + + assert(version < NB_REG_VERSION); + + result = REG(tracee, version, reg); + + /* Use only the 32 least significant bits (LSB) when running + * 32-bit processes on a 64-bit kernel. */ + if (is_32on64_mode(tracee)) + result &= 0xFFFFFFFF; + + return result; +} + +/** + * Set the *cached* value of the given @tracees' @reg. + */ +void poke_reg(Tracee *tracee, Reg reg, word_t value) +{ + if (peek_reg(tracee, CURRENT, reg) == value) + return; + + REG(tracee, CURRENT, reg) = value; + tracee->_regs_were_changed = true; +} + +/** + * Print the value of the current @tracee's registers according + * to the @verbose_level. Note: @message is mixed to the output. + */ +void print_current_regs(Tracee *tracee, int verbose_level, const char *message) +{ + if (tracee->verbose < verbose_level) + return; + + note(tracee, INFO, INTERNAL, + "vpid %" PRIu64 ": %s: %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx) = 0x%lx [0x%lx, %d]", + tracee->vpid, message, + stringify_sysnum(get_sysnum(tracee, CURRENT)), + peek_reg(tracee, CURRENT, SYSARG_1), peek_reg(tracee, CURRENT, SYSARG_2), + peek_reg(tracee, CURRENT, SYSARG_3), peek_reg(tracee, CURRENT, SYSARG_4), + peek_reg(tracee, CURRENT, SYSARG_5), peek_reg(tracee, CURRENT, SYSARG_6), + peek_reg(tracee, CURRENT, SYSARG_RESULT), + peek_reg(tracee, CURRENT, STACK_POINTER), + get_abi(tracee)); +} + +/** + * Save the @tracee's current register bank into the @version register + * bank. + */ +void save_current_regs(Tracee *tracee, RegVersion version) +{ + /* Optimization: don't restore original register values if + * they were never changed. */ + if (version == ORIGINAL) + tracee->_regs_were_changed = false; + + memcpy(&tracee->_regs[version], &tracee->_regs[CURRENT], sizeof(tracee->_regs[CURRENT])); +} + +/** + * Copy all @tracee's general purpose registers into a dedicated + * cache. This function returns -errno if an error occured, 0 + * otherwise. + */ +int fetch_regs(Tracee *tracee) +{ + int status; + +#if defined(ARCH_ARM64) + struct iovec regs; + + regs.iov_base = &tracee->_regs[CURRENT]; + regs.iov_len = sizeof(tracee->_regs[CURRENT]); + + status = ptrace(PTRACE_GETREGSET, tracee->pid, NT_PRSTATUS, ®s); +#else + status = ptrace(PTRACE_GETREGS, tracee->pid, NULL, &tracee->_regs[CURRENT]); +#endif + if (status < 0) + return status; + + return 0; +} + +/** + * Copy the cached values of all @tracee's general purpose registers + * back to the process, if necessary. This function returns -errno if + * an error occured, 0 otherwise. + */ +int push_regs(Tracee *tracee) +{ + int status; + + if (tracee->_regs_were_changed) { + /* At the very end of a syscall, with regard to the + * entry, only the result register can be modified by + * PRoot. */ + if (tracee->restore_original_regs) { + /* Restore the sysarg register only if it is + * not the same as the result register. Note: + * it's never the case on x86 architectures, + * so don't make this check, otherwise it + * would introduce useless complexity because + * of the multiple ABI support. */ +#if defined(ARCH_X86) || defined(ARCH_X86_64) +# define RESTORE(sysarg) (REG(tracee, CURRENT, sysarg) = REG(tracee, ORIGINAL, sysarg)) +#else +# define RESTORE(sysarg) (void) (reg_offset[SYSARG_RESULT] != reg_offset[sysarg] && \ + (REG(tracee, CURRENT, sysarg) = REG(tracee, ORIGINAL, sysarg))) +#endif + + RESTORE(SYSARG_NUM); + RESTORE(SYSARG_1); + RESTORE(SYSARG_2); + RESTORE(SYSARG_3); + RESTORE(SYSARG_4); + RESTORE(SYSARG_5); + RESTORE(SYSARG_6); + RESTORE(STACK_POINTER); + } + +#if defined(ARCH_ARM64) + struct iovec regs; + word_t current_sysnum = REG(tracee, CURRENT, SYSARG_NUM); + + /* Update syscall number if needed. On arm64, a new + * subcommand has been added to PTRACE_{S,G}ETREGSET + * to allow write/read of current sycall number. */ + if (current_sysnum != REG(tracee, ORIGINAL, SYSARG_NUM)) { + regs.iov_base = ¤t_sysnum; + regs.iov_len = sizeof(current_sysnum); + status = ptrace(PTRACE_SETREGSET, tracee->pid, NT_ARM_SYSTEM_CALL, ®s); + if (status < 0) + note(tracee, WARNING, SYSTEM, "can't set the syscall number"); + } + + /* Update other registers. */ + regs.iov_base = &tracee->_regs[CURRENT]; + regs.iov_len = sizeof(tracee->_regs[CURRENT]); + + status = ptrace(PTRACE_SETREGSET, tracee->pid, NT_PRSTATUS, ®s); +#else +# if defined(ARCH_ARM_EABI) + /* On ARM, a special ptrace request is required to + * change effectively the syscall number during a + * ptrace-stop. */ + word_t current_sysnum = REG(tracee, CURRENT, SYSARG_NUM); + if (current_sysnum != REG(tracee, ORIGINAL, SYSARG_NUM)) { + status = ptrace(PTRACE_SET_SYSCALL, tracee->pid, 0, current_sysnum); + if (status < 0) + note(tracee, WARNING, SYSTEM, "can't set the syscall number"); + } +# endif + + status = ptrace(PTRACE_SETREGS, tracee->pid, NULL, &tracee->_regs[CURRENT]); +#endif + if (status < 0) + return status; + } + + return 0; +} diff --git a/proot/proot_linux/tracee/reg.h b/proot/proot_linux/tracee/reg.h new file mode 100644 index 0000000..1656b67 --- /dev/null +++ b/proot/proot_linux/tracee/reg.h @@ -0,0 +1,54 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef TRACEE_REG_H +#define TRACEE_REG_H + +#include "tracee/tracee.h" +#include "arch.h" + +typedef enum { + SYSARG_NUM = 0, + SYSARG_1, + SYSARG_2, + SYSARG_3, + SYSARG_4, + SYSARG_5, + SYSARG_6, + SYSARG_RESULT, + STACK_POINTER, + INSTR_POINTER, + RTLD_FINI, + STATE_FLAGS, + USERARG_1, +} Reg; + +extern int fetch_regs(Tracee *tracee); +extern int push_regs(Tracee *tracee); + +extern word_t peek_reg(const Tracee *tracee, RegVersion version, Reg reg); +extern void poke_reg(Tracee *tracee, Reg reg, word_t value); + +extern void print_current_regs(Tracee *tracee, int verbose_level, const char *message); +extern void save_current_regs(Tracee *tracee, RegVersion version); + +#endif /* TRACEE_REG_H */ diff --git a/proot/proot_linux/tracee/tracee.c b/proot/proot_linux/tracee/tracee.c new file mode 100644 index 0000000..65286dd --- /dev/null +++ b/proot/proot_linux/tracee/tracee.c @@ -0,0 +1,631 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include /* CLONE_*, */ +#include /* pid_t, size_t, */ +#include /* NULL, */ +#include /* assert(3), */ +#include /* bzero(3), */ +#include /* bool, true, false, */ +#include /* LIST_*, */ +#include /* talloc_*, */ +#include /* kill(2), SIGKILL, */ +#include /* ptrace(2), PTRACE_*, */ +#include /* E*, */ +#include /* PRI*, */ + +#include "tracee/tracee.h" +#include "tracee/reg.h" +#include "path/binding.h" +#include "syscall/sysnum.h" +#include "tracee/event.h" +#include "ptrace/ptrace.h" +#include "ptrace/wait.h" +#include "extension/extension.h" +#include "cli/note.h" + +#include "compat.h" + +#ifndef __W_STOPCODE +#define __W_STOPCODE(sig) ((sig) <<8 | 0x7f) +#endif + +typedef LIST_HEAD(tracees, tracee) Tracees; +static Tracees tracees; + + +/** + * Remove @zombie from its parent's list of zombies. Note: this is a + * talloc destructor. + */ +static int remove_zombie(Tracee *zombie) +{ + LIST_REMOVE(zombie, link); + return 0; +} + +/** + * Perform some specific treatments against @pointer according to its + * type, before it gets unlinked from @tracee_->life_context. + */ +static void clean_life_span_object(const void *pointer, int depth UNUSED, + int max_depth UNUSED, int is_ref UNUSED, void *tracee_) +{ + Binding *binding; + Tracee *tracee; + + tracee = talloc_get_type_abort(tracee_, Tracee); + + /* So far, only bindings need a special treatment. */ + binding = talloc_get_type(pointer, Binding); + if (binding != NULL) + remove_binding_from_all_lists(tracee, binding); +} + +/** + * Remove @tracee from the list of tracees and update all of its + * children & ptracees, and its ptracer. Note: this is a talloc + * destructor. + */ +static int remove_tracee(Tracee *tracee) +{ + Tracee *relative; + Tracee *ptracer; + int event; + + LIST_REMOVE(tracee, link); + + /* Clean objects that are linked to this tracee's life + * span. */ + talloc_report_depth_cb(tracee->life_context, 0, 100, clean_life_span_object, tracee); + + /* This could be optimize by using a dedicated list of + * children and ptracees. */ + LIST_FOREACH(relative, &tracees, link) { + /* Its children are now orphan. */ + if (relative->parent == tracee) + relative->parent = NULL; + + /* Its tracees are now free. */ + if (relative->as_ptracee.ptracer == tracee) { + /* Release the pending event, if any. */ + relative->as_ptracee.ptracer = NULL; + + if (relative->as_ptracee.event4.proot.pending) { + event = handle_tracee_event(relative, + relative->as_ptracee.event4.proot.value); + (void) restart_tracee(relative, event); + } + else if (relative->as_ptracee.event4.ptracer.pending) { + event = relative->as_ptracee.event4.proot.value; + (void) restart_tracee(relative, event); + } + + bzero(&relative->as_ptracee, sizeof(relative->as_ptracee)); + } + } + + /* Nothing else to do if it's not a ptracee. */ + ptracer = tracee->as_ptracee.ptracer; + if (ptracer == NULL) + return 0; + + /* Zombify this ptracee until its ptracer is notified about + * its death. */ + event = tracee->as_ptracee.event4.ptracer.value; + if (tracee->as_ptracee.event4.ptracer.pending + && (WIFEXITED(event) || WIFSIGNALED(event))) { + Tracee *zombie; + + zombie = new_dummy_tracee(ptracer); + if (zombie != NULL) { + LIST_INSERT_HEAD(&PTRACER.zombies, zombie, link); + talloc_set_destructor(zombie, remove_zombie); + + zombie->parent = tracee->parent; + zombie->clone = tracee->clone; + zombie->pid = tracee->pid; + + detach_from_ptracer(tracee); + attach_to_ptracer(zombie, ptracer); + + zombie->as_ptracee.event4.ptracer.pending = true; + zombie->as_ptracee.event4.ptracer.value = event; + zombie->as_ptracee.is_zombie = true; + + return 0; + } + /* Fallback to the common path. */ + } + + detach_from_ptracer(tracee); + + /* Wake its ptracer if there's nothing else to wait for. */ + if (PTRACER.nb_ptracees == 0 && PTRACER.wait_pid != 0) { + /* Update the return value of ptracer's wait(2). */ + poke_reg(ptracer, SYSARG_RESULT, -ECHILD); + + /* Don't forget to write its register cache back. */ + (void) push_regs(ptracer); + + PTRACER.wait_pid = 0; + (void) restart_tracee(ptracer, 0); + } + + return 0; +} + +/** + * Allocate a new entry for a dummy tracee (no pid, no destructor, not + * in the list of tracees, ...). The new allocated memory is attached + * to the given @context. This function returns NULL if an error + * occurred (ENOMEM), otherwise it returns the newly allocated + * structure. + */ +Tracee *new_dummy_tracee(TALLOC_CTX *context) +{ + Tracee *tracee; + + tracee = talloc_zero(context, Tracee); + if (tracee == NULL) + return NULL; + + /* Allocate a memory collector. */ + tracee->ctx = talloc_new(tracee); + if (tracee->ctx == NULL) + goto no_mem; + + /* By default new tracees have an empty file-system + * name-space and heap. */ + tracee->fs = talloc_zero(tracee, FileSystemNameSpace); + tracee->heap = talloc_zero(tracee, Heap); + if (tracee->fs == NULL || tracee->heap == NULL) + goto no_mem; + + return tracee; + +no_mem: + TALLOC_FREE(tracee); + return NULL; +} + +static uint64_t next_vpid = 1; + +/** + * Allocate a new entry for the tracee @pid, then set its destructor + * and add it to the list of tracees. This function returns NULL if + * an error occurred (ENOMEM), otherwise it returns the newly + * allocated structure. + */ +static Tracee *new_tracee(pid_t pid) +{ + Tracee *tracee; + + tracee = new_dummy_tracee(NULL); + if (tracee == NULL) + return NULL; + + talloc_set_destructor(tracee, remove_tracee); + + tracee->pid = pid; + tracee->vpid = next_vpid++; + + LIST_INSERT_HEAD(&tracees, tracee, link); + + tracee->life_context = talloc_new(tracee); + + return tracee; +} + +/** + * Return the first [stopped?] tracee with the given + * @pid (-1 for any) which has the given @ptracer, and which has a + * pending event for its ptracer if @only_with_pevent is true. See + * wait(2) manual for the meaning of @wait_options. This function + * returns NULL if there's no such ptracee. + */ +Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, + bool only_with_pevent, word_t wait_options) +{ + Tracee *ptracee; + + /* Return zombies first. */ + LIST_FOREACH(ptracee, &PTRACER.zombies, link) { + /* Not the ptracee you're looking for? */ + if (pid != ptracee->pid && pid != -1) + continue; + + /* Not the expected kind of cloned process? */ + if (!EXPECTED_WAIT_CLONE(wait_options, ptracee)) + continue; + + return ptracee; + } + + LIST_FOREACH(ptracee, &tracees, link) { + /* Discard tracees that don't have this ptracer. */ + if (PTRACEE.ptracer != ptracer) + continue; + + /* Not the ptracee you're looking for? */ + if (pid != ptracee->pid && pid != -1) + continue; + + /* Not the expected kind of cloned process? */ + if (!EXPECTED_WAIT_CLONE(wait_options, ptracee)) + continue; + + /* No need to do more checks if its stopped state + * doesn't matter. Be careful when using such + * maybe-running tracee. */ + if (!only_stopped) + return ptracee; + + /* Is this tracee in the stopped state? */ + if (ptracee->running) + continue; + + /* Has a pending event for its ptracer? */ + if (PTRACEE.event4.ptracer.pending || !only_with_pevent) + return ptracee; + + /* No need to go further if the specific tracee isn't + * in the expected state? */ + if (pid == ptracee->pid) + return NULL; + } + + return NULL; +} + +/** + * Wrapper for get_ptracee(), this ensures only a stopped tracee is + * returned (or NULL). + */ +Tracee *get_stopped_ptracee(const Tracee *ptracer, pid_t pid, + bool only_with_pevent, word_t wait_options) +{ + return get_ptracee(ptracer, pid, true, only_with_pevent, wait_options); +} + +/** + * Wrapper for get_ptracee(), this ensures no running tracee is + * returned. + */ +bool has_ptracees(const Tracee *ptracer, pid_t pid, word_t wait_options) +{ + return (get_ptracee(ptracer, pid, false, false, wait_options) != NULL); +} + +/** + * Return the entry related to the tracee @pid. If no entry were + * found, a new one is created if @create is true, otherwise NULL is + * returned. + */ +Tracee *get_tracee(const Tracee *current_tracee, pid_t pid, bool create) +{ + Tracee *tracee; + + /* Don't reset the memory collector if the searched tracee is + * the current one: there's likely pointers to the + * sub-allocated data in the caller. */ + if (current_tracee != NULL && current_tracee->pid == pid) + return (Tracee *)current_tracee; + + LIST_FOREACH(tracee, &tracees, link) { + if (tracee->pid == pid) { + /* Flush then allocate a new memory collector. */ + TALLOC_FREE(tracee->ctx); + tracee->ctx = talloc_new(tracee); + + return tracee; + } + } + + return (create ? new_tracee(pid) : NULL); +} + +/** + * Mark tracee as terminated and optionally take action. + */ +void terminate_tracee(Tracee *tracee) +{ + tracee->terminated = true; + + /* Case where the terminated tracee is marked + to kill all tracees on exit. + */ + if (tracee->killall_on_exit) { + VERBOSE(tracee, 1, "terminating all tracees on exit"); + kill_all_tracees(); + } +} + +/** + * Free all tracees marked as terminated. + */ +void free_terminated_tracees() +{ + Tracee *next; + + /* Items can't be deleted when using LIST_FOREACH. */ + next = tracees.lh_first; + while (next != NULL) { + Tracee *tracee = next; + next = tracee->link.le_next; + + if (tracee->terminated) + TALLOC_FREE(tracee); + } +} + +/** + * Make new @parent's child inherit from it. Depending on + * @clone_flags, some information are copied or shared. This function + * returns -errno if an error occured, otherwise 0. + */ +int new_child(Tracee *parent, word_t clone_flags) +{ + int ptrace_options; + unsigned long pid; + Tracee *child; + int status; + + /* If the tracee calls clone(2) with the CLONE_VFORK flag, + * PTRACE_EVENT_VFORK will be delivered instead [...]; + * otherwise if the tracee calls clone(2) with the exit signal + * set to SIGCHLD, PTRACE_EVENT_FORK will be delivered [...] + * + * -- ptrace(2) man-page + * + * That means we have to check if it's actually a clone(2) in + * order to get the right flags. + */ + status = fetch_regs(parent); + if (status >= 0 && get_sysnum(parent, CURRENT) == PR_clone) + clone_flags = peek_reg(parent, CURRENT, SYSARG_1); + + /* Get the pid of the parent's new child. */ + status = ptrace(PTRACE_GETEVENTMSG, parent->pid, NULL, &pid); + if (status < 0 || pid == 0) { + note(parent, WARNING, SYSTEM, "ptrace(GETEVENTMSG)"); + return status; + } + + child = get_tracee(parent, (pid_t) pid, true); + if (child == NULL) { + note(parent, WARNING, SYSTEM, "running out of memory"); + return -ENOMEM; + } + + /* Sanity checks. */ + assert(child != NULL + && child->exe == NULL + && child->fs->cwd == NULL + && child->fs->bindings.pending == NULL + && child->fs->bindings.guest == NULL + && child->fs->bindings.host == NULL + && child->qemu == NULL + && child->glue == NULL + && child->parent == NULL + && child->as_ptracee.ptracer == NULL); + + child->verbose = parent->verbose; + child->seccomp = parent->seccomp; + child->sysexit_pending = parent->sysexit_pending; + child->restart_how = parent->restart_how; + + /* If CLONE_VM is set, the calling process and the child + * process run in the same memory space [...] any memory + * mapping or unmapping performed with mmap(2) or munmap(2) by + * the child or calling process also affects the other + * process. + * + * If CLONE_VM is not set, the child process runs in a + * separate copy of the memory space of the calling process at + * the time of clone(). Memory writes or file + * mappings/unmappings performed by one of the processes do + * not affect the other, as with fork(2). + * + * -- clone(2) man-page + */ + TALLOC_FREE(child->heap); + child->heap = ((clone_flags & CLONE_VM) != 0) + ? talloc_reference(child, parent->heap) + : talloc_memdup(child, parent->heap, sizeof(Heap)); + if (child->heap == NULL) + return -ENOMEM; + + /* If CLONE_PARENT is set, then the parent of the new child + * (as returned by getppid(2)) will be the same as that of the + * calling process. + * + * If CLONE_PARENT is not set, then (as with fork(2)) the + * child's parent is the calling process. + * + * -- clone(2) man-page + */ + if ((clone_flags & CLONE_PARENT) != 0) + child->parent = parent->parent; + else + child->parent = parent; + + /* Remember if this child belongs to the same thread group as + * its parent. This is currently useful for ptrace emulation + * only but it deserves to be extended to support execve(2) + * specificity (ie. when a thread calls execve(2), its pid + * gets replaced by the pid of its thread group leader). */ + child->clone = ((clone_flags & CLONE_THREAD) != 0); + + /* Depending on how the new process is created, it may be + * automatically traced by the parent's tracer. */ + ptrace_options = ( clone_flags == 0 ? PTRACE_O_TRACEFORK + : (clone_flags & 0xFF) == SIGCHLD ? PTRACE_O_TRACEFORK + : (clone_flags & CLONE_VFORK) != 0 ? PTRACE_O_TRACEVFORK + : PTRACE_O_TRACECLONE); + if (parent->as_ptracee.ptracer != NULL + && ( (ptrace_options & parent->as_ptracee.options) != 0 + || (clone_flags & CLONE_PTRACE) != 0)) { + attach_to_ptracer(child, parent->as_ptracee.ptracer); + + /* All these flags are inheritable, no matter why this + * child is being traced. */ + child->as_ptracee.options |= (parent->as_ptracee.options + & ( PTRACE_O_TRACECLONE + | PTRACE_O_TRACEEXEC + | PTRACE_O_TRACEEXIT + | PTRACE_O_TRACEFORK + | PTRACE_O_TRACESYSGOOD + | PTRACE_O_TRACEVFORK + | PTRACE_O_TRACEVFORKDONE)); + } + + /* If CLONE_FS is set, the parent and the child process share + * the same file system information. This includes the root + * of the file system, the current working directory, and the + * umask. Any call to chroot(2), chdir(2), or umask(2) + * performed by the parent process or the child process also + * affects the other process. + * + * If CLONE_FS is not set, the child process works on a copy + * of the file system information of the parent process at the + * time of the clone() call. Calls to chroot(2), chdir(2), + * umask(2) performed later by one of the processes do not + * affect the other process. + * + * -- clone(2) man-page + */ + TALLOC_FREE(child->fs); + if ((clone_flags & CLONE_FS) != 0) { + /* File-system name-space is shared. */ + child->fs = talloc_reference(child, parent->fs); + } + else { + /* File-system name-space is copied. */ + child->fs = talloc_zero(child, FileSystemNameSpace); + if (child->fs == NULL) + return -ENOMEM; + + child->fs->cwd = talloc_strdup(child->fs, parent->fs->cwd); + if (child->fs->cwd == NULL) + return -ENOMEM; + talloc_set_name_const(child->fs->cwd, "$cwd"); + + /* Bindings are shared across file-system name-spaces since a + * "mount --bind" made by a process affects all other processes + * under Linux. Actually they are copied when a sub + * reconfiguration occured (nested proot or chroot(2)). */ + child->fs->bindings.guest = talloc_reference(child->fs, parent->fs->bindings.guest); + child->fs->bindings.host = talloc_reference(child->fs, parent->fs->bindings.host); + } + + /* The path to the executable is unshared only once the child + * process does a call to execve(2). */ + child->exe = talloc_reference(child, parent->exe); + + child->qemu = talloc_reference(child, parent->qemu); + child->glue = talloc_reference(child, parent->glue); + + child->host_ldso_paths = talloc_reference(child, parent->host_ldso_paths); + child->guest_ldso_paths = talloc_reference(child, parent->guest_ldso_paths); + + child->tool_name = parent->tool_name; + + // inherit_extensions(child, parent, clone_flags); + + /* Restart the child tracee if it was already alive but + * stopped until that moment. */ + if (child->sigstop == SIGSTOP_PENDING) { + bool keep_stopped = false; + + child->sigstop = SIGSTOP_ALLOWED; + + /* Notify its ptracer if it is ready to be traced. */ + if (child->as_ptracee.ptracer != NULL) { + /* Sanity check. */ + assert(!child->as_ptracee.tracing_started); + + keep_stopped = handle_ptracee_event(child, __W_STOPCODE(SIGSTOP)); + + /* Note that this event was already handled by + * PRoot since child->as_ptracee.ptracer was + * NULL up to now. */ + child->as_ptracee.event4.proot.pending = false; + child->as_ptracee.event4.proot.value = 0; + } + + if (!keep_stopped) + (void) restart_tracee(child, 0); + } + + VERBOSE(child, 1, "vpid %" PRIu64 ": pid %d", child->vpid, child->pid); + + return 0; +} + +/** + * Helper for swap_config(). + */ +static void reparent_config(Tracee *new_parent, Tracee *old_parent) +{ + new_parent->verbose = old_parent->verbose; + +#define REPARENT(field) do { \ + talloc_reparent(old_parent, new_parent, old_parent->field); \ + new_parent->field = old_parent->field; \ + } while(0); + + REPARENT(fs); + REPARENT(exe); + REPARENT(qemu); + REPARENT(glue); + REPARENT(extensions); + +#undef REPARENT +} + +/** + * Swap configuration (pointers and parentality) between @tracee1 and @tracee2. + */ +int swap_config(Tracee *tracee1, Tracee *tracee2) +{ + Tracee *tmp; + + tmp = talloc_zero(tracee1->ctx, Tracee); + if (tmp == NULL) + return -ENOMEM; + + reparent_config(tmp, tracee1); + reparent_config(tracee1, tracee2); + reparent_config(tracee2, tmp); + + return 0; +} + +/* Send the KILL signal to all tracees. */ +void kill_all_tracees() +{ + Tracee *tracee; + + LIST_FOREACH(tracee, &tracees, link) + kill(tracee->pid, SIGKILL); +} diff --git a/proot/proot_linux/tracee/tracee.h b/proot/proot_linux/tracee/tracee.h new file mode 100644 index 0000000..3e71e7d --- /dev/null +++ b/proot/proot_linux/tracee/tracee.h @@ -0,0 +1,291 @@ +/* -*- c-set-style: "K&R"; c-basic-offset: 8 -*- + * + * This file is part of PRoot. + * + * Copyright (C) 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#ifndef TRACEE_H +#define TRACEE_H + +#include /* pid_t, size_t, */ +#include /* struct user*, */ +#include /* bool, */ +#include /* LIST_*, */ +#include /* enum __ptrace_request */ +#include /* talloc_*, */ +#include /* *int*_t, */ +#include /* __WAIT_* */ +#include "arch.h" /* word_t, user_regs_struct, */ +#include "compat.h" + +#if defined(__GLIBC__) +#define PTRACE_REQUEST_TYPE enum __ptrace_request +#else +#define PTRACE_REQUEST_TYPE int +#endif + +typedef enum { + CURRENT = 0, + ORIGINAL = 1, + MODIFIED = 2, + NB_REG_VERSION +} RegVersion; + +struct bindings; +struct load_info; +struct extensions; +struct chained_syscalls; + +/* Information related to a file-system name-space. */ +typedef struct { + struct { + /* List of bindings as specified by the user but not canonicalized yet. */ + struct bindings *pending; + + /* List of bindings canonicalized and sorted in the "guest" order. */ + struct bindings *guest; + + /* List of bindings canonicalized and sorted in the "host" order. */ + struct bindings *host; + } bindings; + + /* Current working directory, à la /proc/self/pwd. */ + char *cwd; +} FileSystemNameSpace; + +/* Virtual heap, emulated with a regular memory mapping. */ +typedef struct { + word_t base; + size_t size; + bool disabled; +} Heap; + +/* Information related to a tracee process. */ +typedef struct tracee { + /********************************************************************** + * Private resources * + **********************************************************************/ + + /* Link for the list of all tracees. */ + LIST_ENTRY(tracee) link; + + /* Process identifier. */ + pid_t pid; + + /* Unique tracee identifier. */ + uint64_t vpid; + + /* Is it currently running or not? */ + bool running; + + /* Is this tracee ready to be freed? TODO: move to a list + * dedicated to terminated tracees instead. */ + bool terminated; + + /* Whether termination of this tracee implies an immediate kill + * of all tracees. */ + bool killall_on_exit; + + /* Parent of this tracee, NULL if none. */ + struct tracee *parent; + + /* Is it a "clone", i.e has the same parent as its creator. */ + bool clone; + + /* Support for ptrace emulation (tracer side). */ + struct { + size_t nb_ptracees; + LIST_HEAD(zombies, tracee) zombies; + + pid_t wait_pid; + word_t wait_options; + + enum { + DOESNT_WAIT = 0, + WAITS_IN_KERNEL, + WAITS_IN_PROOT + } waits_in; + } as_ptracer; + + /* Support for ptrace emulation (tracee side). */ + struct { + struct tracee *ptracer; + + struct { + #define STRUCT_EVENT struct { int value; bool pending; } + + STRUCT_EVENT proot; + STRUCT_EVENT ptracer; + } event4; + + bool tracing_started; + bool ignore_loader_syscalls; + bool ignore_syscalls; + word_t options; + bool is_zombie; + } as_ptracee; + + /* Current status: + * 0: enter syscall + * 1: exit syscall no error + * -errno: exit syscall with error. */ + int status; + +#define IS_IN_SYSENTER(tracee) ((tracee)->status == 0) +#define IS_IN_SYSEXIT(tracee) (!IS_IN_SYSENTER(tracee)) +#define IS_IN_SYSEXIT2(tracee, sysnum) (IS_IN_SYSEXIT(tracee) \ + && get_sysnum((tracee), ORIGINAL) == sysnum) + + /* How this tracee is restarted. */ + PTRACE_REQUEST_TYPE restart_how; + + /* Value of the tracee's general purpose registers. */ + struct user_regs_struct _regs[NB_REG_VERSION]; + bool _regs_were_changed; + bool restore_original_regs; + + /* State for the special handling of SIGSTOP. */ + enum { + SIGSTOP_IGNORED = 0, /* Ignore SIGSTOP (once the parent is known). */ + SIGSTOP_ALLOWED, /* Allow SIGSTOP (once the parent is known). */ + SIGSTOP_PENDING, /* Block SIGSTOP until the parent is unknown. */ + } sigstop; + + /* Context used to collect all the temporary dynamic memory + * allocations. */ + TALLOC_CTX *ctx; + + /* Context used to collect all dynamic memory allocations that + * should be released once this tracee is freed. */ + TALLOC_CTX *life_context; + + /* Note: I could rename "ctx" in "event_span" and + * "life_context" in "life_span". */ + + /* Specify the type of the final component during the + * initialization of a binding. This variable is first + * defined in bind_path() then used in build_glue(). */ + mode_t glue_type; + + /* During a sub-reconfiguration, the new setup is relatively + * to @tracee's file-system name-space. Also, @paths holds + * its $PATH environment variable in order to emulate the + * execvp(3) behavior. */ + struct { + struct tracee *tracee; + const char *paths; + } reconf; + + /* Unrequested syscalls inserted by PRoot after an actual + * syscall. */ + struct { + struct chained_syscalls *syscalls; + bool force_final_result; + word_t final_result; + } chain; + + /* Load info generated during execve sysenter and used during + * execve sysexit. */ + struct load_info *load_info; + + /* Disable mixed-execution (native host) check */ + bool mixed_mode; + + /********************************************************************** + * Private but inherited resources * + **********************************************************************/ + + /* Verbose level. */ + int verbose; + + /* State of the seccomp acceleration for this tracee. */ + enum { DISABLED = 0, DISABLING, ENABLED } seccomp; + + /* Ensure the sysexit stage is always hit under seccomp. */ + bool sysexit_pending; + + + /********************************************************************** + * Shared or private resources, depending on the CLONE_FS/VM flags. * + **********************************************************************/ + + /* Information related to a file-system name-space. */ + FileSystemNameSpace *fs; + + /* Virtual heap, emulated with a regular memory mapping. */ + Heap *heap; + + + /********************************************************************** + * Shared resources until the tracee makes a call to execve(). * + **********************************************************************/ + + /* Path to the executable, à la /proc/self/exe. */ + char *exe; + char *new_exe; + + + /********************************************************************** + * Shared or private resources, depending on the (re-)configuration * + **********************************************************************/ + + /* Runner command-line. */ + char **qemu; + + /* Path to glue between the guest rootfs and the host rootfs. */ + const char *glue; + + /* List of extensions enabled for this tracee. */ + struct extensions *extensions; + + + /********************************************************************** + * Shared but read-only resources * + **********************************************************************/ + + /* For the mixed-mode, the guest LD_LIBRARY_PATH is saved + * during the "guest -> host" transition, in order to be + * restored during the "host -> guest" transition (only if the + * host LD_LIBRARY_PATH hasn't changed). */ + const char *host_ldso_paths; + const char *guest_ldso_paths; + + /* For diagnostic purpose. */ + const char *tool_name; + +} Tracee; + +#define HOST_ROOTFS "/host-rootfs" + +#define TRACEE(a) talloc_get_type_abort(talloc_parent(talloc_parent(a)), Tracee) + +extern Tracee *get_tracee(const Tracee *tracee, pid_t pid, bool create); +extern Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, + bool only_with_pevent, word_t wait_options); +extern Tracee *get_stopped_ptracee(const Tracee *ptracer, pid_t pid, + bool only_with_pevent, word_t wait_options); +extern bool has_ptracees(const Tracee *ptracer, pid_t pid, word_t wait_options); +extern int new_child(Tracee *parent, word_t clone_flags); +extern Tracee *new_dummy_tracee(TALLOC_CTX *context); +extern void terminate_tracee(Tracee *tracee); +extern void free_terminated_tracees(); +extern int swap_config(Tracee *tracee1, Tracee *tracee2); +extern void kill_all_tracees(); + +#endif /* TRACEE_H */ diff --git a/proot/proot_test.go b/proot/proot_test.go new file mode 100644 index 0000000..37400cc --- /dev/null +++ b/proot/proot_test.go @@ -0,0 +1,7 @@ +package proot + +import "testing" + +func TestProot(t *testing.T) { + NewProc() +} \ No newline at end of file -- 2.51.0 From 40d558ece1bd5850a49ece06cf4dd7a513606f64 Mon Sep 17 00:00:00 2001 From: Matheus Sampaio Queiroga Date: Wed, 9 Jul 2025 00:15:26 -0300 Subject: [PATCH 2/5] Update to syscall proot --- .../arch.h | 0 .../attribute.h | 0 .../build.h | 0 .../cli/cli.c | 0 .../cli/cli.h | 0 .../cli/note.c | 0 .../cli/note.h | 0 .../cli/proot.c | 0 .../cli/proot.h | 0 .../compat.h | 0 .../execve/aoxp.c | 0 .../execve/aoxp.h | 0 .../execve/auxv.c | 0 .../execve/auxv.h | 0 .../execve/elf.c | 0 .../execve/elf.h | 0 .../execve/enter.c | 0 .../execve/execve.h | 0 .../execve/exit.c | 0 .../execve/ldso.c | 0 .../execve/ldso.h | 0 .../execve/shebang.c | 0 .../execve/shebang.h | 0 .../extension/care/archive.c | 0 .../extension/care/archive.h | 0 .../extension/care/care.c | 0 .../extension/care/care.h | 0 .../extension/care/extract.c | 0 .../extension/care/extract.h | 0 .../extension/care/final.c | 0 .../extension/care/final.h | 0 .../extension/extension.c | 0 .../extension/extension.h | 0 .../extension/extension/care/archive.h | 0 .../extension/extension/care/care.h | 0 .../extension/extension/care/extract.h | 0 .../extension/extension/care/final.h | 0 .../extension/extension/extension.h | 0 .../extension/extension/portmap/portmap.h | 0 .../extension/extension/python/proot.i | 0 .../extension/python/python_extension.py | 0 .../extension/fake_id0/fake_id0.c | 0 .../extension/kompat/kompat.c | 0 .../extension/link2symlink/link2symlink.c | 0 .../extension/portmap/map.c | 0 .../extension/portmap/portmap.c | 0 .../extension/portmap/portmap.h | 0 .../extension/python/proot.i | 0 .../extension/python/python.c | 0 .../extension/python/python_extension.py | 0 .../loader/assembly-arm.h | 0 .../loader/assembly-arm64.h | 0 .../loader/assembly-x86.h | 0 .../loader/assembly-x86_64.h | 0 .../loader/assembly.S | 0 .../loader/loader | Bin .../loader/loader-m32 | Bin .../loader/loader.c | 0 .../loader/script.h | 0 .../path/binding.c | 0 .../path/binding.h | 0 .../path/canon.c | 0 .../path/canon.h | 0 .../path/glue.c | 0 .../path/glue.h | 0 .../path/path.c | 0 .../path/path.h | 0 .../path/proc.c | 0 .../path/proc.h | 0 .../path/temp.c | 0 .../path/temp.h | 0 .../proot.go | 0 .../ptrace/ptrace.c | 0 .../ptrace/ptrace.h | 0 .../ptrace/user.c | 0 .../ptrace/user.h | 0 .../ptrace/wait.c | 0 .../ptrace/wait.h | 0 .../syscall/chain.c | 0 .../syscall/chain.h | 0 .../syscall/enter.c | 0 .../syscall/exit.c | 0 .../syscall/heap.c | 0 .../syscall/heap.h | 0 .../syscall/rlimit.c | 0 .../syscall/rlimit.h | 0 .../syscall/seccomp.c | 0 .../syscall/seccomp.h | 0 .../syscall/socket.c | 0 .../syscall/socket.h | 0 .../syscall/syscall.c | 0 .../syscall/syscall.h | 0 .../syscall/sysnum.c | 0 .../syscall/sysnum.h | 0 .../syscall/sysnums-arm.h | 0 .../syscall/sysnums-arm64.h | 0 .../syscall/sysnums-i386.h | 0 .../syscall/sysnums-sh4.h | 0 .../syscall/sysnums-x32.h | 0 .../syscall/sysnums-x86_64.h | 0 .../syscall/sysnums.list | 0 .../tracee/abi.h | 0 .../tracee/event.c | 0 .../tracee/event.h | 0 .../tracee/mem.c | 0 .../tracee/mem.h | 0 .../tracee/reg.c | 0 .../tracee/reg.h | 0 .../tracee/tracee.c | 184 ++-- .../tracee/tracee.h | 0 proot/elf_linux.go | 141 +++ proot/proot.go | 65 +- proot/proot_linux.go | 855 +++++++++++++++++- proot/proot_test.go | 21 +- 114 files changed, 1152 insertions(+), 114 deletions(-) rename proot/{proot_linux => binding_proot_linux}/arch.h (100%) rename proot/{proot_linux => binding_proot_linux}/attribute.h (100%) rename proot/{proot_linux => binding_proot_linux}/build.h (100%) rename proot/{proot_linux => binding_proot_linux}/cli/cli.c (100%) rename proot/{proot_linux => binding_proot_linux}/cli/cli.h (100%) rename proot/{proot_linux => binding_proot_linux}/cli/note.c (100%) rename proot/{proot_linux => binding_proot_linux}/cli/note.h (100%) rename proot/{proot_linux => binding_proot_linux}/cli/proot.c (100%) rename proot/{proot_linux => binding_proot_linux}/cli/proot.h (100%) rename proot/{proot_linux => binding_proot_linux}/compat.h (100%) rename proot/{proot_linux => binding_proot_linux}/execve/aoxp.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/aoxp.h (100%) rename proot/{proot_linux => binding_proot_linux}/execve/auxv.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/auxv.h (100%) rename proot/{proot_linux => binding_proot_linux}/execve/elf.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/elf.h (100%) rename proot/{proot_linux => binding_proot_linux}/execve/enter.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/execve.h (100%) rename proot/{proot_linux => binding_proot_linux}/execve/exit.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/ldso.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/ldso.h (100%) rename proot/{proot_linux => binding_proot_linux}/execve/shebang.c (100%) rename proot/{proot_linux => binding_proot_linux}/execve/shebang.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/archive.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/archive.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/care.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/care.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/extract.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/extract.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/final.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/care/final.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/care/archive.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/care/care.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/care/extract.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/care/final.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/extension.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/portmap/portmap.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/python/proot.i (100%) rename proot/{proot_linux => binding_proot_linux}/extension/extension/python/python_extension.py (100%) rename proot/{proot_linux => binding_proot_linux}/extension/fake_id0/fake_id0.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/kompat/kompat.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/link2symlink/link2symlink.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/portmap/map.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/portmap/portmap.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/portmap/portmap.h (100%) rename proot/{proot_linux => binding_proot_linux}/extension/python/proot.i (100%) rename proot/{proot_linux => binding_proot_linux}/extension/python/python.c (100%) rename proot/{proot_linux => binding_proot_linux}/extension/python/python_extension.py (100%) rename proot/{proot_linux => binding_proot_linux}/loader/assembly-arm.h (100%) rename proot/{proot_linux => binding_proot_linux}/loader/assembly-arm64.h (100%) rename proot/{proot_linux => binding_proot_linux}/loader/assembly-x86.h (100%) rename proot/{proot_linux => binding_proot_linux}/loader/assembly-x86_64.h (100%) rename proot/{proot_linux => binding_proot_linux}/loader/assembly.S (100%) rename proot/{proot_linux => binding_proot_linux}/loader/loader (100%) rename proot/{proot_linux => binding_proot_linux}/loader/loader-m32 (100%) rename proot/{proot_linux => binding_proot_linux}/loader/loader.c (100%) rename proot/{proot_linux => binding_proot_linux}/loader/script.h (100%) rename proot/{proot_linux => binding_proot_linux}/path/binding.c (100%) rename proot/{proot_linux => binding_proot_linux}/path/binding.h (100%) rename proot/{proot_linux => binding_proot_linux}/path/canon.c (100%) rename proot/{proot_linux => binding_proot_linux}/path/canon.h (100%) rename proot/{proot_linux => binding_proot_linux}/path/glue.c (100%) rename proot/{proot_linux => binding_proot_linux}/path/glue.h (100%) rename proot/{proot_linux => binding_proot_linux}/path/path.c (100%) rename proot/{proot_linux => binding_proot_linux}/path/path.h (100%) rename proot/{proot_linux => binding_proot_linux}/path/proc.c (100%) rename proot/{proot_linux => binding_proot_linux}/path/proc.h (100%) rename proot/{proot_linux => binding_proot_linux}/path/temp.c (100%) rename proot/{proot_linux => binding_proot_linux}/path/temp.h (100%) rename proot/{proot_linux => binding_proot_linux}/proot.go (100%) rename proot/{proot_linux => binding_proot_linux}/ptrace/ptrace.c (100%) rename proot/{proot_linux => binding_proot_linux}/ptrace/ptrace.h (100%) rename proot/{proot_linux => binding_proot_linux}/ptrace/user.c (100%) rename proot/{proot_linux => binding_proot_linux}/ptrace/user.h (100%) rename proot/{proot_linux => binding_proot_linux}/ptrace/wait.c (100%) rename proot/{proot_linux => binding_proot_linux}/ptrace/wait.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/chain.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/chain.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/enter.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/exit.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/heap.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/heap.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/rlimit.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/rlimit.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/seccomp.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/seccomp.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/socket.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/socket.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/syscall.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/syscall.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnum.c (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnum.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums-arm.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums-arm64.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums-i386.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums-sh4.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums-x32.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums-x86_64.h (100%) rename proot/{proot_linux => binding_proot_linux}/syscall/sysnums.list (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/abi.h (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/event.c (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/event.h (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/mem.c (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/mem.h (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/reg.c (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/reg.h (100%) rename proot/{proot_linux => binding_proot_linux}/tracee/tracee.c (78%) rename proot/{proot_linux => binding_proot_linux}/tracee/tracee.h (100%) create mode 100644 proot/elf_linux.go diff --git a/proot/proot_linux/arch.h b/proot/binding_proot_linux/arch.h similarity index 100% rename from proot/proot_linux/arch.h rename to proot/binding_proot_linux/arch.h diff --git a/proot/proot_linux/attribute.h b/proot/binding_proot_linux/attribute.h similarity index 100% rename from proot/proot_linux/attribute.h rename to proot/binding_proot_linux/attribute.h diff --git a/proot/proot_linux/build.h b/proot/binding_proot_linux/build.h similarity index 100% rename from proot/proot_linux/build.h rename to proot/binding_proot_linux/build.h diff --git a/proot/proot_linux/cli/cli.c b/proot/binding_proot_linux/cli/cli.c similarity index 100% rename from proot/proot_linux/cli/cli.c rename to proot/binding_proot_linux/cli/cli.c diff --git a/proot/proot_linux/cli/cli.h b/proot/binding_proot_linux/cli/cli.h similarity index 100% rename from proot/proot_linux/cli/cli.h rename to proot/binding_proot_linux/cli/cli.h diff --git a/proot/proot_linux/cli/note.c b/proot/binding_proot_linux/cli/note.c similarity index 100% rename from proot/proot_linux/cli/note.c rename to proot/binding_proot_linux/cli/note.c diff --git a/proot/proot_linux/cli/note.h b/proot/binding_proot_linux/cli/note.h similarity index 100% rename from proot/proot_linux/cli/note.h rename to proot/binding_proot_linux/cli/note.h diff --git a/proot/proot_linux/cli/proot.c b/proot/binding_proot_linux/cli/proot.c similarity index 100% rename from proot/proot_linux/cli/proot.c rename to proot/binding_proot_linux/cli/proot.c diff --git a/proot/proot_linux/cli/proot.h b/proot/binding_proot_linux/cli/proot.h similarity index 100% rename from proot/proot_linux/cli/proot.h rename to proot/binding_proot_linux/cli/proot.h diff --git a/proot/proot_linux/compat.h b/proot/binding_proot_linux/compat.h similarity index 100% rename from proot/proot_linux/compat.h rename to proot/binding_proot_linux/compat.h diff --git a/proot/proot_linux/execve/aoxp.c b/proot/binding_proot_linux/execve/aoxp.c similarity index 100% rename from proot/proot_linux/execve/aoxp.c rename to proot/binding_proot_linux/execve/aoxp.c diff --git a/proot/proot_linux/execve/aoxp.h b/proot/binding_proot_linux/execve/aoxp.h similarity index 100% rename from proot/proot_linux/execve/aoxp.h rename to proot/binding_proot_linux/execve/aoxp.h diff --git a/proot/proot_linux/execve/auxv.c b/proot/binding_proot_linux/execve/auxv.c similarity index 100% rename from proot/proot_linux/execve/auxv.c rename to proot/binding_proot_linux/execve/auxv.c diff --git a/proot/proot_linux/execve/auxv.h b/proot/binding_proot_linux/execve/auxv.h similarity index 100% rename from proot/proot_linux/execve/auxv.h rename to proot/binding_proot_linux/execve/auxv.h diff --git a/proot/proot_linux/execve/elf.c b/proot/binding_proot_linux/execve/elf.c similarity index 100% rename from proot/proot_linux/execve/elf.c rename to proot/binding_proot_linux/execve/elf.c diff --git a/proot/proot_linux/execve/elf.h b/proot/binding_proot_linux/execve/elf.h similarity index 100% rename from proot/proot_linux/execve/elf.h rename to proot/binding_proot_linux/execve/elf.h diff --git a/proot/proot_linux/execve/enter.c b/proot/binding_proot_linux/execve/enter.c similarity index 100% rename from proot/proot_linux/execve/enter.c rename to proot/binding_proot_linux/execve/enter.c diff --git a/proot/proot_linux/execve/execve.h b/proot/binding_proot_linux/execve/execve.h similarity index 100% rename from proot/proot_linux/execve/execve.h rename to proot/binding_proot_linux/execve/execve.h diff --git a/proot/proot_linux/execve/exit.c b/proot/binding_proot_linux/execve/exit.c similarity index 100% rename from proot/proot_linux/execve/exit.c rename to proot/binding_proot_linux/execve/exit.c diff --git a/proot/proot_linux/execve/ldso.c b/proot/binding_proot_linux/execve/ldso.c similarity index 100% rename from proot/proot_linux/execve/ldso.c rename to proot/binding_proot_linux/execve/ldso.c diff --git a/proot/proot_linux/execve/ldso.h b/proot/binding_proot_linux/execve/ldso.h similarity index 100% rename from proot/proot_linux/execve/ldso.h rename to proot/binding_proot_linux/execve/ldso.h diff --git a/proot/proot_linux/execve/shebang.c b/proot/binding_proot_linux/execve/shebang.c similarity index 100% rename from proot/proot_linux/execve/shebang.c rename to proot/binding_proot_linux/execve/shebang.c diff --git a/proot/proot_linux/execve/shebang.h b/proot/binding_proot_linux/execve/shebang.h similarity index 100% rename from proot/proot_linux/execve/shebang.h rename to proot/binding_proot_linux/execve/shebang.h diff --git a/proot/proot_linux/extension/care/archive.c b/proot/binding_proot_linux/extension/care/archive.c similarity index 100% rename from proot/proot_linux/extension/care/archive.c rename to proot/binding_proot_linux/extension/care/archive.c diff --git a/proot/proot_linux/extension/care/archive.h b/proot/binding_proot_linux/extension/care/archive.h similarity index 100% rename from proot/proot_linux/extension/care/archive.h rename to proot/binding_proot_linux/extension/care/archive.h diff --git a/proot/proot_linux/extension/care/care.c b/proot/binding_proot_linux/extension/care/care.c similarity index 100% rename from proot/proot_linux/extension/care/care.c rename to proot/binding_proot_linux/extension/care/care.c diff --git a/proot/proot_linux/extension/care/care.h b/proot/binding_proot_linux/extension/care/care.h similarity index 100% rename from proot/proot_linux/extension/care/care.h rename to proot/binding_proot_linux/extension/care/care.h diff --git a/proot/proot_linux/extension/care/extract.c b/proot/binding_proot_linux/extension/care/extract.c similarity index 100% rename from proot/proot_linux/extension/care/extract.c rename to proot/binding_proot_linux/extension/care/extract.c diff --git a/proot/proot_linux/extension/care/extract.h b/proot/binding_proot_linux/extension/care/extract.h similarity index 100% rename from proot/proot_linux/extension/care/extract.h rename to proot/binding_proot_linux/extension/care/extract.h diff --git a/proot/proot_linux/extension/care/final.c b/proot/binding_proot_linux/extension/care/final.c similarity index 100% rename from proot/proot_linux/extension/care/final.c rename to proot/binding_proot_linux/extension/care/final.c diff --git a/proot/proot_linux/extension/care/final.h b/proot/binding_proot_linux/extension/care/final.h similarity index 100% rename from proot/proot_linux/extension/care/final.h rename to proot/binding_proot_linux/extension/care/final.h diff --git a/proot/proot_linux/extension/extension.c b/proot/binding_proot_linux/extension/extension.c similarity index 100% rename from proot/proot_linux/extension/extension.c rename to proot/binding_proot_linux/extension/extension.c diff --git a/proot/proot_linux/extension/extension.h b/proot/binding_proot_linux/extension/extension.h similarity index 100% rename from proot/proot_linux/extension/extension.h rename to proot/binding_proot_linux/extension/extension.h diff --git a/proot/proot_linux/extension/extension/care/archive.h b/proot/binding_proot_linux/extension/extension/care/archive.h similarity index 100% rename from proot/proot_linux/extension/extension/care/archive.h rename to proot/binding_proot_linux/extension/extension/care/archive.h diff --git a/proot/proot_linux/extension/extension/care/care.h b/proot/binding_proot_linux/extension/extension/care/care.h similarity index 100% rename from proot/proot_linux/extension/extension/care/care.h rename to proot/binding_proot_linux/extension/extension/care/care.h diff --git a/proot/proot_linux/extension/extension/care/extract.h b/proot/binding_proot_linux/extension/extension/care/extract.h similarity index 100% rename from proot/proot_linux/extension/extension/care/extract.h rename to proot/binding_proot_linux/extension/extension/care/extract.h diff --git a/proot/proot_linux/extension/extension/care/final.h b/proot/binding_proot_linux/extension/extension/care/final.h similarity index 100% rename from proot/proot_linux/extension/extension/care/final.h rename to proot/binding_proot_linux/extension/extension/care/final.h diff --git a/proot/proot_linux/extension/extension/extension.h b/proot/binding_proot_linux/extension/extension/extension.h similarity index 100% rename from proot/proot_linux/extension/extension/extension.h rename to proot/binding_proot_linux/extension/extension/extension.h diff --git a/proot/proot_linux/extension/extension/portmap/portmap.h b/proot/binding_proot_linux/extension/extension/portmap/portmap.h similarity index 100% rename from proot/proot_linux/extension/extension/portmap/portmap.h rename to proot/binding_proot_linux/extension/extension/portmap/portmap.h diff --git a/proot/proot_linux/extension/extension/python/proot.i b/proot/binding_proot_linux/extension/extension/python/proot.i similarity index 100% rename from proot/proot_linux/extension/extension/python/proot.i rename to proot/binding_proot_linux/extension/extension/python/proot.i diff --git a/proot/proot_linux/extension/extension/python/python_extension.py b/proot/binding_proot_linux/extension/extension/python/python_extension.py similarity index 100% rename from proot/proot_linux/extension/extension/python/python_extension.py rename to proot/binding_proot_linux/extension/extension/python/python_extension.py diff --git a/proot/proot_linux/extension/fake_id0/fake_id0.c b/proot/binding_proot_linux/extension/fake_id0/fake_id0.c similarity index 100% rename from proot/proot_linux/extension/fake_id0/fake_id0.c rename to proot/binding_proot_linux/extension/fake_id0/fake_id0.c diff --git a/proot/proot_linux/extension/kompat/kompat.c b/proot/binding_proot_linux/extension/kompat/kompat.c similarity index 100% rename from proot/proot_linux/extension/kompat/kompat.c rename to proot/binding_proot_linux/extension/kompat/kompat.c diff --git a/proot/proot_linux/extension/link2symlink/link2symlink.c b/proot/binding_proot_linux/extension/link2symlink/link2symlink.c similarity index 100% rename from proot/proot_linux/extension/link2symlink/link2symlink.c rename to proot/binding_proot_linux/extension/link2symlink/link2symlink.c diff --git a/proot/proot_linux/extension/portmap/map.c b/proot/binding_proot_linux/extension/portmap/map.c similarity index 100% rename from proot/proot_linux/extension/portmap/map.c rename to proot/binding_proot_linux/extension/portmap/map.c diff --git a/proot/proot_linux/extension/portmap/portmap.c b/proot/binding_proot_linux/extension/portmap/portmap.c similarity index 100% rename from proot/proot_linux/extension/portmap/portmap.c rename to proot/binding_proot_linux/extension/portmap/portmap.c diff --git a/proot/proot_linux/extension/portmap/portmap.h b/proot/binding_proot_linux/extension/portmap/portmap.h similarity index 100% rename from proot/proot_linux/extension/portmap/portmap.h rename to proot/binding_proot_linux/extension/portmap/portmap.h diff --git a/proot/proot_linux/extension/python/proot.i b/proot/binding_proot_linux/extension/python/proot.i similarity index 100% rename from proot/proot_linux/extension/python/proot.i rename to proot/binding_proot_linux/extension/python/proot.i diff --git a/proot/proot_linux/extension/python/python.c b/proot/binding_proot_linux/extension/python/python.c similarity index 100% rename from proot/proot_linux/extension/python/python.c rename to proot/binding_proot_linux/extension/python/python.c diff --git a/proot/proot_linux/extension/python/python_extension.py b/proot/binding_proot_linux/extension/python/python_extension.py similarity index 100% rename from proot/proot_linux/extension/python/python_extension.py rename to proot/binding_proot_linux/extension/python/python_extension.py diff --git a/proot/proot_linux/loader/assembly-arm.h b/proot/binding_proot_linux/loader/assembly-arm.h similarity index 100% rename from proot/proot_linux/loader/assembly-arm.h rename to proot/binding_proot_linux/loader/assembly-arm.h diff --git a/proot/proot_linux/loader/assembly-arm64.h b/proot/binding_proot_linux/loader/assembly-arm64.h similarity index 100% rename from proot/proot_linux/loader/assembly-arm64.h rename to proot/binding_proot_linux/loader/assembly-arm64.h diff --git a/proot/proot_linux/loader/assembly-x86.h b/proot/binding_proot_linux/loader/assembly-x86.h similarity index 100% rename from proot/proot_linux/loader/assembly-x86.h rename to proot/binding_proot_linux/loader/assembly-x86.h diff --git a/proot/proot_linux/loader/assembly-x86_64.h b/proot/binding_proot_linux/loader/assembly-x86_64.h similarity index 100% rename from proot/proot_linux/loader/assembly-x86_64.h rename to proot/binding_proot_linux/loader/assembly-x86_64.h diff --git a/proot/proot_linux/loader/assembly.S b/proot/binding_proot_linux/loader/assembly.S similarity index 100% rename from proot/proot_linux/loader/assembly.S rename to proot/binding_proot_linux/loader/assembly.S diff --git a/proot/proot_linux/loader/loader b/proot/binding_proot_linux/loader/loader similarity index 100% rename from proot/proot_linux/loader/loader rename to proot/binding_proot_linux/loader/loader diff --git a/proot/proot_linux/loader/loader-m32 b/proot/binding_proot_linux/loader/loader-m32 similarity index 100% rename from proot/proot_linux/loader/loader-m32 rename to proot/binding_proot_linux/loader/loader-m32 diff --git a/proot/proot_linux/loader/loader.c b/proot/binding_proot_linux/loader/loader.c similarity index 100% rename from proot/proot_linux/loader/loader.c rename to proot/binding_proot_linux/loader/loader.c diff --git a/proot/proot_linux/loader/script.h b/proot/binding_proot_linux/loader/script.h similarity index 100% rename from proot/proot_linux/loader/script.h rename to proot/binding_proot_linux/loader/script.h diff --git a/proot/proot_linux/path/binding.c b/proot/binding_proot_linux/path/binding.c similarity index 100% rename from proot/proot_linux/path/binding.c rename to proot/binding_proot_linux/path/binding.c diff --git a/proot/proot_linux/path/binding.h b/proot/binding_proot_linux/path/binding.h similarity index 100% rename from proot/proot_linux/path/binding.h rename to proot/binding_proot_linux/path/binding.h diff --git a/proot/proot_linux/path/canon.c b/proot/binding_proot_linux/path/canon.c similarity index 100% rename from proot/proot_linux/path/canon.c rename to proot/binding_proot_linux/path/canon.c diff --git a/proot/proot_linux/path/canon.h b/proot/binding_proot_linux/path/canon.h similarity index 100% rename from proot/proot_linux/path/canon.h rename to proot/binding_proot_linux/path/canon.h diff --git a/proot/proot_linux/path/glue.c b/proot/binding_proot_linux/path/glue.c similarity index 100% rename from proot/proot_linux/path/glue.c rename to proot/binding_proot_linux/path/glue.c diff --git a/proot/proot_linux/path/glue.h b/proot/binding_proot_linux/path/glue.h similarity index 100% rename from proot/proot_linux/path/glue.h rename to proot/binding_proot_linux/path/glue.h diff --git a/proot/proot_linux/path/path.c b/proot/binding_proot_linux/path/path.c similarity index 100% rename from proot/proot_linux/path/path.c rename to proot/binding_proot_linux/path/path.c diff --git a/proot/proot_linux/path/path.h b/proot/binding_proot_linux/path/path.h similarity index 100% rename from proot/proot_linux/path/path.h rename to proot/binding_proot_linux/path/path.h diff --git a/proot/proot_linux/path/proc.c b/proot/binding_proot_linux/path/proc.c similarity index 100% rename from proot/proot_linux/path/proc.c rename to proot/binding_proot_linux/path/proc.c diff --git a/proot/proot_linux/path/proc.h b/proot/binding_proot_linux/path/proc.h similarity index 100% rename from proot/proot_linux/path/proc.h rename to proot/binding_proot_linux/path/proc.h diff --git a/proot/proot_linux/path/temp.c b/proot/binding_proot_linux/path/temp.c similarity index 100% rename from proot/proot_linux/path/temp.c rename to proot/binding_proot_linux/path/temp.c diff --git a/proot/proot_linux/path/temp.h b/proot/binding_proot_linux/path/temp.h similarity index 100% rename from proot/proot_linux/path/temp.h rename to proot/binding_proot_linux/path/temp.h diff --git a/proot/proot_linux/proot.go b/proot/binding_proot_linux/proot.go similarity index 100% rename from proot/proot_linux/proot.go rename to proot/binding_proot_linux/proot.go diff --git a/proot/proot_linux/ptrace/ptrace.c b/proot/binding_proot_linux/ptrace/ptrace.c similarity index 100% rename from proot/proot_linux/ptrace/ptrace.c rename to proot/binding_proot_linux/ptrace/ptrace.c diff --git a/proot/proot_linux/ptrace/ptrace.h b/proot/binding_proot_linux/ptrace/ptrace.h similarity index 100% rename from proot/proot_linux/ptrace/ptrace.h rename to proot/binding_proot_linux/ptrace/ptrace.h diff --git a/proot/proot_linux/ptrace/user.c b/proot/binding_proot_linux/ptrace/user.c similarity index 100% rename from proot/proot_linux/ptrace/user.c rename to proot/binding_proot_linux/ptrace/user.c diff --git a/proot/proot_linux/ptrace/user.h b/proot/binding_proot_linux/ptrace/user.h similarity index 100% rename from proot/proot_linux/ptrace/user.h rename to proot/binding_proot_linux/ptrace/user.h diff --git a/proot/proot_linux/ptrace/wait.c b/proot/binding_proot_linux/ptrace/wait.c similarity index 100% rename from proot/proot_linux/ptrace/wait.c rename to proot/binding_proot_linux/ptrace/wait.c diff --git a/proot/proot_linux/ptrace/wait.h b/proot/binding_proot_linux/ptrace/wait.h similarity index 100% rename from proot/proot_linux/ptrace/wait.h rename to proot/binding_proot_linux/ptrace/wait.h diff --git a/proot/proot_linux/syscall/chain.c b/proot/binding_proot_linux/syscall/chain.c similarity index 100% rename from proot/proot_linux/syscall/chain.c rename to proot/binding_proot_linux/syscall/chain.c diff --git a/proot/proot_linux/syscall/chain.h b/proot/binding_proot_linux/syscall/chain.h similarity index 100% rename from proot/proot_linux/syscall/chain.h rename to proot/binding_proot_linux/syscall/chain.h diff --git a/proot/proot_linux/syscall/enter.c b/proot/binding_proot_linux/syscall/enter.c similarity index 100% rename from proot/proot_linux/syscall/enter.c rename to proot/binding_proot_linux/syscall/enter.c diff --git a/proot/proot_linux/syscall/exit.c b/proot/binding_proot_linux/syscall/exit.c similarity index 100% rename from proot/proot_linux/syscall/exit.c rename to proot/binding_proot_linux/syscall/exit.c diff --git a/proot/proot_linux/syscall/heap.c b/proot/binding_proot_linux/syscall/heap.c similarity index 100% rename from proot/proot_linux/syscall/heap.c rename to proot/binding_proot_linux/syscall/heap.c diff --git a/proot/proot_linux/syscall/heap.h b/proot/binding_proot_linux/syscall/heap.h similarity index 100% rename from proot/proot_linux/syscall/heap.h rename to proot/binding_proot_linux/syscall/heap.h diff --git a/proot/proot_linux/syscall/rlimit.c b/proot/binding_proot_linux/syscall/rlimit.c similarity index 100% rename from proot/proot_linux/syscall/rlimit.c rename to proot/binding_proot_linux/syscall/rlimit.c diff --git a/proot/proot_linux/syscall/rlimit.h b/proot/binding_proot_linux/syscall/rlimit.h similarity index 100% rename from proot/proot_linux/syscall/rlimit.h rename to proot/binding_proot_linux/syscall/rlimit.h diff --git a/proot/proot_linux/syscall/seccomp.c b/proot/binding_proot_linux/syscall/seccomp.c similarity index 100% rename from proot/proot_linux/syscall/seccomp.c rename to proot/binding_proot_linux/syscall/seccomp.c diff --git a/proot/proot_linux/syscall/seccomp.h b/proot/binding_proot_linux/syscall/seccomp.h similarity index 100% rename from proot/proot_linux/syscall/seccomp.h rename to proot/binding_proot_linux/syscall/seccomp.h diff --git a/proot/proot_linux/syscall/socket.c b/proot/binding_proot_linux/syscall/socket.c similarity index 100% rename from proot/proot_linux/syscall/socket.c rename to proot/binding_proot_linux/syscall/socket.c diff --git a/proot/proot_linux/syscall/socket.h b/proot/binding_proot_linux/syscall/socket.h similarity index 100% rename from proot/proot_linux/syscall/socket.h rename to proot/binding_proot_linux/syscall/socket.h diff --git a/proot/proot_linux/syscall/syscall.c b/proot/binding_proot_linux/syscall/syscall.c similarity index 100% rename from proot/proot_linux/syscall/syscall.c rename to proot/binding_proot_linux/syscall/syscall.c diff --git a/proot/proot_linux/syscall/syscall.h b/proot/binding_proot_linux/syscall/syscall.h similarity index 100% rename from proot/proot_linux/syscall/syscall.h rename to proot/binding_proot_linux/syscall/syscall.h diff --git a/proot/proot_linux/syscall/sysnum.c b/proot/binding_proot_linux/syscall/sysnum.c similarity index 100% rename from proot/proot_linux/syscall/sysnum.c rename to proot/binding_proot_linux/syscall/sysnum.c diff --git a/proot/proot_linux/syscall/sysnum.h b/proot/binding_proot_linux/syscall/sysnum.h similarity index 100% rename from proot/proot_linux/syscall/sysnum.h rename to proot/binding_proot_linux/syscall/sysnum.h diff --git a/proot/proot_linux/syscall/sysnums-arm.h b/proot/binding_proot_linux/syscall/sysnums-arm.h similarity index 100% rename from proot/proot_linux/syscall/sysnums-arm.h rename to proot/binding_proot_linux/syscall/sysnums-arm.h diff --git a/proot/proot_linux/syscall/sysnums-arm64.h b/proot/binding_proot_linux/syscall/sysnums-arm64.h similarity index 100% rename from proot/proot_linux/syscall/sysnums-arm64.h rename to proot/binding_proot_linux/syscall/sysnums-arm64.h diff --git a/proot/proot_linux/syscall/sysnums-i386.h b/proot/binding_proot_linux/syscall/sysnums-i386.h similarity index 100% rename from proot/proot_linux/syscall/sysnums-i386.h rename to proot/binding_proot_linux/syscall/sysnums-i386.h diff --git a/proot/proot_linux/syscall/sysnums-sh4.h b/proot/binding_proot_linux/syscall/sysnums-sh4.h similarity index 100% rename from proot/proot_linux/syscall/sysnums-sh4.h rename to proot/binding_proot_linux/syscall/sysnums-sh4.h diff --git a/proot/proot_linux/syscall/sysnums-x32.h b/proot/binding_proot_linux/syscall/sysnums-x32.h similarity index 100% rename from proot/proot_linux/syscall/sysnums-x32.h rename to proot/binding_proot_linux/syscall/sysnums-x32.h diff --git a/proot/proot_linux/syscall/sysnums-x86_64.h b/proot/binding_proot_linux/syscall/sysnums-x86_64.h similarity index 100% rename from proot/proot_linux/syscall/sysnums-x86_64.h rename to proot/binding_proot_linux/syscall/sysnums-x86_64.h diff --git a/proot/proot_linux/syscall/sysnums.list b/proot/binding_proot_linux/syscall/sysnums.list similarity index 100% rename from proot/proot_linux/syscall/sysnums.list rename to proot/binding_proot_linux/syscall/sysnums.list diff --git a/proot/proot_linux/tracee/abi.h b/proot/binding_proot_linux/tracee/abi.h similarity index 100% rename from proot/proot_linux/tracee/abi.h rename to proot/binding_proot_linux/tracee/abi.h diff --git a/proot/proot_linux/tracee/event.c b/proot/binding_proot_linux/tracee/event.c similarity index 100% rename from proot/proot_linux/tracee/event.c rename to proot/binding_proot_linux/tracee/event.c diff --git a/proot/proot_linux/tracee/event.h b/proot/binding_proot_linux/tracee/event.h similarity index 100% rename from proot/proot_linux/tracee/event.h rename to proot/binding_proot_linux/tracee/event.h diff --git a/proot/proot_linux/tracee/mem.c b/proot/binding_proot_linux/tracee/mem.c similarity index 100% rename from proot/proot_linux/tracee/mem.c rename to proot/binding_proot_linux/tracee/mem.c diff --git a/proot/proot_linux/tracee/mem.h b/proot/binding_proot_linux/tracee/mem.h similarity index 100% rename from proot/proot_linux/tracee/mem.h rename to proot/binding_proot_linux/tracee/mem.h diff --git a/proot/proot_linux/tracee/reg.c b/proot/binding_proot_linux/tracee/reg.c similarity index 100% rename from proot/proot_linux/tracee/reg.c rename to proot/binding_proot_linux/tracee/reg.c diff --git a/proot/proot_linux/tracee/reg.h b/proot/binding_proot_linux/tracee/reg.h similarity index 100% rename from proot/proot_linux/tracee/reg.h rename to proot/binding_proot_linux/tracee/reg.h diff --git a/proot/proot_linux/tracee/tracee.c b/proot/binding_proot_linux/tracee/tracee.c similarity index 78% rename from proot/proot_linux/tracee/tracee.c rename to proot/binding_proot_linux/tracee/tracee.c index 65286dd..1791186 100644 --- a/proot/proot_linux/tracee/tracee.c +++ b/proot/binding_proot_linux/tracee/tracee.c @@ -20,18 +20,18 @@ * 02110-1301 USA. */ -#include /* CLONE_*, */ -#include /* pid_t, size_t, */ -#include /* NULL, */ -#include /* assert(3), */ -#include /* bzero(3), */ -#include /* bool, true, false, */ -#include /* LIST_*, */ -#include /* talloc_*, */ -#include /* kill(2), SIGKILL, */ +#include /* CLONE_*, */ +#include /* pid_t, size_t, */ +#include /* NULL, */ +#include /* assert(3), */ +#include /* bzero(3), */ +#include /* bool, true, false, */ +#include /* LIST_*, */ +#include /* talloc_*, */ +#include /* kill(2), SIGKILL, */ #include /* ptrace(2), PTRACE_*, */ -#include /* E*, */ -#include /* PRI*, */ +#include /* E*, */ +#include /* PRI*, */ #include "tracee/tracee.h" #include "tracee/reg.h" @@ -46,13 +46,12 @@ #include "compat.h" #ifndef __W_STOPCODE -#define __W_STOPCODE(sig) ((sig) <<8 | 0x7f) +#define __W_STOPCODE(sig) ((sig) << 8 | 0x7f) #endif typedef LIST_HEAD(tracees, tracee) Tracees; static Tracees tracees; - /** * Remove @zombie from its parent's list of zombies. Note: this is a * talloc destructor. @@ -68,7 +67,7 @@ static int remove_zombie(Tracee *zombie) * type, before it gets unlinked from @tracee_->life_context. */ static void clean_life_span_object(const void *pointer, int depth UNUSED, - int max_depth UNUSED, int is_ref UNUSED, void *tracee_) + int max_depth UNUSED, int is_ref UNUSED, void *tracee_) { Binding *binding; Tracee *tracee; @@ -100,24 +99,28 @@ static int remove_tracee(Tracee *tracee) /* This could be optimize by using a dedicated list of * children and ptracees. */ - LIST_FOREACH(relative, &tracees, link) { + LIST_FOREACH(relative, &tracees, link) + { /* Its children are now orphan. */ if (relative->parent == tracee) relative->parent = NULL; /* Its tracees are now free. */ - if (relative->as_ptracee.ptracer == tracee) { + if (relative->as_ptracee.ptracer == tracee) + { /* Release the pending event, if any. */ relative->as_ptracee.ptracer = NULL; - if (relative->as_ptracee.event4.proot.pending) { + if (relative->as_ptracee.event4.proot.pending) + { event = handle_tracee_event(relative, - relative->as_ptracee.event4.proot.value); - (void) restart_tracee(relative, event); + relative->as_ptracee.event4.proot.value); + (void)restart_tracee(relative, event); } - else if (relative->as_ptracee.event4.ptracer.pending) { + else if (relative->as_ptracee.event4.ptracer.pending) + { event = relative->as_ptracee.event4.proot.value; - (void) restart_tracee(relative, event); + (void)restart_tracee(relative, event); } bzero(&relative->as_ptracee, sizeof(relative->as_ptracee)); @@ -132,12 +135,13 @@ static int remove_tracee(Tracee *tracee) /* Zombify this ptracee until its ptracer is notified about * its death. */ event = tracee->as_ptracee.event4.ptracer.value; - if (tracee->as_ptracee.event4.ptracer.pending - && (WIFEXITED(event) || WIFSIGNALED(event))) { + if (tracee->as_ptracee.event4.ptracer.pending && (WIFEXITED(event) || WIFSIGNALED(event))) + { Tracee *zombie; zombie = new_dummy_tracee(ptracer); - if (zombie != NULL) { + if (zombie != NULL) + { LIST_INSERT_HEAD(&PTRACER.zombies, zombie, link); talloc_set_destructor(zombie, remove_zombie); @@ -160,15 +164,16 @@ static int remove_tracee(Tracee *tracee) detach_from_ptracer(tracee); /* Wake its ptracer if there's nothing else to wait for. */ - if (PTRACER.nb_ptracees == 0 && PTRACER.wait_pid != 0) { + if (PTRACER.nb_ptracees == 0 && PTRACER.wait_pid != 0) + { /* Update the return value of ptracer's wait(2). */ poke_reg(ptracer, SYSARG_RESULT, -ECHILD); /* Don't forget to write its register cache back. */ - (void) push_regs(ptracer); + (void)push_regs(ptracer); PTRACER.wait_pid = 0; - (void) restart_tracee(ptracer, 0); + (void)restart_tracee(ptracer, 0); } return 0; @@ -229,9 +234,15 @@ static Tracee *new_tracee(pid_t pid) tracee->pid = pid; tracee->vpid = next_vpid++; - LIST_INSERT_HEAD(&tracees, tracee, link); + do + { + if (((tracee)->link.le_next = (&tracees)->lh_first) != ((void *)0)) + (&tracees)->lh_first->link.le_prev = &(tracee)->link.le_next; + (&tracees)->lh_first = (tracee); + (tracee)->link.le_prev = &(&tracees)->lh_first; + } while (0) - tracee->life_context = talloc_new(tracee); + tracee->life_context = talloc_new(tracee); return tracee; } @@ -244,12 +255,13 @@ static Tracee *new_tracee(pid_t pid) * returns NULL if there's no such ptracee. */ Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, - bool only_with_pevent, word_t wait_options) + bool only_with_pevent, word_t wait_options) { Tracee *ptracee; /* Return zombies first. */ - LIST_FOREACH(ptracee, &PTRACER.zombies, link) { + LIST_FOREACH(ptracee, &PTRACER.zombies, link) + { /* Not the ptracee you're looking for? */ if (pid != ptracee->pid && pid != -1) continue; @@ -261,7 +273,8 @@ Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, return ptracee; } - LIST_FOREACH(ptracee, &tracees, link) { + LIST_FOREACH(ptracee, &tracees, link) + { /* Discard tracees that don't have this ptracer. */ if (PTRACEE.ptracer != ptracer) continue; @@ -302,7 +315,7 @@ Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, * returned (or NULL). */ Tracee *get_stopped_ptracee(const Tracee *ptracer, pid_t pid, - bool only_with_pevent, word_t wait_options) + bool only_with_pevent, word_t wait_options) { return get_ptracee(ptracer, pid, true, only_with_pevent, wait_options); } @@ -331,8 +344,10 @@ Tracee *get_tracee(const Tracee *current_tracee, pid_t pid, bool create) if (current_tracee != NULL && current_tracee->pid == pid) return (Tracee *)current_tracee; - LIST_FOREACH(tracee, &tracees, link) { - if (tracee->pid == pid) { + for ((tracee) = ((&tracees)->lh_first); (tracee); (tracee) = ((tracee)->link.le_next)) + { + if (tracee->pid == pid) + { /* Flush then allocate a new memory collector. */ TALLOC_FREE(tracee->ctx); tracee->ctx = talloc_new(tracee); @@ -349,15 +364,16 @@ Tracee *get_tracee(const Tracee *current_tracee, pid_t pid, bool create) */ void terminate_tracee(Tracee *tracee) { - tracee->terminated = true; + tracee->terminated = true; - /* Case where the terminated tracee is marked - to kill all tracees on exit. - */ - if (tracee->killall_on_exit) { - VERBOSE(tracee, 1, "terminating all tracees on exit"); - kill_all_tracees(); - } + /* Case where the terminated tracee is marked + to kill all tracees on exit. + */ + if (tracee->killall_on_exit) + { + VERBOSE(tracee, 1, "terminating all tracees on exit"); + kill_all_tracees(); + } } /** @@ -369,7 +385,8 @@ void free_terminated_tracees() /* Items can't be deleted when using LIST_FOREACH. */ next = tracees.lh_first; - while (next != NULL) { + while (next != NULL) + { Tracee *tracee = next; next = tracee->link.le_next; @@ -406,28 +423,21 @@ int new_child(Tracee *parent, word_t clone_flags) /* Get the pid of the parent's new child. */ status = ptrace(PTRACE_GETEVENTMSG, parent->pid, NULL, &pid); - if (status < 0 || pid == 0) { + if (status < 0 || pid == 0) + { note(parent, WARNING, SYSTEM, "ptrace(GETEVENTMSG)"); return status; } - child = get_tracee(parent, (pid_t) pid, true); - if (child == NULL) { + child = get_tracee(parent, (pid_t)pid, true); + if (child == NULL) + { note(parent, WARNING, SYSTEM, "running out of memory"); return -ENOMEM; } /* Sanity checks. */ - assert(child != NULL - && child->exe == NULL - && child->fs->cwd == NULL - && child->fs->bindings.pending == NULL - && child->fs->bindings.guest == NULL - && child->fs->bindings.host == NULL - && child->qemu == NULL - && child->glue == NULL - && child->parent == NULL - && child->as_ptracee.ptracer == NULL); + assert(child != NULL && child->exe == NULL && child->fs->cwd == NULL && child->fs->bindings.pending == NULL && child->fs->bindings.guest == NULL && child->fs->bindings.host == NULL && child->qemu == NULL && child->glue == NULL && child->parent == NULL && child->as_ptracee.ptracer == NULL); child->verbose = parent->verbose; child->seccomp = parent->seccomp; @@ -450,8 +460,8 @@ int new_child(Tracee *parent, word_t clone_flags) */ TALLOC_FREE(child->heap); child->heap = ((clone_flags & CLONE_VM) != 0) - ? talloc_reference(child, parent->heap) - : talloc_memdup(child, parent->heap, sizeof(Heap)); + ? talloc_reference(child, parent->heap) + : talloc_memdup(child, parent->heap, sizeof(Heap)); if (child->heap == NULL) return -ENOMEM; @@ -478,25 +488,17 @@ int new_child(Tracee *parent, word_t clone_flags) /* Depending on how the new process is created, it may be * automatically traced by the parent's tracer. */ - ptrace_options = ( clone_flags == 0 ? PTRACE_O_TRACEFORK - : (clone_flags & 0xFF) == SIGCHLD ? PTRACE_O_TRACEFORK - : (clone_flags & CLONE_VFORK) != 0 ? PTRACE_O_TRACEVFORK - : PTRACE_O_TRACECLONE); - if (parent->as_ptracee.ptracer != NULL - && ( (ptrace_options & parent->as_ptracee.options) != 0 - || (clone_flags & CLONE_PTRACE) != 0)) { + ptrace_options = (clone_flags == 0 ? PTRACE_O_TRACEFORK + : (clone_flags & 0xFF) == SIGCHLD ? PTRACE_O_TRACEFORK + : (clone_flags & CLONE_VFORK) != 0 ? PTRACE_O_TRACEVFORK + : PTRACE_O_TRACECLONE); + if (parent->as_ptracee.ptracer != NULL && ((ptrace_options & parent->as_ptracee.options) != 0 || (clone_flags & CLONE_PTRACE) != 0)) + { attach_to_ptracer(child, parent->as_ptracee.ptracer); /* All these flags are inheritable, no matter why this * child is being traced. */ - child->as_ptracee.options |= (parent->as_ptracee.options - & ( PTRACE_O_TRACECLONE - | PTRACE_O_TRACEEXEC - | PTRACE_O_TRACEEXIT - | PTRACE_O_TRACEFORK - | PTRACE_O_TRACESYSGOOD - | PTRACE_O_TRACEVFORK - | PTRACE_O_TRACEVFORKDONE)); + child->as_ptracee.options |= (parent->as_ptracee.options & (PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEFORK | PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEVFORK | PTRACE_O_TRACEVFORKDONE)); } /* If CLONE_FS is set, the parent and the child process share @@ -515,11 +517,13 @@ int new_child(Tracee *parent, word_t clone_flags) * -- clone(2) man-page */ TALLOC_FREE(child->fs); - if ((clone_flags & CLONE_FS) != 0) { + if ((clone_flags & CLONE_FS) != 0) + { /* File-system name-space is shared. */ child->fs = talloc_reference(child, parent->fs); } - else { + else + { /* File-system name-space is copied. */ child->fs = talloc_zero(child, FileSystemNameSpace); if (child->fs == NULL) @@ -535,7 +539,7 @@ int new_child(Tracee *parent, word_t clone_flags) * under Linux. Actually they are copied when a sub * reconfiguration occured (nested proot or chroot(2)). */ child->fs->bindings.guest = talloc_reference(child->fs, parent->fs->bindings.guest); - child->fs->bindings.host = talloc_reference(child->fs, parent->fs->bindings.host); + child->fs->bindings.host = talloc_reference(child->fs, parent->fs->bindings.host); } /* The path to the executable is unshared only once the child @@ -545,7 +549,7 @@ int new_child(Tracee *parent, word_t clone_flags) child->qemu = talloc_reference(child, parent->qemu); child->glue = talloc_reference(child, parent->glue); - child->host_ldso_paths = talloc_reference(child, parent->host_ldso_paths); + child->host_ldso_paths = talloc_reference(child, parent->host_ldso_paths); child->guest_ldso_paths = talloc_reference(child, parent->guest_ldso_paths); child->tool_name = parent->tool_name; @@ -554,13 +558,15 @@ int new_child(Tracee *parent, word_t clone_flags) /* Restart the child tracee if it was already alive but * stopped until that moment. */ - if (child->sigstop == SIGSTOP_PENDING) { + if (child->sigstop == SIGSTOP_PENDING) + { bool keep_stopped = false; child->sigstop = SIGSTOP_ALLOWED; /* Notify its ptracer if it is ready to be traced. */ - if (child->as_ptracee.ptracer != NULL) { + if (child->as_ptracee.ptracer != NULL) + { /* Sanity check. */ assert(!child->as_ptracee.tracing_started); @@ -570,11 +576,11 @@ int new_child(Tracee *parent, word_t clone_flags) * PRoot since child->as_ptracee.ptracer was * NULL up to now. */ child->as_ptracee.event4.proot.pending = false; - child->as_ptracee.event4.proot.value = 0; + child->as_ptracee.event4.proot.value = 0; } if (!keep_stopped) - (void) restart_tracee(child, 0); + (void)restart_tracee(child, 0); } VERBOSE(child, 1, "vpid %" PRIu64 ": pid %d", child->vpid, child->pid); @@ -589,10 +595,12 @@ static void reparent_config(Tracee *new_parent, Tracee *old_parent) { new_parent->verbose = old_parent->verbose; -#define REPARENT(field) do { \ - talloc_reparent(old_parent, new_parent, old_parent->field); \ - new_parent->field = old_parent->field; \ - } while(0); +#define REPARENT(field) \ + do \ + { \ + talloc_reparent(old_parent, new_parent, old_parent->field); \ + new_parent->field = old_parent->field; \ + } while (0); REPARENT(fs); REPARENT(exe); @@ -614,7 +622,7 @@ int swap_config(Tracee *tracee1, Tracee *tracee2) if (tmp == NULL) return -ENOMEM; - reparent_config(tmp, tracee1); + reparent_config(tmp, tracee1); reparent_config(tracee1, tracee2); reparent_config(tracee2, tmp); @@ -627,5 +635,5 @@ void kill_all_tracees() Tracee *tracee; LIST_FOREACH(tracee, &tracees, link) - kill(tracee->pid, SIGKILL); + kill(tracee->pid, SIGKILL); } diff --git a/proot/proot_linux/tracee/tracee.h b/proot/binding_proot_linux/tracee/tracee.h similarity index 100% rename from proot/proot_linux/tracee/tracee.h rename to proot/binding_proot_linux/tracee/tracee.h diff --git a/proot/elf_linux.go b/proot/elf_linux.go new file mode 100644 index 0000000..e779a19 --- /dev/null +++ b/proot/elf_linux.go @@ -0,0 +1,141 @@ +package proot + +import "unsafe" + +const EI_NIDENT = 16 + +const ( + ET_REL = 1 + ET_EXEC = 2 + ET_DYN = 3 + ET_CORE = 4 +) + +const ( + PF_X = 1 + PF_W = 2 + PF_R = 4 +) + +const ( + PT_LOAD = 1 + PT_DYNAMIC = 2 + PT_INTERP = 3 + PT_GNU_STACK = 0x6474e551 +) + +const ( + DT_STRTAB = 5 + DT_RPATH = 15 + DT_RUNPATH = 29 +) + +type ElfHeader32 struct { + EIdent [EI_NIDENT]byte + EType uint16 + EMachine uint16 + EVersion uint32 + EEntry uint32 + EPhoff uint32 + EShoff uint32 + EFlags uint32 + EEhsize uint16 + EPhentsize uint16 + EPhnum uint16 + EShentsize uint16 + EShnum uint16 + EShstrndx uint16 +} + +type ElfHeader64 struct { + EIdent [EI_NIDENT]byte + EType uint16 + EMachine uint16 + EVersion uint32 + EEntry uint64 + EPhoff uint64 + EShoff uint64 + EFlags uint32 + EEhsize uint16 + EPhentsize uint16 + EPhnum uint16 + EShentsize uint16 + EShnum uint16 + EShstrndx uint16 +} + +type ElfHeader struct { + Class32 ElfHeader32 + Class64 ElfHeader64 +} + +type ProgramHeader32 struct { + PType uint32 + POffset uint32 + PVaddr uint32 + PPaddr uint32 + PFilesz uint32 + PMemsz uint32 + PFlags uint32 + PAlign uint32 +} + +type ProgramHeader64 struct { + PType uint32 + PFlags uint32 + POffset uint64 + PVaddr uint64 + PPaddr uint64 + PFilesz uint64 + PMemsz uint64 + PAlign uint64 +} + +type ProgramHeader struct { + Class32 ProgramHeader32 + Class64 ProgramHeader64 +} + +type DynamicEntry32 struct { + DTag int32 + DVal uint32 +} + +type DynamicEntry64 struct { + DTag int64 + DVal uint64 +} + +type DynamicEntry struct { + Class32 DynamicEntry32 + Class64 DynamicEntry64 +} + +func ElfClass(header ElfHeader) byte { + return header.Class32.EIdent[4] +} + +func IsClass32(header ElfHeader) bool { + return ElfClass(header) == 1 +} + +func IsClass64(header ElfHeader) bool { + return ElfClass(header) == 2 +} + +func KnownPhEntSize(header ElfHeader, size int) bool { + return (IsClass32(header) && size == int(unsafe.Sizeof(ProgramHeader32{}))) || + (IsClass64(header) && size == int(unsafe.Sizeof(ProgramHeader64{}))) +} + +func IsPositionIndependent(header ElfHeader) bool { + return ElfField(header, "Type") == ET_DYN +} + +// Simples substituto — personalize conforme seu uso real +func ElfField(header ElfHeader, field string) int { + if IsClass64(header) { + return int(header.Class64.EType) + } + return int(header.Class32.EType) +} diff --git a/proot/proot.go b/proot/proot.go index 195912a..5aa8af5 100644 --- a/proot/proot.go +++ b/proot/proot.go @@ -1,11 +1,66 @@ // Exec proot with rootfs without root account package proot -import "os" +import ( + "io" + "os/exec" + + goexec "sirherobrine23.com.br/go-bds/exec/exec" +) type Proot struct { - Rootfs string - Qemu string + Rootfs string // Rootfs to bind / + Qemu string // qemu tool to call - Stdin, Stdout, Stderr *os.File -} \ No newline at end of file + Cmd *exec.Cmd // Golang process + EventLocked bool + Err error + + last_exit_status int + Tracees []*Tracee + vpid int + + Stdin io.Reader + Stdout, Stderr io.Writer +} + +func (proot *Proot) Wait() error { + switch { + case proot.Cmd != nil && proot.Cmd.ProcessState != nil: + if !proot.Cmd.ProcessState.Success() { + return &exec.ExitError{ProcessState: proot.Cmd.ProcessState} + } + return nil + case proot.Cmd != nil && proot.Cmd.Process != nil: + return proot.Cmd.Wait() + default: + return goexec.ErrNoProcess + } +} +func (proot *Proot) ExitCode() (int, error) { + switch { + case proot.Cmd != nil && proot.Cmd.ProcessState != nil: + return proot.Cmd.ProcessState.ExitCode(), nil + case proot.Cmd != nil && proot.Cmd.Process != nil: + state, err := proot.Cmd.Process.Wait() + if err != nil { + return -1, err + } + return state.ExitCode(), nil + default: + return -1, goexec.ErrNoProcess + } +} + +func (proot *Proot) AttachStdin(s io.Reader) error { + proot.Stdin = s + return nil +} +func (proot *Proot) AttachStdout(s io.Writer) error { + proot.Stdout = s + return nil +} +func (proot *Proot) AttachStderr(s io.Writer) error { + proot.Stderr = s + return nil +} diff --git a/proot/proot_linux.go b/proot/proot_linux.go index 0171dd9..1e62de7 100644 --- a/proot/proot_linux.go +++ b/proot/proot_linux.go @@ -1,31 +1,850 @@ -//go:build cgo && (linux || android) +//go:build linux || android package proot import ( + "fmt" + "os" "os/exec" + "runtime" "syscall" + "time" - prootsrc "sirherobrine23.com.br/go-bds/exec/proot/proot_linux" + "golang.org/x/sys/unix" + + goexec "sirherobrine23.com.br/go-bds/exec/exec" ) -func NewProc() (any, error) { - ptr, err := prootsrc.NewCli() +var _ = goexec.Register("proot", NewProc) + +func NewProc() (*Proot, error) { + return &Proot{ + Rootfs: "/", + Cmd: nil, + }, nil +} + +func (proot *Proot) Kill() error { return proot.Cmd.Process.Kill() } +func (proot *Proot) Signal(s os.Signal) error { return proot.Cmd.Process.Signal(s) } + +func (proot *Proot) Close() error { + + return nil +} + +func (proot *Proot) Start(options *goexec.Exec) error { + proot.Cmd = exec.Command(options.Arguments[0], options.Arguments[1:]...) + if err := proot.Cmd.Err; err != nil { + return err + } + if proot.Cmd.SysProcAttr == nil { + proot.Cmd.SysProcAttr = &syscall.SysProcAttr{} + } + proot.Cmd.SysProcAttr.Ptrace = true + proot.Cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER + + runtime.LockOSThread() // Lock thread to use PTRACE + proot.EventLocked = true + // go proot.Event() // Start background loop event + proot.Cmd.Start() + + proot.Event() // Start background loop event + return nil +} + +func (proot *Proot) Event() { + var wstatus unix.WaitStatus + var rusage unix.Rusage + for { + if proot.Cmd == nil || proot.Cmd.Process == nil { + <-time.After(time.Microsecond) // wait 1ms to check + continue + } + + wpid, err := unix.Wait4(proot.Cmd.Process.Pid, &wstatus, unix.WALL, &rusage) + switch err { + case nil: + case syscall.Errno(3), syscall.Errno(10): + unix.Kill(wpid, unix.PTRACE_CONT) + continue + default: + panic(err) + } + fmt.Printf("wpid: %d, status %08d, Exited: %5t, Signaled: %t, Stopped: %t, Continued: %t\n", + wpid, + wstatus, + wstatus.Exited(), + wstatus.Signaled(), + wstatus.Stopped(), + wstatus.Continued(), + ) + + tracee := proot.GetTracee(nil, wpid, true) + tracee.Running = false + + if tracee.AsPtracee != nil { + keep_stopped := proot.handle_ptracee_event(tracee, wstatus) + if keep_stopped { + continue + } + } + + signal := proot.handle_tracee_event(tracee, wstatus) + proot.restart_tracee(tracee, unix.Signal(signal)) + } +} + +func (proot *Proot) GetTracee(current_tracee *Tracee, wpid int, create bool) *Tracee { + if current_tracee != nil && current_tracee.Pid == wpid { + return current_tracee + } + + for _, tracee := range proot.Tracees { + if tracee.Pid == wpid { + return tracee + } + } + + if create { + tracee := new(Tracee) + tracee.Pid = wpid + proot.vpid++ + tracee.Vpid = uint64(proot.vpid) + proot.Tracees = append(proot.Tracees, tracee) + + return tracee + } + return nil +} + +func (proot *Proot) handle_ptracee_event(ptracee *Tracee, event unix.WaitStatus) (keep_stopped bool) { + ptracer := ptracee.AsPtracee.Ptracer + var handled_by_proot_first bool + + ptracee.AsPtracee.Event4.Proot.Value = int(event) + ptracee.AsPtracee.Event4.Proot.Pending = true + + keep_stopped = true + + if event.Stopped() { + switch unix.Signal((event & 0xfff00) >> 8) { + case unix.SIGTRAP | 0x80: + if ptracee.AsPtracee.IgnoreSyscalls || ptracee.AsPtracee.IgnoreLoaderSyscalls { + return false + } else if ptracee.AsPtracee.Options&unix.PTRACE_O_TRACESYSGOOD == 0 { + // event &= ~(0x80 << 8); + // event &= ^(0x80 << 8) + handled_by_proot_first = ptracee.Status == 0 + } + + // FORK + case unix.SIGTRAP | unix.PTRACE_EVENT_FORK<<8: + if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEFORK) == 0 { + return false + } + ptracer.AsPtracee.TracingStarted = true + handled_by_proot_first = true + + // VFORK + case unix.SIGTRAP | unix.PTRACE_EVENT_VFORK<<8: + if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEVFORK) == 0 { + return false + } + ptracer.AsPtracee.TracingStarted = true + handled_by_proot_first = true + // VFORKDONE + case unix.SIGTRAP | unix.PTRACE_EVENT_VFORK_DONE<<8: + if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEVFORKDONE) == 0 { + return false + } + ptracer.AsPtracee.TracingStarted = true + handled_by_proot_first = true + // CLONE + case unix.SIGTRAP | unix.PTRACE_EVENT_CLONE<<8: + if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACECLONE) == 0 { + return false + } + ptracer.AsPtracee.TracingStarted = true + handled_by_proot_first = true + // EXIT + case unix.SIGTRAP | unix.PTRACE_EVENT_EXIT<<8: + if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEEXIT) == 0 { + return false + } + ptracer.AsPtracee.TracingStarted = true + handled_by_proot_first = true + // EXEC + case unix.SIGTRAP | unix.PTRACE_EVENT_EXEC<<8: + if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEEXEC) == 0 { + return false + } + ptracer.AsPtracee.TracingStarted = true + handled_by_proot_first = true + + case unix.SIGTRAP | unix.PTRACE_EVENT_SECCOMP<<8: + return false + default: + ptracee.AsPtracee.TracingStarted = true + } + } else if event.Exited() || event.Signaled() { + ptracee.AsPtracee.TracingStarted = true + keep_stopped = false + } + + if !ptracee.AsPtracee.TracingStarted { + return false + } + + if handled_by_proot_first { + ptracee.AsPtracee.Event4.Proot.Value = proot.handle_tracee_event(ptracee, unix.WaitStatus(ptracee.AsPtracee.Event4.Proot.Value)) + } + + ptracee.AsPtracee.Event4.Ptracer.Value = int(event) + ptracee.AsPtracee.Event4.Ptracer.Pending = true + + unix.Kill(ptracee.Pid, unix.SIGCHLD) + + //#define EXPECTED_WAIT_CLONE(wait_options,tracee) ((((wait_options) & __WALL) != 0) || ((((wait_options) & __WCLONE) != 0) && (tracee)->clone) || ((((wait_options) & __WCLONE) == 0) && !(tracee)->clone)) + // if ( (PTRACER.wait_pid == -1 || PTRACER.wait_pid == ptracee->pid) && EXPECTED_WAIT_CLONE(PTRACER.wait_options, ptracee)) + if ptracer.AsPtracer.WaitPid == -1 || ptracer.AsPtracer.WaitPid == ptracee.Pid && + (ptracer.AsPtracer.WaitOptions&unix.WALL != 0 || (ptracer.AsPtracer.WaitOptions&unix.WCLONE != 0 && ptracee.Clone) || (ptracer.AsPtracer.WaitOptions&unix.WCLONE == 0 && !ptracee.Clone)) { + status := proot.update_wait_status(ptracer, ptracee) + if status != 0 { + // poke_reg(ptracer, SYSARG_RESULT, (word_t) status); + } + /* Write ptracer's register cache back. */ + // (void) push_regs(ptracer); + + ptracer.AsPtracer.WaitPid = 0 + restarted := proot.restart_tracee(ptracer, 0) + if !restarted { + keep_stopped = false + } + } + + return +} + +func (proot *Proot) update_wait_status(ptracer, ptracee *Tracee) (result int) { + + // /* Special case: the Linux kernel reports the terminating + // * event issued by a process to both its parent and its + // * tracer, except when they are the same. In this case the + // * Linux kernel reports the terminating event only once to the + // * tracing parent ... */ + // if (PTRACEE.ptracer == ptracee->parent + // && (WIFEXITED(PTRACEE.event4.ptracer.value) + // || WIFSIGNALED(PTRACEE.event4.ptracer.value))) { + // /* ... So hide this terminating event (toward its + // * tracer, ie. PRoot) and make the second one appear + // * (towards its parent, ie. the ptracer). This will + // * ensure its exit status is collected from a kernel + // * point-of-view (ie. it doesn't stay a zombie + // * forever). */ + // restart_original_syscall(ptracer); + + // /* Detach this ptracee from its ptracer, PRoot doesn't + // * have anything else to emulate. */ + // detach_from_ptracer(ptracee); + + // /* Zombies can rest in peace once the ptracer is + // * notified. */ + // if (PTRACEE.is_zombie) + // TALLOC_FREE(ptracee); + + return 0 +} + +func (proot *Proot) restart_tracee(tracee *Tracee, signal unix.Signal) bool { + // tracee->as_ptracer.wait_pid != 0 || signal == -1 + if tracee.AsPtracer.WaitPid != 0 || signal == -1 { + return false + } + + err := ptrace(tracee.RestartHow, tracee.Pid, 0, uintptr(signal)) if err != nil { + return false + } + + tracee.RestartHow = 0 + tracee.Running = true + return true +} + +func is_kernel_4_8() bool { + var ust unix.Utsname + unix.Uname(&ust) + + var major, minor int + fmt.Sscanf(string(ust.Release[:]), "%d.%d", &major, &minor) + + return (major == 4 && minor >= 8) || major > 4 +} + +func (proot *Proot) handle_tracee_event_kernel_4_8(tracee *Tracee, tracee_status unix.WaitStatus) int { + /* Don't overwrite restart_how if it is explicitly set + * elsewhere, i.e in the ptrace emulation when single + * stepping. */ + if tracee.RestartHow == 0 { + /* When seccomp is enabled, all events are restarted in + * non-stop mode, but this default choice could be overwritten + * later if necessary. The check against "sysexit_pending" + * ensures PTRACE_SYSCALL (used to hit the exit stage under + * seccomp) is not cleared due to an event that would happen + * before the exit stage, eg. PTRACE_EVENT_EXEC for the exit + * stage of execve(2). */ + if tracee.Seccomp == 2 && !tracee.SysexitPending { + tracee.RestartHow = unix.PTRACE_CONT + } else { + tracee.RestartHow = unix.PTRACE_SYSCALL + } + } + + // var seccomp_detected, seccomp_enabled bool + var + // status, + signal int + + /* Not a signal-stop by default. */ + signal = 0 + + if tracee_status.Exited() { + // if (WIFEXITED(tracee_status)) { + // last_exit_status = WEXITSTATUS(tracee_status); + // VERBOSE(tracee, 1, + // "vpid %" PRIu64 ": exited with status %d", + // tracee->vpid, last_exit_status); + // terminate_tracee(tracee); + // } + proot.last_exit_status = tracee_status.ExitStatus() + } else if tracee_status.Signaled() { + // else if (WIFSIGNALED(tracee_status)) { + // check_architecture(tracee); + // VERBOSE(tracee, 1, + // "vpid %" PRIu64 ": terminated with signal %d", + // tracee->vpid, WTERMSIG(tracee_status)); + // terminate_tracee(tracee); + // } + } else if tracee_status.Stopped() { + /* Don't use WSTOPSIG() to extract the signal + * since it clears the PTRACE_EVENT_* bits. */ + signal = (int(tracee_status) & 0xfff00) >> 8 + // static bool deliver_sigtrap = false; + // switch (signal) { + // case SIGTRAP: { + // const unsigned long default_ptrace_options = ( + // PTRACE_O_TRACESYSGOOD | + // PTRACE_O_TRACEFORK | + // PTRACE_O_TRACEVFORK | + // PTRACE_O_TRACEVFORKDONE | + // PTRACE_O_TRACEEXEC | + // PTRACE_O_TRACECLONE | + // PTRACE_O_TRACEEXIT); + // /* Distinguish some events from others and + // * automatically trace each new process with + // * the same options. + // * + // * Note that only the first bare SIGTRAP is + // * related to the tracing loop, others SIGTRAP + // * carry tracing information because of + // * TRACE*FORK/CLONE/EXEC. */ + // if (deliver_sigtrap) + // break; /* Deliver this signal as-is. */ + // deliver_sigtrap = true; + // /* Try to enable seccomp mode 2... */ + // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + // default_ptrace_options | PTRACE_O_TRACESECCOMP); + // if (status < 0) { + // seccomp_enabled = false; + // /* ... otherwise use default options only. */ + // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + // default_ptrace_options); + // if (status < 0) { + // note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); + // exit(EXIT_FAILURE); + // } + // } + // else { + // if (getenv("PROOT_NO_SECCOMP") == NULL) + // seccomp_enabled = true; + // } + // } + // /* Fall through. */ + // case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: + // case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: + // if (!seccomp_detected && seccomp_enabled) { + // VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); + // tracee->seccomp = ENABLED; + // seccomp_detected = true; + // } + // if (signal == (SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8) || + // signal == (SIGTRAP | PTRACE_EVENT_SECCOMP << 8)) { + // unsigned long flags = 0; + // signal = 0; + // /* Use the common ptrace flow if seccomp was + // * explicitly disabled for this tracee. */ + // if (tracee->seccomp != ENABLED) + // break; + // status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags); + // if (status < 0) + // break; + // if ((flags & FILTER_SYSEXIT) == 0) { + // tracee->restart_how = PTRACE_CONT; + // translate_syscall(tracee); + // if (tracee->seccomp == DISABLING) + // tracee->restart_how = PTRACE_SYSCALL; + // break; + // } + // } + // /* Fall through. */ + // case SIGTRAP | 0x80: + // signal = 0; + // /* This tracee got signaled then freed during the + // sysenter stage but the kernel reports the sysexit + // stage; just discard this spurious tracee/event. */ + // if (tracee->exe == NULL) { + // tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */ + // return 0; + // } + // switch (tracee->seccomp) { + // case ENABLED: + // if (IS_IN_SYSENTER(tracee)) { + // /* sysenter: ensure the sysexit + // * stage will be hit under seccomp. */ + // tracee->restart_how = PTRACE_SYSCALL; + // tracee->sysexit_pending = true; + // } + // else { + // /* sysexit: the next sysenter + // * will be notified by seccomp. */ + // tracee->restart_how = PTRACE_CONT; + // tracee->sysexit_pending = false; + // } + // /* Fall through. */ + // case DISABLED: + // translate_syscall(tracee); + // /* This syscall has disabled seccomp. */ + // if (tracee->seccomp == DISABLING) { + // tracee->restart_how = PTRACE_SYSCALL; + // tracee->seccomp = DISABLED; + // } + // break; + // case DISABLING: + // /* Seccomp was disabled by the + // * previous syscall, but its sysenter + // * stage was already handled. */ + // tracee->seccomp = DISABLED; + // if (IS_IN_SYSENTER(tracee)) + // tracee->status = 1; + // break; + // } + // break; + // case SIGTRAP | PTRACE_EVENT_VFORK << 8: + // signal = 0; + // (void) new_child(tracee, CLONE_VFORK); + // break; + // case SIGTRAP | PTRACE_EVENT_FORK << 8: + // case SIGTRAP | PTRACE_EVENT_CLONE << 8: + // signal = 0; + // (void) new_child(tracee, 0); + // break; + // case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: + // case SIGTRAP | PTRACE_EVENT_EXEC << 8: + // case SIGTRAP | PTRACE_EVENT_EXIT << 8: + // signal = 0; + // break; + // case SIGSTOP: + // /* Stop this tracee until PRoot has received + // * the EVENT_*FORK|CLONE notification. */ + // if (tracee->exe == NULL) { + // tracee->sigstop = SIGSTOP_PENDING; + // signal = -1; + // } + // /* For each tracee, the first SIGSTOP + // * is only used to notify the tracer. */ + // if (tracee->sigstop == SIGSTOP_IGNORED) { + // tracee->sigstop = SIGSTOP_ALLOWED; + // signal = 0; + // } + // break; + // default: + // /* Deliver this signal as-is. */ + // break; + // } + } + + // /* Clear the pending event, if any. */ + // tracee.AsPtracee.Event4.Proot.Pending = false + + return signal +} + +func (proot *Proot) handle_tracee_event(tracee *Tracee, tracee_status unix.WaitStatus) int { + if is_kernel_4_8() { + return proot.handle_tracee_event_kernel_4_8(tracee, tracee_status) + } + + // static bool seccomp_detected = false; + // long status; + // int signal; + // /* Don't overwrite restart_how if it is explicitly set + // * elsewhere, i.e in the ptrace emulation when single + // * stepping. */ + // if (tracee->restart_how == 0) { + // /* When seccomp is enabled, all events are restarted in + // * non-stop mode, but this default choice could be overwritten + // * later if necessary. The check against "sysexit_pending" + // * ensures PTRACE_SYSCALL (used to hit the exit stage under + // * seccomp) is not cleared due to an event that would happen + // * before the exit stage, eg. PTRACE_EVENT_EXEC for the exit + // * stage of execve(2). */ + // if (tracee->seccomp == ENABLED && !tracee->sysexit_pending) + // tracee->restart_how = PTRACE_CONT; + // else + // tracee->restart_how = PTRACE_SYSCALL; + // } + + // /* Not a signal-stop by default. */ + // signal = 0; + + // if (WIFEXITED(tracee_status)) { + // last_exit_status = WEXITSTATUS(tracee_status); + // VERBOSE(tracee, 1, + // "vpid %" PRIu64 ": exited with status %d", + // tracee->vpid, last_exit_status); + // terminate_tracee(tracee); + // } + // else if (WIFSIGNALED(tracee_status)) { + // check_architecture(tracee); + // VERBOSE(tracee, 1, + // "vpid %" PRIu64 ": terminated with signal %d", + // tracee->vpid, WTERMSIG(tracee_status)); + // terminate_tracee(tracee); + // } + // else if (WIFSTOPPED(tracee_status)) { + // /* Don't use WSTOPSIG() to extract the signal + // * since it clears the PTRACE_EVENT_* bits. */ + // signal = (tracee_status & 0xfff00) >> 8; + + // switch (signal) { + // static bool deliver_sigtrap = false; + + // case SIGTRAP: { + // const unsigned long default_ptrace_options = ( + // PTRACE_O_TRACESYSGOOD | + // PTRACE_O_TRACEFORK | + // PTRACE_O_TRACEVFORK | + // PTRACE_O_TRACEVFORKDONE | + // PTRACE_O_TRACEEXEC | + // PTRACE_O_TRACECLONE | + // PTRACE_O_TRACEEXIT); + + // /* Distinguish some events from others and + // * automatically trace each new process with + // * the same options. + // * + // * Note that only the first bare SIGTRAP is + // * related to the tracing loop, others SIGTRAP + // * carry tracing information because of + // * TRACE*FORK/CLONE/EXEC. */ + // if (deliver_sigtrap) + // break; /* Deliver this signal as-is. */ + + // deliver_sigtrap = true; + + // /* Try to enable seccomp mode 2... */ + // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + // default_ptrace_options | PTRACE_O_TRACESECCOMP); + // if (status < 0) { + // /* ... otherwise use default options only. */ + // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, + // default_ptrace_options); + // if (status < 0) { + // note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); + // exit(EXIT_FAILURE); + // } + // } + // } + + // /* Fall through. */ + // case SIGTRAP | 0x80: + // signal = 0; + + // /* This tracee got signaled then freed during the + // sysenter stage but the kernel reports the sysexit + // stage; just discard this spurious tracee/event. */ + // if (tracee->exe == NULL) { + // tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */ + // return 0; + // } + + // switch (tracee->seccomp) { + // case ENABLED: + // if (IS_IN_SYSENTER(tracee)) { + // /* sysenter: ensure the sysexit + // * stage will be hit under seccomp. */ + // tracee->restart_how = PTRACE_SYSCALL; + // tracee->sysexit_pending = true; + // } + // else { + // /* sysexit: the next sysenter + // * will be notified by seccomp. */ + // tracee->restart_how = PTRACE_CONT; + // tracee->sysexit_pending = false; + // } + // /* Fall through. */ + // case DISABLED: + // translate_syscall(tracee); + + // /* This syscall has disabled seccomp. */ + // if (tracee->seccomp == DISABLING) { + // tracee->restart_how = PTRACE_SYSCALL; + // tracee->seccomp = DISABLED; + // } + + // break; + + // case DISABLING: + // /* Seccomp was disabled by the + // * previous syscall, but its sysenter + // * stage was already handled. */ + // tracee->seccomp = DISABLED; + // if (IS_IN_SYSENTER(tracee)) + // tracee->status = 1; + // break; + // } + // break; + + // case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: + // case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: { + // unsigned long flags = 0; + + // signal = 0; + + // if (!seccomp_detected) { + // VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); + // tracee->seccomp = ENABLED; + // seccomp_detected = true; + // } + + // /* Use the common ptrace flow if seccomp was + // * explicitely disabled for this tracee. */ + // if (tracee->seccomp != ENABLED) + // break; + + // status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags); + // if (status < 0) + // break; + + // /* Use the common ptrace flow when + // * sysexit has to be handled. */ + // if ((flags & FILTER_SYSEXIT) != 0) { + // tracee->restart_how = PTRACE_SYSCALL; + // break; + // } + + // /* Otherwise, handle the sysenter + // * stage right now. */ + // tracee->restart_how = PTRACE_CONT; + // translate_syscall(tracee); + + // /* This syscall has disabled seccomp, so move + // * the ptrace flow back to the common path to + // * ensure its sysexit will be handled. */ + // if (tracee->seccomp == DISABLING) + // tracee->restart_how = PTRACE_SYSCALL; + // break; + // } + + // case SIGTRAP | PTRACE_EVENT_VFORK << 8: + // signal = 0; + // (void) new_child(tracee, CLONE_VFORK); + // break; + + // case SIGTRAP | PTRACE_EVENT_FORK << 8: + // case SIGTRAP | PTRACE_EVENT_CLONE << 8: + // signal = 0; + // (void) new_child(tracee, 0); + // break; + + // case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: + // case SIGTRAP | PTRACE_EVENT_EXEC << 8: + // case SIGTRAP | PTRACE_EVENT_EXIT << 8: + // signal = 0; + // break; + + // case SIGSTOP: + // /* Stop this tracee until PRoot has received + // * the EVENT_*FORK|CLONE notification. */ + // if (tracee->exe == NULL) { + // tracee->sigstop = SIGSTOP_PENDING; + // signal = -1; + // } + + // /* For each tracee, the first SIGSTOP + // * is only used to notify the tracer. */ + // if (tracee->sigstop == SIGSTOP_IGNORED) { + // tracee->sigstop = SIGSTOP_ALLOWED; + // signal = 0; + // } + // break; + + // default: + // /* Deliver this signal as-is. */ + // break; + // } + // } + + // /* Clear the pending event, if any. */ + // tracee->as_ptracee.event4.proot.pending = false; + // return signal; + + return 0 +} + +func ptrace(request int, pid int, addr uintptr, data uintptr) (err error) { + _, _, e1 := unix.Syscall6(unix.SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +func (proot *Proot) getPtraceRegs(pid int) (*unix.PtraceRegs, error) { + var regs unix.PtraceRegs + if err := unix.PtraceGetRegs(pid, ®s); err != nil { return nil, err } - - cmd := exec.Command("go", "version") - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} - } - cmd.SysProcAttr.Ptrace = true - cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER - - cmd.Start() - ptr.SetPID(cmd.Process.Pid) - - ptr.LoopEvent() - - return nil, nil + return ®s, nil } + +type Tracee struct { + // Private + Pid int + Vpid uint64 + Running bool + Terminated bool + KillallOnExit bool + Parent *Tracee + Clone bool + + AsPtracer struct { + NbPtracees int + WaitPid int + WaitOptions uint64 + WaitsIn int // enum: 0 = DOESNT_WAIT, 1 = WAITS_IN_KERNEL, 2 = WAITS_IN_PROOT + } + + AsPtracee *struct { + Ptracer *Tracee + Event4 struct { + Proot struct { + Value int + Pending bool + } + Ptracer struct { + Value int + Pending bool + } + } + TracingStarted bool + IgnoreLoaderSyscalls bool + IgnoreSyscalls bool + Options uint64 + IsZombie bool + } + + Status int + RestartHow int // syscall.PTRACE_CONT etc. + Regs [NB_REG_VERSION]syscall.PtraceRegs + RegsWereChanged bool + RestoreOriginalRegs bool + + Sigstop int // enum: 0 = IGNORED, 1 = ALLOWED, 2 = PENDING + + GlueType uint32 // mode_t is often uint32 + Reconf struct { + Tracee *Tracee + Paths string + } + + Chain struct { + Syscalls *ChainedSyscalls + ForceFinalResult bool + FinalResult uint64 + } + + LoadInfo *LoadInfo + MixedMode bool + + // Inherited + Verbose int + Seccomp int + SysexitPending bool + + // Shared or private + FS *FileSystemNameSpace + Heap *Heap + + // Shared until execve + Exe string + NewExe string + + // (Re)configuration + Qemu []string + Glue string + Extensions *Extensions + + // Read-only + HostLdsoPaths string + GuestLdsoPaths string + ToolName string +} + +type Heap struct { + Base uint64 // assuming word_t = uint64 + Size uint64 // size_t + Disabled bool +} + +type FileSystemNameSpace struct { + Bindings struct { + Pending *Bindings + Guest *Bindings + Host *Bindings + } + Cwd string +} + +type RegVersion int + +const ( + CURRENT RegVersion = iota + ORIGINAL + MODIFIED + NB_REG_VERSION +) + +type Mapping struct { + Addr uint64 + Length uint64 + ClearLength uint64 + Prot uint64 + Flags uint64 + Fd uint64 + Offset uint64 +} + +type LoadInfo struct { + HostPath string + UserPath string + RawPath string + Mappings []*Mapping + ElfHeader ElfHeader + NeedsExecutableStack bool + Interp *LoadInfo +} + +type Bindings struct{} +type Extensions struct{} +type ChainedSyscalls struct{} diff --git a/proot/proot_test.go b/proot/proot_test.go index 37400cc..5755af3 100644 --- a/proot/proot_test.go +++ b/proot/proot_test.go @@ -1,7 +1,22 @@ package proot -import "testing" +import ( + "testing" + + "sirherobrine23.com.br/go-bds/exec/exec" +) func TestProot(t *testing.T) { - NewProc() -} \ No newline at end of file + cmd, _ := NewProc() + + err := cmd.Start(&exec.Exec{ + Arguments: []string{"go", "version"}, + }) + + if err != nil { + t.Error(err) + return + } + + cmd.Wait() +} -- 2.51.0 From 68b4eeb842c6446569b86d3c764e6a1676a02110 Mon Sep 17 00:00:00 2001 From: Matheus Sampaio Queiroga Date: Thu, 10 Jul 2025 18:18:40 -0300 Subject: [PATCH 3/5] Refactor proot implementation and add main command entry point Signed-off-by: Matheus Sampaio Queiroga --- go.mod | 2 +- proot/binding_proot_linux/tracee/event.c | 2 +- proot/cmd/main.go | 32 ++ proot/proot.go | 9 +- proot/proot_linux.go | 455 +++++++++++++---------- proot/proot_test.go | 9 +- 6 files changed, 313 insertions(+), 196 deletions(-) create mode 100644 proot/cmd/main.go diff --git a/go.mod b/go.mod index 34a934c..5d1cdfe 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.24.4 require ( github.com/docker/docker v28.3.3+incompatible github.com/docker/go-connections v0.5.0 + golang.org/x/sys v0.34.0 ) require ( @@ -37,7 +38,6 @@ require ( go.opentelemetry.io/otel/trace v1.37.0 // indirect go.opentelemetry.io/proto/otlp v1.7.0 // indirect golang.org/x/net v0.41.0 // indirect - golang.org/x/sys v0.34.0 // indirect golang.org/x/time v0.8.0 // indirect google.golang.org/grpc v1.73.0 // indirect google.golang.org/protobuf v1.36.6 // indirect diff --git a/proot/binding_proot_linux/tracee/event.c b/proot/binding_proot_linux/tracee/event.c index ce59f07..22947a7 100644 --- a/proot/binding_proot_linux/tracee/event.c +++ b/proot/binding_proot_linux/tracee/event.c @@ -474,7 +474,7 @@ static int handle_tracee_event_kernel_4_8(Tracee *tracee, int tracee_status) default_ptrace_options); if (status < 0) { note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); - exit(EXIT_FAILURE); + exit(EXIT_FAILUtRE); } } else { diff --git a/proot/cmd/main.go b/proot/cmd/main.go new file mode 100644 index 0000000..f1335da --- /dev/null +++ b/proot/cmd/main.go @@ -0,0 +1,32 @@ +package main + +import ( + "fmt" + "os" + + "sirherobrine23.com.br/go-bds/exec/exec" + "sirherobrine23.com.br/go-bds/exec/proot" +) + +func main() { + cmd, _ := proot.NewProc() + + err := cmd.Start(&exec.Exec{ + Arguments: os.Args[min(1, len(os.Args)):], + Stdout: os.Stdout, + Stderr: os.Stderr, + Stdin: os.Stdin, + }) + + if err != nil { + fmt.Fprintf(os.Stderr, err.Error()) + os.Exit(-1) + return + } + + if err = cmd.Wait(); err != nil { + fmt.Fprintf(os.Stderr, err.Error()) + os.Exit(-1) + return + } +} diff --git a/proot/proot.go b/proot/proot.go index 5aa8af5..50e8ef7 100644 --- a/proot/proot.go +++ b/proot/proot.go @@ -2,6 +2,7 @@ package proot import ( + "context" "io" "os/exec" @@ -14,12 +15,17 @@ type Proot struct { Cmd *exec.Cmd // Golang process EventLocked bool + NoSeccomp bool Err error last_exit_status int Tracees []*Tracee vpid int + doneEnd *exec.ExitError + done context.Context + donefFn context.CancelFunc + Stdin io.Reader Stdout, Stderr io.Writer } @@ -32,7 +38,8 @@ func (proot *Proot) Wait() error { } return nil case proot.Cmd != nil && proot.Cmd.Process != nil: - return proot.Cmd.Wait() + <-proot.done.Done() + return proot.doneEnd default: return goexec.ErrNoProcess } diff --git a/proot/proot_linux.go b/proot/proot_linux.go index 1e62de7..7b07470 100644 --- a/proot/proot_linux.go +++ b/proot/proot_linux.go @@ -3,18 +3,37 @@ package proot import ( + "context" "fmt" + "log" "os" "os/exec" + "reflect" "runtime" + "slices" "syscall" "time" + "unsafe" "golang.org/x/sys/unix" goexec "sirherobrine23.com.br/go-bds/exec/exec" ) +const ( + DISABLED = iota + DISABLING + ENABLED + + FILTER_SYSEXIT = 0x1 +) + +const ( + SIGSTOP_IGNORED = iota /* Ignore SIGSTOP (once the parent is known). */ + SIGSTOP_ALLOWED /* Allow SIGSTOP (once the parent is known). */ + SIGSTOP_PENDING /* Block SIGSTOP until the parent is unknown. */ +) + var _ = goexec.Register("proot", NewProc) func NewProc() (*Proot, error) { @@ -28,6 +47,9 @@ func (proot *Proot) Kill() error { return proot.Cmd.Process.Kill() func (proot *Proot) Signal(s os.Signal) error { return proot.Cmd.Process.Signal(s) } func (proot *Proot) Close() error { + if proot.Cmd.Process != nil { + return proot.Cmd.Process.Signal(os.Interrupt) + } return nil } @@ -43,13 +65,56 @@ func (proot *Proot) Start(options *goexec.Exec) error { proot.Cmd.SysProcAttr.Ptrace = true proot.Cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER + // attach stdin + switch { + case options.Stdin != nil: + proot.Cmd.Stdin = options.Stdin + case proot.Stdin != nil: + proot.Cmd.Stdin = proot.Stdin + } + + // attach stdout + switch { + case options.Stdout != nil: + proot.Cmd.Stdout = options.Stdout + case proot.Stdout != nil: + proot.Cmd.Stdout = proot.Stdout + } + + // attach stderr + switch { + case options.Stderr != nil: + proot.Cmd.Stderr = options.Stderr + case proot.Stderr != nil: + proot.Cmd.Stderr = proot.Stderr + } + runtime.LockOSThread() // Lock thread to use PTRACE proot.EventLocked = true - // go proot.Event() // Start background loop event - proot.Cmd.Start() + proot.done, proot.donefFn = context.WithCancel(context.Background()) + go proot.Event() // Start background loop event - proot.Event() // Start background loop event - return nil + return proot.Cmd.Start() +} + +func mountProcessState(pid int, status unix.WaitStatus, rusage *unix.Rusage) *os.ProcessState { + newState := &os.ProcessState{} + ptr := reflect.ValueOf(newState).Elem() + ptrType := ptr.Type() + + for index := range ptr.Type().NumField() { + ptr, ptrType := ptr.Field(index), ptrType.Field(index) + switch ptrType.Name { + case "pid": + ptr.SetInt(int64(pid)) + case "status": + ptr.Set(reflect.ValueOf(syscall.WaitStatus(status))) + case "rusage": + ptr.Set(reflect.ValueOf(any(rusage).(*syscall.Rusage))) + } + } + + return newState } func (proot *Proot) Event() { @@ -62,9 +127,14 @@ func (proot *Proot) Event() { } wpid, err := unix.Wait4(proot.Cmd.Process.Pid, &wstatus, unix.WALL, &rusage) + fmt.Println(err) switch err { case nil: - case syscall.Errno(3), syscall.Errno(10): + case syscall.Errno(10): + proot.doneEnd = &exec.ExitError{ProcessState: mountProcessState(proot.Cmd.Process.Pid, wstatus, &rusage)} + proot.donefFn() + return + case syscall.Errno(3): unix.Kill(wpid, unix.PTRACE_CONT) continue default: @@ -79,17 +149,25 @@ func (proot *Proot) Event() { wstatus.Continued(), ) + log.Printf("Getting tracee to wpid %d\n", wpid) tracee := proot.GetTracee(nil, wpid, true) tracee.Running = false if tracee.AsPtracee != nil { + log.Printf("handle_ptracee_event to %d\n", wpid) keep_stopped := proot.handle_ptracee_event(tracee, wstatus) if keep_stopped { continue } } + log.Printf("handle_tracee_event to %d\n", wpid) signal := proot.handle_tracee_event(tracee, wstatus) + log.Printf("restarting tracee to %d with %s\n", wpid, unix.Signal(signal)) + if signal == -255 && proot.Err != nil { + fmt.Println(proot.Err) + signal = 0 + } proot.restart_tracee(tracee, unix.Signal(signal)) } } @@ -204,16 +282,14 @@ func (proot *Proot) handle_ptracee_event(ptracee *Tracee, event unix.WaitStatus) unix.Kill(ptracee.Pid, unix.SIGCHLD) - //#define EXPECTED_WAIT_CLONE(wait_options,tracee) ((((wait_options) & __WALL) != 0) || ((((wait_options) & __WCLONE) != 0) && (tracee)->clone) || ((((wait_options) & __WCLONE) == 0) && !(tracee)->clone)) - // if ( (PTRACER.wait_pid == -1 || PTRACER.wait_pid == ptracee->pid) && EXPECTED_WAIT_CLONE(PTRACER.wait_options, ptracee)) if ptracer.AsPtracer.WaitPid == -1 || ptracer.AsPtracer.WaitPid == ptracee.Pid && (ptracer.AsPtracer.WaitOptions&unix.WALL != 0 || (ptracer.AsPtracer.WaitOptions&unix.WCLONE != 0 && ptracee.Clone) || (ptracer.AsPtracer.WaitOptions&unix.WCLONE == 0 && !ptracee.Clone)) { status := proot.update_wait_status(ptracer, ptracee) if status != 0 { - // poke_reg(ptracer, SYSARG_RESULT, (word_t) status); + // proot.poke_reg(ptracer, SYSARG_RESULT, (word_t) status); } /* Write ptracer's register cache back. */ - // (void) push_regs(ptracer); + // proot.push_regs(ptracer); ptracer.AsPtracer.WaitPid = 0 restarted := proot.restart_tracee(ptracer, 0) @@ -226,31 +302,52 @@ func (proot *Proot) handle_ptracee_event(ptracee *Tracee, event unix.WaitStatus) } func (proot *Proot) update_wait_status(ptracer, ptracee *Tracee) (result int) { + /* Special case: the Linux kernel reports the terminating + * event issued by a process to both its parent and its + * tracer, except when they are the same. In this case the + * Linux kernel reports the terminating event only once to the + * tracing parent ... */ + if ptracee.AsPtracee.Ptracer == ptracee.Parent && + (unix.WaitStatus(ptracee.AsPtracee.Event4.Ptracer.Value).Exited() || + unix.WaitStatus(ptracee.AsPtracee.Event4.Ptracer.Value).Signaled()) { + /* ... So hide this terminating event (toward its + * tracer, ie. PRoot) and make the second one appear + * (towards its parent, ie. the ptracer). This will + * ensure its exit status is collected from a kernel + * point-of-view (ie. it doesn't stay a zombie + * forever). */ + // restart_original_syscall(ptracer); - // /* Special case: the Linux kernel reports the terminating - // * event issued by a process to both its parent and its - // * tracer, except when they are the same. In this case the - // * Linux kernel reports the terminating event only once to the - // * tracing parent ... */ - // if (PTRACEE.ptracer == ptracee->parent - // && (WIFEXITED(PTRACEE.event4.ptracer.value) - // || WIFSIGNALED(PTRACEE.event4.ptracer.value))) { - // /* ... So hide this terminating event (toward its - // * tracer, ie. PRoot) and make the second one appear - // * (towards its parent, ie. the ptracer). This will - // * ensure its exit status is collected from a kernel - // * point-of-view (ie. it doesn't stay a zombie - // * forever). */ - // restart_original_syscall(ptracer); + /* Detach this ptracee from its ptracer, PRoot doesn't + * have anything else to emulate. */ + // detach_from_ptracer(ptracee); - // /* Detach this ptracee from its ptracer, PRoot doesn't - // * have anything else to emulate. */ + /* Zombies can rest in peace once the ptracer is notified. */ + // if (PTRACEE.is_zombie) + // TALLOC_FREE(ptracee); + + return 0 + } + + // address = peek_reg(ptracer, ORIGINAL, SYSARG_2); + // if (address != 0) { + // poke_int32(ptracer, address, PTRACEE.event4.ptracer.value); + // if (errno != 0) + // return -errno; + // } + + // PTRACEE.event4.ptracer.pending = false; + + /* Be careful; ptracee might get freed before its pid is returned. */ + // result = ptracee->pid; + + // /* Zombies can rest in peace once the ptracer is notified. */ + // if (PTRACEE.is_zombie) { // detach_from_ptracer(ptracee); + // TALLOC_FREE(ptracee); + // } - // /* Zombies can rest in peace once the ptracer is - // * notified. */ - // if (PTRACEE.is_zombie) - // TALLOC_FREE(ptracee); + // return result; return 0 } @@ -281,6 +378,15 @@ func is_kernel_4_8() bool { return (major == 4 && minor >= 8) || major > 4 } +func (proot *Proot) terminate_tracee(tracee *Tracee) { + tracee.Terminated = true + if tracee.KillallOnExit { + for _, tracee := range slices.Backward(proot.Tracees) { + unix.Kill(tracee.Pid, unix.SIGKILL) + } + } +} + func (proot *Proot) handle_tracee_event_kernel_4_8(tracee *Tracee, tracee_status unix.WaitStatus) int { /* Don't overwrite restart_how if it is explicitly set * elsewhere, i.e in the ptrace emulation when single @@ -300,181 +406,146 @@ func (proot *Proot) handle_tracee_event_kernel_4_8(tracee *Tracee, tracee_status } } - // var seccomp_detected, seccomp_enabled bool - var - // status, - signal int + var signal int + var seccomp_detected, seccomp_enabled bool /* Not a signal-stop by default. */ signal = 0 if tracee_status.Exited() { - // if (WIFEXITED(tracee_status)) { - // last_exit_status = WEXITSTATUS(tracee_status); - // VERBOSE(tracee, 1, - // "vpid %" PRIu64 ": exited with status %d", - // tracee->vpid, last_exit_status); - // terminate_tracee(tracee); - // } proot.last_exit_status = tracee_status.ExitStatus() + proot.terminate_tracee(tracee) } else if tracee_status.Signaled() { - // else if (WIFSIGNALED(tracee_status)) { // check_architecture(tracee); - // VERBOSE(tracee, 1, - // "vpid %" PRIu64 ": terminated with signal %d", - // tracee->vpid, WTERMSIG(tracee_status)); - // terminate_tracee(tracee); - // } + proot.terminate_tracee(tracee) } else if tracee_status.Stopped() { - /* Don't use WSTOPSIG() to extract the signal - * since it clears the PTRACE_EVENT_* bits. */ + /* Don't use WSTOPSIG() to extract the signal since it clears the PTRACE_EVENT_* bits. */ signal = (int(tracee_status) & 0xfff00) >> 8 - // static bool deliver_sigtrap = false; - // switch (signal) { - // case SIGTRAP: { - // const unsigned long default_ptrace_options = ( - // PTRACE_O_TRACESYSGOOD | - // PTRACE_O_TRACEFORK | - // PTRACE_O_TRACEVFORK | - // PTRACE_O_TRACEVFORKDONE | - // PTRACE_O_TRACEEXEC | - // PTRACE_O_TRACECLONE | - // PTRACE_O_TRACEEXIT); - // /* Distinguish some events from others and - // * automatically trace each new process with - // * the same options. - // * - // * Note that only the first bare SIGTRAP is - // * related to the tracing loop, others SIGTRAP - // * carry tracing information because of - // * TRACE*FORK/CLONE/EXEC. */ - // if (deliver_sigtrap) - // break; /* Deliver this signal as-is. */ - // deliver_sigtrap = true; - // /* Try to enable seccomp mode 2... */ - // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, - // default_ptrace_options | PTRACE_O_TRACESECCOMP); - // if (status < 0) { - // seccomp_enabled = false; - // /* ... otherwise use default options only. */ - // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, - // default_ptrace_options); - // if (status < 0) { - // note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); - // exit(EXIT_FAILURE); - // } - // } - // else { - // if (getenv("PROOT_NO_SECCOMP") == NULL) - // seccomp_enabled = true; - // } - // } - // /* Fall through. */ - // case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: - // case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: - // if (!seccomp_detected && seccomp_enabled) { - // VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); - // tracee->seccomp = ENABLED; - // seccomp_detected = true; - // } - // if (signal == (SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8) || - // signal == (SIGTRAP | PTRACE_EVENT_SECCOMP << 8)) { - // unsigned long flags = 0; - // signal = 0; - // /* Use the common ptrace flow if seccomp was - // * explicitly disabled for this tracee. */ - // if (tracee->seccomp != ENABLED) - // break; - // status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags); - // if (status < 0) - // break; - // if ((flags & FILTER_SYSEXIT) == 0) { - // tracee->restart_how = PTRACE_CONT; - // translate_syscall(tracee); - // if (tracee->seccomp == DISABLING) - // tracee->restart_how = PTRACE_SYSCALL; - // break; - // } - // } - // /* Fall through. */ - // case SIGTRAP | 0x80: - // signal = 0; - // /* This tracee got signaled then freed during the - // sysenter stage but the kernel reports the sysexit - // stage; just discard this spurious tracee/event. */ - // if (tracee->exe == NULL) { - // tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */ - // return 0; - // } - // switch (tracee->seccomp) { - // case ENABLED: - // if (IS_IN_SYSENTER(tracee)) { - // /* sysenter: ensure the sysexit - // * stage will be hit under seccomp. */ - // tracee->restart_how = PTRACE_SYSCALL; - // tracee->sysexit_pending = true; - // } - // else { - // /* sysexit: the next sysenter - // * will be notified by seccomp. */ - // tracee->restart_how = PTRACE_CONT; - // tracee->sysexit_pending = false; - // } - // /* Fall through. */ - // case DISABLED: - // translate_syscall(tracee); - // /* This syscall has disabled seccomp. */ - // if (tracee->seccomp == DISABLING) { - // tracee->restart_how = PTRACE_SYSCALL; - // tracee->seccomp = DISABLED; - // } - // break; - // case DISABLING: - // /* Seccomp was disabled by the - // * previous syscall, but its sysenter - // * stage was already handled. */ - // tracee->seccomp = DISABLED; - // if (IS_IN_SYSENTER(tracee)) - // tracee->status = 1; - // break; - // } - // break; - // case SIGTRAP | PTRACE_EVENT_VFORK << 8: - // signal = 0; - // (void) new_child(tracee, CLONE_VFORK); - // break; - // case SIGTRAP | PTRACE_EVENT_FORK << 8: - // case SIGTRAP | PTRACE_EVENT_CLONE << 8: - // signal = 0; - // (void) new_child(tracee, 0); - // break; - // case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: - // case SIGTRAP | PTRACE_EVENT_EXEC << 8: - // case SIGTRAP | PTRACE_EVENT_EXIT << 8: - // signal = 0; - // break; - // case SIGSTOP: - // /* Stop this tracee until PRoot has received - // * the EVENT_*FORK|CLONE notification. */ - // if (tracee->exe == NULL) { - // tracee->sigstop = SIGSTOP_PENDING; - // signal = -1; - // } - // /* For each tracee, the first SIGSTOP - // * is only used to notify the tracer. */ - // if (tracee->sigstop == SIGSTOP_IGNORED) { - // tracee->sigstop = SIGSTOP_ALLOWED; - // signal = 0; - // } - // break; - // default: - // /* Deliver this signal as-is. */ - // break; - // } + var deliver_sigtrap bool + switch signal { + case int(unix.SIGTRAP): + default_ptrace_options := + unix.PTRACE_O_TRACESYSGOOD | + unix.PTRACE_O_TRACEFORK | + unix.PTRACE_O_TRACEVFORK | + unix.PTRACE_O_TRACEVFORKDONE | + unix.PTRACE_O_TRACEEXEC | + unix.PTRACE_O_TRACECLONE | + unix.PTRACE_O_TRACEEXIT + if deliver_sigtrap { + break + } + deliver_sigtrap = true + /* Try to enable seccomp mode 2... */ + err := ptrace(unix.PTRACE_SETOPTIONS, tracee.Pid, 0, uintptr(default_ptrace_options|unix.PTRACE_O_TRACESECCOMP)) + if err != nil { + seccomp_enabled = false + /* ... otherwise use default options only. */ + err = ptrace(unix.PTRACE_SETOPTIONS, tracee.Pid, 0, uintptr(default_ptrace_options)) + if err != nil { + proot.Err = fmt.Errorf("ptrace(PTRACE_SETOPTIONS): %s", err) + return -255 + } + } else { + if proot.NoSeccomp { + seccomp_enabled = true + } + } + fallthrough + case int(unix.SIGTRAP | unix.PTRACE_EVENT_SECCOMP<<8): + if !seccomp_detected && seccomp_enabled { + // VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); + tracee.Seccomp = ENABLED + seccomp_detected = true + } + if signal == int(unix.SIGTRAP|unix.PTRACE_EVENT_SECCOMP<<8) { + signal = 0 + flags := 0 + /* Use the common ptrace flow if seccomp was + * explicitly disabled for this tracee. */ + if tracee.Seccomp != ENABLED { + break + } + err := ptrace(unix.PTRACE_GETEVENTMSG, tracee.Pid, 0, uintptr(unsafe.Pointer(&flags))) + if err != nil { + break + } + if (flags & FILTER_SYSEXIT) == 0 { + tracee.RestartHow = unix.PTRACE_CONT + // translate_syscall(tracee); + if tracee.Seccomp == DISABLING { + tracee.RestartHow = unix.PTRACE_SYSCALL + } + break + } + } + fallthrough + case int(unix.SIGTRAP | 0x80): + signal = 0 + /* This tracee got signaled then freed during the + sysenter stage but the kernel reports the sysexit + stage; just discard this spurious tracee/event. */ + if tracee.Exe == "" { + tracee.RestartHow = unix.PTRACE_CONT /* SYSCALL OR CONT */ + return 0 + } + switch tracee.Seccomp { + case ENABLED: + if tracee.AsPtracee.Ptracer.Status == 0 { + /* sysenter: ensure the sysexit + * stage will be hit under seccomp. */ + tracee.RestartHow = unix.PTRACE_SYSCALL + tracee.SysexitPending = true + } else { + /* sysexit: the next sysenter + * will be notified by seccomp. */ + tracee.RestartHow = unix.PTRACE_CONT + tracee.SysexitPending = false + } + fallthrough + case DISABLED: + // translate_syscall(tracee); + /* This syscall has disabled seccomp. */ + if tracee.Seccomp == DISABLING { + tracee.RestartHow = unix.PTRACE_SYSCALL + tracee.Seccomp = DISABLED + } + case DISABLING: + /* Seccomp was disabled by the + * previous syscall, but its sysenter + * stage was already handled. */ + tracee.Seccomp = DISABLED + if tracee.AsPtracee.Ptracer.Status == 0 { + tracee.Status = 1 + } + } + case int(unix.SIGTRAP | unix.PTRACE_EVENT_VFORK<<8): + signal = 0 + // proot.new_child(tracee, CLONE_VFORK); + case int(unix.SIGTRAP | unix.PTRACE_EVENT_FORK<<8), int(unix.SIGTRAP | unix.PTRACE_EVENT_CLONE<<8): + signal = 0 + // proot.new_child(tracee, 0); + case int(unix.SIGTRAP | unix.PTRACE_EVENT_VFORK_DONE<<8), int(unix.SIGTRAP | unix.PTRACE_EVENT_EXEC<<8), + int(unix.SIGTRAP | unix.PTRACE_EVENT_EXIT<<8): + signal = 0 + case int(unix.SIGSTOP): + /* Stop this tracee until PRoot has received + * the EVENT_*FORK|CLONE notification. */ + if tracee.Exe == "" { + tracee.Sigstop = SIGSTOP_PENDING + signal = -1 + } + /* For each tracee, the first SIGSTOP + * is only used to notify the tracer. */ + if tracee.Sigstop == SIGSTOP_IGNORED { + tracee.Sigstop = SIGSTOP_ALLOWED + signal = 0 + } + } } // /* Clear the pending event, if any. */ - // tracee.AsPtracee.Event4.Proot.Pending = false + tracee.AsPtracee.Event4.Proot.Pending = false return signal } diff --git a/proot/proot_test.go b/proot/proot_test.go index 5755af3..8f7a1d1 100644 --- a/proot/proot_test.go +++ b/proot/proot_test.go @@ -1,6 +1,7 @@ package proot import ( + "os" "testing" "sirherobrine23.com.br/go-bds/exec/exec" @@ -11,6 +12,9 @@ func TestProot(t *testing.T) { err := cmd.Start(&exec.Exec{ Arguments: []string{"go", "version"}, + Stdout: os.Stdout, + Stderr: os.Stderr, + Stdin: os.Stdin, }) if err != nil { @@ -18,5 +22,8 @@ func TestProot(t *testing.T) { return } - cmd.Wait() + if err = cmd.Wait(); err != nil { + t.Error(err) + return + } } -- 2.51.0 From 31bcff271904229122a18f46723720b82b790099 Mon Sep 17 00:00:00 2001 From: Matheus Sampaio Queiroga Date: Wed, 30 Jul 2025 14:30:22 -0300 Subject: [PATCH 4/5] update proot Signed-off-by: Matheus Sampaio Queiroga --- .gitignore | 3 +- .vscode/c_cpp_properties.json | 31 + proot/binding_proot_linux/.check_process_vm.c | 8 + .../.check_seccomp_filter.c | 31 + proot/binding_proot_linux/GNUmakefile | 308 +++++++ proot/binding_proot_linux/build.h | 6 +- proot/binding_proot_linux/cli/cli.c | 7 +- proot/binding_proot_linux/loader/loader | Bin 15880 -> 5672 bytes proot/binding_proot_linux/loader/loader-m32 | Bin 18544 -> 9852 bytes proot/binding_proot_linux/proot.go | 8 +- proot/binding_proot_linux/tracee/event.c | 29 +- proot/binding_proot_linux/tracee/event.h | 2 +- proot/binding_proot_linux/tracee/tracee.c | 46 +- proot/binding_proot_linux/tracee/tracee.h | 14 +- proot/cmd/main.go | 32 - proot/proot.go | 5 - proot/proot_linux.go | 834 +----------------- 17 files changed, 446 insertions(+), 918 deletions(-) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 proot/binding_proot_linux/.check_process_vm.c create mode 100644 proot/binding_proot_linux/.check_seccomp_filter.c create mode 100644 proot/binding_proot_linux/GNUmakefile delete mode 100644 proot/cmd/main.go diff --git a/.gitignore b/.gitignore index 2edb906..ee9bb94 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.o -*.d \ No newline at end of file +*.d +*.res \ No newline at end of file diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..6f23cc0 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,31 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [ + "__USE_GNU" + ], + "compilerPath": "/usr/bin/clang", + "cStandard": "c17", + "cppStandard": "c++17", + "intelliSenseMode": "linux-clang-x64", + "compilerArgs": [ + "-g", + "-Wall", + "-Wextra", + "-O2", + "-D_FILE_OFFSET_BITS=64", + "-D_GNU_SOURCE", + "-Iproot/binding_proot_linux", + "-Iproot/binding_proot_linux", + "-Iproot/binding_proot_linux/../lib/uthash/include", + "-Wl,-z,noexecstack", + "-ltalloc" + ] + } + ], + "version": 4 +} \ No newline at end of file diff --git a/proot/binding_proot_linux/.check_process_vm.c b/proot/binding_proot_linux/.check_process_vm.c new file mode 100644 index 0000000..e7f6de2 --- /dev/null +++ b/proot/binding_proot_linux/.check_process_vm.c @@ -0,0 +1,8 @@ +#include +#include + +int main(void) +{ + return process_vm_readv(0, NULL, 0, NULL, 0, 0) + + process_vm_writev(0, NULL, 0, NULL, 0, 0); +} diff --git a/proot/binding_proot_linux/.check_seccomp_filter.c b/proot/binding_proot_linux/.check_seccomp_filter.c new file mode 100644 index 0000000..cd79ded --- /dev/null +++ b/proot/binding_proot_linux/.check_seccomp_filter.c @@ -0,0 +1,31 @@ +#include /* prctl(2), PR_* */ +#include /* SECCOMP_MODE_FILTER, */ +#include /* struct sock_*, */ +#include /* AUDIT_ARCH_*, */ +#include /* offsetof(3), */ + +int main(void) +{ + const size_t arch_offset = offsetof(struct seccomp_data, arch); + const size_t syscall_offset = offsetof(struct seccomp_data, nr); + struct sock_fprog program; + + #define ARCH_NR AUDIT_ARCH_X86_64 + + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, arch_offset), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 0, 1), + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, syscall_offset), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE) + }; + + program.filter = filter; + program.len = sizeof(filter) / sizeof(struct sock_filter); + + (void) prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + (void) prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &program); + + return 1; +} + diff --git a/proot/binding_proot_linux/GNUmakefile b/proot/binding_proot_linux/GNUmakefile new file mode 100644 index 0000000..e0fe5a2 --- /dev/null +++ b/proot/binding_proot_linux/GNUmakefile @@ -0,0 +1,308 @@ +# If you want to build outside of the source tree, use the -f option: +# make -f ${SOMEWHERE}/proot/src/GNUmakefile + +# the VPATH variable must point to the actual makefile directory +VPATH := $(dir $(lastword $(MAKEFILE_LIST))) +SRC = $(dir $(firstword $(MAKEFILE_LIST))) + +GIT = git +RM = rm +INSTALL = install +CC = $(CROSS_COMPILE)gcc +LD = $(CC) +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +PYTHON = python3 + +HAS_SWIG := $(shell swig -version 2>/dev/null) +PYTHON_MAJOR_VERSION = $(shell ${PYTHON} -c "import sys; print(sys.version_info.major)" 2>/dev/null) +PYTHON_EMBED = $(shell ${PYTHON} -c "import sys; print('--embed' if sys.hexversion > 0x03080000 else '')" 2>/dev/null) +HAS_PYTHON_CONFIG := $(shell ${PYTHON}-config --ldflags ${PYTHON_EMBED} 2>/dev/null) + +CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -I. -I$(VPATH) -I$(VPATH)/../lib/uthash/include +CFLAGS += -g -Wall -Wextra -O2 -Wdeprecated-declarations -Wunused-parameter +CFLAGS += $(shell pkg-config --cflags talloc) +LDFLAGS += -Wl,-z,noexecstack +LDFLAGS += $(shell pkg-config --libs talloc) + +CARE_LDFLAGS = $(shell pkg-config --libs libarchive) + +OBJECTS += \ + cli/cli.o \ + cli/proot.o \ + cli/note.o \ + execve/enter.o \ + execve/exit.o \ + execve/shebang.o \ + execve/elf.o \ + execve/ldso.o \ + execve/auxv.o \ + execve/aoxp.o \ + path/binding.o \ + path/glue.o \ + path/canon.o \ + path/path.o \ + path/proc.o \ + path/temp.o \ + syscall/seccomp.o \ + syscall/syscall.o \ + syscall/chain.o \ + syscall/enter.o \ + syscall/exit.o \ + syscall/sysnum.o \ + syscall/socket.o \ + syscall/heap.o \ + syscall/rlimit.o \ + tracee/tracee.o \ + tracee/mem.o \ + tracee/reg.o \ + tracee/event.o \ + ptrace/ptrace.o \ + ptrace/user.o \ + ptrace/wait.o \ + extension/extension.o \ + extension/kompat/kompat.o \ + extension/fake_id0/fake_id0.o \ + extension/link2symlink/link2symlink.o \ + extension/portmap/portmap.o \ + extension/portmap/map.o \ + loader/loader-wrapped.o + +define define_from_arch.h +$2$1 := $(shell $(CC) $1 -E -dM -DNO_LIBC_HEADER $(SRC)/arch.h | grep -w $2 | cut -f 3 -d ' ') +endef + +$(eval $(call define_from_arch.h,,HAS_LOADER_32BIT)) + +ifdef HAS_LOADER_32BIT + OBJECTS += loader/loader-m32-wrapped.o +endif + +ifneq ($(and $(HAS_SWIG),$(HAS_PYTHON_CONFIG)),) + OBJECTS += extension/python/python.o \ + extension/python/proot_wrap.o \ + extension/python/python_extension.o \ + extension/python/proot.o +endif + +CARE_OBJECTS = \ + cli/care.o \ + cli/care-manual.o \ + extension/care/care.o \ + extension/care/final.o \ + extension/care/extract.o \ + extension/care/archive.o + +.DEFAULT_GOAL = proot +all: proot + +###################################################################### +# Beautified output + +quiet_GEN = @echo " GEN $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $@"; $(GEN) +quiet_CC = @echo " CC $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $@"; $(CC) +quiet_LD = @echo " LD $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $@"; $(LD) +quiet_INSTALL = @echo " INSTALL $?"; $(INSTALL) + +V = 0 +ifeq ($(V), 0) + quiet = quiet_ + Q = @ + silently = >/dev/null 2>&1 +else + quiet = + Q = + silently = +endif + +###################################################################### +# Auto-configuration + +GIT_VERSION := $(shell git describe --tags `git rev-list --tags --max-count=1`) + +GIT_COMMIT := $(shell git rev-list --all --max-count=1 | cut -c 1-8) + +VERSION = $(GIT_VERSION)-$(GIT_COMMIT) + +CHECK_VERSION = if [ ! -z "$(VERSION)" ]; \ + then /bin/echo -e "\#undef VERSION\n\#define VERSION \"$(VERSION)\""; \ + fi; + +ifneq ($(and $(HAS_SWIG),$(HAS_PYTHON_CONFIG)),) + CHECK_PYTHON_EXTENSION = /bin/echo -e "\#define HAVE_PYTHON_EXTENSION" +endif + +CHECK_FEATURES = process_vm seccomp_filter +CHECK_PROGRAMS = $(foreach feature,$(CHECK_FEATURES),.check_$(feature)) +CHECK_OBJECTS = $(foreach feature,$(CHECK_FEATURES),.check_$(feature).o) +CHECK_RESULTS = $(foreach feature,$(CHECK_FEATURES),.check_$(feature).res) + +.SILENT .IGNORE .INTERMEDIATE: $(CHECK_OBJECTS) $(CHECK_PROGRAMS) + +.check_%.o: .check_%.c + -$(COMPILE:echo=false) $(silently) + +.check_%: .check_%.o + -$(LINK:echo=false) $(silently) + +.check_%.res: .check_% + $(Q)if [ -e $< ]; then echo "#define HAVE_$(shell echo $* | tr a-z A-Z)" > $@; else echo "" > $@; fi + +build.h: $(CHECK_RESULTS) + $($(quiet)GEN) + $(Q)echo "/* This file is auto-generated, edit at your own risk. */" > $@ + $(Q)echo "#ifndef BUILD_H" >> $@ + $(Q)echo "#define BUILD_H" >> $@ + $(Q)sh -c '$(CHECK_VERSION)' >> $@ + $(Q)sh -c '$(CHECK_PYTHON_EXTENSION)' >> $@ + $(Q)cat $^ >> $@ + $(Q)echo "#endif /* BUILD_H */" >> $@ + +BUILD_ID_NONE := $(shell if ld --build-id=none --version >/dev/null 2>&1; then echo ',--build-id=none'; fi) + +###################################################################### +# Build rules + +COMPILE = $($(quiet)CC) $(CPPFLAGS) $(CFLAGS) -MD -c $(SRC)$< -o $@ +LINK = $($(quiet)LD) -o $@ $^ $(LDFLAGS) + +OBJIFY = $($(quiet)GEN) \ + $(OBJCOPY) \ + --input-target binary \ + --output-target `env LC_ALL=C $(OBJDUMP) -f cli/cli.o | \ + grep 'file format' | awk '{print $$4}'` \ + --binary-architecture `env LC_ALL=C $(OBJDUMP) -f cli/cli.o | \ + grep architecture | cut -f 1 -d , | awk '{print $$2}'` \ + $< $@ + +proot: $(OBJECTS) + $(LINK) + +care: $(OBJECTS) $(CARE_OBJECTS) + $(LINK) $(CARE_LDFLAGS) + +# Special case to compute which files depend on the auto-generated +# file "build.h". +USE_BUILD_H := $(patsubst $(SRC)%.c,%.o,$(shell grep -E -sl 'include[[:space:]]+"build.h"' $(patsubst %.o,$(SRC)%.c,$(OBJECTS) $(CARE_OBJECTS)))) +$(USE_BUILD_H): build.h + +%.o: %.c + @mkdir -p $(dir $@) + $(COMPILE) + +.INTERMEDIATE: manual +manual: $(VPATH)/../doc/care/manual.rst + $(Q)cp $< $@ + +cli/care-manual.o: manual cli/cli.o + $(OBJIFY) + +cli/%-licenses.o: licenses cli/cli.o + $(OBJIFY) + +###################################################################### +# Python extension + +define build_python_extension +CPPFLAGS += $(shell ${PYTHON}-config --includes) +LDFLAGS += $(shell ${PYTHON}-config --ldflags ${PYTHON_EMBED}) +SWIG = swig +quiet_SWIG = @echo " SWIG $$@"; swig +SWIG_OPT = -python + +ifeq ($(PYTHON_MAJOR_VERSION), 3) + SWIG_OPT += -py3 +endif + +.INTERMEDIATE:python_extension.py +python_extension.py: extension/python/python_extension.py + $$(Q)cp $$< $$@ +extension/python/python_extension.o: python_extension.py cli/cli.o + $$(OBJIFY) + +.SECONDARY: proot_wrap.c proot.py +proot_wrap.c proot.py: extension/python/proot.i + $$($$(quiet)SWIG) $$(SWIG_OPT) -outcurrentdir -I$$(VPATH) $$(VPATH)/extension/python/proot.i + +extension/python/proot.o: proot.py cli/cli.o + $$(OBJIFY) + +extension/python/proot_wrap.o: proot_wrap.c + $$($$(quiet)CC) $$(CPPFLAGS) $$(CFLAGS) -MD -c $$< -o $$@ + +endef + +ifneq ($(and $(HAS_SWIG),$(HAS_PYTHON_CONFIG)),) +$(eval $(build_python_extension)) +endif + +###################################################################### +# Build rules for the loader + +define build_loader +LOADER$1_OBJECTS = loader/loader$1.o loader/assembly$1.o + +$(eval $(call define_from_arch.h,$1,LOADER_ARCH_CFLAGS)) +$(eval $(call define_from_arch.h,$1,LOADER_ADDRESS)) + +LOADER_CFLAGS$1 += -fPIC -ffreestanding $(LOADER_ARCH_CFLAGS$1) +LOADER_LDFLAGS$1 += -static -nostdlib -Wl$(BUILD_ID_NONE),-Ttext=$(LOADER_ADDRESS$1),-z,noexecstack + +loader/loader$1.o: loader/loader.c + @mkdir -p $$(dir $$@) + $$(COMPILE) $1 $$(LOADER_CFLAGS$1) + +loader/assembly$1.o: loader/assembly.S + @mkdir -p $$(dir $$@) + $$(COMPILE) $1 $$(LOADER_CFLAGS$1) + +loader/loader$1: $$(LOADER$1_OBJECTS) + $$($$(quiet)LD) $1 -o $$@ $$^ $$(LOADER_LDFLAGS$1) + +.INTERMEDIATE: loader$1.elf +loader$1.elf: loader/loader$1 + $$(Q)cp $$< $$@ + $$(Q)$(STRIP) $$@ + +loader/loader$1-wrapped.o: loader$1.elf cli/cli.o + $$(OBJIFY) + +endef + +$(eval $(build_loader)) + +ifdef HAS_LOADER_32BIT +$(eval $(call build_loader,-m32)) +endif + +###################################################################### +# Dependencies + +.DELETE_ON_ERROR: +$(OBJECTS) $(CARE_OBJECTS) $(LOADER_OBJECTS) $(LOADER-m32_OBJECTS): $(firstword $(MAKEFILE_LIST)) + +DEPS = $(OBJECTS:.o=.d) $(CARE_OBJECTS:.o=.d) $(LOADER_OBJECTS:.o=.d) $(LOADER-m32_OBJECTS:.o=.d) $(CHECK_OBJECTS:.o=.d) +-include $(DEPS) + +###################################################################### +# PHONY targets + +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin + +.PHONY: clean distclean install install-care uninstall +clean distclean: + -$(RM) -f $(CHECK_OBJECTS) $(CHECK_PROGRAMS) $(CHECK_RESULTS) $(OBJECTS) $(CARE_OBJECTS) $(LOADER_OBJECTS) $(LOADER-m32_OBJECTS) proot care loader/loader loader/loader-m32 cli/care-manual.o $(DEPS) build.h licenses proot.py proot_wrap.c + +install: proot + $($(quiet)INSTALL) -D $< $(DESTDIR)$(BINDIR)/$< + +install-care: care + $($(quiet)INSTALL) -D $< $(DESTDIR)$(BINDIR)/$< + +uninstall: + -$(RM) -f $(DESTDIR)$(BINDIR)/proot + +uninstall-care: + -$(RM) -f $(DESTDIR)$(BINDIR)/care diff --git a/proot/binding_proot_linux/build.h b/proot/binding_proot_linux/build.h index 5cf8056..a474383 100644 --- a/proot/binding_proot_linux/build.h +++ b/proot/binding_proot_linux/build.h @@ -2,7 +2,7 @@ #ifndef BUILD_H #define BUILD_H #undef VERSION -#define VERSION "v5.4.0-60485d26" -// #define HAVE_PROCESS_VM -// #define HAVE_SECCOMP_FILTER +#define VERSION "-46d69dae" +#define HAVE_PROCESS_VM +#define HAVE_SECCOMP_FILTER #endif /* BUILD_H */ diff --git a/proot/binding_proot_linux/cli/cli.c b/proot/binding_proot_linux/cli/cli.c index 9385047..7235f41 100644 --- a/proot/binding_proot_linux/cli/cli.c +++ b/proot/binding_proot_linux/cli/cli.c @@ -468,6 +468,11 @@ int NO_main(int argc, char *const argv[]) if (tracee == NULL) goto error; tracee->pid = getpid(); + tracee->RootConfig = talloc_zero(tracee, tracee_root); + if (tracee->RootConfig == NULL) { + note(tracee, ERROR, INTERNAL, "talloc_zero() failed: %s", strerror(errno)); + goto error; + } /* Pre-configure the first tracee. */ status = parse_config(tracee, argc, argv); @@ -482,7 +487,7 @@ int NO_main(int argc, char *const argv[]) } /* Start tracing the first tracee and all its children. */ - exit(event_loop()); + exit(event_loop(tracee)); error: TALLOC_FREE(tracee); diff --git a/proot/binding_proot_linux/loader/loader b/proot/binding_proot_linux/loader/loader index 8cd9b0d5dca7a1ef4b8055f8bcb66da8bd3c9e30..a924d4ced1f8a16ad47b7ccdefdacdf589f84587 100755 GIT binary patch literal 5672 zcmeHL%Wl&^6dfmR2?Y9}Qb8(J4FaiM*iKwhDx_9O5iLj+Pjy?zj;D@AGOor>n{Hr( zgv6?B`3gRPU*IE`pc zBIF(xR_2vLa2sR-&KF5lYA1e{2>Ci34jmv6YqM!lA;DaDqz`kcYNc8NSi?|Z1@q^h z6aQ~~Pr)~ZbDRaT%Q}m`&=eggI#6_==s?kdq60+-iVhSVC^}Gdpy)u+f&aM!;$2#Y zV-k*?M|nqQwuRm}TEEx7G zxdxt+^r`pvbQX0sA($kZES4@qvQ*w#D3hkD18S9XBGa_Wk;I#%THUIar(xnb?bh^s z^;E0A)27PHi_5cDKv9Qdl2pj5G-=S5xvlx_SzXXsT;kd&*)}A8RcOcQkS`8b&yo-;pQ5+NaH94S{k9S!=Karr&vQch z0sYs}ry#FwK_Slx?I-j%M3NbKZ5L{St7SAIoSI9C*EwGK6&^!c61>6m#)>37xwCSCKytSjIRD1M#m3<35RsQKDgjc17L$-nLNq_<%!>y1 zRHFl~BW4YIbh~!Na1L$p)UD8#RY0u)Gyzt-v36Y#$33z?+)C9I-96~K#q{&N@4d+| zN{^@Q>FFPHbMC$0&-eHJ-uvF0d6WBAL~c+xXHHYtwJe327+Yowj#W4s#4Be4V;&1g zT`?hI?9BN1cq170+&IM&Ix|dUqZk@dAsWz#-;_2xNjS4@M>hYJT?ypWoY`$bw)Q=< z^}l0R2Rq&N@l7T>&2OTb)yY;MTY+o^vK7cyAX|ZK1+o>$Rv=q}Yz49v$X4M0tOE5$ z^1Z5CR+Nrc{Ylk7(Rb~t`kK*K@g07=YF%XDvK)M$U27!&Xe2K>l|JyV1ZQt0g z*0g9_BmC6(DgUf~O7?(?A(H&qFSZT)ANnc2Y8%_X4k zu!4Q?Ow~aOYHVNYc05G3E5`Pin;g{+Tz|yve`q#r{Ij+~51PrtMq(Jbl(XL78QZG@ z#y}ld<8P@QcXyYIjY=uz%ZP*h4V;#%)-@a};drSyPKpcz9h=diL~xoEsk>G?KCO z8v3_wN4V4fNHLu{m&p1imw#?xC$Z<-Hj&C82LCJ^k$cX!Uq!-ry8Y}-``M(j+Oo%% zg)-%Cl9yQWh*}+pBo9aC9LEMEPPzPBufZ9N4Ai+J$sQqg9!ZWF$+sdE zZ}=1MW34yNIehcL^t>AfR*grJ8^&r9f5^RSc}?P1XWIN958pH}X=>HkqX9GM&*Mho za3E4~+$^-uhs6r{^Qz8(-HTzvn1AbI@EPIn8p%HxiBWER_QuG{aWLM9RQxRdt;8?5 zi8YRL$pojPZDPODNLkMj7j|`2aa6B8AIDC8a8;T`8kyRE`+6Cb@6n8kvDinEiX;BS zOq45<95x1OxiM$V80Z@}663jd{b{7)@ZGo6R2-9+C9U4@FF$M~M~%e4oiPR`EnaqZ zG+<8i=P7a%Do&U)&kM>d3F@)@j8So-7sKcNt$&Y`WAHDHWg6Rfp*QR3+~`RR?jGfl<1FJTk}Vyx_2$eiq)-c`|;1^ADxd)N#rQlCT4}FY( zN1(-3tIFAmMd%IAbi7llj1rJ=HtvPC6}%1T4C z*F;-GMzd)P=9SH1SHTf|Wnj$tzXFg=jL8VZ&S?9gCK#8>c`qWZ8S2UcHTBO4QoHGV zbKg3sDYFW%hj8)&43pfhU6_0^3`(f*7GiOC0+ZWaESW)AT>@ugu~pRiGD0 z+O5#Phb)wIKO4#+#^}{f}+I45e(mwVV?oD2fe7R zmGM89te*lNL>IK0dnKzHUKgQzDaHqQ(<7b-asTAWh_em+IJ#zlMJI}02~q3DapMm{ zvMq-e$X9b8rWDvR0qU7HpALGHwFpq`8OQkBBF#7j$k+xjzLhMRaOh}8Q&cD z9ppy-Fg^7YAY&U?soTIx-3C_bHn38+ft9)qkouC0)PLke-kvA+?-;Yo!!_P<1=6X>D}!;!q}AXter zdmXxbjdlWPNnnhpj%Zs;0|g~>4ZB7=Oi!VoSbs9(7C-LnZ*R6EF4tNrcTEu zo&$@D!c){z;-K+!zMOf`7{pFJjINghJPYD!0@VPoffzx@oo{VMDtF?j$bxv!OJxF4 zDh{e=)7HV=NpjVM&T7#4E|IeV+)u#$6?)DlfO#q&dg!_dPz$1xzIMPuLkIOxU87;XGOd5 z5WT2_7t$GN12SpNeZTM@$6BbpjNBN2w@Q$b;X{ ze8p~}0wR5VjOFF=%W{LhLGIhZb#>N7vuGUwq&)d@M*6`*%1(w%@~M)^@B_;QSvUCB`)+~b zX(?`fp~Jh#hh3YFbwNHOjFc=-S4^!5hJ2LP%yec5TC2*ZJ-R=geDVFb04tyT;xhdrMOhiwL)u0_*x-ug7asD2x2+9{f|Vs8hc1LQTf)LK~Z-P9?o|N3SSN4=Tv|Z|*|~ zb=Q4QVUvUT!70HhwBX80QB8Z-CW3tyRgMiB5&`&B^y`9R8W)ASa&b_o`dgs5n5Hk$ zd2Iu5uY+;TKu}lHKbx5w-7L%cSD9tiWDAe^|FMuOhy^eZS ziu_vW#6{5t{g9aAK2CxM#1zCYLSiznpE=OV3k57fUZ}$4rDBSeRuR%VE;rnk4#Lm+ zG`lpJP}2%Hv(xhMy%1a8Wy2D7K=j$8fX*xJ>OT z6G~U~3TA;Br+TCt$VsW#%<<7>< zJssU+Ozrw0Gm>M9U^*0oFnq7SV7u>+I=n52ZAa25W9b)VSGYQo>QQ z5y3V#$D^#P`Oavgm2b2!+F}P~U0uzaS#yio(bFySf~XBEmYFirkC~Jv*45l8({Qq- zIMKMh(C*$YX2zOZqOHiNxs~k9XcrbqS+=kUcxzLvyL)1-SYc0F8}dVeTJ4e%qlnnuialfVH}!Ny=f^u@o1(Fv zjo8L$*@F4)9VXUz=X~sEk2w!=Nz9MOT3C2K)h8OWr|<*^FCQpq7>BAQ*0I?HVc?@C zJ*=IkvAMinoBOVpueIMxITAdUnZ_REO39G+Apbz&>$MM*y&A#2+Uo-Fb#dH7@VMt4 z58ykV4<-E2^Qaear}teiqzq4g1&?+4fdJ8z(jjfG$#2)peok~h->m`dhEvNmQn?+0 z87F0U-;F=sh@>yvps9Ms;kT1-&^r4$Uk$fUbAYG$D+<>43K9pBdDqLlfsT7ldLTPV z*>6Dh?|ESNo@cL@G`$DCkR8Mt_y&BAp+9O06=lt>P&}%>rBWR2FEvcBi=PWjd%gR; zm|pM>(=_UR4HFe0qiJ*d_*$*6pR4iBRDmZ{z$etFREeKb-yr-30;uuTL)u&F9u0U8 z(wBIzCU+~%C)L;KOnaD?UZ?Hjhp7;U`KugHts(7IKB9oVS9@E-^tSenrv4eCzoUI$ z0DoURDI|VU93Xr^ylCMU1@^km+T*%ZOzm+x5e-&F8!Tyq_Ms=~<()PFqtCTE! zj>9e8aw~=B#NUzrZ}nX+9F<+JJubDE4{3W``v~uIJ#XRXT}KH2t?f3=hxR+yb8gi3 zoO_oC>F@IF@u((w@a!kN-}8cnU+^Fc`iPA0xL=5I7s|BTNm4S;iI>RFOYRTdu#=JM znDx4nGKJYmaY~37yJp&uc1nCfNA(M_U$;5?^`GmaR|qk9Nkh+pE3leG`(0`FA-^#t(1~ zjwxdbL_5GeDaJf9C@tp#jV?fqeL`PZ&ujGmbCd1MlfF4PZ_?@gXMk=bArRm@tpLQ= z@n>9sFP}cq7;dYLjT_@}`}x%vZEZH286LW=c*h&x(q%Sp#B9c_d6Pq#(LR%fyL-%N zxV^hKjJx*cXw2Ng!m%FdAROJ)*cPJ)P`IV1OFrJhtpQ)--ehFPqIm?*xyH()NIxJ{Ttn9&BUDK+9` zoY&;nS8yErVcUKVc^pb0qKo&sX6n_X|xpBL%7%?GW|Ms zjxPO7kjW$2?Y8}ajD$0F@3??{!Ik6%zc*wUWI)yjolG9tar(&%tt8_Ko~gSPI>gId zXK5)}GZu0-#)7F>j-N-t|2q4+1(v}E+s>IBJ7*rU?H#!@KWp1ZG7gMm=jiRdfc+QO zSQ^$1+|mAOt;k=-5~cEL`(Zd}o6`M`^qlQ1vibDW*H=GkFm}qezu4Aurp)~FE?_Tz On1k$P$IzZL?f(-eux+vc diff --git a/proot/binding_proot_linux/loader/loader-m32 b/proot/binding_proot_linux/loader/loader-m32 index b0be46a51c1288e088af5336cee293fda7c9b21f..6930fe349ba6990620ea9bdfc3e3f7c5d32431b9 100755 GIT binary patch literal 9852 zcmeHN&u<$=6n7jgYz2jPK4mof^ohQ9{-<$8v?CdwY zd)fK?_3bx=5Tq?bSxU3dhqkvJs zC}0$L$`#oC;MUHUza8ByFCE3@OGlrUFCPuc^4EJG-f4gI%Y>On#*o8Pu9mrdqkvJs zC}0#Y3K#{90!9I&fKk9GU=%P4{67`o|LvFH{O^AA?Opu47e8|!|HIFu7?3twt(Rp5 z71w2b&8k~<*{C(vYHN+Uth}#!@=YgBiE86I>X(x7Jd$U7WS0Rm)IdA!AKQ;;vVe0W z5;C)h*(J1gDTCb+gTQy? z$d5eNhwh#q^S;Seqj`#rk=0?dfhJV$z7q~Nn!xeMR~xSKRp@n*I$^iJ;jQ0TmsM9* z-$C`>yKlA7?uAN4vE%nV-=*pGyL~4l%N_(yPldJIy2{C7 zt?qQ)G_!8(!Q;kTu5FI~dHC!k8FN^~!@$q_6>Oc(uQEst$6e?JctuY5BA))29?SVb zo8{LzpP7%Q3yuz>t}__4*GZ!$wcDL2YGY@@nA)o6#14Yrn^s_UD>@v;P6sg#C$XyIqRzpujhkgRSn94b6qR!?=&n*CPjeJO$DPeSrO@|htyId6on#PpYj&ol9u+N7H` zPu|lsO*rN^;2gExmP9vnf;gYzb8Z?s?`}wdARPw~6*^ClNKgnSye0DX!Sr;Jn-@uUrn`fnqPUp= zJ#9vqa%-$wmAJLz&Q7t`JWw;)afgSCqF5DI*Tr3G2L;)%y0hZ4wM;E;@9#VJ-sIvq zTeDUBm({20ob$cTcfRj;&bhbm?M@!6U(>)jXHLuQmQk?^mD#@vC z_&CvmaDK1;9@2kBA^!-Et%v@r<|p3>GgC4MWDv+8kU=1WKn8&f0vQA{2xJh*Ado>I zgTU7gflbMexBQA8dN(IeZWtZW8GHQ1M|d{x`*iN>_2<$kuh*ZYtdrX=^xa|P?fVm$ zq_$m18L6lGs$szYA^p+74wl+<4$>id#2$SaCjCeA_77_0Cp)!SKvi+7lMT@ug3%|S z?t3jiS$~d<9wwuc9u>nU&mK5ge}<o0~V zdAgH5J51T6&(S=mv&{{B;W{cnLxUj5F|gaKW${%qgL;)?X8Q(sUh$RkyM4mc+3 zuB9}rdVepby7Xf;U$I-hjpW})Sm)|{pI$5lDSs)*`{tLu& zsnV^-vb>DYhX~HmYiI_DWqN}rc|3XI<~Nh?4AhCf5A~bhK$dqM$8Jm>OV*!Bp6NSx zt>(@6%;dI_)Y>!2`qS8tI0wmh`;POII;$D!y^?~e{&aHf85q>mzZbui^l$bZ6R%^T z7^^uJyIgZ7ukWNQoCC?@eMk98jn$lqUAc4^8hhP4=?o21kezy`);Qz^H98B8EBZUY zlGS)Vb4gM%^Tehtn>QriO>La%PbFp+ryiMElIofncze(N-wbU#VQDS46TbiEN1gA%H5a{cB%SU;E&1DY z`qOvP>0hEg1O71j64Xy2KaDyU^kKVy!tP1-|5+38i=P{)!XH}C_#+-ZEnA5FcsvR~ zT7o*~-_q%i@V0<0&GqzabvbhBKD}YDSk{L`={EpNP%nO-PE)R#l0hJYKn8&f0vQA{ z2xJh*Ado>IgFps>3<4PhzU~MtU$*S_z#KgJ-yEozAFK#g1S-ob=a?8n4LuQ*;yi&}f08btK}R@1paF(oPcIh!Hn>$rdvI ztQSdk1kG$qrrikoCPfSFQP6V~?a?|h?q)@2Ym~t)iq41MdB?$OxzGod41P;0ZlJ&j zjY>_@N0KGub`C2P%Zd0^hy9RopqKlcoL=R|7E|++J8WD!vvN8WC~KSiJQ>ri)WD2 z?5ClwBJGPLB;tQ>SoTHO+(w4){GJgk`z*9IWI{hF8287Da=`Nk=@%hthwx{9h*d8( zl+;Hwy~Clu`w+K7wkzw;A)>FaX)g%+Wx+#AMC-eQM6aQFSS9y&px!~9(%QoKZY8=5 z_B^VHqIpz_2=G~`Q`H>5fc){o7coZ{WcQ&?n@l3SBNE>PaV-1i(4B((=ct14I$m%a z6r~ztGf)LB)Cnc31bYjK=#A5BYV9_HU5}bK1TVL6Hpr=|#gH$&0nCwe3i2ixJb-Gv z2WAh5hlu$#nCCz|jmqTD02hIMI|gyy$QN+FUO%|iU~)Gc(!y=9G@9oyRQygRkKv3x z0>yTF$Rd(guzdWQN4X+56_9Bj{4yZ#GfYED3( z6{?XB=AS^EC1x6!Q4kl2nGNPDh|h?r05cUK;5aj*7EBPtEyOGbQx9S(DwA973`;QK z^%*`04P{tuYvyx)4=-nO#Lj%JCD%N=Nm0S%H7F?au%kH2Bo;@>dR?+Y8;S4PjvX}e zaq0S9h**j6a%~!N%B544OPhQ8cR=yx$?DL20yhP;C!1#zpS2o|IgzAUw7pvu$#R!L zd;?Nve2HH#pfkIqlWZ*fEBJ2yn2$qmRI>`7EzY{m7S}`{_j)a-r8*f;7@wt7lJ|*| z$-CEbj_dYJ=Jnp;)pKzL2=Y6pP)GvG(!86dtn}4RAs;z%e8Sv3<}DTEC~rEi@U7=V zb>0GAXlyOz_jCZ9$f z`7S|f@H{<;IW1FyAfR%(!7U8HY6d15b4WX@faMn*6j#k!l5He*8#R(Mi4DXC_5TE;`FuXVIG-d7n#vZ*oF%eg6SRx{_JsFd?@ph{X;f`O z3KhqzmYU0%iVezK>T7Kl^Q^}7wKU%cQu>TPl7`{gA#cFE1+}0TOR1MUC;bLOSR{+V zi?wpHe(_O&;+GR)rco#Xa>`{cctr+@-zw*#(~V(ER!cHtt1MN0KUxjui>^=3Zj8c?dM3vvr~$N#sfY*_PGs^ z=S3GneA{prR%@-H9}5EsVYFA`N(X5h4J0-2m^8M2 zhW;iSmIbiLsgnizLpFd#ve*X%xhxI>Q9bg8f;UG8zy*5LK{+3Kw^_`yO1ZMuAe+0_ zz|cEAt+tm*D<;!u|vX-l+|wt0J~C)yE7n2v4Ju7nw9;Y6sXy@h14jz}9b zBZ=59%aAqq#^TXfC7CEOc}eufZPO6ziA58Pyo3_0HxgIV2AbO`m|gMaa93Ao0qcrJ z+5%%rOwYnCYPRkKivE$$_!=$ynubQV>gxaV*s=MoFoMlv5#dx&jzR3r=2%Bh0)!LeyqyadxLAaIuYJR$^P&na z0D{fsjbiRwI^Qhb(noZC$Ie4yME@v@Z?>O*$1R`YTKOUI6n{q74mOHs^z(xF^Wv%` z{`Dh`Rc{hV(R{rpKz+>`tV#6F|HE6}|w z-WQ1MeenTtA7C1CPLqHo-+T*#rzh;Wr1*gB8gZc*FDf~GLK^ez)(&nJyPw3TaLn>+ zPMiuK{Z)yu(W`I|;xoIeh^^nkh@bISY2Y>e00s1l7^ZGmye;^8#Ph7d*HeyPDq`+_ z4oLSSHwEdF+=lp!1@Rd@C7@4<147@?ehA%J;?E*a#h({y32d(fpsR`>@Ye{c5q?=i z5a-2jD3;%d%R-M~`7VpS68v77l!{Nvlf<8tr!4-Iyh!{-dCB50$*bV;sR@362yoUo zxp489{HJ&X5kFOHzQSJX74{mhpdlBvYZ||YK624w{8&4pX*){95$!__D1E41B-Zi= zZ(prdyTh$L3y*MoksRX#`}4-z1hQ4Yy~5DtT+Pz1^ioioY;kBJ-D^k}*~_^^i&uV(@X| zmAP?ng+aO-F^FzcRK(mtUHk-xh~cz=92I*E@gtlIrM<3ExY-iHE~h7g4&Z6e5^+X< zO0fbTOC6rxC49@pPXV8gCqZ;5oJ~i2&NaRi{7+o`Ebs?h<8J{!=+e_&Hs8fpfj{T+ zzYzRU7r&UsyZAcrdtCf-@a--h>{y|EX|K`$#vma+g0m*{bkioj5E8nP>tdWgND53z z@8U>LPp9;7OTt3j&7UBm+~V`$@98h`hrY!B8Tee6e|0Z2V|0mKQ#l_^MFlnGXlhUw zqTYsDO@eCFs!D3eriw(BWK>C3l?zyC`I>cgOV@EcxeA)?q1G7Pa)WKrM6jm|*3Hpwbq5c&m`%NH zp>Ql5X~U-*o9&3SMjblb)D$y!I9wNQe-4fL9g@U|f1HmtcXh-QV-91HHjAf4`NPUI zn9-S`*KH_+^dqN9)~Qnhu&U6$r{kusE%~quSpQo^DbYAXszH-KvYP|DIRSM(z*)ty zqc>cR-4N`C0*sAcVaEM6*miVBpnXmMeokQv!y8*lbeGt4;NJsTYcJIi=os4aB-CoIt$`GnU6CbX_+ixQ5;^?S% ug~Rs4ZVsi_pid); - kill_all_tracees(); - - /* Exit immediately for system signals (segmentation fault, - * illegal instruction, ...), otherwise exit cleanly through - * the event loop. */ - if (signum != SIGQUIT) - _exit(EXIT_FAILURE); + note(NULL, WARNING, INTERNAL, "signal %d received from process %d", signum, siginfo->si_pid); + // kill_all_tracees(current_tracee); } /** @@ -278,16 +271,16 @@ static void check_architecture(Tracee *tracee) * Wait then handle any event from any tracee. This function returns * the exit status of the last terminated program. */ -int event_loop() +int event_loop(Tracee *current_tracee1) { struct sigaction signal_action; long status; int signum; /* Kill all tracees when exiting. */ - status = atexit(kill_all_tracees); - if (status != 0) - note(NULL, WARNING, INTERNAL, "atexit() failed"); + // status = atexit(kill_all_tracees); + // if (status != 0) + // note(NULL, WARNING, INTERNAL, "atexit() failed"); /* All signals are blocked when the signal handler is called. * SIGINFO is used to know which process has signaled us and @@ -309,7 +302,7 @@ int event_loop() /* Kill all tracees on abnormal termination * signals. This ensures no process is left * untraced. */ - signal_action.sa_sigaction = kill_all_tracees2; + // signal_action.sa_sigaction = kill_all_tracees2; break; case SIGUSR1: @@ -348,7 +341,7 @@ int event_loop() pid_t pid; /* This is the only safe place to free tracees. */ - free_terminated_tracees(); + free_terminated_tracees(current_tracee1); /* Wait for the next tracee's stop. */ pid = waitpid(-1, &tracee_status, __WALL); @@ -474,7 +467,7 @@ static int handle_tracee_event_kernel_4_8(Tracee *tracee, int tracee_status) default_ptrace_options); if (status < 0) { note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); - exit(EXIT_FAILUtRE); + exit(EXIT_FAILURE); } } else { diff --git a/proot/binding_proot_linux/tracee/event.h b/proot/binding_proot_linux/tracee/event.h index ff00cfb..952bf0e 100644 --- a/proot/binding_proot_linux/tracee/event.h +++ b/proot/binding_proot_linux/tracee/event.h @@ -28,7 +28,7 @@ #include "tracee/tracee.h" extern int launch_process(Tracee *tracee, char *const argv[]); -extern int event_loop(); +extern int event_loop(Tracee *current_tracee); extern int handle_tracee_event(Tracee *tracee, int tracee_status); extern bool restart_tracee(Tracee *tracee, int signal); diff --git a/proot/binding_proot_linux/tracee/tracee.c b/proot/binding_proot_linux/tracee/tracee.c index 1791186..7836bd4 100644 --- a/proot/binding_proot_linux/tracee/tracee.c +++ b/proot/binding_proot_linux/tracee/tracee.c @@ -49,9 +49,6 @@ #define __W_STOPCODE(sig) ((sig) << 8 | 0x7f) #endif -typedef LIST_HEAD(tracees, tracee) Tracees; -static Tracees tracees; - /** * Remove @zombie from its parent's list of zombies. Note: this is a * talloc destructor. @@ -99,7 +96,7 @@ static int remove_tracee(Tracee *tracee) /* This could be optimize by using a dedicated list of * children and ptracees. */ - LIST_FOREACH(relative, &tracees, link) + LIST_FOREACH(relative, tracee->RootConfig->tracees, link) { /* Its children are now orphan. */ if (relative->parent == tracee) @@ -221,8 +218,18 @@ static uint64_t next_vpid = 1; * an error occurred (ENOMEM), otherwise it returns the newly * allocated structure. */ -static Tracee *new_tracee(pid_t pid) +static Tracee *new_tracee(pid_t pid, Tracee *current_tracee) { + if (current_tracee == NULL) + { + current_tracee = new_dummy_tracee(NULL); + if (current_tracee == NULL) + return NULL; + + talloc_set_destructor(current_tracee, remove_tracee); + current_tracee->RootConfig = (tracee_root *)talloc_zero(current_tracee, tracee_root); + current_tracee->RootConfig->next_vpid = 1; + } Tracee *tracee; tracee = new_dummy_tracee(NULL); @@ -236,13 +243,12 @@ static Tracee *new_tracee(pid_t pid) do { - if (((tracee)->link.le_next = (&tracees)->lh_first) != ((void *)0)) - (&tracees)->lh_first->link.le_prev = &(tracee)->link.le_next; - (&tracees)->lh_first = (tracee); - (tracee)->link.le_prev = &(&tracees)->lh_first; - } while (0) - - tracee->life_context = talloc_new(tracee); + if (((tracee)->link.le_next = (current_tracee->RootConfig->tracees)->lh_first) != ((void *)0)) + (current_tracee->RootConfig->tracees)->lh_first->link.le_prev = &(tracee)->link.le_next; + (current_tracee->RootConfig->tracees)->lh_first = (tracee); + (tracee)->link.le_prev = &(current_tracee->RootConfig->tracees)->lh_first; + } while (0); + tracee->life_context = talloc_new(tracee); return tracee; } @@ -273,7 +279,7 @@ Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, return ptracee; } - LIST_FOREACH(ptracee, &tracees, link) + LIST_FOREACH(ptracee, ptracer->RootConfig->tracees, link) { /* Discard tracees that don't have this ptracer. */ if (PTRACEE.ptracer != ptracer) @@ -344,7 +350,7 @@ Tracee *get_tracee(const Tracee *current_tracee, pid_t pid, bool create) if (current_tracee != NULL && current_tracee->pid == pid) return (Tracee *)current_tracee; - for ((tracee) = ((&tracees)->lh_first); (tracee); (tracee) = ((tracee)->link.le_next)) + for ((tracee) = ((current_tracee->RootConfig->tracees)->lh_first); (tracee); (tracee) = ((tracee)->link.le_next)) { if (tracee->pid == pid) { @@ -356,7 +362,7 @@ Tracee *get_tracee(const Tracee *current_tracee, pid_t pid, bool create) } } - return (create ? new_tracee(pid) : NULL); + return (create ? new_tracee(pid, current_tracee) : NULL); } /** @@ -372,19 +378,19 @@ void terminate_tracee(Tracee *tracee) if (tracee->killall_on_exit) { VERBOSE(tracee, 1, "terminating all tracees on exit"); - kill_all_tracees(); + kill_all_tracees(tracee); } } /** * Free all tracees marked as terminated. */ -void free_terminated_tracees() +void free_terminated_tracees(Tracee *tracee) { Tracee *next; /* Items can't be deleted when using LIST_FOREACH. */ - next = tracees.lh_first; + next = tracee->RootConfig->tracees->lh_first; while (next != NULL) { Tracee *tracee = next; @@ -630,10 +636,10 @@ int swap_config(Tracee *tracee1, Tracee *tracee2) } /* Send the KILL signal to all tracees. */ -void kill_all_tracees() +void kill_all_tracees(Tracee *current_tracee) { Tracee *tracee; - LIST_FOREACH(tracee, &tracees, link) + LIST_FOREACH(tracee, current_tracee->RootConfig->tracees, link) kill(tracee->pid, SIGKILL); } diff --git a/proot/binding_proot_linux/tracee/tracee.h b/proot/binding_proot_linux/tracee/tracee.h index 3e71e7d..50f1a7c 100644 --- a/proot/binding_proot_linux/tracee/tracee.h +++ b/proot/binding_proot_linux/tracee/tracee.h @@ -76,8 +76,18 @@ typedef struct { bool disabled; } Heap; +typedef LIST_HEAD(tracees, tracee) Tracees; + +typedef struct tracee_root +{ + Tracees *tracees; + uint64_t next_vpid; +} tracee_root; + /* Information related to a tracee process. */ typedef struct tracee { + tracee_root *RootConfig; + /********************************************************************** * Private resources * **********************************************************************/ @@ -284,8 +294,8 @@ extern bool has_ptracees(const Tracee *ptracer, pid_t pid, word_t wait_options); extern int new_child(Tracee *parent, word_t clone_flags); extern Tracee *new_dummy_tracee(TALLOC_CTX *context); extern void terminate_tracee(Tracee *tracee); -extern void free_terminated_tracees(); +extern void free_terminated_tracees(Tracee *tracee); extern int swap_config(Tracee *tracee1, Tracee *tracee2); -extern void kill_all_tracees(); +extern void kill_all_tracees(Tracee *tracee); #endif /* TRACEE_H */ diff --git a/proot/cmd/main.go b/proot/cmd/main.go deleted file mode 100644 index f1335da..0000000 --- a/proot/cmd/main.go +++ /dev/null @@ -1,32 +0,0 @@ -package main - -import ( - "fmt" - "os" - - "sirherobrine23.com.br/go-bds/exec/exec" - "sirherobrine23.com.br/go-bds/exec/proot" -) - -func main() { - cmd, _ := proot.NewProc() - - err := cmd.Start(&exec.Exec{ - Arguments: os.Args[min(1, len(os.Args)):], - Stdout: os.Stdout, - Stderr: os.Stderr, - Stdin: os.Stdin, - }) - - if err != nil { - fmt.Fprintf(os.Stderr, err.Error()) - os.Exit(-1) - return - } - - if err = cmd.Wait(); err != nil { - fmt.Fprintf(os.Stderr, err.Error()) - os.Exit(-1) - return - } -} diff --git a/proot/proot.go b/proot/proot.go index 50e8ef7..4bd82ac 100644 --- a/proot/proot.go +++ b/proot/proot.go @@ -14,14 +14,9 @@ type Proot struct { Qemu string // qemu tool to call Cmd *exec.Cmd // Golang process - EventLocked bool NoSeccomp bool Err error - last_exit_status int - Tracees []*Tracee - vpid int - doneEnd *exec.ExitError done context.Context donefFn context.CancelFunc diff --git a/proot/proot_linux.go b/proot/proot_linux.go index 7b07470..f8e9941 100644 --- a/proot/proot_linux.go +++ b/proot/proot_linux.go @@ -4,18 +4,10 @@ package proot import ( "context" - "fmt" - "log" "os" "os/exec" - "reflect" "runtime" - "slices" "syscall" - "time" - "unsafe" - - "golang.org/x/sys/unix" goexec "sirherobrine23.com.br/go-bds/exec/exec" ) @@ -90,832 +82,8 @@ func (proot *Proot) Start(options *goexec.Exec) error { } runtime.LockOSThread() // Lock thread to use PTRACE - proot.EventLocked = true proot.done, proot.donefFn = context.WithCancel(context.Background()) - go proot.Event() // Start background loop event + // go proot.Event() // Start background loop event return proot.Cmd.Start() } - -func mountProcessState(pid int, status unix.WaitStatus, rusage *unix.Rusage) *os.ProcessState { - newState := &os.ProcessState{} - ptr := reflect.ValueOf(newState).Elem() - ptrType := ptr.Type() - - for index := range ptr.Type().NumField() { - ptr, ptrType := ptr.Field(index), ptrType.Field(index) - switch ptrType.Name { - case "pid": - ptr.SetInt(int64(pid)) - case "status": - ptr.Set(reflect.ValueOf(syscall.WaitStatus(status))) - case "rusage": - ptr.Set(reflect.ValueOf(any(rusage).(*syscall.Rusage))) - } - } - - return newState -} - -func (proot *Proot) Event() { - var wstatus unix.WaitStatus - var rusage unix.Rusage - for { - if proot.Cmd == nil || proot.Cmd.Process == nil { - <-time.After(time.Microsecond) // wait 1ms to check - continue - } - - wpid, err := unix.Wait4(proot.Cmd.Process.Pid, &wstatus, unix.WALL, &rusage) - fmt.Println(err) - switch err { - case nil: - case syscall.Errno(10): - proot.doneEnd = &exec.ExitError{ProcessState: mountProcessState(proot.Cmd.Process.Pid, wstatus, &rusage)} - proot.donefFn() - return - case syscall.Errno(3): - unix.Kill(wpid, unix.PTRACE_CONT) - continue - default: - panic(err) - } - fmt.Printf("wpid: %d, status %08d, Exited: %5t, Signaled: %t, Stopped: %t, Continued: %t\n", - wpid, - wstatus, - wstatus.Exited(), - wstatus.Signaled(), - wstatus.Stopped(), - wstatus.Continued(), - ) - - log.Printf("Getting tracee to wpid %d\n", wpid) - tracee := proot.GetTracee(nil, wpid, true) - tracee.Running = false - - if tracee.AsPtracee != nil { - log.Printf("handle_ptracee_event to %d\n", wpid) - keep_stopped := proot.handle_ptracee_event(tracee, wstatus) - if keep_stopped { - continue - } - } - - log.Printf("handle_tracee_event to %d\n", wpid) - signal := proot.handle_tracee_event(tracee, wstatus) - log.Printf("restarting tracee to %d with %s\n", wpid, unix.Signal(signal)) - if signal == -255 && proot.Err != nil { - fmt.Println(proot.Err) - signal = 0 - } - proot.restart_tracee(tracee, unix.Signal(signal)) - } -} - -func (proot *Proot) GetTracee(current_tracee *Tracee, wpid int, create bool) *Tracee { - if current_tracee != nil && current_tracee.Pid == wpid { - return current_tracee - } - - for _, tracee := range proot.Tracees { - if tracee.Pid == wpid { - return tracee - } - } - - if create { - tracee := new(Tracee) - tracee.Pid = wpid - proot.vpid++ - tracee.Vpid = uint64(proot.vpid) - proot.Tracees = append(proot.Tracees, tracee) - - return tracee - } - return nil -} - -func (proot *Proot) handle_ptracee_event(ptracee *Tracee, event unix.WaitStatus) (keep_stopped bool) { - ptracer := ptracee.AsPtracee.Ptracer - var handled_by_proot_first bool - - ptracee.AsPtracee.Event4.Proot.Value = int(event) - ptracee.AsPtracee.Event4.Proot.Pending = true - - keep_stopped = true - - if event.Stopped() { - switch unix.Signal((event & 0xfff00) >> 8) { - case unix.SIGTRAP | 0x80: - if ptracee.AsPtracee.IgnoreSyscalls || ptracee.AsPtracee.IgnoreLoaderSyscalls { - return false - } else if ptracee.AsPtracee.Options&unix.PTRACE_O_TRACESYSGOOD == 0 { - // event &= ~(0x80 << 8); - // event &= ^(0x80 << 8) - handled_by_proot_first = ptracee.Status == 0 - } - - // FORK - case unix.SIGTRAP | unix.PTRACE_EVENT_FORK<<8: - if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEFORK) == 0 { - return false - } - ptracer.AsPtracee.TracingStarted = true - handled_by_proot_first = true - - // VFORK - case unix.SIGTRAP | unix.PTRACE_EVENT_VFORK<<8: - if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEVFORK) == 0 { - return false - } - ptracer.AsPtracee.TracingStarted = true - handled_by_proot_first = true - // VFORKDONE - case unix.SIGTRAP | unix.PTRACE_EVENT_VFORK_DONE<<8: - if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEVFORKDONE) == 0 { - return false - } - ptracer.AsPtracee.TracingStarted = true - handled_by_proot_first = true - // CLONE - case unix.SIGTRAP | unix.PTRACE_EVENT_CLONE<<8: - if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACECLONE) == 0 { - return false - } - ptracer.AsPtracee.TracingStarted = true - handled_by_proot_first = true - // EXIT - case unix.SIGTRAP | unix.PTRACE_EVENT_EXIT<<8: - if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEEXIT) == 0 { - return false - } - ptracer.AsPtracee.TracingStarted = true - handled_by_proot_first = true - // EXEC - case unix.SIGTRAP | unix.PTRACE_EVENT_EXEC<<8: - if (ptracer.AsPtracee.Options & unix.PTRACE_O_TRACEEXEC) == 0 { - return false - } - ptracer.AsPtracee.TracingStarted = true - handled_by_proot_first = true - - case unix.SIGTRAP | unix.PTRACE_EVENT_SECCOMP<<8: - return false - default: - ptracee.AsPtracee.TracingStarted = true - } - } else if event.Exited() || event.Signaled() { - ptracee.AsPtracee.TracingStarted = true - keep_stopped = false - } - - if !ptracee.AsPtracee.TracingStarted { - return false - } - - if handled_by_proot_first { - ptracee.AsPtracee.Event4.Proot.Value = proot.handle_tracee_event(ptracee, unix.WaitStatus(ptracee.AsPtracee.Event4.Proot.Value)) - } - - ptracee.AsPtracee.Event4.Ptracer.Value = int(event) - ptracee.AsPtracee.Event4.Ptracer.Pending = true - - unix.Kill(ptracee.Pid, unix.SIGCHLD) - - if ptracer.AsPtracer.WaitPid == -1 || ptracer.AsPtracer.WaitPid == ptracee.Pid && - (ptracer.AsPtracer.WaitOptions&unix.WALL != 0 || (ptracer.AsPtracer.WaitOptions&unix.WCLONE != 0 && ptracee.Clone) || (ptracer.AsPtracer.WaitOptions&unix.WCLONE == 0 && !ptracee.Clone)) { - status := proot.update_wait_status(ptracer, ptracee) - if status != 0 { - // proot.poke_reg(ptracer, SYSARG_RESULT, (word_t) status); - } - /* Write ptracer's register cache back. */ - // proot.push_regs(ptracer); - - ptracer.AsPtracer.WaitPid = 0 - restarted := proot.restart_tracee(ptracer, 0) - if !restarted { - keep_stopped = false - } - } - - return -} - -func (proot *Proot) update_wait_status(ptracer, ptracee *Tracee) (result int) { - /* Special case: the Linux kernel reports the terminating - * event issued by a process to both its parent and its - * tracer, except when they are the same. In this case the - * Linux kernel reports the terminating event only once to the - * tracing parent ... */ - if ptracee.AsPtracee.Ptracer == ptracee.Parent && - (unix.WaitStatus(ptracee.AsPtracee.Event4.Ptracer.Value).Exited() || - unix.WaitStatus(ptracee.AsPtracee.Event4.Ptracer.Value).Signaled()) { - /* ... So hide this terminating event (toward its - * tracer, ie. PRoot) and make the second one appear - * (towards its parent, ie. the ptracer). This will - * ensure its exit status is collected from a kernel - * point-of-view (ie. it doesn't stay a zombie - * forever). */ - // restart_original_syscall(ptracer); - - /* Detach this ptracee from its ptracer, PRoot doesn't - * have anything else to emulate. */ - // detach_from_ptracer(ptracee); - - /* Zombies can rest in peace once the ptracer is notified. */ - // if (PTRACEE.is_zombie) - // TALLOC_FREE(ptracee); - - return 0 - } - - // address = peek_reg(ptracer, ORIGINAL, SYSARG_2); - // if (address != 0) { - // poke_int32(ptracer, address, PTRACEE.event4.ptracer.value); - // if (errno != 0) - // return -errno; - // } - - // PTRACEE.event4.ptracer.pending = false; - - /* Be careful; ptracee might get freed before its pid is returned. */ - // result = ptracee->pid; - - // /* Zombies can rest in peace once the ptracer is notified. */ - // if (PTRACEE.is_zombie) { - // detach_from_ptracer(ptracee); - // TALLOC_FREE(ptracee); - // } - - // return result; - - return 0 -} - -func (proot *Proot) restart_tracee(tracee *Tracee, signal unix.Signal) bool { - // tracee->as_ptracer.wait_pid != 0 || signal == -1 - if tracee.AsPtracer.WaitPid != 0 || signal == -1 { - return false - } - - err := ptrace(tracee.RestartHow, tracee.Pid, 0, uintptr(signal)) - if err != nil { - return false - } - - tracee.RestartHow = 0 - tracee.Running = true - return true -} - -func is_kernel_4_8() bool { - var ust unix.Utsname - unix.Uname(&ust) - - var major, minor int - fmt.Sscanf(string(ust.Release[:]), "%d.%d", &major, &minor) - - return (major == 4 && minor >= 8) || major > 4 -} - -func (proot *Proot) terminate_tracee(tracee *Tracee) { - tracee.Terminated = true - if tracee.KillallOnExit { - for _, tracee := range slices.Backward(proot.Tracees) { - unix.Kill(tracee.Pid, unix.SIGKILL) - } - } -} - -func (proot *Proot) handle_tracee_event_kernel_4_8(tracee *Tracee, tracee_status unix.WaitStatus) int { - /* Don't overwrite restart_how if it is explicitly set - * elsewhere, i.e in the ptrace emulation when single - * stepping. */ - if tracee.RestartHow == 0 { - /* When seccomp is enabled, all events are restarted in - * non-stop mode, but this default choice could be overwritten - * later if necessary. The check against "sysexit_pending" - * ensures PTRACE_SYSCALL (used to hit the exit stage under - * seccomp) is not cleared due to an event that would happen - * before the exit stage, eg. PTRACE_EVENT_EXEC for the exit - * stage of execve(2). */ - if tracee.Seccomp == 2 && !tracee.SysexitPending { - tracee.RestartHow = unix.PTRACE_CONT - } else { - tracee.RestartHow = unix.PTRACE_SYSCALL - } - } - - var signal int - var seccomp_detected, seccomp_enabled bool - - /* Not a signal-stop by default. */ - signal = 0 - - if tracee_status.Exited() { - proot.last_exit_status = tracee_status.ExitStatus() - proot.terminate_tracee(tracee) - } else if tracee_status.Signaled() { - // check_architecture(tracee); - proot.terminate_tracee(tracee) - } else if tracee_status.Stopped() { - /* Don't use WSTOPSIG() to extract the signal since it clears the PTRACE_EVENT_* bits. */ - signal = (int(tracee_status) & 0xfff00) >> 8 - var deliver_sigtrap bool - switch signal { - case int(unix.SIGTRAP): - default_ptrace_options := - unix.PTRACE_O_TRACESYSGOOD | - unix.PTRACE_O_TRACEFORK | - unix.PTRACE_O_TRACEVFORK | - unix.PTRACE_O_TRACEVFORKDONE | - unix.PTRACE_O_TRACEEXEC | - unix.PTRACE_O_TRACECLONE | - unix.PTRACE_O_TRACEEXIT - if deliver_sigtrap { - break - } - deliver_sigtrap = true - /* Try to enable seccomp mode 2... */ - err := ptrace(unix.PTRACE_SETOPTIONS, tracee.Pid, 0, uintptr(default_ptrace_options|unix.PTRACE_O_TRACESECCOMP)) - if err != nil { - seccomp_enabled = false - /* ... otherwise use default options only. */ - err = ptrace(unix.PTRACE_SETOPTIONS, tracee.Pid, 0, uintptr(default_ptrace_options)) - if err != nil { - proot.Err = fmt.Errorf("ptrace(PTRACE_SETOPTIONS): %s", err) - return -255 - } - } else { - if proot.NoSeccomp { - seccomp_enabled = true - } - } - fallthrough - case int(unix.SIGTRAP | unix.PTRACE_EVENT_SECCOMP<<8): - if !seccomp_detected && seccomp_enabled { - // VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); - tracee.Seccomp = ENABLED - seccomp_detected = true - } - if signal == int(unix.SIGTRAP|unix.PTRACE_EVENT_SECCOMP<<8) { - signal = 0 - flags := 0 - /* Use the common ptrace flow if seccomp was - * explicitly disabled for this tracee. */ - if tracee.Seccomp != ENABLED { - break - } - err := ptrace(unix.PTRACE_GETEVENTMSG, tracee.Pid, 0, uintptr(unsafe.Pointer(&flags))) - if err != nil { - break - } - if (flags & FILTER_SYSEXIT) == 0 { - tracee.RestartHow = unix.PTRACE_CONT - // translate_syscall(tracee); - if tracee.Seccomp == DISABLING { - tracee.RestartHow = unix.PTRACE_SYSCALL - } - break - } - } - fallthrough - case int(unix.SIGTRAP | 0x80): - signal = 0 - /* This tracee got signaled then freed during the - sysenter stage but the kernel reports the sysexit - stage; just discard this spurious tracee/event. */ - if tracee.Exe == "" { - tracee.RestartHow = unix.PTRACE_CONT /* SYSCALL OR CONT */ - return 0 - } - switch tracee.Seccomp { - case ENABLED: - if tracee.AsPtracee.Ptracer.Status == 0 { - /* sysenter: ensure the sysexit - * stage will be hit under seccomp. */ - tracee.RestartHow = unix.PTRACE_SYSCALL - tracee.SysexitPending = true - } else { - /* sysexit: the next sysenter - * will be notified by seccomp. */ - tracee.RestartHow = unix.PTRACE_CONT - tracee.SysexitPending = false - } - fallthrough - case DISABLED: - // translate_syscall(tracee); - /* This syscall has disabled seccomp. */ - if tracee.Seccomp == DISABLING { - tracee.RestartHow = unix.PTRACE_SYSCALL - tracee.Seccomp = DISABLED - } - case DISABLING: - /* Seccomp was disabled by the - * previous syscall, but its sysenter - * stage was already handled. */ - tracee.Seccomp = DISABLED - if tracee.AsPtracee.Ptracer.Status == 0 { - tracee.Status = 1 - } - } - case int(unix.SIGTRAP | unix.PTRACE_EVENT_VFORK<<8): - signal = 0 - // proot.new_child(tracee, CLONE_VFORK); - case int(unix.SIGTRAP | unix.PTRACE_EVENT_FORK<<8), int(unix.SIGTRAP | unix.PTRACE_EVENT_CLONE<<8): - signal = 0 - // proot.new_child(tracee, 0); - case int(unix.SIGTRAP | unix.PTRACE_EVENT_VFORK_DONE<<8), int(unix.SIGTRAP | unix.PTRACE_EVENT_EXEC<<8), - int(unix.SIGTRAP | unix.PTRACE_EVENT_EXIT<<8): - signal = 0 - case int(unix.SIGSTOP): - /* Stop this tracee until PRoot has received - * the EVENT_*FORK|CLONE notification. */ - if tracee.Exe == "" { - tracee.Sigstop = SIGSTOP_PENDING - signal = -1 - } - /* For each tracee, the first SIGSTOP - * is only used to notify the tracer. */ - if tracee.Sigstop == SIGSTOP_IGNORED { - tracee.Sigstop = SIGSTOP_ALLOWED - signal = 0 - } - } - } - - // /* Clear the pending event, if any. */ - tracee.AsPtracee.Event4.Proot.Pending = false - - return signal -} - -func (proot *Proot) handle_tracee_event(tracee *Tracee, tracee_status unix.WaitStatus) int { - if is_kernel_4_8() { - return proot.handle_tracee_event_kernel_4_8(tracee, tracee_status) - } - - // static bool seccomp_detected = false; - // long status; - // int signal; - // /* Don't overwrite restart_how if it is explicitly set - // * elsewhere, i.e in the ptrace emulation when single - // * stepping. */ - // if (tracee->restart_how == 0) { - // /* When seccomp is enabled, all events are restarted in - // * non-stop mode, but this default choice could be overwritten - // * later if necessary. The check against "sysexit_pending" - // * ensures PTRACE_SYSCALL (used to hit the exit stage under - // * seccomp) is not cleared due to an event that would happen - // * before the exit stage, eg. PTRACE_EVENT_EXEC for the exit - // * stage of execve(2). */ - // if (tracee->seccomp == ENABLED && !tracee->sysexit_pending) - // tracee->restart_how = PTRACE_CONT; - // else - // tracee->restart_how = PTRACE_SYSCALL; - // } - - // /* Not a signal-stop by default. */ - // signal = 0; - - // if (WIFEXITED(tracee_status)) { - // last_exit_status = WEXITSTATUS(tracee_status); - // VERBOSE(tracee, 1, - // "vpid %" PRIu64 ": exited with status %d", - // tracee->vpid, last_exit_status); - // terminate_tracee(tracee); - // } - // else if (WIFSIGNALED(tracee_status)) { - // check_architecture(tracee); - // VERBOSE(tracee, 1, - // "vpid %" PRIu64 ": terminated with signal %d", - // tracee->vpid, WTERMSIG(tracee_status)); - // terminate_tracee(tracee); - // } - // else if (WIFSTOPPED(tracee_status)) { - // /* Don't use WSTOPSIG() to extract the signal - // * since it clears the PTRACE_EVENT_* bits. */ - // signal = (tracee_status & 0xfff00) >> 8; - - // switch (signal) { - // static bool deliver_sigtrap = false; - - // case SIGTRAP: { - // const unsigned long default_ptrace_options = ( - // PTRACE_O_TRACESYSGOOD | - // PTRACE_O_TRACEFORK | - // PTRACE_O_TRACEVFORK | - // PTRACE_O_TRACEVFORKDONE | - // PTRACE_O_TRACEEXEC | - // PTRACE_O_TRACECLONE | - // PTRACE_O_TRACEEXIT); - - // /* Distinguish some events from others and - // * automatically trace each new process with - // * the same options. - // * - // * Note that only the first bare SIGTRAP is - // * related to the tracing loop, others SIGTRAP - // * carry tracing information because of - // * TRACE*FORK/CLONE/EXEC. */ - // if (deliver_sigtrap) - // break; /* Deliver this signal as-is. */ - - // deliver_sigtrap = true; - - // /* Try to enable seccomp mode 2... */ - // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, - // default_ptrace_options | PTRACE_O_TRACESECCOMP); - // if (status < 0) { - // /* ... otherwise use default options only. */ - // status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL, - // default_ptrace_options); - // if (status < 0) { - // note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)"); - // exit(EXIT_FAILURE); - // } - // } - // } - - // /* Fall through. */ - // case SIGTRAP | 0x80: - // signal = 0; - - // /* This tracee got signaled then freed during the - // sysenter stage but the kernel reports the sysexit - // stage; just discard this spurious tracee/event. */ - // if (tracee->exe == NULL) { - // tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */ - // return 0; - // } - - // switch (tracee->seccomp) { - // case ENABLED: - // if (IS_IN_SYSENTER(tracee)) { - // /* sysenter: ensure the sysexit - // * stage will be hit under seccomp. */ - // tracee->restart_how = PTRACE_SYSCALL; - // tracee->sysexit_pending = true; - // } - // else { - // /* sysexit: the next sysenter - // * will be notified by seccomp. */ - // tracee->restart_how = PTRACE_CONT; - // tracee->sysexit_pending = false; - // } - // /* Fall through. */ - // case DISABLED: - // translate_syscall(tracee); - - // /* This syscall has disabled seccomp. */ - // if (tracee->seccomp == DISABLING) { - // tracee->restart_how = PTRACE_SYSCALL; - // tracee->seccomp = DISABLED; - // } - - // break; - - // case DISABLING: - // /* Seccomp was disabled by the - // * previous syscall, but its sysenter - // * stage was already handled. */ - // tracee->seccomp = DISABLED; - // if (IS_IN_SYSENTER(tracee)) - // tracee->status = 1; - // break; - // } - // break; - - // case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8: - // case SIGTRAP | PTRACE_EVENT_SECCOMP << 8: { - // unsigned long flags = 0; - - // signal = 0; - - // if (!seccomp_detected) { - // VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled"); - // tracee->seccomp = ENABLED; - // seccomp_detected = true; - // } - - // /* Use the common ptrace flow if seccomp was - // * explicitely disabled for this tracee. */ - // if (tracee->seccomp != ENABLED) - // break; - - // status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags); - // if (status < 0) - // break; - - // /* Use the common ptrace flow when - // * sysexit has to be handled. */ - // if ((flags & FILTER_SYSEXIT) != 0) { - // tracee->restart_how = PTRACE_SYSCALL; - // break; - // } - - // /* Otherwise, handle the sysenter - // * stage right now. */ - // tracee->restart_how = PTRACE_CONT; - // translate_syscall(tracee); - - // /* This syscall has disabled seccomp, so move - // * the ptrace flow back to the common path to - // * ensure its sysexit will be handled. */ - // if (tracee->seccomp == DISABLING) - // tracee->restart_how = PTRACE_SYSCALL; - // break; - // } - - // case SIGTRAP | PTRACE_EVENT_VFORK << 8: - // signal = 0; - // (void) new_child(tracee, CLONE_VFORK); - // break; - - // case SIGTRAP | PTRACE_EVENT_FORK << 8: - // case SIGTRAP | PTRACE_EVENT_CLONE << 8: - // signal = 0; - // (void) new_child(tracee, 0); - // break; - - // case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8: - // case SIGTRAP | PTRACE_EVENT_EXEC << 8: - // case SIGTRAP | PTRACE_EVENT_EXIT << 8: - // signal = 0; - // break; - - // case SIGSTOP: - // /* Stop this tracee until PRoot has received - // * the EVENT_*FORK|CLONE notification. */ - // if (tracee->exe == NULL) { - // tracee->sigstop = SIGSTOP_PENDING; - // signal = -1; - // } - - // /* For each tracee, the first SIGSTOP - // * is only used to notify the tracer. */ - // if (tracee->sigstop == SIGSTOP_IGNORED) { - // tracee->sigstop = SIGSTOP_ALLOWED; - // signal = 0; - // } - // break; - - // default: - // /* Deliver this signal as-is. */ - // break; - // } - // } - - // /* Clear the pending event, if any. */ - // tracee->as_ptracee.event4.proot.pending = false; - // return signal; - - return 0 -} - -func ptrace(request int, pid int, addr uintptr, data uintptr) (err error) { - _, _, e1 := unix.Syscall6(unix.SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -func (proot *Proot) getPtraceRegs(pid int) (*unix.PtraceRegs, error) { - var regs unix.PtraceRegs - if err := unix.PtraceGetRegs(pid, ®s); err != nil { - return nil, err - } - return ®s, nil -} - -type Tracee struct { - // Private - Pid int - Vpid uint64 - Running bool - Terminated bool - KillallOnExit bool - Parent *Tracee - Clone bool - - AsPtracer struct { - NbPtracees int - WaitPid int - WaitOptions uint64 - WaitsIn int // enum: 0 = DOESNT_WAIT, 1 = WAITS_IN_KERNEL, 2 = WAITS_IN_PROOT - } - - AsPtracee *struct { - Ptracer *Tracee - Event4 struct { - Proot struct { - Value int - Pending bool - } - Ptracer struct { - Value int - Pending bool - } - } - TracingStarted bool - IgnoreLoaderSyscalls bool - IgnoreSyscalls bool - Options uint64 - IsZombie bool - } - - Status int - RestartHow int // syscall.PTRACE_CONT etc. - Regs [NB_REG_VERSION]syscall.PtraceRegs - RegsWereChanged bool - RestoreOriginalRegs bool - - Sigstop int // enum: 0 = IGNORED, 1 = ALLOWED, 2 = PENDING - - GlueType uint32 // mode_t is often uint32 - Reconf struct { - Tracee *Tracee - Paths string - } - - Chain struct { - Syscalls *ChainedSyscalls - ForceFinalResult bool - FinalResult uint64 - } - - LoadInfo *LoadInfo - MixedMode bool - - // Inherited - Verbose int - Seccomp int - SysexitPending bool - - // Shared or private - FS *FileSystemNameSpace - Heap *Heap - - // Shared until execve - Exe string - NewExe string - - // (Re)configuration - Qemu []string - Glue string - Extensions *Extensions - - // Read-only - HostLdsoPaths string - GuestLdsoPaths string - ToolName string -} - -type Heap struct { - Base uint64 // assuming word_t = uint64 - Size uint64 // size_t - Disabled bool -} - -type FileSystemNameSpace struct { - Bindings struct { - Pending *Bindings - Guest *Bindings - Host *Bindings - } - Cwd string -} - -type RegVersion int - -const ( - CURRENT RegVersion = iota - ORIGINAL - MODIFIED - NB_REG_VERSION -) - -type Mapping struct { - Addr uint64 - Length uint64 - ClearLength uint64 - Prot uint64 - Flags uint64 - Fd uint64 - Offset uint64 -} - -type LoadInfo struct { - HostPath string - UserPath string - RawPath string - Mappings []*Mapping - ElfHeader ElfHeader - NeedsExecutableStack bool - Interp *LoadInfo -} - -type Bindings struct{} -type Extensions struct{} -type ChainedSyscalls struct{} -- 2.51.0 From 94176dc6bf05efc0756d2ff262a21a07fcfd4960 Mon Sep 17 00:00:00 2001 From: Matheus Sampaio Queiroga Date: Sun, 3 Aug 2025 16:16:19 -0300 Subject: [PATCH 5/5] Sync Signed-off-by: Matheus Sampaio Queiroga --- proot/binding_proot_linux/path/binding.h | 8 +- proot/binding_proot_linux/tracee/reg.c | 6 +- proot/binding_proot_linux/tracee/tracee.h | 2 +- proot/proot/execve/elf.go | 233 +++++++++++++ proot/proot/execve/execve.go | 14 + proot/proot/path/binding.go | 31 ++ proot/proot/path/glue.go | 3 + proot/proot/path/path.go | 76 +++++ proot/proot/path/proc.go | 16 + proot/proot/syscall/syscall.go | 40 +++ proot/proot/tracee/abi.go | 19 ++ proot/proot/tracee/abi_amd64.go | 28 ++ proot/proot/tracee/abi_other.go | 6 + proot/proot/tracee/event.go | 317 ++++++++++++++++++ proot/proot/tracee/mem.go | 1 + proot/proot/tracee/reg.go | 79 +++++ proot/proot/tracee/reg_386.go | 66 ++++ proot/proot/tracee/reg_amd64.go | 119 +++++++ proot/proot/tracee/reg_arm.go | 66 ++++ proot/proot/tracee/reg_arm64.go | 67 ++++ proot/proot/tracee/tracee.go | 379 ++++++++++++++++++++++ 21 files changed, 1567 insertions(+), 9 deletions(-) create mode 100644 proot/proot/execve/elf.go create mode 100644 proot/proot/execve/execve.go create mode 100644 proot/proot/path/binding.go create mode 100644 proot/proot/path/glue.go create mode 100644 proot/proot/path/path.go create mode 100644 proot/proot/path/proc.go create mode 100644 proot/proot/syscall/syscall.go create mode 100644 proot/proot/tracee/abi.go create mode 100644 proot/proot/tracee/abi_amd64.go create mode 100644 proot/proot/tracee/abi_other.go create mode 100644 proot/proot/tracee/event.go create mode 100644 proot/proot/tracee/mem.go create mode 100644 proot/proot/tracee/reg.go create mode 100644 proot/proot/tracee/reg_386.go create mode 100644 proot/proot/tracee/reg_amd64.go create mode 100644 proot/proot/tracee/reg_arm.go create mode 100644 proot/proot/tracee/reg_arm64.go create mode 100644 proot/proot/tracee/tracee.go diff --git a/proot/binding_proot_linux/path/binding.h b/proot/binding_proot_linux/path/binding.h index b7f8d46..6939673 100644 --- a/proot/binding_proot_linux/path/binding.h +++ b/proot/binding_proot_linux/path/binding.h @@ -37,13 +37,13 @@ typedef struct binding { bool must_exist; struct { - CIRCLEQ_ENTRY(binding) pending; - CIRCLEQ_ENTRY(binding) guest; - CIRCLEQ_ENTRY(binding) host; + struct { struct binding *cqe_next; struct binding *cqe_prev; } pending; + struct { struct binding *cqe_next; struct binding *cqe_prev; } guest; + struct { struct binding *cqe_next; struct binding *cqe_prev; } host; } link; } Binding; -typedef CIRCLEQ_HEAD(bindings, binding) Bindings; +typedef struct bindings { struct binding *cqh_first; struct binding *cqh_last; } Bindings; extern Binding *insort_binding3(const Tracee *tracee, const TALLOC_CTX *context, const char host_path[PATH_MAX], const char guest_path[PATH_MAX]); diff --git a/proot/binding_proot_linux/tracee/reg.c b/proot/binding_proot_linux/tracee/reg.c index a0c1784..6011b83 100644 --- a/proot/binding_proot_linux/tracee/reg.c +++ b/proot/binding_proot_linux/tracee/reg.c @@ -48,8 +48,7 @@ * Compute the offset of the register @reg_name in the USER area. */ #define USER_REGS_OFFSET(reg_name) \ - (offsetof(struct user, regs) \ - + offsetof(struct user_regs_struct, reg_name)) + (offsetof(struct user, regs) + offsetof(struct user_regs_struct, reg_name)) #define REG(tracee, version, index) \ (*(word_t*) (((uint8_t *) &tracee->_regs[version]) + reg_offset[index])) @@ -182,8 +181,7 @@ word_t peek_reg(const Tracee *tracee, RegVersion version, Reg reg) result = REG(tracee, version, reg); - /* Use only the 32 least significant bits (LSB) when running - * 32-bit processes on a 64-bit kernel. */ + /* Use only the 32 least significant bits (LSB) when running 32-bit processes on a 64-bit kernel. */ if (is_32on64_mode(tracee)) result &= 0xFFFFFFFF; diff --git a/proot/binding_proot_linux/tracee/tracee.h b/proot/binding_proot_linux/tracee/tracee.h index 50f1a7c..813f2cc 100644 --- a/proot/binding_proot_linux/tracee/tracee.h +++ b/proot/binding_proot_linux/tracee/tracee.h @@ -93,7 +93,7 @@ typedef struct tracee { **********************************************************************/ /* Link for the list of all tracees. */ - LIST_ENTRY(tracee) link; + struct { struct tracee *le_next; struct tracee **le_prev; } link; /* Process identifier. */ pid_t pid; diff --git a/proot/proot/execve/elf.go b/proot/proot/execve/elf.go new file mode 100644 index 0000000..6eb5720 --- /dev/null +++ b/proot/proot/execve/elf.go @@ -0,0 +1,233 @@ +package execve + +/* +#define EI_NIDENT 16 + +typedef struct { + unsigned char e_ident[EI_NIDENT]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} ElfHeader32; + +typedef struct { + unsigned char e_ident[EI_NIDENT]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} ElfHeader64; + +typedef union { + ElfHeader32 class32; + ElfHeader64 class64; +} ElfHeader; + +typedef struct { + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} ProgramHeader32; + +typedef struct { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} ProgramHeader64; + +typedef union { + ProgramHeader32 class32; + ProgramHeader64 class64; +} ProgramHeader; + +// Object type: +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 + +// Segment flags: +#define PF_X 1 +#define PF_W 2 +#define PF_R 4 + +typedef enum { + PT_LOAD = 1, + PT_DYNAMIC = 2, + PT_INTERP = 3, + PT_GNU_STACK = 0x6474e551, +} SegmentType; + +typedef struct { + int32_t d_tag; + uint32_t d_val; +} DynamicEntry32; + +typedef struct { + int64_t d_tag; + uint64_t d_val; +} DynamicEntry64; + +typedef union { + DynamicEntry32 class32; + DynamicEntry64 class64; +} DynamicEntry; + +typedef enum { + DT_STRTAB = 5, + DT_RPATH = 15, + DT_RUNPATH = 29 +} DynamicType; + +// The following macros are also compatible with ELF 64-bit. +#define ELF_IDENT(header, index) (header).class32.e_ident[(index)] +#define ELF_CLASS(header) ELF_IDENT(header, 4) +#define IS_CLASS32(header) (ELF_CLASS(header) == 1) +#define IS_CLASS64(header) (ELF_CLASS(header) == 2) + +// Helper to access a @field of the structure ElfHeaderXX. +#define ELF_FIELD(header, field) \ + (IS_CLASS64(header) \ + ? (header).class64. e_ ## field \ + : (header).class32. e_ ## field) + +// Helper to access a @field of the structure ProgramHeaderXX +#define PROGRAM_FIELD(ehdr, phdr, field) \ + (IS_CLASS64(ehdr) \ + ? (phdr).class64. p_ ## field \ + : (phdr).class32. p_ ## field) + +// Helper to access a @field of the structure DynamicEntryXX +#define DYNAMIC_FIELD(ehdr, dynent, field) \ + (IS_CLASS64(ehdr) \ + ? (dynent).class64. d_ ## field \ + : (dynent).class32. d_ ## field) + +#define KNOWN_PHENTSIZE(header, size) \ + ( (IS_CLASS32(header) && (size) == sizeof(ProgramHeader32)) \ + || (IS_CLASS64(header) && (size) == sizeof(ProgramHeader64))) + +#define IS_POSITION_INDENPENDANT(elf_header) \ + (ELF_FIELD((elf_header), type) == ET_DYN) + +*/ + +const EI_NIDENT = 16 + +type ElfHeader32 struct { + Ident [EI_NIDENT]byte + Type uint16 + Machine uint16 + Version uint32 + Entry uint32 + Phoff uint32 + Shoff uint32 + Flags uint32 + Ehsize uint16 + Phentsize uint16 + Phnum uint16 + Shentsize uint16 + Shnum uint16 + Shstrndx uint16 +} + +type ElfHeader64 struct { + Ident [EI_NIDENT]byte + Type uint16 + Machine uint16 + Version uint32 + Entry uint64 + Phoff uint64 + Shoff uint64 + Flags uint32 + Ehsize uint16 + Phentsize uint16 + Phnum uint16 + Shentsize uint16 + Shnum uint16 + Shstrndx uint16 +} + +type ElfHeader struct { + class32 ElfHeader32 + class64 ElfHeader64 +} + +type ProgramHeader32 struct { + Type uint32 + Offset uint32 + Vaddr uint32 + Paddr uint32 + Filesz uint32 + Memsz uint32 + Flags uint32 + Align uint32 +} + +type ProgramHeader64 struct { + Type uint32 + Flags uint32 + Offset uint64 + Vaddr uint64 + Paddr uint64 + Filesz uint64 + Memsz uint64 + Align uint64 +} + +type ProgramHeader struct { + class32 ProgramHeader32 + class64 ProgramHeader64 +} + +type DynamicEntry32 struct { + Tag int32 + Val uint32 +} + +type DynamicEntry64 struct { + Tag int64 + Val uint64 +} + +type DynamicEntry struct { + class32 DynamicEntry32 + class64 DynamicEntry64 +} + +type DynamicType int32 + +const ( + DT_STRTAB DynamicType = 5 + DT_RPATH DynamicType = 15 + DT_RUNPATH DynamicType = 29 +) \ No newline at end of file diff --git a/proot/proot/execve/execve.go b/proot/proot/execve/execve.go new file mode 100644 index 0000000..39805a4 --- /dev/null +++ b/proot/proot/execve/execve.go @@ -0,0 +1,14 @@ +package execve + +type Mapping struct { + Addr, Lenght, ClearLength, Prot, Flags, FD, Offset uint64 +} + +type LoadInfo struct { + HostPath, UserPath, RawPath string + Mappings *Mapping + ElfHeader *ElfHeader + needsExecutableStack bool + + Interp *LoadInfo +} diff --git a/proot/proot/path/binding.go b/proot/proot/path/binding.go new file mode 100644 index 0000000..4cff1a9 --- /dev/null +++ b/proot/proot/path/binding.go @@ -0,0 +1,31 @@ +package path + +type LinkBinding struct { + Next, Prev *Binding +} + +type Binding struct { + Host, Guest Path + + NeedSubstitution, MustExist bool + + Link struct { + Pending, Guest, Host LinkBinding + } +} + +// struct bindings { struct binding *cqh_first; struct binding *cqh_last; } Bindings +type Bindings struct { + First, Last *Binding +} + +/* +extern Binding *insort_binding3(const Tracee *tracee, const TALLOC_CTX *context, const char host_path[PATH_MAX], const char guest_path[PATH_MAX]); +extern Binding *new_binding(Tracee *tracee, const char *host, const char *guest, bool must_exist); +extern int initialize_bindings(Tracee *tracee); +extern const char *get_path_binding(const Tracee* tracee, Side side, const char path[PATH_MAX]); +extern Binding *get_binding(const Tracee *tracee, Side side, const char path[PATH_MAX]); +extern const char *get_root(const Tracee* tracee); +extern int substitute_binding(const Tracee* tracee, Side side, char path[PATH_MAX]); +extern void remove_binding_from_all_lists(const Tracee *tracee, Binding *binding); +*/ \ No newline at end of file diff --git a/proot/proot/path/glue.go b/proot/proot/path/glue.go new file mode 100644 index 0000000..52b1a2d --- /dev/null +++ b/proot/proot/path/glue.go @@ -0,0 +1,3 @@ +package path + +// extern mode_t build_glue(Tracee *tracee, const char *guest_path, char host_path[PATH_MAX], Finality finality); \ No newline at end of file diff --git a/proot/proot/path/path.go b/proot/proot/path/path.go new file mode 100644 index 0000000..f646994 --- /dev/null +++ b/proot/proot/path/path.go @@ -0,0 +1,76 @@ +package path + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +type Side int +type Type int +type Finality int +type Comparison int + +const ( + Guest Side = iota + Host + + // Used for bindings as specified by the user but not canonicalized yet (new_binding, initialize_binding). + Pedding +) + +// Type type +const ( + Regular Type = iota + Symlink +) + +// Path ending type. +const ( + NOT_FINAL Finality = iota + FINAL_NORMAL + FINAL_SLASH + FINAL_DOT +) + +// Comparison between two paths. +const ( + PATHS_ARE_EQUAL Comparison = iota + PATH1_IS_PREFIX + PATH2_IS_PREFIX + PATHS_ARE_NOT_COMPARABLE +) + +type Path struct { + Path [syscall.PathMax]byte + Length int + Side Side +} + +/* +extern int which(Tracee *tracee, const char *paths, char host_path[PATH_MAX], const char *command); +extern int realpath2(Tracee *tracee, char host_path[PATH_MAX], const char *path, bool deref_final); +extern int getcwd2(Tracee *tracee, char guest_path[PATH_MAX]); +extern void chop_finality(char *path); + +extern int translate_path(Tracee *tracee, char host_path[PATH_MAX], int dir_fd, const char *guest_path, bool deref_final); + +extern int detranslate_path(Tracee *tracee, char path[PATH_MAX], const char t_referrer[PATH_MAX]); +extern bool belongs_to_guestfs(const Tracee *tracee, const char *path); + +extern int join_paths(int number_paths, char result[PATH_MAX], ...); +extern int list_open_fd(const Tracee *tracee); + +extern Comparison compare_paths(const char *path1, const char *path2); +extern Comparison compare_paths2(const char *path1, size_t length1, const char *path2, size_t length2); + +extern size_t substitute_path_prefix(char path[PATH_MAX], size_t old_prefix_length, const char *new_prefix, size_t new_prefix_length); + +extern int readlink_proc_pid_fd(pid_t pid, int fd, char path[PATH_MAX]); +*/ + +// #define AT_FD(dirfd, path) ((dirfd) != AT_FDCWD && ((path) != NULL && (path)[0] != '/')) +/* Check if path interpretable relatively to dirfd, see openat(2) for details. */ +func IsAtFD(dirfd int, path string) bool { + return dirfd != unix.AT_FDCWD && path[0] != '/' +} diff --git a/proot/proot/path/proc.go b/proot/proot/path/proc.go new file mode 100644 index 0000000..e538a3f --- /dev/null +++ b/proot/proot/path/proc.go @@ -0,0 +1,16 @@ +package path + +type Action int + +// Action to do after a call to readlink_proc(). +const ( + DEFAULT Action = iota // Nothing special to do, treat it as a regular link. + CANONICALIZE // The symlink was dereferenced, now canonicalize it. + DONT_CANONICALIZE // The symlink shouldn't be dereferenced nor canonicalized. +) + +/* +extern Action readlink_proc(const Tracee *tracee, char result[PATH_MAX], const char path[PATH_MAX], const char component[NAME_MAX], Comparison comparison); + +extern ssize_t readlink_proc2(const Tracee *tracee, char result[PATH_MAX], const char path[PATH_MAX]); +*/ \ No newline at end of file diff --git a/proot/proot/syscall/syscall.go b/proot/proot/syscall/syscall.go new file mode 100644 index 0000000..e960020 --- /dev/null +++ b/proot/proot/syscall/syscall.go @@ -0,0 +1,40 @@ +package syscall + +import ( + "syscall" + + "golang.org/x/sys/unix" + origin_tracee "sirherobrine23.com.br/go-bds/exec/proot/proot/tracee" +) + +type Tracee origin_tracee.Tracee + +func (tracee *Tracee) TranslateSyscall() { + isEnterStage := tracee.Status == 0 + if tracee.Exe == "" { + panic("Empty exe") + } + + if err := (*origin_tracee.Tracee)(tracee).FetchRegs(); err != nil { + return + } + + var status error + if isEnterStage { + if tracee.Chain.Syscalls == nil { + (*origin_tracee.Tracee)(tracee).SaveCurrentRegs(origin_tracee.ORIGINAL) + // status = tracee.TranslateSyscallEnter() + (*origin_tracee.Tracee)(tracee).SaveCurrentRegs(origin_tracee.MODIFIED) + } else { + // notify_extensions + // status = (*origin_tracee.Tracee)(tracee).NotifyExtensions() + tracee.RestartHow = unix.PTRACE_SYSCALL + } + + if status != nil { + // tracee.SetSysnum(origin_tracee.PR_void) + (*origin_tracee.Tracee)(tracee).PokeReg(origin_tracee.CURRENT, origin_tracee.SYSARG_RESULT, uint64(status.(syscall.Errno))) + tracee.Status = int(status.(syscall.Errno)) + } + } +} diff --git a/proot/proot/tracee/abi.go b/proot/proot/tracee/abi.go new file mode 100644 index 0000000..dd0deda --- /dev/null +++ b/proot/proot/tracee/abi.go @@ -0,0 +1,19 @@ +package tracee + +import "unsafe" + +type Abi uint8 + +const ( + ABI_DEFAULT Abi = iota + ABI_2 // x86_32 on x86_64. + ABI_3 // x32 on x86_64. + NB_MAX_ABIS +) + +func (tracee *Tracee) SizeofWord() int { + if tracee.Is32On64() { + return int(unsafe.Sizeof(uint64(0))) / 2 + } + return int(unsafe.Sizeof(uint64(0))) +} diff --git a/proot/proot/tracee/abi_amd64.go b/proot/proot/tracee/abi_amd64.go new file mode 100644 index 0000000..9c03ee6 --- /dev/null +++ b/proot/proot/tracee/abi_amd64.go @@ -0,0 +1,28 @@ +//go:build amd64 && (linux || android) + +package tracee + +func (tracee *Tracee) GetAbi() Abi { + switch tracee.Regs[ORIGINAL].Cs { + case 0x23: + return ABI_2 + case 0x33: + if tracee.Regs[ORIGINAL].Ds == 0x2b { + return ABI_3 + } + } + return ABI_DEFAULT +} + +func (tracee *Tracee) Is32On64() bool { + switch tracee.Regs[ORIGINAL].Cs { + case 0x23: + return tracee.Regs[ORIGINAL].Ds == 0x2b + case 0x33: + if tracee.Regs[ORIGINAL].Ds == 0x2b { + return true + } + } + + return false +} diff --git a/proot/proot/tracee/abi_other.go b/proot/proot/tracee/abi_other.go new file mode 100644 index 0000000..3f87347 --- /dev/null +++ b/proot/proot/tracee/abi_other.go @@ -0,0 +1,6 @@ +//go:build !amd64 && (linux || android) + +package tracee + +func (tracee *Tracee) GetAbi() Abi { return ABI_DEFAULT } +func (tracee *Tracee) Is32On64() bool { return false } diff --git a/proot/proot/tracee/event.go b/proot/proot/tracee/event.go new file mode 100644 index 0000000..2112000 --- /dev/null +++ b/proot/proot/tracee/event.go @@ -0,0 +1,317 @@ +package tracee + +import ( + "fmt" + "os/exec" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" +) + +func ptrace(request int, pid int, addr uintptr, data uintptr) (err error) { + _, _, e1 := unix.Syscall6(unix.SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = syscall.Errno(e1) + } + return +} + +func (tracee *Tracee) RestartTracee(signal syscall.Signal) bool { + if tracee.AsPtracer.WaitPID != 0 || signal == -1 { + return false + } else if err := ptrace(tracee.RestartHow, tracee.PID, 0, uintptr(signal)); err != nil { + return false + } + + tracee.RestartHow = 0 + tracee.Running = true + return true +} + +func (tracee *Tracee) LaunchProces(cmd *exec.Cmd) error { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Ptrace = true + + if err := cmd.Start(); err != nil { + return err + } + + if !tracee.RootConfig.NoSeccmp { + // tracee.EnableSyscallFiltering() + } + + tracee.PID = cmd.Process.Pid + return nil +} + +func (tracee *Tracee) EventLoop() error { return nil } + +func isKernel4_8() bool { + var uts unix.Utsname + if err := unix.Uname(&uts); err != nil { + return false + } + var major, minor int + fmt.Sscanf(string(uts.Release[:]), "%d.%d", &major, &minor) + return major == 4 && minor >= 8 || major > 4 +} + +const PTRACE_EVENT_SECCOMP2 = unix.PTRACE_EVENT_SECCOMP + 1 +const defaultPtraceOption = unix.PTRACE_O_TRACESYSGOOD | + unix.PTRACE_O_TRACEFORK | + unix.PTRACE_O_TRACEVFORK | + unix.PTRACE_O_TRACEVFORKDONE | + unix.PTRACE_O_TRACEEXEC | + unix.PTRACE_O_TRACECLONE | + unix.PTRACE_O_TRACEEXIT + +func (tracee *Tracee) HandleTraceeEvent4_8(traceeStatus unix.WaitStatus) error { + if tracee.RestartHow == 0 { + if tracee.Seccomp == Enabled && !tracee.SysexitPending { + tracee.RestartHow = unix.PTRACE_CONT + } else { + tracee.RestartHow = unix.PTRACE_SYSCALL + } + } + + var seccomp_detected, seccomp_enabled bool + var signal syscall.Errno + + if traceeStatus.Exited() { + tracee.RootConfig.LastExitStatus = traceeStatus.ExitStatus() + tracee.TerminateTracee() + return nil + } else if traceeStatus.Signaled() { + // check_architecture(tracee); + tracee.TerminateTracee() + } else if traceeStatus.Stopped() { + deliverSigtrap := false + signal = syscall.Errno(traceeStatus&0xfff00) >> 8 + // static bool deliver_sigtrap = false; + fall_t: + switch signal { + case syscall.Errno(unix.SIGTRAP): + if deliverSigtrap { + break + } + deliverSigtrap = true + + err := ptrace(unix.PTRACE_SETOPTIONS, tracee.PID, 0, defaultPtraceOption|unix.PTRACE_O_TRACESECCOMP) + if err == nil { + if !tracee.RootConfig.NoSeccmp { + seccomp_enabled = true + } + } else { + seccomp_enabled = false + if err = ptrace(unix.PTRACE_SETOPTIONS, tracee.PID, 0, defaultPtraceOption); err != nil { + return err + } + } + goto fall_t + case syscall.Errno(unix.SIGTRAP) | PTRACE_EVENT_SECCOMP2<<8, + syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_SECCOMP<<8: + if !seccomp_detected && seccomp_enabled { + tracee.Seccomp = Enabled + seccomp_detected = true + } + + if signal == (syscall.Errno(unix.SIGTRAP)|PTRACE_EVENT_SECCOMP2<<8) || signal == (syscall.Errno(unix.SIGTRAP)|unix.PTRACE_EVENT_SECCOMP<<8) { + flags := 0 + signal = 0 + if tracee.Seccomp != Enabled { + break + } + + err := ptrace(unix.PTRACE_GETEVENTMSG, tracee.PID, 0, uintptr(unsafe.Pointer(&flags))) + if err != nil { + break + } else if flags&0x1 == 0 { + tracee.RestartHow = unix.PTRACE_CONT + tracee.TranslateSyscall() + if tracee.Seccomp == Disabling { + tracee.RestartHow = unix.PTRACE_SYSCALL + } + break + } + } + goto fall_t + case syscall.Errno(unix.SIGTRAP) | 0x80: + signal = 0 + if tracee.Exe == "" { + tracee.RestartHow = unix.PTRACE_CONT + return nil + } + + switch tracee.Seccomp { + case Enabled: + if tracee.Status == 0 { + tracee.RestartHow = unix.PTRACE_SYSCALL + tracee.SysexitPending = true + } else { + tracee.RestartHow = unix.PTRACE_CONT + tracee.SysexitPending = false + } + fallthrough + case Disable: + // tracee.TranslateSyscall() + if tracee.Seccomp == Disabling { + tracee.RestartHow = unix.PTRACE_SYSCALL + tracee.Seccomp = Disable + } + case Disabling: + tracee.Seccomp = Disable + if tracee.Status == 0 { + tracee.Status = 1 + } + } + case syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_VFORK<<8: + signal = 0 + tracee.NewChild(unix.PTRACE_EVENT_VFORK) + case syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_FORK<<8, + syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_CLONE<<8: + signal = 0 + tracee.NewChild(0) + case syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_VFORK_DONE<<8, + syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_EXEC<<8, + syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_EXIT<<8: + signal = 0 + case syscall.Errno(unix.SIGSTOP): + if tracee.Exe == "" { + tracee.Sigstop = SIGSTOP_PENDING + // signal = -1 + signal = 0 + } + if tracee.Sigstop == SIGSTOP_IGNORED { + tracee.Sigstop = SIGSTOP_ALLOWED + signal = 0 + } + } + } + + tracee.AsPtracee.Event4.Proot.Pending = true + + return signal +} + +func (tracee *Tracee) HandleTraceeEvent(traceeStatus unix.WaitStatus) error { + if isKernel4_8() { + return tracee.HandleTraceeEvent4_8(traceeStatus) + } + + if tracee.RestartHow == 0 { + if tracee.Seccomp == Enabled && !tracee.SysexitPending { + tracee.RestartHow = unix.PTRACE_CONT + } else { + tracee.RestartHow = unix.PTRACE_SYSCALL + } + } + + seccomp_dected := false + + var signal error + if traceeStatus.Exited() { + tracee.RootConfig.LastExitStatus = traceeStatus.ExitStatus() + tracee.TerminateTracee() + return nil + } else if traceeStatus.Signaled() { + // check_architecture(tracee); + tracee.TerminateTracee() + } else if traceeStatus.Stopped() { + deliverSigtrap := false + signal = syscall.Errno(traceeStatus&0xfff00) >> 8 + + godo: + switch signal { + case syscall.Errno(unix.SIGTRAP): + if deliverSigtrap { + break + } + deliverSigtrap = true + + status := ptrace(unix.PTRACE_SETOPTIONS, tracee.PID, 0, defaultPtraceOption|unix.PTRACE_O_TRACESECCOMP) + if status != nil { + status = ptrace(unix.PTRACE_SETOPTIONS, tracee.PID, 0, defaultPtraceOption) + if status != nil { + return status + } + } + goto godo + case syscall.Errno(unix.SIGTRAP) | 0x80: + signal = unix.Errno(0) + if tracee.Exe == "" { + tracee.RestartHow = unix.PTRACE_CONT + return nil + } + + switch tracee.Seccomp { + case Enabled: + if tracee.Status == 0 { + tracee.RestartHow = unix.PTRACE_SYSCALL + tracee.SysexitPending = true + } else { + tracee.RestartHow = unix.PTRACE_CONT + tracee.SysexitPending = false + } + fallthrough + case Disable: + tracee.TranslateSyscall() + if tracee.Seccomp == Disabling { + tracee.RestartHow = unix.PTRACE_SYSCALL + tracee.Seccomp = Disable + } + case Disabling: + tracee.Seccomp = Disable + if tracee.Status == 0 { + tracee.Status = 1 + } + } + case syscall.Errno(unix.SIGTRAP) | PTRACE_EVENT_SECCOMP2<<8, + syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_SECCOMP<<8: + flags := 0 + signal = unix.Errno(0) + + if !seccomp_dected { + tracee.Seccomp = Enabled + seccomp_dected = true + } + if tracee.Seccomp != Enabled { + break + } + err := ptrace(unix.PTRACE_GETEVENTMSG, tracee.PID, 0, uintptr(unsafe.Pointer(&flags))) + if err != nil { + break + } else if flags&0x1 != 0 { + tracee.RestartHow = unix.PTRACE_SYSCALL + break + } + tracee.RestartHow = unix.PTRACE_CONT + tracee.TranslateSyscall() + if tracee.Seccomp == Disabling { + tracee.RestartHow = unix.PTRACE_SYSCALL + } + case syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_VFORK<<8: + signal = unix.Errno(0) + tracee.NewChild(unix.PTRACE_EVENT_VFORK) + case syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_FORK<<8, syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_CLONE<<8: + signal = unix.Errno(0) + tracee.NewChild(0) + case syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_VFORK_DONE<<8, syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_EXEC<<8, syscall.Errno(unix.SIGTRAP) | unix.PTRACE_EVENT_EXIT<<8: + signal = unix.Errno(0) + case syscall.Errno(unix.SIGSTOP): + if tracee.Exe == "" { + tracee.Sigstop = SIGSTOP_PENDING + signal = unix.Errno(0) + } + if tracee.Sigstop == SIGSTOP_IGNORED { + tracee.Sigstop = SIGSTOP_ALLOWED + signal = unix.Errno(0) + } + } + } + + tracee.AsPtracee.Event4.Proot.Pending = false + return signal +} diff --git a/proot/proot/tracee/mem.go b/proot/proot/tracee/mem.go new file mode 100644 index 0000000..786d4db --- /dev/null +++ b/proot/proot/tracee/mem.go @@ -0,0 +1 @@ +package tracee \ No newline at end of file diff --git a/proot/proot/tracee/reg.go b/proot/proot/tracee/reg.go new file mode 100644 index 0000000..1cf6ec8 --- /dev/null +++ b/proot/proot/tracee/reg.go @@ -0,0 +1,79 @@ +package tracee + +import "golang.org/x/sys/unix" + +type RegVersion int + +const ( + CURRENT RegVersion = iota + ORIGINAL + MODIFIED + NB_REG_VERSION +) + +type Reg int + +const ( + SYSARG_NUM Reg = iota + SYSARG_1 + SYSARG_2 + SYSARG_3 + SYSARG_4 + SYSARG_5 + SYSARG_6 + SYSARG_RESULT + STACK_POINTER + INSTR_POINTER + RTLD_FINI + STATE_FLAGS + USERARG_1 +) + +var __user = &unix.PtraceRegs{} + +func (tracee *Tracee) PeekReg(Version RegVersion, Reg Reg) (result uint64) { + if Version < NB_REG_VERSION { + panic("invalid version") + } + + // result = REG(tracee, version, reg); + result = tracee.Reg(Version, Reg) + + // Use only the 32 least significant bits (LSB) when running 32-bit processes on a 64-bit kernel. + if tracee.Is32On64() { + result &= 0xFFFFFFFF + } + + return +} + +// Save the @tracee's current register bank into the @version register bank. +func (tracee *Tracee) SaveCurrentRegs(Ver RegVersion) { + if Ver == ORIGINAL { + tracee.RegsWereChanged = false + } + tracee.Regs[Ver] = tracee.Regs[CURRENT] +} + +func (tracee *Tracee) FetchRegs() error { + return unix.PtraceGetRegs(tracee.PID, &tracee.Regs[CURRENT]) +} + +func (tracee *Tracee) PushRegs() error { + if tracee.RegsWereChanged { + if tracee.RestoreOrigianlRegs { + tracee.Restore(SYSARG_NUM) + tracee.Restore(SYSARG_1) + tracee.Restore(SYSARG_2) + tracee.Restore(SYSARG_3) + tracee.Restore(SYSARG_4) + tracee.Restore(SYSARG_5) + tracee.Restore(SYSARG_6) + tracee.Restore(STACK_POINTER) + } + + return unix.PtraceSetRegs(tracee.PID, &tracee.Regs[CURRENT]) + } + + return nil +} diff --git a/proot/proot/tracee/reg_386.go b/proot/proot/tracee/reg_386.go new file mode 100644 index 0000000..d8059a3 --- /dev/null +++ b/proot/proot/tracee/reg_386.go @@ -0,0 +1,66 @@ +//go:build 386 && (linux || android) + +package tracee + +import ( + "unsafe" +) + +/** + * Compute the offset of the register @reg_name in the USER area. + */ +// #define USER_REGS_OFFSET(reg_name) \ +// (offsetof(struct user, regs) \ +// + offsetof(struct user_regs_struct, reg_name)) + +// #define REG(tracee, version, index) \ +// (*(word_t*) (((uint8_t *) &tracee->_regs[version]) + reg_offset[index])) + +// static off_t reg_offset[] = { +// [SYSARG_NUM] = USER_REGS_OFFSET(orig_eax), +// [SYSARG_1] = USER_REGS_OFFSET(ebx), +// [SYSARG_2] = USER_REGS_OFFSET(ecx), +// [SYSARG_3] = USER_REGS_OFFSET(edx), +// [SYSARG_4] = USER_REGS_OFFSET(esi), +// [SYSARG_5] = USER_REGS_OFFSET(edi), +// [SYSARG_6] = USER_REGS_OFFSET(ebp), +// [SYSARG_RESULT] = USER_REGS_OFFSET(eax), +// [STACK_POINTER] = USER_REGS_OFFSET(esp), +// [INSTR_POINTER] = USER_REGS_OFFSET(eip), +// [RTLD_FINI] = USER_REGS_OFFSET(edx), +// [STATE_FLAGS] = USER_REGS_OFFSET(eflags), +// [USERARG_1] = USER_REGS_OFFSET(eax), +// }; + +var RegOffset = []uintptr{ + SYSARG_NUM: unsafe.Offsetof(__user.Orig_eax), + SYSARG_1: unsafe.Offsetof(__user.Ebx), + SYSARG_2: unsafe.Offsetof(__user.Ecx), + SYSARG_3: unsafe.Offsetof(__user.Edx), + SYSARG_4: unsafe.Offsetof(__user.Esi), + SYSARG_5: unsafe.Offsetof(__user.Edi), + SYSARG_6: unsafe.Offsetof(__user.Ebp), + SYSARG_RESULT: unsafe.Offsetof(__user.Eax), + STACK_POINTER: unsafe.Offsetof(__user.Esp), + INSTR_POINTER: unsafe.Offsetof(__user.Eip), + RTLD_FINI: unsafe.Offsetof(__user.Edx), + STATE_FLAGS: unsafe.Offsetof(__user.Eflags), + USERARG_1: unsafe.Offsetof(__user.Eax), +} + +func (tracee *Tracee) Reg(Version RegVersion, Reg Reg) uint64 { + return uint64(uintptr(unsafe.Pointer(&tracee.Regs[Version])) + uintptr(RegOffset[Reg])) +} + +func (tracee *Tracee) PokeReg(Version RegVersion, Reg Reg, Value uint64) { + if tracee.PeekReg(CURRENT, Reg) == Value { + return + } + + // REG(tracee, CURRENT, reg) = value; + *(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[Version]), int(RegOffset[Reg]))) = Value + tracee.RegsWereChanged = true +} +func (tracee *Tracee) Restore(Reg Reg) { + (*(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[CURRENT]), int(RegOffset[Reg])))) = tracee.Reg(ORIGINAL, Reg) +} diff --git a/proot/proot/tracee/reg_amd64.go b/proot/proot/tracee/reg_amd64.go new file mode 100644 index 0000000..9d11e54 --- /dev/null +++ b/proot/proot/tracee/reg_amd64.go @@ -0,0 +1,119 @@ +//go:build amd64 && (linux || android) + +package tracee + +import ( + "unsafe" +) + +// /** +// * Compute the offset of the register @reg_name in the USER area. +// */ +// #define USER_REGS_OFFSET(reg_name) \ +// (offsetof(struct user, regs) \ +// + offsetof(struct user_regs_struct, reg_name)) + +// static off_t reg_offset[] = { +// [SYSARG_NUM] = USER_REGS_OFFSET(orig_rax), +// [SYSARG_1] = USER_REGS_OFFSET(rdi), +// [SYSARG_2] = USER_REGS_OFFSET(rsi), +// [SYSARG_3] = USER_REGS_OFFSET(rdx), +// [SYSARG_4] = USER_REGS_OFFSET(r10), +// [SYSARG_5] = USER_REGS_OFFSET(r8), +// [SYSARG_6] = USER_REGS_OFFSET(r9), +// [SYSARG_RESULT] = USER_REGS_OFFSET(rax), +// [STACK_POINTER] = USER_REGS_OFFSET(rsp), +// [INSTR_POINTER] = USER_REGS_OFFSET(rip), +// [RTLD_FINI] = USER_REGS_OFFSET(rdx), +// [STATE_FLAGS] = USER_REGS_OFFSET(eflags), +// [USERARG_1] = USER_REGS_OFFSET(rdi), +// }; + +// static off_t reg_offset_x86[] = { +// [SYSARG_NUM] = USER_REGS_OFFSET(orig_rax), +// [SYSARG_1] = USER_REGS_OFFSET(rbx), +// [SYSARG_2] = USER_REGS_OFFSET(rcx), +// [SYSARG_3] = USER_REGS_OFFSET(rdx), +// [SYSARG_4] = USER_REGS_OFFSET(rsi), +// [SYSARG_5] = USER_REGS_OFFSET(rdi), +// [SYSARG_6] = USER_REGS_OFFSET(rbp), +// [SYSARG_RESULT] = USER_REGS_OFFSET(rax), +// [STACK_POINTER] = USER_REGS_OFFSET(rsp), +// [INSTR_POINTER] = USER_REGS_OFFSET(rip), +// [RTLD_FINI] = USER_REGS_OFFSET(rdx), +// [STATE_FLAGS] = USER_REGS_OFFSET(eflags), +// [USERARG_1] = USER_REGS_OFFSET(rax), +// }; + +// #define REG(tracee, version, index) \ +// (*(word_t*) (tracee->_regs[version].cs == 0x23 \ +// ? (((uint8_t *) &tracee->_regs[version]) + reg_offset_x86[index]) \ +// : (((uint8_t *) &tracee->_regs[version]) + reg_offset[index]))) + +var ( + RegOffset = []uintptr{ + SYSARG_NUM: unsafe.Offsetof(__user.Orig_rax), + SYSARG_1: unsafe.Offsetof(__user.Rdi), + SYSARG_2: unsafe.Offsetof(__user.Rsi), + SYSARG_3: unsafe.Offsetof(__user.Rdx), + SYSARG_4: unsafe.Offsetof(__user.R10), + SYSARG_5: unsafe.Offsetof(__user.R8), + SYSARG_6: unsafe.Offsetof(__user.R9), + SYSARG_RESULT: unsafe.Offsetof(__user.Rax), + STACK_POINTER: unsafe.Offsetof(__user.Rsp), + INSTR_POINTER: unsafe.Offsetof(__user.Rip), + RTLD_FINI: unsafe.Offsetof(__user.Rdx), + STATE_FLAGS: unsafe.Offsetof(__user.Eflags), + USERARG_1: unsafe.Offsetof(__user.Rdi), + } + RegOffsetX86 = []uintptr{ + SYSARG_NUM: unsafe.Offsetof(__user.Orig_rax), + SYSARG_1: unsafe.Offsetof(__user.Rbx), + SYSARG_2: unsafe.Offsetof(__user.Rcx), + SYSARG_3: unsafe.Offsetof(__user.Rdx), + SYSARG_4: unsafe.Offsetof(__user.Rsi), + SYSARG_5: unsafe.Offsetof(__user.Rdi), + SYSARG_6: unsafe.Offsetof(__user.Rbp), + SYSARG_RESULT: unsafe.Offsetof(__user.Rax), + STACK_POINTER: unsafe.Offsetof(__user.Rsp), + INSTR_POINTER: unsafe.Offsetof(__user.Rip), + RTLD_FINI: unsafe.Offsetof(__user.Rdx), + STATE_FLAGS: unsafe.Offsetof(__user.Eflags), + USERARG_1: unsafe.Offsetof(__user.Rax), + } +) + +func (tracee *Tracee) Reg(Version RegVersion, Reg Reg) uint64 { + if Version < NB_REG_VERSION { + panic("invalid version") + } + + if tracee.Regs[Version].Cs == 0x23 { + return uint64(uintptr(unsafe.Pointer(&tracee.Regs[Version])) + uintptr(RegOffsetX86[Reg])) + } + return uint64(uintptr(unsafe.Pointer(&tracee.Regs[Version])) + uintptr(RegOffset[Reg])) +} + +func (tracee *Tracee) PokeReg(Version RegVersion, Reg Reg, Value uint64) { + if tracee.PeekReg(CURRENT, Reg) == Value { + return + } + + // REG(tracee, CURRENT, reg) = value; + var unpoint unsafe.Pointer + if tracee.Regs[Version].Cs == 0x23 { + unpoint = unsafe.Add(unsafe.Pointer(&tracee.Regs[Version]), int(RegOffsetX86[Reg])) + } else { + unpoint = unsafe.Add(unsafe.Pointer(&tracee.Regs[Version]), int(RegOffset[Reg])) + } + + *(*uint64)(unpoint) = Value + tracee.RegsWereChanged = true +} + +func (tracee *Tracee) Restore(Reg Reg) { + if tracee.Regs[CURRENT].Cs == 0x23 { + (*(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[CURRENT]), int(RegOffsetX86[Reg])))) = tracee.Reg(ORIGINAL, Reg) + } + (*(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[CURRENT]), int(RegOffset[Reg])))) = tracee.Reg(ORIGINAL, Reg) +} diff --git a/proot/proot/tracee/reg_arm.go b/proot/proot/tracee/reg_arm.go new file mode 100644 index 0000000..d410f10 --- /dev/null +++ b/proot/proot/tracee/reg_arm.go @@ -0,0 +1,66 @@ +//go:build arm && (linux || android) + +package tracee + +import ( + "unsafe" +) + +// /** +// * Compute the offset of the register @reg_name in the USER area. +// */ +// #define USER_REGS_OFFSET(reg_name) \ +// (offsetof(struct user, regs) \ +// + offsetof(struct user_regs_struct, reg_name)) + +// #define REG(tracee, version, index) \ +// (*(word_t*) (((uint8_t *) &tracee->_regs[version]) + reg_offset[index])) + +// off_t reg_offset[] = { +// [SYSARG_NUM] = USER_REGS_OFFSET(uregs[7]), +// [SYSARG_1] = USER_REGS_OFFSET(uregs[0]), +// [SYSARG_2] = USER_REGS_OFFSET(uregs[1]), +// [SYSARG_3] = USER_REGS_OFFSET(uregs[2]), +// [SYSARG_4] = USER_REGS_OFFSET(uregs[3]), +// [SYSARG_5] = USER_REGS_OFFSET(uregs[4]), +// [SYSARG_6] = USER_REGS_OFFSET(uregs[5]), +// [SYSARG_RESULT] = USER_REGS_OFFSET(uregs[0]), +// [STACK_POINTER] = USER_REGS_OFFSET(uregs[13]), +// [INSTR_POINTER] = USER_REGS_OFFSET(uregs[15]), +// [USERARG_1] = USER_REGS_OFFSET(uregs[0]), +// }; + +var RegOffset = []uintptr{ + SYSARG_NUM: unsafe.Offsetof(__user.Uregs) + uintptr(7*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_1: unsafe.Offsetof(__user.Uregs) + uintptr(0*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_2: unsafe.Offsetof(__user.Uregs) + uintptr(1*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_3: unsafe.Offsetof(__user.Uregs) + uintptr(2*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_4: unsafe.Offsetof(__user.Uregs) + uintptr(3*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_5: unsafe.Offsetof(__user.Uregs) + uintptr(4*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_6: unsafe.Offsetof(__user.Uregs) + uintptr(5*unsafe.Sizeof(__user.Uregs[0])), + SYSARG_RESULT: unsafe.Offsetof(__user.Uregs) + uintptr(0*unsafe.Sizeof(__user.Uregs[0])), + STACK_POINTER: unsafe.Offsetof(__user.Uregs) + uintptr(13*unsafe.Sizeof(__user.Uregs[0])), + INSTR_POINTER: unsafe.Offsetof(__user.Uregs) + uintptr(15*unsafe.Sizeof(__user.Uregs[0])), + USERARG_1: unsafe.Offsetof(__user.Uregs) + uintptr(0*unsafe.Sizeof(__user.Uregs[0])), +} + +func (tracee *Tracee) Reg(Version RegVersion, Reg Reg) uint64 { + return uint64(uintptr(unsafe.Pointer(&tracee.Regs[Version])) + uintptr(RegOffset[Reg])) +} + +func (tracee *Tracee) PokeReg(Version RegVersion, Reg Reg, Value uint64) { + if tracee.PeekReg(CURRENT, Reg) == Value { + return + } + + // REG(tracee, CURRENT, reg) = value; + *(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[Version]), int(RegOffset[Reg]))) = Value + tracee.RegsWereChanged = true +} + +// #define RESTORE(sysarg) (void) (reg_offset[SYSARG_RESULT] != reg_offset[sysarg] && (REG(tracee, CURRENT, sysarg) = REG(tracee, ORIGINAL, sysarg))) +func (tracee *Tracee) Restore(Reg Reg) { + if RegOffset[SYSARG_RESULT] != RegOffset[Reg] { + (*(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[CURRENT]), int(RegOffset[Reg])))) = tracee.Reg(ORIGINAL, Reg) + } +} diff --git a/proot/proot/tracee/reg_arm64.go b/proot/proot/tracee/reg_arm64.go new file mode 100644 index 0000000..5874545 --- /dev/null +++ b/proot/proot/tracee/reg_arm64.go @@ -0,0 +1,67 @@ +//go:build arm64 && (linux || android) + +package tracee + +import ( + "unsafe" +) + +/** + * Compute the offset of the register @reg_name in the USER area. + */ +// #define USER_REGS_OFFSET(reg_name) \ +// (offsetof(struct user, regs) \ +// + offsetof(struct user_regs_struct, reg_name)) + +// #define REG(tracee, version, index) \ +// (*(word_t*) (((uint8_t *) &tracee->_regs[version]) + reg_offset[index])) + +// #undef USER_REGS_OFFSET +// #define USER_REGS_OFFSET(reg_name) offsetof(struct user_regs_struct, reg_name) + +// static off_t reg_offset[] = { +// [SYSARG_NUM] = USER_REGS_OFFSET(regs[8]), +// [SYSARG_1] = USER_REGS_OFFSET(regs[0]), +// [SYSARG_2] = USER_REGS_OFFSET(regs[1]), +// [SYSARG_3] = USER_REGS_OFFSET(regs[2]), +// [SYSARG_4] = USER_REGS_OFFSET(regs[3]), +// [SYSARG_5] = USER_REGS_OFFSET(regs[4]), +// [SYSARG_6] = USER_REGS_OFFSET(regs[5]), +// [SYSARG_RESULT] = USER_REGS_OFFSET(regs[0]), +// [STACK_POINTER] = USER_REGS_OFFSET(sp), +// [INSTR_POINTER] = USER_REGS_OFFSET(pc), +// [USERARG_1] = USER_REGS_OFFSET(regs[0]), +// }; + +var RegOffset = []uintptr{ + // + uintptr(7*unsafe.Sizeof(__user.Uregs[0])) + SYSARG_NUM: unsafe.Offsetof(__user.Regs) + uintptr(8*unsafe.Sizeof(__user.Regs[0])), + SYSARG_1: unsafe.Offsetof(__user.Regs) + uintptr(0*unsafe.Sizeof(__user.Regs[0])), + SYSARG_2: unsafe.Offsetof(__user.Regs) + uintptr(1*unsafe.Sizeof(__user.Regs[0])), + SYSARG_3: unsafe.Offsetof(__user.Regs) + uintptr(2*unsafe.Sizeof(__user.Regs[0])), + SYSARG_4: unsafe.Offsetof(__user.Regs) + uintptr(3*unsafe.Sizeof(__user.Regs[0])), + SYSARG_5: unsafe.Offsetof(__user.Regs) + uintptr(4*unsafe.Sizeof(__user.Regs[0])), + SYSARG_6: unsafe.Offsetof(__user.Regs) + uintptr(5*unsafe.Sizeof(__user.Regs[0])), + SYSARG_RESULT: unsafe.Offsetof(__user.Regs) + uintptr(0*unsafe.Sizeof(__user.Regs[0])), + STACK_POINTER: unsafe.Offsetof(__user.Sp), + INSTR_POINTER: unsafe.Offsetof(__user.Pc), + USERARG_1: unsafe.Offsetof(__user.Regs) + uintptr(0*unsafe.Sizeof(__user.Regs[0])), +} + +func (tracee *Tracee) Reg(Version RegVersion, Reg Reg) uint64 { + return uint64(uintptr(unsafe.Pointer(&tracee.Regs[Version])) + uintptr(RegOffset[Reg])) +} +func (tracee *Tracee) PokeReg(Version RegVersion, Reg Reg, Value uint64) { + if tracee.PeekReg(CURRENT, Reg) == Value { + return + } + + // REG(tracee, CURRENT, reg) = value; + *(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[Version]), int(RegOffset[Reg]))) = Value + tracee.RegsWereChanged = true +} +func (tracee *Tracee) Restore(Reg Reg) { + if RegOffset[SYSARG_RESULT] != RegOffset[Reg] { + (*(*uint64)(unsafe.Add(unsafe.Pointer(&tracee.Regs[CURRENT]), int(RegOffset[Reg])))) = tracee.Reg(ORIGINAL, Reg) + } +} diff --git a/proot/proot/tracee/tracee.go b/proot/proot/tracee/tracee.go new file mode 100644 index 0000000..f1ffa03 --- /dev/null +++ b/proot/proot/tracee/tracee.go @@ -0,0 +1,379 @@ +//go:build linux || android + +package tracee + +import ( + "syscall" + + "golang.org/x/sys/unix" + "sirherobrine23.com.br/go-bds/exec/proot/proot/execve" + "sirherobrine23.com.br/go-bds/exec/proot/proot/path" +) + +const HostRootfs string = "/host-rootfs" + +// struct { struct tracee *le_next; struct tracee **le_prev; } +type TraceeLink struct { + Next *Tracee + Prev **Tracee +} + +type WaitsIn int + +const ( + DOESNT_WAIT = iota + WAITS_IN_KERNEL + WAITS_IN_PROOT +) + +type AsPtracer struct { + NbPtracees int + Zombies struct{ lh_first *Tracee } + WaitPID int + WaitOption uint + + WaitsIn WaitsIn +} + +type AsPtracerEvent struct { + Value int + Pending bool +} + +type AsPtracee struct { + Ptracer *Tracee + Event4 struct { + Proot AsPtracerEvent + Ptracer AsPtracerEvent + } + + TracingStarted bool + IgnoreLoeaderSyscall bool + IgnoreSyscall bool + Option uint + IsZombie bool +} + +type Sigstop int + +const ( + SIGSTOP_IGNORED Sigstop = iota + SIGSTOP_ALLOWED + SIGSTOP_PENDING +) + +type Seccomp int + +const ( + Disable Seccomp = iota + Disabling + Enabled +) + +// Information related to a file-system name-space. +type FileSystemNameSpace struct { + Cwd string // Current working directory, à la /proc/self/pwd. + + Bindings struct { + // List of bindings as specified by the user but not canonicalized yet. + Pending *path.Bindings + + // List of bindings canonicalized and sorted in the "guest" order. + Guest *path.Binding + + // List of bindings canonicalized and sorted in the "host" order. + Host *path.Bindings + } +} + +// Virtual heap, emulated with a regular memory mapping. +type Heap struct { + Base uint64 + Size uint64 + Disabled bool +} + +type Tracees struct { + First *Tracee +} + +type TraceeRoot struct { + Tracees Tracees + NextVPID uint + NoSeccmp bool + LastExitStatus int +} + +// Information related to a tracee process. +type Tracee struct { + RootConfig *TraceeRoot + + Link TraceeLink // Link for the list of all tracees. + PID int // Process identifier. + Vpid uint // Unique tracee identifier. + Running bool // Is it currently running or not? + Terminated bool // Is this tracee ready to be freed? TODO: move to a list dedicated to terminated tracees instead. + KillallOnExit bool // Whether termination of this tracee implies an immediate kill of all tracees. + Parent *Tracee // Parent of this tracee, NULL if none. + Clone bool // Is it a "clone", i.e has the same parent as its creator. + AsPtracer AsPtracer // Support for ptrace emulation (tracer side). + AsPtracee AsPtracee // Support for ptrace emulation (tracee side). + + /* Current status: + * 0: enter syscall + * 1: exit syscall no error + * -errno: exit syscall with error. + */ + Status int + + /* + #define IS_IN_SYSENTER(tracee) ((tracee)->status == 0) + #define IS_IN_SYSEXIT(tracee) (!IS_IN_SYSENTER(tracee)) + #define IS_IN_SYSEXIT2(tracee, sysnum) (IS_IN_SYSEXIT(tracee) && get_sysnum((tracee), ORIGINAL) == sysnum) + */ + + // PTRACE_REQUEST_TYPE + + RestartHow int // How this tracee is restarted. + + /* Value of the tracee's general purpose registers. */ + // struct user_regs_struct _regs[NB_REG_VERSION]; + // bool _regs_were_changed; + // bool restore_original_regs; + RestoreOrigianlRegs bool + RegsWereChanged bool + Regs [NB_REG_VERSION]unix.PtraceRegs + + Sigstop Sigstop // State for the special handling of SIGSTOP. + + GlueType uint // Specify the type of the final component during the initialization of a binding. This variable is first defined in bind_path() then used in build_glue(). + + // During a sub-reconfiguration, the new setup is relatively to @tracee's file-system name-space. Also, @paths holds its $PATH environment variable in order to emulate the execvp(3) behavior. + Reconf struct { + Tracee *Tracee + paths string + } + + // Unrequested syscalls inserted by PRoot after an actual syscall. + Chain struct { + // struct chained_syscalls *syscalls; + Syscalls *any // Assuming chained_syscalls is a slice of some `any` + ForceFinalResult bool + FinalResult uint64 + } + + // Load info generated during execve sysenter and used during execve sysexit. + LoadInfo *execve.LoadInfo + + MixedMode bool // Disable mixed-execution (native host) check + Verbose int // Verbosity level. + Seccomp Seccomp // State of the seccomp acceleration for this tracee. + SysexitPending bool // Ensure the sysexit stage is always hit under seccomp. + + FS *FileSystemNameSpace // Information related to a file-system name-space. + Heap *Heap // Virtual heap, emulated with a regular memory mapping. + + // Path to the executable, à la /proc/self/exe. + Exe, NewExe string + + Qemu *string // Runner command-line. + Glue string // Path to glue between the guest rootfs and the host rootfs. + + // List of extensions enabled for this tracee. + // struct extensions *extensions; + + HostLDSOPaths, GuestLDSOPaths string // For the mixed-mode, the guest LD_LIBRARY_PATH is saved during the "guest -> host" transition, in order to be restored during the "host -> guest" transition (only if the host LD_LIBRARY_PATH hasn't changed). + ToolName string // For diagnostic purpose. +} + +/* +extern Tracee *get_tracee(const Tracee *tracee, pid_t pid, bool create); +extern Tracee *get_ptracee(const Tracee *ptracer, pid_t pid, bool only_stopped, bool only_with_pevent, word_t wait_options); +extern Tracee *get_stopped_ptracee(const Tracee *ptracer, pid_t pid, bool only_with_pevent, word_t wait_options); +extern bool has_ptracees(const Tracee *ptracer, pid_t pid, word_t wait_options); +extern int new_child(Tracee *parent, word_t clone_flags); +extern Tracee *new_dummy_tracee(TALLOC_CTX *context); +extern void terminate_tracee(Tracee *tracee); +extern void free_terminated_tracees(Tracee *tracee); +extern int swap_config(Tracee *tracee1, Tracee *tracee2); +extern void kill_all_tracees(Tracee *tracee); +*/ + +// #define PTRACEE (ptracee->as_ptracee) +// #define PTRACER (ptracer->as_ptracer) + +func (tracee *Tracee) Ptracer() *AsPtracer { return &tracee.AsPtracer } +func (tracee *Tracee) Ptracee() *AsPtracee { return &tracee.AsPtracee } + +func NewDummyTracee() (tracee *Tracee) { + tracee = new(Tracee) + tracee.Link = TraceeLink{} + + tracee.FS = new(FileSystemNameSpace) + tracee.Heap = new(Heap) + + return +} + +func newTracee(pid int, currentTracee *Tracee) *Tracee { + if currentTracee == nil { + currentTracee = NewDummyTracee() + currentTracee.RootConfig = &TraceeRoot{Tracees: Tracees{}, NextVPID: 1} + } + + tracee := NewDummyTracee() + tracee.PID = pid + currentTracee.RootConfig.NextVPID++ + tracee.Vpid = currentTracee.RootConfig.NextVPID + + /* + do { + if (((tracee)->link.le_next = (current_tracee->RootConfig->tracees)->lh_first) != ((void *)0)) { + (current_tracee->RootConfig->tracees)->lh_first->link.le_prev = &(tracee)->link.le_next; + } + + (current_tracee->RootConfig->tracees)->lh_first = (tracee); + (tracee)->link.le_prev = &(current_tracee->RootConfig->tracees)->lh_first; + } while(true) + */ + if tracee.Link.Next = currentTracee.RootConfig.Tracees.First; tracee.Link.Next != nil { + tracee.Link.Next.Link.Prev = &tracee.Link.Next + } + currentTracee.RootConfig.Tracees.First = tracee + tracee.Link.Prev = ¤tTracee.RootConfig.Tracees.First + + return tracee +} + +// Return the entry related to the tracee @pid. If no entry were found, a new one is created if @create is true, otherwise NULL is returned. +func (tracee *Tracee) GetTracee(Pid int, create bool) *Tracee { + if tracee != nil && tracee.PID == Pid { + return tracee + } + + for tracee := tracee.RootConfig.Tracees.First; tracee != nil; tracee = tracee.Link.Next { + if tracee.PID == Pid { + return tracee + } + } + + if create { + return newTracee(Pid, tracee) + } + return nil +} + +// #define EXPECTED_WAIT_CLONE(wait_options, tracee) \ +// ((((wait_options) & __WALL) != 0) || ((((wait_options) & __WCLONE) != 0) && (tracee)->clone) || ((((wait_options) & __WCLONE) == 0) && !(tracee)->clone)) +func ExpectedWaitClone(wait_options uint, tracee *Tracee) bool { + return (((wait_options & syscall.WALL) != 0) || + (((wait_options & syscall.WCLONE) != 0) && tracee.Clone) || + (((wait_options & syscall.WCLONE) == 0) && !tracee.Clone)) +} + +// Return the first [stopped?] tracee with the given @pid (-1 for any) which has the given @ptracer, +// and which has a pending event for its ptracer if @only_with_pevent is true. +// See wait(2) manual for the meaning of @wait_options. This function returns NULL if there's no such ptracee. +func (tracee *Tracee) GetPtracee(Pid int, onlyStopped, onlyWithPevent bool, wait_options uint) *Tracee { + if tracee == nil { + return nil + } + + // Zombies first + // for + for ptracee := tracee.AsPtracer.Zombies.lh_first; ptracee != nil; ptracee = ptracee.Link.Next { + if ptracee.PID != Pid { + continue + } + + if !ExpectedWaitClone(wait_options, ptracee) { + continue + } + + return ptracee + } + + for ptracee := tracee.RootConfig.Tracees.First; ptracee != nil; ptracee = ptracee.Link.Next { + // if ((ptracee->as_ptracee).ptracer != ptracer) + // continue; + // if (pid != ptracee->pid && pid != -1) + // continue; + // if (!EXPECTED_WAIT_CLONE(wait_options, ptracee)) + // continue; + // if (!only_stopped) + // return ptracee; + // if (ptracee->running) + // continue; + // if ((ptracee->as_ptracee).event4.ptracer.pending || !only_with_pevent) + // return ptracee; + // if (pid == ptracee->pid) + // return NULL; + + if ptracee.PID != Pid { + continue + } + if !ExpectedWaitClone(wait_options, ptracee) { + continue + } + if !onlyStopped { + return ptracee + } + if ptracee.Running { + continue + } + if ptracee.AsPtracee.Event4.Ptracer.Pending || !onlyWithPevent { + return ptracee + } + if ptracee.PID == Pid { + return nil + } + } + + return nil +} + +// Wrapper for get_ptracee(), this ensures only a stopped tracee is returned (or NULL). +func (tracee *Tracee) GetStoppedPtracee(Pid int, onlyWithPevent bool, wait_options uint) *Tracee { + return tracee.GetPtracee(Pid, true, onlyWithPevent, wait_options) +} + +// Wrapper for get_ptracee(), this ensures no running tracee is returned. +func (tracee *Tracee) HasPtracees(Pid int, wait_options uint) bool { + return tracee.GetPtracee(Pid, false, false, wait_options) != nil +} + +// Mark tracee as terminated and optionally take action. +func (tracee *Tracee) TerminateTracee() { + tracee.Terminated = true + if tracee.KillallOnExit { + tracee.KillAllTracees() + } +} + +// Free all tracees marked as terminated. +func (tracee *Tracee) FreeTerminatedTracees() { + next := tracee.RootConfig.Tracees.First + for next != nil { + tracee := next + next = tracee.Link.Next + if tracee.Terminated { + tracee.TerminateTracee() + } + + } +} + +// Send the KILL signal to all tracees. +func (tracee *Tracee) KillAllTracees() { + for tracee := tracee.RootConfig.Tracees.First; tracee != nil; tracee = tracee.Link.Next { + syscall.Kill(tracee.PID, syscall.SIGKILL) + } +} + +// Make new @parent's child inherit from it. Depending on @clone_flags, +// some information are copied or shared. +// This function returns -errno if an error occured, otherwise 0. +func (tracee *Tracee) NewChild(cloneFlags uint) error { + + return nil +} \ No newline at end of file -- 2.51.0