WIP: Golang Proot code #1

Draft
Sirherobrine23 wants to merge 5 commits from proot_binding into main
6 changed files with 313 additions and 196 deletions
Showing only changes of commit 68b4eeb842 - Show all commits

2
go.mod
View File

@@ -5,6 +5,7 @@ go 1.24.4
require (
github.com/docker/docker v28.3.3+incompatible
github.com/docker/go-connections v0.5.0
golang.org/x/sys v0.34.0
)
require (
@@ -37,7 +38,6 @@ require (
go.opentelemetry.io/otel/trace v1.37.0 // indirect
go.opentelemetry.io/proto/otlp v1.7.0 // indirect
golang.org/x/net v0.41.0 // indirect
golang.org/x/sys v0.34.0 // indirect
golang.org/x/time v0.8.0 // indirect
google.golang.org/grpc v1.73.0 // indirect
google.golang.org/protobuf v1.36.6 // indirect

View File

@@ -474,7 +474,7 @@ static int handle_tracee_event_kernel_4_8(Tracee *tracee, int tracee_status)
default_ptrace_options);
if (status < 0) {
note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)");
exit(EXIT_FAILURE);
exit(EXIT_FAILUtRE);
}
}
else {

32
proot/cmd/main.go Normal file
View File

@@ -0,0 +1,32 @@
package main
import (
"fmt"
"os"
"sirherobrine23.com.br/go-bds/exec/exec"
"sirherobrine23.com.br/go-bds/exec/proot"
)
func main() {
cmd, _ := proot.NewProc()
err := cmd.Start(&exec.Exec{
Arguments: os.Args[min(1, len(os.Args)):],
Stdout: os.Stdout,
Stderr: os.Stderr,
Stdin: os.Stdin,
})
if err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(-1)
return
}
if err = cmd.Wait(); err != nil {
fmt.Fprintf(os.Stderr, err.Error())
os.Exit(-1)
return
}
}

View File

@@ -2,6 +2,7 @@
package proot
import (
"context"
"io"
"os/exec"
@@ -14,12 +15,17 @@ type Proot struct {
Cmd *exec.Cmd // Golang process
EventLocked bool
NoSeccomp bool
Err error
last_exit_status int
Tracees []*Tracee
vpid int
doneEnd *exec.ExitError
done context.Context
donefFn context.CancelFunc
Stdin io.Reader
Stdout, Stderr io.Writer
}
@@ -32,7 +38,8 @@ func (proot *Proot) Wait() error {
}
return nil
case proot.Cmd != nil && proot.Cmd.Process != nil:
return proot.Cmd.Wait()
<-proot.done.Done()
return proot.doneEnd
default:
return goexec.ErrNoProcess
}

View File

@@ -3,18 +3,37 @@
package proot
import (
"context"
"fmt"
"log"
"os"
"os/exec"
"reflect"
"runtime"
"slices"
"syscall"
"time"
"unsafe"
"golang.org/x/sys/unix"
goexec "sirherobrine23.com.br/go-bds/exec/exec"
)
const (
DISABLED = iota
DISABLING
ENABLED
FILTER_SYSEXIT = 0x1
)
const (
SIGSTOP_IGNORED = iota /* Ignore SIGSTOP (once the parent is known). */
SIGSTOP_ALLOWED /* Allow SIGSTOP (once the parent is known). */
SIGSTOP_PENDING /* Block SIGSTOP until the parent is unknown. */
)
var _ = goexec.Register("proot", NewProc)
func NewProc() (*Proot, error) {
@@ -28,6 +47,9 @@ func (proot *Proot) Kill() error { return proot.Cmd.Process.Kill()
func (proot *Proot) Signal(s os.Signal) error { return proot.Cmd.Process.Signal(s) }
func (proot *Proot) Close() error {
if proot.Cmd.Process != nil {
return proot.Cmd.Process.Signal(os.Interrupt)
}
return nil
}
@@ -43,13 +65,56 @@ func (proot *Proot) Start(options *goexec.Exec) error {
proot.Cmd.SysProcAttr.Ptrace = true
proot.Cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER
// attach stdin
switch {
case options.Stdin != nil:
proot.Cmd.Stdin = options.Stdin
case proot.Stdin != nil:
proot.Cmd.Stdin = proot.Stdin
}
// attach stdout
switch {
case options.Stdout != nil:
proot.Cmd.Stdout = options.Stdout
case proot.Stdout != nil:
proot.Cmd.Stdout = proot.Stdout
}
// attach stderr
switch {
case options.Stderr != nil:
proot.Cmd.Stderr = options.Stderr
case proot.Stderr != nil:
proot.Cmd.Stderr = proot.Stderr
}
runtime.LockOSThread() // Lock thread to use PTRACE
proot.EventLocked = true
// go proot.Event() // Start background loop event
proot.Cmd.Start()
proot.done, proot.donefFn = context.WithCancel(context.Background())
go proot.Event() // Start background loop event
proot.Event() // Start background loop event
return nil
return proot.Cmd.Start()
}
func mountProcessState(pid int, status unix.WaitStatus, rusage *unix.Rusage) *os.ProcessState {
newState := &os.ProcessState{}
ptr := reflect.ValueOf(newState).Elem()
ptrType := ptr.Type()
for index := range ptr.Type().NumField() {
ptr, ptrType := ptr.Field(index), ptrType.Field(index)
switch ptrType.Name {
case "pid":
ptr.SetInt(int64(pid))
case "status":
ptr.Set(reflect.ValueOf(syscall.WaitStatus(status)))
case "rusage":
ptr.Set(reflect.ValueOf(any(rusage).(*syscall.Rusage)))
}
}
return newState
}
func (proot *Proot) Event() {
@@ -62,9 +127,14 @@ func (proot *Proot) Event() {
}
wpid, err := unix.Wait4(proot.Cmd.Process.Pid, &wstatus, unix.WALL, &rusage)
fmt.Println(err)
switch err {
case nil:
case syscall.Errno(3), syscall.Errno(10):
case syscall.Errno(10):
proot.doneEnd = &exec.ExitError{ProcessState: mountProcessState(proot.Cmd.Process.Pid, wstatus, &rusage)}
proot.donefFn()
return
case syscall.Errno(3):
unix.Kill(wpid, unix.PTRACE_CONT)
continue
default:
@@ -79,17 +149,25 @@ func (proot *Proot) Event() {
wstatus.Continued(),
)
log.Printf("Getting tracee to wpid %d\n", wpid)
tracee := proot.GetTracee(nil, wpid, true)
tracee.Running = false
if tracee.AsPtracee != nil {
log.Printf("handle_ptracee_event to %d\n", wpid)
keep_stopped := proot.handle_ptracee_event(tracee, wstatus)
if keep_stopped {
continue
}
}
log.Printf("handle_tracee_event to %d\n", wpid)
signal := proot.handle_tracee_event(tracee, wstatus)
log.Printf("restarting tracee to %d with %s\n", wpid, unix.Signal(signal))
if signal == -255 && proot.Err != nil {
fmt.Println(proot.Err)
signal = 0
}
proot.restart_tracee(tracee, unix.Signal(signal))
}
}
@@ -204,16 +282,14 @@ func (proot *Proot) handle_ptracee_event(ptracee *Tracee, event unix.WaitStatus)
unix.Kill(ptracee.Pid, unix.SIGCHLD)
//#define EXPECTED_WAIT_CLONE(wait_options,tracee) ((((wait_options) & __WALL) != 0) || ((((wait_options) & __WCLONE) != 0) && (tracee)->clone) || ((((wait_options) & __WCLONE) == 0) && !(tracee)->clone))
// if ( (PTRACER.wait_pid == -1 || PTRACER.wait_pid == ptracee->pid) && EXPECTED_WAIT_CLONE(PTRACER.wait_options, ptracee))
if ptracer.AsPtracer.WaitPid == -1 || ptracer.AsPtracer.WaitPid == ptracee.Pid &&
(ptracer.AsPtracer.WaitOptions&unix.WALL != 0 || (ptracer.AsPtracer.WaitOptions&unix.WCLONE != 0 && ptracee.Clone) || (ptracer.AsPtracer.WaitOptions&unix.WCLONE == 0 && !ptracee.Clone)) {
status := proot.update_wait_status(ptracer, ptracee)
if status != 0 {
// poke_reg(ptracer, SYSARG_RESULT, (word_t) status);
// proot.poke_reg(ptracer, SYSARG_RESULT, (word_t) status);
}
/* Write ptracer's register cache back. */
// (void) push_regs(ptracer);
// proot.push_regs(ptracer);
ptracer.AsPtracer.WaitPid = 0
restarted := proot.restart_tracee(ptracer, 0)
@@ -226,31 +302,52 @@ func (proot *Proot) handle_ptracee_event(ptracee *Tracee, event unix.WaitStatus)
}
func (proot *Proot) update_wait_status(ptracer, ptracee *Tracee) (result int) {
/* Special case: the Linux kernel reports the terminating
* event issued by a process to both its parent and its
* tracer, except when they are the same. In this case the
* Linux kernel reports the terminating event only once to the
* tracing parent ... */
if ptracee.AsPtracee.Ptracer == ptracee.Parent &&
(unix.WaitStatus(ptracee.AsPtracee.Event4.Ptracer.Value).Exited() ||
unix.WaitStatus(ptracee.AsPtracee.Event4.Ptracer.Value).Signaled()) {
/* ... So hide this terminating event (toward its
* tracer, ie. PRoot) and make the second one appear
* (towards its parent, ie. the ptracer). This will
* ensure its exit status is collected from a kernel
* point-of-view (ie. it doesn't stay a zombie
* forever). */
// restart_original_syscall(ptracer);
// /* Special case: the Linux kernel reports the terminating
// * event issued by a process to both its parent and its
// * tracer, except when they are the same. In this case the
// * Linux kernel reports the terminating event only once to the
// * tracing parent ... */
// if (PTRACEE.ptracer == ptracee->parent
// && (WIFEXITED(PTRACEE.event4.ptracer.value)
// || WIFSIGNALED(PTRACEE.event4.ptracer.value))) {
// /* ... So hide this terminating event (toward its
// * tracer, ie. PRoot) and make the second one appear
// * (towards its parent, ie. the ptracer). This will
// * ensure its exit status is collected from a kernel
// * point-of-view (ie. it doesn't stay a zombie
// * forever). */
// restart_original_syscall(ptracer);
/* Detach this ptracee from its ptracer, PRoot doesn't
* have anything else to emulate. */
// detach_from_ptracer(ptracee);
// /* Detach this ptracee from its ptracer, PRoot doesn't
// * have anything else to emulate. */
/* Zombies can rest in peace once the ptracer is notified. */
// if (PTRACEE.is_zombie)
// TALLOC_FREE(ptracee);
return 0
}
// address = peek_reg(ptracer, ORIGINAL, SYSARG_2);
// if (address != 0) {
// poke_int32(ptracer, address, PTRACEE.event4.ptracer.value);
// if (errno != 0)
// return -errno;
// }
// PTRACEE.event4.ptracer.pending = false;
/* Be careful; ptracee might get freed before its pid is returned. */
// result = ptracee->pid;
// /* Zombies can rest in peace once the ptracer is notified. */
// if (PTRACEE.is_zombie) {
// detach_from_ptracer(ptracee);
// TALLOC_FREE(ptracee);
// }
// /* Zombies can rest in peace once the ptracer is
// * notified. */
// if (PTRACEE.is_zombie)
// TALLOC_FREE(ptracee);
// return result;
return 0
}
@@ -281,6 +378,15 @@ func is_kernel_4_8() bool {
return (major == 4 && minor >= 8) || major > 4
}
func (proot *Proot) terminate_tracee(tracee *Tracee) {
tracee.Terminated = true
if tracee.KillallOnExit {
for _, tracee := range slices.Backward(proot.Tracees) {
unix.Kill(tracee.Pid, unix.SIGKILL)
}
}
}
func (proot *Proot) handle_tracee_event_kernel_4_8(tracee *Tracee, tracee_status unix.WaitStatus) int {
/* Don't overwrite restart_how if it is explicitly set
* elsewhere, i.e in the ptrace emulation when single
@@ -300,181 +406,146 @@ func (proot *Proot) handle_tracee_event_kernel_4_8(tracee *Tracee, tracee_status
}
}
// var seccomp_detected, seccomp_enabled bool
var
// status,
signal int
var signal int
var seccomp_detected, seccomp_enabled bool
/* Not a signal-stop by default. */
signal = 0
if tracee_status.Exited() {
// if (WIFEXITED(tracee_status)) {
// last_exit_status = WEXITSTATUS(tracee_status);
// VERBOSE(tracee, 1,
// "vpid %" PRIu64 ": exited with status %d",
// tracee->vpid, last_exit_status);
// terminate_tracee(tracee);
// }
proot.last_exit_status = tracee_status.ExitStatus()
proot.terminate_tracee(tracee)
} else if tracee_status.Signaled() {
// else if (WIFSIGNALED(tracee_status)) {
// check_architecture(tracee);
// VERBOSE(tracee, 1,
// "vpid %" PRIu64 ": terminated with signal %d",
// tracee->vpid, WTERMSIG(tracee_status));
// terminate_tracee(tracee);
// }
proot.terminate_tracee(tracee)
} else if tracee_status.Stopped() {
/* Don't use WSTOPSIG() to extract the signal
* since it clears the PTRACE_EVENT_* bits. */
/* Don't use WSTOPSIG() to extract the signal since it clears the PTRACE_EVENT_* bits. */
signal = (int(tracee_status) & 0xfff00) >> 8
// static bool deliver_sigtrap = false;
// switch (signal) {
// case SIGTRAP: {
// const unsigned long default_ptrace_options = (
// PTRACE_O_TRACESYSGOOD |
// PTRACE_O_TRACEFORK |
// PTRACE_O_TRACEVFORK |
// PTRACE_O_TRACEVFORKDONE |
// PTRACE_O_TRACEEXEC |
// PTRACE_O_TRACECLONE |
// PTRACE_O_TRACEEXIT);
// /* Distinguish some events from others and
// * automatically trace each new process with
// * the same options.
// *
// * Note that only the first bare SIGTRAP is
// * related to the tracing loop, others SIGTRAP
// * carry tracing information because of
// * TRACE*FORK/CLONE/EXEC. */
// if (deliver_sigtrap)
// break; /* Deliver this signal as-is. */
// deliver_sigtrap = true;
// /* Try to enable seccomp mode 2... */
// status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL,
// default_ptrace_options | PTRACE_O_TRACESECCOMP);
// if (status < 0) {
// seccomp_enabled = false;
// /* ... otherwise use default options only. */
// status = ptrace(PTRACE_SETOPTIONS, tracee->pid, NULL,
// default_ptrace_options);
// if (status < 0) {
// note(tracee, ERROR, SYSTEM, "ptrace(PTRACE_SETOPTIONS)");
// exit(EXIT_FAILURE);
// }
// }
// else {
// if (getenv("PROOT_NO_SECCOMP") == NULL)
// seccomp_enabled = true;
// }
// }
// /* Fall through. */
// case SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8:
// case SIGTRAP | PTRACE_EVENT_SECCOMP << 8:
// if (!seccomp_detected && seccomp_enabled) {
// VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled");
// tracee->seccomp = ENABLED;
// seccomp_detected = true;
// }
// if (signal == (SIGTRAP | PTRACE_EVENT_SECCOMP2 << 8) ||
// signal == (SIGTRAP | PTRACE_EVENT_SECCOMP << 8)) {
// unsigned long flags = 0;
// signal = 0;
// /* Use the common ptrace flow if seccomp was
// * explicitly disabled for this tracee. */
// if (tracee->seccomp != ENABLED)
// break;
// status = ptrace(PTRACE_GETEVENTMSG, tracee->pid, NULL, &flags);
// if (status < 0)
// break;
// if ((flags & FILTER_SYSEXIT) == 0) {
// tracee->restart_how = PTRACE_CONT;
// translate_syscall(tracee);
// if (tracee->seccomp == DISABLING)
// tracee->restart_how = PTRACE_SYSCALL;
// break;
// }
// }
// /* Fall through. */
// case SIGTRAP | 0x80:
// signal = 0;
// /* This tracee got signaled then freed during the
// sysenter stage but the kernel reports the sysexit
// stage; just discard this spurious tracee/event. */
// if (tracee->exe == NULL) {
// tracee->restart_how = PTRACE_CONT; /* SYSCALL OR CONT */
// return 0;
// }
// switch (tracee->seccomp) {
// case ENABLED:
// if (IS_IN_SYSENTER(tracee)) {
// /* sysenter: ensure the sysexit
// * stage will be hit under seccomp. */
// tracee->restart_how = PTRACE_SYSCALL;
// tracee->sysexit_pending = true;
// }
// else {
// /* sysexit: the next sysenter
// * will be notified by seccomp. */
// tracee->restart_how = PTRACE_CONT;
// tracee->sysexit_pending = false;
// }
// /* Fall through. */
// case DISABLED:
// translate_syscall(tracee);
// /* This syscall has disabled seccomp. */
// if (tracee->seccomp == DISABLING) {
// tracee->restart_how = PTRACE_SYSCALL;
// tracee->seccomp = DISABLED;
// }
// break;
// case DISABLING:
// /* Seccomp was disabled by the
// * previous syscall, but its sysenter
// * stage was already handled. */
// tracee->seccomp = DISABLED;
// if (IS_IN_SYSENTER(tracee))
// tracee->status = 1;
// break;
// }
// break;
// case SIGTRAP | PTRACE_EVENT_VFORK << 8:
// signal = 0;
// (void) new_child(tracee, CLONE_VFORK);
// break;
// case SIGTRAP | PTRACE_EVENT_FORK << 8:
// case SIGTRAP | PTRACE_EVENT_CLONE << 8:
// signal = 0;
// (void) new_child(tracee, 0);
// break;
// case SIGTRAP | PTRACE_EVENT_VFORK_DONE << 8:
// case SIGTRAP | PTRACE_EVENT_EXEC << 8:
// case SIGTRAP | PTRACE_EVENT_EXIT << 8:
// signal = 0;
// break;
// case SIGSTOP:
// /* Stop this tracee until PRoot has received
// * the EVENT_*FORK|CLONE notification. */
// if (tracee->exe == NULL) {
// tracee->sigstop = SIGSTOP_PENDING;
// signal = -1;
// }
// /* For each tracee, the first SIGSTOP
// * is only used to notify the tracer. */
// if (tracee->sigstop == SIGSTOP_IGNORED) {
// tracee->sigstop = SIGSTOP_ALLOWED;
// signal = 0;
// }
// break;
// default:
// /* Deliver this signal as-is. */
// break;
// }
var deliver_sigtrap bool
switch signal {
case int(unix.SIGTRAP):
default_ptrace_options :=
unix.PTRACE_O_TRACESYSGOOD |
unix.PTRACE_O_TRACEFORK |
unix.PTRACE_O_TRACEVFORK |
unix.PTRACE_O_TRACEVFORKDONE |
unix.PTRACE_O_TRACEEXEC |
unix.PTRACE_O_TRACECLONE |
unix.PTRACE_O_TRACEEXIT
if deliver_sigtrap {
break
}
deliver_sigtrap = true
/* Try to enable seccomp mode 2... */
err := ptrace(unix.PTRACE_SETOPTIONS, tracee.Pid, 0, uintptr(default_ptrace_options|unix.PTRACE_O_TRACESECCOMP))
if err != nil {
seccomp_enabled = false
/* ... otherwise use default options only. */
err = ptrace(unix.PTRACE_SETOPTIONS, tracee.Pid, 0, uintptr(default_ptrace_options))
if err != nil {
proot.Err = fmt.Errorf("ptrace(PTRACE_SETOPTIONS): %s", err)
return -255
}
} else {
if proot.NoSeccomp {
seccomp_enabled = true
}
}
fallthrough
case int(unix.SIGTRAP | unix.PTRACE_EVENT_SECCOMP<<8):
if !seccomp_detected && seccomp_enabled {
// VERBOSE(tracee, 1, "ptrace acceleration (seccomp mode 2) enabled");
tracee.Seccomp = ENABLED
seccomp_detected = true
}
if signal == int(unix.SIGTRAP|unix.PTRACE_EVENT_SECCOMP<<8) {
signal = 0
flags := 0
/* Use the common ptrace flow if seccomp was
* explicitly disabled for this tracee. */
if tracee.Seccomp != ENABLED {
break
}
err := ptrace(unix.PTRACE_GETEVENTMSG, tracee.Pid, 0, uintptr(unsafe.Pointer(&flags)))
if err != nil {
break
}
if (flags & FILTER_SYSEXIT) == 0 {
tracee.RestartHow = unix.PTRACE_CONT
// translate_syscall(tracee);
if tracee.Seccomp == DISABLING {
tracee.RestartHow = unix.PTRACE_SYSCALL
}
break
}
}
fallthrough
case int(unix.SIGTRAP | 0x80):
signal = 0
/* This tracee got signaled then freed during the
sysenter stage but the kernel reports the sysexit
stage; just discard this spurious tracee/event. */
if tracee.Exe == "" {
tracee.RestartHow = unix.PTRACE_CONT /* SYSCALL OR CONT */
return 0
}
switch tracee.Seccomp {
case ENABLED:
if tracee.AsPtracee.Ptracer.Status == 0 {
/* sysenter: ensure the sysexit
* stage will be hit under seccomp. */
tracee.RestartHow = unix.PTRACE_SYSCALL
tracee.SysexitPending = true
} else {
/* sysexit: the next sysenter
* will be notified by seccomp. */
tracee.RestartHow = unix.PTRACE_CONT
tracee.SysexitPending = false
}
fallthrough
case DISABLED:
// translate_syscall(tracee);
/* This syscall has disabled seccomp. */
if tracee.Seccomp == DISABLING {
tracee.RestartHow = unix.PTRACE_SYSCALL
tracee.Seccomp = DISABLED
}
case DISABLING:
/* Seccomp was disabled by the
* previous syscall, but its sysenter
* stage was already handled. */
tracee.Seccomp = DISABLED
if tracee.AsPtracee.Ptracer.Status == 0 {
tracee.Status = 1
}
}
case int(unix.SIGTRAP | unix.PTRACE_EVENT_VFORK<<8):
signal = 0
// proot.new_child(tracee, CLONE_VFORK);
case int(unix.SIGTRAP | unix.PTRACE_EVENT_FORK<<8), int(unix.SIGTRAP | unix.PTRACE_EVENT_CLONE<<8):
signal = 0
// proot.new_child(tracee, 0);
case int(unix.SIGTRAP | unix.PTRACE_EVENT_VFORK_DONE<<8), int(unix.SIGTRAP | unix.PTRACE_EVENT_EXEC<<8),
int(unix.SIGTRAP | unix.PTRACE_EVENT_EXIT<<8):
signal = 0
case int(unix.SIGSTOP):
/* Stop this tracee until PRoot has received
* the EVENT_*FORK|CLONE notification. */
if tracee.Exe == "" {
tracee.Sigstop = SIGSTOP_PENDING
signal = -1
}
/* For each tracee, the first SIGSTOP
* is only used to notify the tracer. */
if tracee.Sigstop == SIGSTOP_IGNORED {
tracee.Sigstop = SIGSTOP_ALLOWED
signal = 0
}
}
}
// /* Clear the pending event, if any. */
// tracee.AsPtracee.Event4.Proot.Pending = false
tracee.AsPtracee.Event4.Proot.Pending = false
return signal
}

View File

@@ -1,6 +1,7 @@
package proot
import (
"os"
"testing"
"sirherobrine23.com.br/go-bds/exec/exec"
@@ -11,6 +12,9 @@ func TestProot(t *testing.T) {
err := cmd.Start(&exec.Exec{
Arguments: []string{"go", "version"},
Stdout: os.Stdout,
Stderr: os.Stderr,
Stdin: os.Stdin,
})
if err != nil {
@@ -18,5 +22,8 @@ func TestProot(t *testing.T) {
return
}
cmd.Wait()
if err = cmd.Wait(); err != nil {
t.Error(err)
return
}
}