2334 lines
67 KiB
Go
2334 lines
67 KiB
Go
//go:build linux || android
|
|
|
|
package proot
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"sirherobrine23.com.br/go-bds/exec/v2/process"
|
|
prootext "sirherobrine23.com.br/go-bds/exec/v2/proot/extensions/extensions"
|
|
)
|
|
|
|
const atFDCWD = -100
|
|
|
|
// Linux ptrace tracers must wait with __WALL so clone/vfork/thread stops
|
|
// generated by PTRACE_O_TRACE* are visible even when they are not normal
|
|
// children according to wait4(2). Without it, helpers spawned by apt/dpkg can
|
|
// disappear or stay stopped while the tracer blocks waiting for an event that
|
|
// is hidden from plain wait4(-1, ...).
|
|
const waitTraceOptions = syscall.WALL
|
|
|
|
// Keep scratch strings/vectors outside the AMD64 red-zone and similar
|
|
// architecture/compiler scratch area below the user stack pointer. Most path
|
|
// rewrites are only needed for the duration of a single syscall, but writing
|
|
// them immediately below SP can corrupt leaf-function red-zone locals in libc,
|
|
// dpkg, or apt helpers. A small gap keeps the injected strings away from that
|
|
// live user stack area without going far enough below SP to commonly hit an
|
|
// unmapped guard page.
|
|
const stackScratchGap = 256
|
|
|
|
const (
|
|
processGroupShutdownTimeout = 90 * time.Second
|
|
processGroupKillGrace = 5 * time.Second
|
|
)
|
|
|
|
var errProcessExited = errors.New("process already exited")
|
|
|
|
type nativeProcess struct {
|
|
process *os.Process
|
|
pid int
|
|
pgid int
|
|
|
|
stdin io.Reader
|
|
stdout, stderr io.Writer
|
|
|
|
stdinW *os.File
|
|
stdoutR *os.File
|
|
stderrR *os.File
|
|
copyWG sync.WaitGroup // stdin-only; never waited by Wait()
|
|
outputWG sync.WaitGroup
|
|
childClose []io.Closer
|
|
parentClose []io.Closer
|
|
cleanup []func() error
|
|
stdioStarted bool
|
|
|
|
tracer *tracer
|
|
done chan error
|
|
once sync.Once
|
|
|
|
mu sync.Mutex
|
|
exitCode int
|
|
exited bool
|
|
}
|
|
|
|
func startNative(pr *Proot, config prootext.Config, cleanups []func() error, options *process.Exec) (*nativeProcess, error) {
|
|
pm, err := newPathMapper(config.Rootfs, config.PathResolvers)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cwd := options.Cwd
|
|
if cwd == "" {
|
|
cwd = "/"
|
|
}
|
|
cwd = cleanGuestPath(cwd)
|
|
hostCwd := pm.GuestToHost(cwd)
|
|
|
|
args := append([]string(nil), options.Arguments...)
|
|
execPath := args[0]
|
|
if resolution, handled, err := resolveExecExtensions(config.ExecResolvers, pm, cwd, args); err != nil {
|
|
return nil, err
|
|
} else if handled {
|
|
execPath = resolution.ExecPath
|
|
args = resolution.Args
|
|
} else {
|
|
guestCmd, hostCmd := pm.Translate(cwd, args[0])
|
|
if rw, changed := pm.resolveExec(hostCmd, guestCmd, args); changed {
|
|
execPath = rw.ExecPath
|
|
args = rw.Argv
|
|
} else {
|
|
execPath = hostCmd
|
|
args[0] = hostCmd
|
|
}
|
|
}
|
|
|
|
p := &nativeProcess{done: make(chan error, 1), exitCode: -1, cleanup: cleanups}
|
|
if pr.proc != nil {
|
|
p.stdin, p.stdout, p.stderr = pr.proc.stdin, pr.proc.stdout, pr.proc.stderr
|
|
}
|
|
if options.Stdin != nil {
|
|
p.stdin = options.Stdin
|
|
}
|
|
if options.Stdout != nil {
|
|
p.stdout = options.Stdout
|
|
}
|
|
if options.Stderr != nil {
|
|
p.stderr = options.Stderr
|
|
}
|
|
|
|
files, err := p.prepareFiles()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
envOverlay := options.Environment
|
|
if envOverlay == nil {
|
|
envOverlay = process.Env{}
|
|
}
|
|
if lp := pm.libraryPath(); lp != "" {
|
|
if old := envOverlay["LD_LIBRARY_PATH"]; old != "" {
|
|
envOverlay["LD_LIBRARY_PATH"] = lp + ":" + old
|
|
} else {
|
|
envOverlay["LD_LIBRARY_PATH"] = lp
|
|
}
|
|
}
|
|
envOverlay["PWD"] = cwd
|
|
env := mergeEnv(os.Environ(), envOverlay)
|
|
attr := &os.ProcAttr{
|
|
Dir: hostCwd,
|
|
Env: env,
|
|
Files: files,
|
|
Sys: &syscall.SysProcAttr{
|
|
Ptrace: true,
|
|
Setpgid: true,
|
|
},
|
|
}
|
|
|
|
proc, err := os.StartProcess(execPath, args, attr)
|
|
if err != nil {
|
|
p.closeAllPipes()
|
|
return nil, err
|
|
}
|
|
// The parent must close the child-side pipe descriptors immediately.
|
|
// Otherwise stdout/stderr copy loops never observe EOF because the tracer
|
|
// process itself still owns a write end of the pipe.
|
|
p.closeChildPipes()
|
|
p.process = proc
|
|
p.pid = proc.Pid
|
|
p.pgid = proc.Pid
|
|
p.startCopyLoops()
|
|
|
|
p.tracer = newTracer(pm, cwd, uint32(pr.UID), uint32(pr.GID))
|
|
go func() {
|
|
runtime.LockOSThread()
|
|
defer runtime.UnlockOSThread()
|
|
|
|
if options.Context != nil {
|
|
go func() {
|
|
<-options.Context.Done()
|
|
p.interruptThenKillAfter(processGroupKillGrace)
|
|
}()
|
|
}
|
|
err := p.tracer.loop(p.pid)
|
|
|
|
// Only wait for helper processes after a clean ptrace loop. If tracer
|
|
// setup itself failed (for example a transient ESRCH from
|
|
// PTRACE_SETOPTIONS), the root can still be stopped under ptrace and
|
|
// remain visible in the process group forever. Kill that incomplete
|
|
// tracee tree immediately instead of waiting the normal 90-second
|
|
// package-manager shutdown window.
|
|
if err != nil {
|
|
p.tracer.debugf("ptrace loop failed for root pid=%d: %v; terminating process group", p.pid, err)
|
|
_ = syscall.Kill(-p.pgid, syscall.SIGKILL)
|
|
p.reapProcessGroupChildren()
|
|
} else {
|
|
// The ptrace root can legitimately disappear before package-manager
|
|
// helper children in the same process group have finished flushing
|
|
// stdout/stderr and modifying the rootfs. Do not report process
|
|
// completion until the whole process group is gone.
|
|
if waitErr := p.waitProcessGroupGone(processGroupShutdownTimeout); waitErr != nil {
|
|
err = waitErr
|
|
}
|
|
}
|
|
|
|
// Stop the parent-side stdio plumbing before waiting for copy loops.
|
|
// In the root-gone/detach paths the traced process tree may have already
|
|
// disappeared without delivering the final pipe EOF in the usual order.
|
|
// Closing stdout/stderr readers here wakes the copy goroutines so Wait()
|
|
// cannot hang after the ptrace loop has already completed.
|
|
p.closeParentInputPipes()
|
|
p.closeParentOutputPipes()
|
|
p.outputWG.Wait()
|
|
err = errors.Join(err, p.cleanupExtensions())
|
|
p.mu.Lock()
|
|
if p.tracer.exitCodeSet {
|
|
p.exitCode = p.tracer.exitCode
|
|
}
|
|
p.exited = true
|
|
p.mu.Unlock()
|
|
_ = p.process.Release()
|
|
p.done <- err
|
|
}()
|
|
|
|
return p, nil
|
|
}
|
|
|
|
func resolveExecExtensions(resolvers []prootext.ExecResolver, pm *pathMapper, cwd string, args []string) (prootext.ExecResolution, bool, error) {
|
|
for i := len(resolvers) - 1; i >= 0; i-- {
|
|
resolver := resolvers[i]
|
|
resolution, err := resolver.ResolveExec(prootext.ExecRequest{
|
|
Cwd: cwd,
|
|
Args: append([]string(nil), args...),
|
|
Translate: func(cwd, name string, mode prootext.PathMode) (string, string) {
|
|
return pm.TranslateMode(cwd, name, mode)
|
|
},
|
|
})
|
|
if err != nil {
|
|
return prootext.ExecResolution{}, false, err
|
|
}
|
|
if resolution.Handled {
|
|
return resolution, true, nil
|
|
}
|
|
}
|
|
return prootext.ExecResolution{}, false, nil
|
|
}
|
|
|
|
func (p *nativeProcess) cleanupExtensions() error {
|
|
var err error
|
|
for i := len(p.cleanup) - 1; i >= 0; i-- {
|
|
err = errors.Join(err, p.cleanup[i]())
|
|
}
|
|
p.cleanup = nil
|
|
return err
|
|
}
|
|
|
|
func mergeEnv(base []string, overlay map[string]string) []string {
|
|
idx := make(map[string]int, len(base))
|
|
out := append([]string(nil), base...)
|
|
for i, kv := range out {
|
|
if eq := strings.IndexByte(kv, '='); eq >= 0 {
|
|
idx[kv[:eq]] = i
|
|
}
|
|
}
|
|
for k, v := range overlay {
|
|
kv := k + "=" + v
|
|
if i, ok := idx[k]; ok {
|
|
out[i] = kv
|
|
} else {
|
|
idx[k] = len(out)
|
|
out = append(out, kv)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (p *nativeProcess) prepareFiles() ([]*os.File, error) {
|
|
files := []*os.File{os.Stdin, os.Stdout, os.Stderr}
|
|
|
|
if p.stdin != nil {
|
|
if f, ok := p.stdin.(*os.File); ok {
|
|
files[0] = f
|
|
} else {
|
|
r, w, err := os.Pipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
p.stdinW = w
|
|
p.childClose = append(p.childClose, r)
|
|
p.parentClose = append(p.parentClose, w)
|
|
files[0] = r
|
|
}
|
|
}
|
|
if p.stdout != nil {
|
|
r, w, err := os.Pipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
p.stdoutR = r
|
|
p.childClose = append(p.childClose, w)
|
|
p.parentClose = append(p.parentClose, r)
|
|
files[1] = w
|
|
}
|
|
if p.stderr != nil {
|
|
r, w, err := os.Pipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
p.stderrR = r
|
|
p.childClose = append(p.childClose, w)
|
|
p.parentClose = append(p.parentClose, r)
|
|
files[2] = w
|
|
}
|
|
return files, nil
|
|
}
|
|
|
|
func (p *nativeProcess) startCopyLoops() {
|
|
if p.stdioStarted {
|
|
return
|
|
}
|
|
p.stdioStarted = true
|
|
if p.stdin != nil && p.stdinW != nil {
|
|
// Do not include stdin in the output wait path. A generic Reader can block
|
|
// forever waiting for terminal input, and Wait() must report process exit
|
|
// independently from stdin draining.
|
|
p.copyWG.Add(1)
|
|
go func() {
|
|
defer p.copyWG.Done()
|
|
_, _ = io.Copy(p.stdinW, p.stdin)
|
|
_ = p.stdinW.Close()
|
|
}()
|
|
}
|
|
if p.stdout != nil && p.stdoutR != nil {
|
|
p.outputWG.Add(1)
|
|
go func() {
|
|
defer p.outputWG.Done()
|
|
_, _ = io.Copy(p.stdout, p.stdoutR)
|
|
}()
|
|
}
|
|
if p.stderr != nil && p.stderrR != nil {
|
|
p.outputWG.Add(1)
|
|
go func() {
|
|
defer p.outputWG.Done()
|
|
_, _ = io.Copy(p.stderr, p.stderrR)
|
|
}()
|
|
}
|
|
}
|
|
|
|
func (p *nativeProcess) waitProcessGroupGone(timeout time.Duration) error {
|
|
if p.pgid <= 0 {
|
|
return nil
|
|
}
|
|
|
|
deadline := time.Now().Add(timeout)
|
|
interruptSent := false
|
|
killSent := false
|
|
for {
|
|
p.reapProcessGroupChildren()
|
|
if !p.processGroupExists() {
|
|
return nil
|
|
}
|
|
|
|
now := time.Now()
|
|
switch {
|
|
case !interruptSent && now.After(deadline):
|
|
p.tracer.debugf("process group %d still alive after %s; sending SIGINT", p.pgid, timeout)
|
|
_ = syscall.Kill(-p.pgid, syscall.SIGINT)
|
|
interruptSent = true
|
|
deadline = now.Add(processGroupKillGrace)
|
|
case interruptSent && !killSent && now.After(deadline):
|
|
p.tracer.debugf("process group %d ignored SIGINT; sending SIGKILL", p.pgid)
|
|
_ = syscall.Kill(-p.pgid, syscall.SIGKILL)
|
|
killSent = true
|
|
deadline = now.Add(time.Second)
|
|
case killSent && now.After(deadline):
|
|
p.reapProcessGroupChildren()
|
|
if p.processGroupExists() {
|
|
return fmt.Errorf("proot: process group %d did not exit after SIGKILL", p.pgid)
|
|
}
|
|
return nil
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
func (p *nativeProcess) processGroupExists() bool {
|
|
return processGroupExists(p.pgid)
|
|
}
|
|
|
|
func processGroupExists(pgid int) bool {
|
|
if pgid <= 0 {
|
|
return false
|
|
}
|
|
live, err := processGroupLiveMembers(pgid)
|
|
if err == nil {
|
|
return len(live) > 0
|
|
}
|
|
err = syscall.Kill(-pgid, 0)
|
|
return err == nil || errors.Is(err, syscall.EPERM)
|
|
}
|
|
|
|
func processGroupLiveMembers(pgid int) ([]int, error) {
|
|
entries, err := os.ReadDir("/proc")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var live []int
|
|
for _, entry := range entries {
|
|
if !entry.Type().IsDir() {
|
|
continue
|
|
}
|
|
pid, err := strconv.Atoi(entry.Name())
|
|
if err != nil {
|
|
continue
|
|
}
|
|
procPGID, state, err := readProcStatProcessGroup(pid)
|
|
if err != nil || procPGID != pgid || state == 'Z' {
|
|
continue
|
|
}
|
|
live = append(live, pid)
|
|
}
|
|
return live, nil
|
|
}
|
|
|
|
func readProcStatProcessGroup(pid int) (int, byte, error) {
|
|
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
end := strings.LastIndexByte(string(data), ')')
|
|
if end < 0 || end+2 >= len(data) {
|
|
return 0, 0, fmt.Errorf("invalid /proc/%d/stat", pid)
|
|
}
|
|
state := data[end+2]
|
|
fields := strings.Fields(string(data[end+3:]))
|
|
if len(fields) < 2 {
|
|
return 0, 0, fmt.Errorf("invalid /proc/%d/stat", pid)
|
|
}
|
|
pgrp, err := strconv.Atoi(fields[1])
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
return pgrp, state, nil
|
|
}
|
|
|
|
func (p *nativeProcess) reapProcessGroupChildren() {
|
|
for {
|
|
var ws syscall.WaitStatus
|
|
var ru syscall.Rusage
|
|
pid, err := syscall.Wait4(-p.pgid, &ws, waitTraceOptions|syscall.WNOHANG, &ru)
|
|
if err == syscall.EINTR {
|
|
continue
|
|
}
|
|
if err != nil || pid <= 0 {
|
|
return
|
|
}
|
|
if p.tracer != nil {
|
|
p.tracer.debugf("reaped leftover process-group child pid=%d status=%#x", pid, int(ws))
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *nativeProcess) interruptThenKillAfter(grace time.Duration) {
|
|
_ = p.Signal(os.Interrupt)
|
|
time.Sleep(grace)
|
|
if !p.isExited() {
|
|
_ = p.Kill()
|
|
}
|
|
}
|
|
|
|
func (p *nativeProcess) isExited() bool {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
return p.exited
|
|
}
|
|
|
|
func (p *nativeProcess) closeChildPipes() {
|
|
for _, c := range p.childClose {
|
|
_ = c.Close()
|
|
}
|
|
p.childClose = nil
|
|
}
|
|
|
|
func (p *nativeProcess) closeParentInputPipes() {
|
|
if p.stdinW != nil {
|
|
_ = p.stdinW.Close()
|
|
}
|
|
}
|
|
|
|
func (p *nativeProcess) closeParentOutputPipes() {
|
|
if p.stdoutR != nil {
|
|
_ = p.stdoutR.Close()
|
|
}
|
|
if p.stderrR != nil {
|
|
_ = p.stderrR.Close()
|
|
}
|
|
}
|
|
|
|
func (p *nativeProcess) closeAllPipes() {
|
|
p.closeChildPipes()
|
|
for _, c := range p.parentClose {
|
|
_ = c.Close()
|
|
}
|
|
p.parentClose = nil
|
|
}
|
|
|
|
func (p *nativeProcess) Kill() error {
|
|
if p.process == nil {
|
|
return errProcessExited
|
|
}
|
|
if p.pgid > 0 {
|
|
return syscall.Kill(-p.pgid, syscall.SIGKILL)
|
|
}
|
|
return p.process.Kill()
|
|
}
|
|
|
|
func (p *nativeProcess) Signal(sig os.Signal) error {
|
|
if p.process == nil {
|
|
return errProcessExited
|
|
}
|
|
s, ok := sig.(syscall.Signal)
|
|
if !ok {
|
|
return fmt.Errorf("unsupported signal %v", sig)
|
|
}
|
|
if p.pgid > 0 {
|
|
return syscall.Kill(-p.pgid, s)
|
|
}
|
|
return p.process.Signal(sig)
|
|
}
|
|
|
|
func (p *nativeProcess) Wait() error {
|
|
if p.done == nil {
|
|
return errProcessExited
|
|
}
|
|
err := <-p.done
|
|
p.done = nil
|
|
return err
|
|
}
|
|
|
|
func (p *nativeProcess) ExitCode() (int, error) {
|
|
p.mu.Lock()
|
|
if p.exited {
|
|
code := p.exitCode
|
|
p.mu.Unlock()
|
|
return code, nil
|
|
}
|
|
p.mu.Unlock()
|
|
if err := p.Wait(); err != nil {
|
|
return -1, err
|
|
}
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
return p.exitCode, nil
|
|
}
|
|
|
|
type tracer struct {
|
|
pm *pathMapper
|
|
rootPID int
|
|
rootCwd string
|
|
uid uint32
|
|
gid uint32
|
|
debug bool
|
|
tracees map[int]*traceeState
|
|
pendingTracees map[int]*traceeState
|
|
rootGone bool
|
|
rootGoneSince time.Time
|
|
lostTracee bool
|
|
lostTraceeSince time.Time
|
|
exitCode int
|
|
exitCodeSet bool
|
|
}
|
|
|
|
type traceeState struct {
|
|
pid int
|
|
parentPID int
|
|
inSyscall bool
|
|
optionsSet bool
|
|
cwd string
|
|
scratch uint64
|
|
pending *pendingExit
|
|
creds fakeCreds
|
|
|
|
// Keep the unmodified syscall-entry registers until the corresponding
|
|
// exit/SIGSYS. On ARM/ARM64 the first argument and return value share r0/x0,
|
|
// so an emulated exit can otherwise destroy the argument needed by the
|
|
// outer-seccomp handler.
|
|
originalRegs syscall.PtraceRegs
|
|
originalSysno uint64
|
|
originalValid bool
|
|
}
|
|
|
|
type pendingExit struct {
|
|
kind string
|
|
guestPath string
|
|
sourcePath string
|
|
targetPath string
|
|
buf uint64
|
|
size uint64
|
|
fd uint64
|
|
}
|
|
|
|
type fakeCreds struct {
|
|
ruid, euid, suid uint32
|
|
rgid, egid, sgid uint32
|
|
fsuid, fsgid uint32
|
|
groups []uint32
|
|
}
|
|
|
|
func newFakeCreds(uid, gid uint32) fakeCreds {
|
|
return fakeCreds{
|
|
ruid: uid, euid: uid, suid: uid, fsuid: uid,
|
|
rgid: gid, egid: gid, sgid: gid, fsgid: gid,
|
|
groups: []uint32{gid},
|
|
}
|
|
}
|
|
|
|
func cloneFakeCreds(c fakeCreds) fakeCreds {
|
|
out := c
|
|
if c.groups != nil {
|
|
out.groups = append([]uint32(nil), c.groups...)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func noCredID(v uint32) bool { return v == ^uint32(0) }
|
|
|
|
func (c fakeCreds) uidPrivileged() bool { return c.euid == 0 }
|
|
func (c fakeCreds) gidPrivileged() bool { return c.euid == 0 }
|
|
|
|
func (c fakeCreds) canUseUID(v uint32) bool {
|
|
return noCredID(v) || c.uidPrivileged() || v == c.ruid || v == c.euid || v == c.suid
|
|
}
|
|
|
|
func (c fakeCreds) canUseGID(v uint32) bool {
|
|
return noCredID(v) || c.gidPrivileged() || v == c.rgid || v == c.egid || v == c.sgid
|
|
}
|
|
|
|
func (c fakeCreds) canSetUID(vals ...uint32) bool {
|
|
for _, v := range vals {
|
|
if !c.canUseUID(v) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (c fakeCreds) canSetGID(vals ...uint32) bool {
|
|
for _, v := range vals {
|
|
if !c.canUseGID(v) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (t *tracer) fakeErrno(st *traceeState, regs *syscall.PtraceRegs, errno syscall.Errno) error {
|
|
setSysno(regs, sc.getpid)
|
|
st.pending = &pendingExit{kind: "errno", size: uint64(errno)}
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
|
|
func newTracer(pm *pathMapper, cwd string, uid, gid uint32) *tracer {
|
|
return &tracer{
|
|
pm: pm,
|
|
rootCwd: cleanGuestPath(cwd),
|
|
uid: uid,
|
|
gid: gid,
|
|
debug: os.Getenv("PROOT_GO_DEBUG") == "1",
|
|
tracees: map[int]*traceeState{},
|
|
pendingTracees: map[int]*traceeState{},
|
|
exitCode: -1,
|
|
}
|
|
}
|
|
|
|
func (t *tracer) debugf(format string, args ...any) {
|
|
if t.debug {
|
|
fmt.Fprintf(os.Stderr, "proot-go: "+format+"\n", args...)
|
|
}
|
|
}
|
|
|
|
func (t *tracer) addTracee(pid int, parent *traceeState, cwd string) *traceeState {
|
|
st := &traceeState{pid: pid, cwd: cleanGuestPath(cwd), creds: newFakeCreds(t.uid, t.gid)}
|
|
if parent != nil {
|
|
st.parentPID = parent.pid
|
|
st.cwd = parent.cwd
|
|
st.creds = cloneFakeCreds(parent.creds)
|
|
}
|
|
t.tracees[pid] = st
|
|
delete(t.pendingTracees, pid)
|
|
return st
|
|
}
|
|
|
|
func (t *tracer) queueTracee(pid int, parent *traceeState) {
|
|
if pid <= 0 {
|
|
return
|
|
}
|
|
|
|
// A newly forked task can report its initial ptrace stop before the
|
|
// parent's PTRACE_EVENT_FORK/CLONE is consumed. In that ordering the task
|
|
// has already been adopted with the conservative cwd "/". Merge the
|
|
// parent state when the event finally arrives instead of returning early;
|
|
// otherwise only some members of a shell pipeline inherit chdir(), e.g.
|
|
// `cd /usr/share/ca-certificates; find . | sort`.
|
|
inheritParent := func(st *traceeState) {
|
|
if parent == nil {
|
|
return
|
|
}
|
|
st.parentPID = parent.pid
|
|
st.cwd = parent.cwd
|
|
st.creds = cloneFakeCreds(parent.creds)
|
|
}
|
|
|
|
if st, ok := t.tracees[pid]; ok {
|
|
inheritParent(st)
|
|
t.debugf("pid=%d merged late parent pid=%d cwd=%q", pid, st.parentPID, st.cwd)
|
|
return
|
|
}
|
|
if st, ok := t.pendingTracees[pid]; ok {
|
|
inheritParent(st)
|
|
return
|
|
}
|
|
|
|
st := &traceeState{pid: pid, cwd: "/", creds: newFakeCreds(t.uid, t.gid)}
|
|
inheritParent(st)
|
|
t.pendingTracees[pid] = st
|
|
}
|
|
|
|
func (t *tracer) adoptTracee(pid int) *traceeState {
|
|
if st := t.pendingTracees[pid]; st != nil {
|
|
delete(t.pendingTracees, pid)
|
|
t.tracees[pid] = st
|
|
return st
|
|
}
|
|
return t.addTracee(pid, nil, "/")
|
|
}
|
|
|
|
func (t *tracer) loop(rootPID int) error {
|
|
t.rootPID = rootPID
|
|
var ws syscall.WaitStatus
|
|
var ru syscall.Rusage
|
|
pid, err := syscall.Wait4(rootPID, &ws, waitTraceOptions, &ru)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
root := t.addTracee(pid, nil, t.rootCwd)
|
|
if ws.Exited() || ws.Signaled() {
|
|
t.setExit(ws)
|
|
return t.waitErr(ws)
|
|
}
|
|
if err := setPtraceOptionsRetry(pid, true); err != nil {
|
|
t.debugf("pid=%d initial ptrace options failed: %v", pid, err)
|
|
_ = syscall.Kill(pid, syscall.SIGKILL)
|
|
_ = syscall.PtraceCont(pid, int(syscall.SIGKILL))
|
|
return err
|
|
}
|
|
root.optionsSet = true
|
|
if !t.resumeTracee(pid, 0) {
|
|
return syscall.ESRCH
|
|
}
|
|
_ = root
|
|
|
|
for len(t.tracees) > 0 {
|
|
waitOptions := waitTraceOptions
|
|
if t.rootGone || t.lostTracee {
|
|
waitOptions |= syscall.WNOHANG
|
|
}
|
|
pid, err = syscall.Wait4(-1, &ws, waitOptions, &ru)
|
|
if err == syscall.EINTR {
|
|
continue
|
|
}
|
|
if pid == 0 {
|
|
if t.rootGone {
|
|
t.pruneDeadTracees()
|
|
if len(t.tracees) == 0 {
|
|
if !t.exitCodeSet {
|
|
t.exitCodeSet = true
|
|
t.exitCode = 0
|
|
}
|
|
break
|
|
}
|
|
if time.Since(t.rootGoneSince) > 2*time.Second {
|
|
t.debugf("root tracee gone; detaching remaining tracees after idle wait: %v", t.traceePids())
|
|
t.detachRemainingTracees()
|
|
if !t.exitCodeSet {
|
|
t.exitCodeSet = true
|
|
t.exitCode = 0
|
|
}
|
|
break
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
continue
|
|
}
|
|
if t.lostTracee {
|
|
t.pruneDeadTracees()
|
|
if time.Since(t.lostTraceeSince) > 500*time.Millisecond {
|
|
t.debugf("idle after lost tracee; waking possible waiters: %v", t.traceePids())
|
|
t.signalTracees(syscall.SIGCHLD)
|
|
t.lostTraceeSince = time.Now()
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
continue
|
|
}
|
|
continue
|
|
}
|
|
if err != nil {
|
|
if len(t.tracees) == 0 || err == syscall.ECHILD {
|
|
break
|
|
}
|
|
return err
|
|
}
|
|
st := t.tracees[pid]
|
|
if st == nil {
|
|
st = t.adoptTracee(pid)
|
|
}
|
|
|
|
if ws.Exited() || ws.Signaled() {
|
|
if pid == rootPID {
|
|
t.setExit(ws)
|
|
}
|
|
delete(t.tracees, pid)
|
|
continue
|
|
}
|
|
if !ws.Stopped() {
|
|
if !t.resumeTracee(pid, 0) {
|
|
t.markRootGone(pid, rootPID)
|
|
}
|
|
continue
|
|
}
|
|
if !st.optionsSet {
|
|
if err := setPtraceOptionsRetry(pid, pid == rootPID); err == nil {
|
|
st.optionsSet = true
|
|
} else {
|
|
t.debugf("pid=%d ptrace options failed: %v", pid, err)
|
|
if isProcessGoneErr(err) {
|
|
t.markRootGone(pid, rootPID)
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
sig := ws.StopSignal()
|
|
event := ptraceEvent(ws)
|
|
if sig == syscall.SIGTRAP && event != 0 {
|
|
t.handleEvent(pid, st, event)
|
|
if !t.resumeTracee(pid, 0) {
|
|
t.markRootGone(pid, rootPID)
|
|
}
|
|
continue
|
|
}
|
|
|
|
if isSyscallStop(ws) {
|
|
if err := t.handleSyscall(st); err != nil {
|
|
if isProcessGoneErr(err) {
|
|
// Some kernels/reporting paths can return ESRCH from PTRACE_PEEKDATA
|
|
// while a tracee is in the syscall-stop we are handling. Do not
|
|
// treat that as a fatal translation error by itself: first try to
|
|
// resume the original syscall so the real parent/children can finish
|
|
// normally. This is important for apt/dpkg where the root apt
|
|
// process can briefly report ESRCH while dpkg helpers are still
|
|
// unpacking packages.
|
|
if pid == rootPID {
|
|
t.debugf("pid=%d root syscall translation saw process-gone error, trying resume: %v", pid, err)
|
|
} else {
|
|
t.debugf("pid=%d syscall translation saw process-gone error, trying resume: %v", pid, err)
|
|
}
|
|
if t.resumeTracee(pid, 0) {
|
|
continue
|
|
}
|
|
t.markRootGone(pid, rootPID)
|
|
continue
|
|
}
|
|
// Do not kill the tracee for a best-effort translation failure;
|
|
// continue and let the kernel report the original error if possible.
|
|
t.debugf("pid=%d syscall translation failed: %v", pid, err)
|
|
}
|
|
if !t.resumeTracee(pid, 0) {
|
|
t.markRootGone(pid, rootPID)
|
|
}
|
|
continue
|
|
}
|
|
|
|
if sig == syscall.SIGSYS {
|
|
handled, err := t.handleSeccompSIGSYS(st)
|
|
if err != nil {
|
|
if isProcessGoneErr(err) {
|
|
t.markRootGone(pid, rootPID)
|
|
continue
|
|
}
|
|
t.debugf("pid=%d failed handling seccomp SIGSYS: %v", pid, err)
|
|
} else if handled {
|
|
// SECCOMP_RET_TRAP skips the syscall and reports SIGSYS instead of
|
|
// producing the normal syscall-exit stop. Suppress the signal and
|
|
// continue at the instruction following the syscall.
|
|
if !t.resumeTracee(pid, 0) {
|
|
t.markRootGone(pid, rootPID)
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Forward real signals, but do not reinject ptrace's synthetic SIGTRAP/SIGSTOP.
|
|
forward := int(sig)
|
|
if sig == syscall.SIGTRAP || sig == syscall.SIGSTOP {
|
|
forward = 0
|
|
}
|
|
if !t.resumeTracee(pid, forward) {
|
|
t.markRootGone(pid, rootPID)
|
|
}
|
|
}
|
|
if t.exitCodeSet && t.exitCode != 0 {
|
|
return fmt.Errorf("exit status %d", t.exitCode)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (t *tracer) traceeGone(pid, rootPID int, where string, err error) {
|
|
t.debugf("pid=%d disappeared %s: %v", pid, where, err)
|
|
delete(t.tracees, pid)
|
|
t.markRootGone(pid, rootPID)
|
|
}
|
|
|
|
func (t *tracer) markRootGone(pid, rootPID int) {
|
|
if pid != rootPID {
|
|
return
|
|
}
|
|
|
|
if processExists(pid) {
|
|
t.terminateLostRoot(pid, rootPID)
|
|
}
|
|
|
|
if !t.rootGone {
|
|
t.rootGone = true
|
|
t.rootGoneSince = time.Now()
|
|
}
|
|
|
|
// Once the root tracee has disappeared there is no command owner left to
|
|
// wait for. Keeping the tracer attached to leftover helpers is unsafe here:
|
|
// apt/dpkg can leave transient method/sqv/extracttemplate children that have
|
|
// already notified their real parent, while the tracer no longer has a root
|
|
// process capable of driving the tree forward. Detach the remaining tracees
|
|
// immediately so Wait() can return instead of spinning in WNOHANG forever.
|
|
delete(t.tracees, pid)
|
|
if len(t.tracees) > 0 {
|
|
t.debugf("root tracee gone; detaching remaining tracees immediately: %v", t.traceePids())
|
|
t.detachRemainingTracees()
|
|
}
|
|
|
|
// This flag is what makes the main wait loop switch to WNOHANG + pruning.
|
|
// Without it, a process-gone root can leave the tracer blocked forever in
|
|
// wait4(-1, __WALL) if one of the queued/adopted helper PIDs never delivers
|
|
// another wait status. The log that ends with:
|
|
//
|
|
// root syscall translation saw process-gone error, trying resume: no such process
|
|
// disappeared before resume
|
|
//
|
|
// hits exactly this path.
|
|
if !t.rootGone {
|
|
t.rootGone = true
|
|
t.rootGoneSince = time.Now()
|
|
}
|
|
|
|
// If wait4 never delivered the root's final status, fall back to a clean exit.
|
|
// Real non-zero exits/signals still win when they were observed via setExit().
|
|
if !t.exitCodeSet {
|
|
t.exitCodeSet = true
|
|
t.exitCode = 0
|
|
}
|
|
}
|
|
|
|
func (t *tracer) pruneDeadTracees() {
|
|
for pid := range t.tracees {
|
|
if err := syscall.Kill(pid, 0); err != nil && errors.Is(err, syscall.ESRCH) {
|
|
t.debugf("pid=%d disappeared while pruning stale tracees", pid)
|
|
delete(t.tracees, pid)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (t *tracer) terminateLostRoot(pid, pgid int) {
|
|
t.debugf("root tracee pid=%d is alive but no longer ptrace-controllable; sending SIGINT to process group %d", pid, pgid)
|
|
_ = syscall.Kill(-pgid, syscall.SIGINT)
|
|
if err := syscall.PtraceCont(pid, int(syscall.SIGINT)); err != nil {
|
|
t.debugf("pid=%d ptrace continue with SIGINT failed: %v", pid, err)
|
|
}
|
|
|
|
time.Sleep(500 * time.Millisecond)
|
|
if !processGroupExists(pgid) {
|
|
return
|
|
}
|
|
|
|
t.debugf("root process group %d still alive after SIGINT; sending SIGKILL", pgid)
|
|
_ = syscall.Kill(-pgid, syscall.SIGKILL)
|
|
if err := syscall.PtraceCont(pid, int(syscall.SIGKILL)); err != nil {
|
|
t.debugf("pid=%d ptrace continue with SIGKILL failed: %v", pid, err)
|
|
if err := syscall.PtraceDetach(pid); err != nil && !isProcessGoneErr(err) {
|
|
t.debugf("pid=%d ptrace detach after SIGKILL failed: %v", pid, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (t *tracer) traceePids() []int {
|
|
pids := make([]int, 0, len(t.tracees))
|
|
for pid := range t.tracees {
|
|
pids = append(pids, pid)
|
|
}
|
|
return pids
|
|
}
|
|
|
|
func (t *tracer) detachRemainingTracees() {
|
|
for pid := range t.tracees {
|
|
if err := syscall.PtraceDetach(pid); err != nil {
|
|
if isProcessGoneErr(err) {
|
|
t.debugf("pid=%d disappeared before detach", pid)
|
|
} else {
|
|
t.debugf("pid=%d ptrace detach failed: %v", pid, err)
|
|
}
|
|
}
|
|
delete(t.tracees, pid)
|
|
}
|
|
}
|
|
|
|
func (t *tracer) noteLostTracee(pid int) {
|
|
if !t.lostTracee {
|
|
t.lostTracee = true
|
|
}
|
|
t.lostTraceeSince = time.Now()
|
|
}
|
|
|
|
func (t *tracer) signalTracees(sig syscall.Signal) {
|
|
for pid := range t.tracees {
|
|
if err := syscall.Kill(pid, sig); err != nil && !isProcessGoneErr(err) {
|
|
t.debugf("pid=%d signal %s failed: %v", pid, sig, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (t *tracer) signalTraceeParent(pid int, sig syscall.Signal) {
|
|
st := t.tracees[pid]
|
|
if st == nil || st.parentPID == 0 {
|
|
return
|
|
}
|
|
if _, ok := t.tracees[st.parentPID]; !ok {
|
|
return
|
|
}
|
|
if err := syscall.Kill(st.parentPID, sig); err != nil {
|
|
if !isProcessGoneErr(err) {
|
|
t.debugf("pid=%d signal parent %d with %s failed: %v", pid, st.parentPID, sig, err)
|
|
}
|
|
return
|
|
}
|
|
t.debugf("pid=%d disappeared; sent %s to parent pid=%d", pid, sig, st.parentPID)
|
|
}
|
|
|
|
func (t *tracer) reapGoneTracee(pid int) {
|
|
// A ptrace resume can return ESRCH after a tracee has already reached its
|
|
// final wait state. In that case, the real parent can remain blocked in
|
|
// waitpid() until the tracer consumes that pending ptrace status. Drain any
|
|
// immediately available status for this pid before forgetting it.
|
|
for {
|
|
var ws syscall.WaitStatus
|
|
var ru syscall.Rusage
|
|
r, err := syscall.Wait4(pid, &ws, waitTraceOptions|syscall.WNOHANG, &ru)
|
|
if r == 0 {
|
|
return
|
|
}
|
|
if err != nil {
|
|
if err != syscall.ECHILD && err != syscall.EINTR {
|
|
t.debugf("pid=%d reap after ESRCH failed: %v", pid, err)
|
|
}
|
|
return
|
|
}
|
|
if r != pid {
|
|
return
|
|
}
|
|
if ws.Exited() || ws.Signaled() {
|
|
t.debugf("pid=%d reaped after ESRCH: status=%#x", pid, int(ws))
|
|
delete(t.tracees, pid)
|
|
return
|
|
}
|
|
if ws.Stopped() {
|
|
// If it was merely stopped, release it; otherwise the real parent may wait
|
|
// forever for a helper that the tracer no longer intends to manage.
|
|
sig := 0
|
|
if s := ws.StopSignal(); s != syscall.SIGTRAP && s != syscall.SIGSTOP {
|
|
sig = int(s)
|
|
}
|
|
if err := syscall.PtraceSyscall(pid, sig); err != nil && !isProcessGoneErr(err) {
|
|
t.debugf("pid=%d resume while reaping after ESRCH failed: %v", pid, err)
|
|
}
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func setPtraceOptions(pid int, traceExit bool) error {
|
|
options := syscall.PTRACE_O_TRACESYSGOOD |
|
|
syscall.PTRACE_O_TRACEFORK |
|
|
syscall.PTRACE_O_TRACEVFORK |
|
|
syscall.PTRACE_O_TRACECLONE |
|
|
syscall.PTRACE_O_TRACEEXEC
|
|
|
|
// Do not request PTRACE_O_TRACEEXIT for short-lived helper children. The
|
|
// extra synthetic exit-stop can race with helpers such as `stty -a` spawned
|
|
// by debconf: PTRACE_PEEKDATA/PTRACE_SYSCALL may report ESRCH before the
|
|
// real parent observes completion, leaving apt/dpkg waiting for a child that
|
|
// the tracer effectively consumed. The root tracee is different: enabling
|
|
// TRACEEXIT only for it gives the tracer one last deterministic stop before
|
|
// apt itself exits, avoiding the root "process-gone" path that can otherwise
|
|
// make us terminate before reporting the real wait status.
|
|
if traceExit {
|
|
options |= syscall.PTRACE_O_TRACEEXIT
|
|
}
|
|
|
|
return syscall.PtraceSetOptions(pid, options)
|
|
}
|
|
|
|
func setPtraceOptionsRetry(pid int, traceExit bool) error {
|
|
var err error
|
|
for attempt := 0; attempt < 20; attempt++ {
|
|
err = setPtraceOptions(pid, traceExit)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
if errors.Is(err, syscall.EINTR) {
|
|
continue
|
|
}
|
|
if !isProcessGoneErr(err) || !processExists(pid) {
|
|
return err
|
|
}
|
|
// Android kernels can briefly report ESRCH while the newly exec'd
|
|
// tracee is transitioning into its first ptrace stop. The task still
|
|
// exists and remains stopped, so retry the option write on the same
|
|
// tracee instead of abandoning it.
|
|
time.Sleep(5 * time.Millisecond)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func isSyscallStop(ws syscall.WaitStatus) bool {
|
|
return ws.Stopped() && int(ws.StopSignal()) == (int(syscall.SIGTRAP)|0x80)
|
|
}
|
|
|
|
func ptraceEvent(ws syscall.WaitStatus) int {
|
|
return int(uint32(ws) >> 16)
|
|
}
|
|
|
|
func (t *tracer) handleEvent(pid int, st *traceeState, event int) {
|
|
switch event {
|
|
case syscall.PTRACE_EVENT_FORK, syscall.PTRACE_EVENT_VFORK, syscall.PTRACE_EVENT_CLONE:
|
|
msg, err := syscall.PtraceGetEventMsg(pid)
|
|
if err == nil && msg != 0 {
|
|
child := int(msg)
|
|
t.queueTracee(child, st)
|
|
}
|
|
case syscall.PTRACE_EVENT_EXIT:
|
|
// Root-only TRACEEXIT is used as a deterministic pre-exit stop. Do not
|
|
// set the final status here; resume and let the normal wait status report
|
|
// the real exit code/signal.
|
|
case syscall.PTRACE_EVENT_EXEC:
|
|
// Keep the syscall entry/exit phase unchanged. With PTRACE_SYSCALL,
|
|
// Linux may report PTRACE_EVENT_EXEC between execve-enter and
|
|
// execve-exit. Resetting inSyscall here makes the following
|
|
// execve-exit stop look like a new syscall-enter stop, which then
|
|
// flips the phase for the freshly loaded dynamic linker. Once the
|
|
// phase is inverted, the loader's openat() calls for the executable
|
|
// itself are missed, producing errors such as:
|
|
//
|
|
// /usr/lib/apt/methods/http: error while loading shared libraries:
|
|
// /usr/lib/apt/methods/http: cannot open shared object file
|
|
//
|
|
// The normal syscall-exit stop will clear st.inSyscall.
|
|
}
|
|
}
|
|
|
|
func (t *tracer) setExit(ws syscall.WaitStatus) {
|
|
t.exitCodeSet = true
|
|
if ws.Exited() {
|
|
t.exitCode = ws.ExitStatus()
|
|
} else if ws.Signaled() {
|
|
t.exitCode = 128 + int(ws.Signal())
|
|
} else {
|
|
t.exitCode = -1
|
|
}
|
|
}
|
|
|
|
func (t *tracer) waitErr(ws syscall.WaitStatus) error {
|
|
if ws.Exited() && ws.ExitStatus() == 0 {
|
|
return nil
|
|
}
|
|
if ws.Exited() {
|
|
return fmt.Errorf("exit status %d", ws.ExitStatus())
|
|
}
|
|
if ws.Signaled() {
|
|
return fmt.Errorf("process killed by %s", ws.Signal())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isProcessGoneErr(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
return errors.Is(err, syscall.ESRCH) || strings.Contains(err.Error(), "no such process")
|
|
}
|
|
|
|
func processExists(pid int) bool {
|
|
err := syscall.Kill(pid, 0)
|
|
return err == nil || errors.Is(err, syscall.EPERM)
|
|
}
|
|
|
|
func (t *tracer) resumeTracee(pid int, sig int) bool {
|
|
if err := syscall.PtraceSyscall(pid, sig); err != nil {
|
|
if isProcessGoneErr(err) {
|
|
if t.recoverTracee(pid, sig) {
|
|
return true
|
|
}
|
|
t.debugf("pid=%d disappeared before resume", pid)
|
|
t.reapGoneTracee(pid)
|
|
t.signalTraceeParent(pid, syscall.SIGCHLD)
|
|
t.noteLostTracee(pid)
|
|
} else {
|
|
t.debugf("pid=%d ptrace resume failed: %v", pid, err)
|
|
}
|
|
delete(t.tracees, pid)
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (t *tracer) recoverTracee(pid int, sig int) bool {
|
|
if !processExists(pid) {
|
|
return false
|
|
}
|
|
|
|
t.debugf("pid=%d still exists after ptrace ESRCH; trying PTRACE_ATTACH recovery", pid)
|
|
if err := syscall.PtraceAttach(pid); err != nil {
|
|
t.debugf("pid=%d ptrace attach recovery failed: %v", pid, err)
|
|
return false
|
|
}
|
|
|
|
var ws syscall.WaitStatus
|
|
var ru syscall.Rusage
|
|
for {
|
|
r, err := syscall.Wait4(pid, &ws, waitTraceOptions, &ru)
|
|
if err == syscall.EINTR {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
t.debugf("pid=%d wait after ptrace attach recovery failed: %v", pid, err)
|
|
_ = syscall.PtraceDetach(pid)
|
|
return false
|
|
}
|
|
if r == pid {
|
|
break
|
|
}
|
|
}
|
|
if !ws.Stopped() {
|
|
t.debugf("pid=%d was not stopped after ptrace attach recovery: status=%#x", pid, int(ws))
|
|
_ = syscall.PtraceDetach(pid)
|
|
return false
|
|
}
|
|
|
|
st := t.tracees[pid]
|
|
if st == nil {
|
|
st = t.adoptTracee(pid)
|
|
}
|
|
st.inSyscall = false
|
|
st.pending = nil
|
|
if err := setPtraceOptions(pid, pid == t.rootPID); err != nil {
|
|
t.debugf("pid=%d ptrace option recovery failed: %v", pid, err)
|
|
_ = syscall.PtraceDetach(pid)
|
|
return false
|
|
}
|
|
st.optionsSet = true
|
|
if err := syscall.PtraceSyscall(pid, sig); err != nil {
|
|
t.debugf("pid=%d ptrace resume after recovery failed: %v", pid, err)
|
|
_ = syscall.PtraceDetach(pid)
|
|
return false
|
|
}
|
|
t.debugf("pid=%d ptrace recovery succeeded", pid)
|
|
return true
|
|
}
|
|
|
|
// isLocallyEmulatedSyscall reports whether onSyscallEnter replaces the
|
|
// syscall with a harmless host call and synthesizes its guest-visible result.
|
|
// An outer Android seccomp filter may still report SIGSYS for the original
|
|
// syscall after that synthetic exit has already been processed. In that case
|
|
// the result must be preserved instead of being overwritten with ENOSYS.
|
|
func isLocallyEmulatedSyscall(nr uint64) bool {
|
|
switch nr {
|
|
case sc.chown, sc.lchown, sc.fchown, sc.fchownat,
|
|
sc.getuid, sc.geteuid, sc.getgid, sc.getegid,
|
|
sc.getresuid, sc.getresgid, sc.getgroups,
|
|
sc.setuid, sc.setgid, sc.setreuid, sc.setregid,
|
|
sc.setresuid, sc.setresgid, sc.setgroups,
|
|
sc.setfsuid, sc.setfsgid, sc.chroot:
|
|
return nr != noSys
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func (t *tracer) handleSeccompSIGSYS(st *traceeState) (bool, error) {
|
|
info, err := ptraceGetSeccompSiginfo(st.pid)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if info.Code != sysSeccompCode {
|
|
t.debugf("pid=%d received non-seccomp SIGSYS: si_code=%d", st.pid, info.Code)
|
|
return false, nil
|
|
}
|
|
|
|
regs, err := getRegs(st.pid)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
trappedSysno := uint64(uint32(info.Syscall))
|
|
logRegs := regs
|
|
if st.originalValid && st.originalSysno == trappedSysno {
|
|
// This is the Go equivalent of PRoot's ORIGINAL register snapshot.
|
|
// In particular, it retains x0/r0 before a synthesized sysexit writes
|
|
// the return value into the same register.
|
|
logRegs = &st.originalRegs
|
|
}
|
|
|
|
name := "syscall"
|
|
if trappedSysno == sc.setRobust {
|
|
name = "set_robust_list"
|
|
}
|
|
// Some Android policies trap link/linkat instead of returning an errno.
|
|
// The translated host paths were retained at syscall-entry, so perform the
|
|
// same backup-copy fallback here and suppress the SIGSYS.
|
|
if st.originalValid && st.originalSysno == trappedSysno && st.pending != nil && st.pending.kind == "hardlink" {
|
|
p := st.pending
|
|
if err := emulateHardLinkFallback(p.sourcePath, p.targetPath, p.size); err != nil {
|
|
setRetval(regs, -int64(errnoFromError(err)))
|
|
t.debugf("pid=%d seccomp hardlink fallback failed source=%q target=%q: %v", st.pid, p.sourcePath, p.targetPath, err)
|
|
} else {
|
|
setRetval(regs, 0)
|
|
t.debugf("pid=%d seccomp hardlink fallback copied source=%q target=%q", st.pid, p.sourcePath, p.targetPath)
|
|
}
|
|
st.inSyscall = false
|
|
st.pending = nil
|
|
st.originalValid = false
|
|
if err := setRegs(st.pid, regs); err != nil {
|
|
return false, err
|
|
}
|
|
return true, nil
|
|
}
|
|
// When the syscall was already emulated by onSyscallEnter, the ordinary
|
|
// syscall-exit handler may have run before this outer-seccomp SIGSYS. This
|
|
// is the ordering handled by termux/proot's synthesized-sysexit path. Do not
|
|
// replace the emulated success/error with ENOSYS. If the exit handler has
|
|
// not run yet, run it now from the saved pending operation.
|
|
if st.originalValid && st.originalSysno == trappedSysno && isLocallyEmulatedSyscall(trappedSysno) {
|
|
if st.pending != nil {
|
|
st.inSyscall = false
|
|
if err := t.onSyscallExit(st, regs); err != nil {
|
|
return false, err
|
|
}
|
|
t.debugf("pid=%d synthesized syscall exit for emulated syscall=%d before suppressing SIGSYS", st.pid, trappedSysno)
|
|
} else {
|
|
t.debugf("pid=%d preserving completed emulation for syscall=%d while suppressing SIGSYS", st.pid, trappedSysno)
|
|
}
|
|
st.inSyscall = false
|
|
st.pending = nil
|
|
st.originalValid = false
|
|
return true, nil
|
|
}
|
|
|
|
t.debugf("pid=%d handling seccomp SIGSYS for %s=%d as ENOSYS (inSyscall=%t args=%d,%d,%d,%d,%d,%d)",
|
|
st.pid, name, trappedSysno, st.inSyscall,
|
|
arg(logRegs, 0), arg(logRegs, 1), arg(logRegs, 2),
|
|
arg(logRegs, 3), arg(logRegs, 4), arg(logRegs, 5))
|
|
|
|
// The blocked syscall has no ordinary exit stop to process. Reset the
|
|
// entry/exit state before resuming, discard any result rewrite associated
|
|
// with that call, and expose the same ENOSYS fallback used by PRoot for
|
|
// set_robust_list and unsupported outer-seccomp calls such as rseq.
|
|
st.inSyscall = false
|
|
st.pending = nil
|
|
st.originalValid = false
|
|
setRetval(regs, -int64(syscall.ENOSYS))
|
|
if err := setRegs(st.pid, regs); err != nil {
|
|
return false, err
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func (t *tracer) handleSyscall(st *traceeState) error {
|
|
regs, err := getRegs(st.pid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !st.inSyscall {
|
|
st.inSyscall = true
|
|
st.scratch = stackPtr(regs) - stackScratchGap
|
|
st.originalRegs = *regs
|
|
st.originalSysno = sysno(regs)
|
|
st.originalValid = true
|
|
return t.onSyscallEnter(st, regs)
|
|
}
|
|
st.inSyscall = false
|
|
// Keep ORIGINAL until the next syscall entry. Some Android kernels report
|
|
// the outer-seccomp SIGSYS after the synthesized syscall-exit stop; clearing
|
|
// it here would lose x0/r0 before the SIGSYS handler can inspect it.
|
|
return t.onSyscallExit(st, regs)
|
|
}
|
|
|
|
func (t *tracer) onSyscallEnter(st *traceeState, regs *syscall.PtraceRegs) error {
|
|
nr := sysno(regs)
|
|
st.pending = nil
|
|
|
|
switch nr {
|
|
case sc.execve:
|
|
return t.translateExecve(st, regs, 0, 1, atFDCWD)
|
|
case sc.execveat:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
return t.translateExecve(st, regs, 1, 2, dirfd)
|
|
case sc.open:
|
|
return t.translateArgPathMode(st, regs, 0, atFDCWD, pathModeFromOpenFlags(arg(regs, 1)))
|
|
case sc.openat:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
return t.translateArgPathMode(st, regs, 1, dirfd, pathModeFromOpenFlags(arg(regs, 2)))
|
|
case sc.openat2:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
return t.translateArgPathMode(st, regs, 1, dirfd, t.pathModeFromOpenHow(st, arg(regs, 2)))
|
|
case sc.access, sc.stat, sc.lstat, sc.statfs, sc.statfs64:
|
|
return t.translateArgPath(st, regs, 0, atFDCWD)
|
|
case sc.chmod, sc.truncate, sc.utime, sc.utimes:
|
|
return t.translateArgPathMode(st, regs, 0, atFDCWD, prootext.PathWrite)
|
|
case sc.rmdir:
|
|
return t.translateArgPathMode(st, regs, 0, atFDCWD, prootext.PathDeleteDir)
|
|
case sc.chown, sc.lchown, sc.fchown, sc.fchownat:
|
|
setSysno(regs, sc.getpid)
|
|
st.pending = &pendingExit{kind: "fakezero"}
|
|
return setRegs(st.pid, regs)
|
|
case sc.faccessat, sc.faccessat2, sc.newfstatat, sc.fstatat, sc.statx:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
return t.translateArgPath(st, regs, 1, dirfd)
|
|
case sc.mkdirat, sc.mknodat:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
return t.translateArgPathMode(st, regs, 1, dirfd, prootext.PathCreate)
|
|
case sc.unlinkat:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
mode := prootext.PathDelete
|
|
if arg(regs, 2)&0x200 != 0 {
|
|
mode = prootext.PathDeleteDir
|
|
}
|
|
return t.translateArgPathMode(st, regs, 1, dirfd, mode)
|
|
case sc.fchmodat, sc.utimensat:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
return t.translateArgPathMode(st, regs, 1, dirfd, prootext.PathWrite)
|
|
case sc.mkdir, sc.mknod:
|
|
return t.translateArgPathMode(st, regs, 0, atFDCWD, prootext.PathCreate)
|
|
case sc.unlink:
|
|
return t.translateArgPathMode(st, regs, 0, atFDCWD, prootext.PathDelete)
|
|
case sc.rename:
|
|
if err := t.translateArgPathMode(st, regs, 0, atFDCWD, prootext.PathWrite); err != nil {
|
|
return err
|
|
}
|
|
return t.translateArgPathMode(st, regs, 1, atFDCWD, prootext.PathCreate)
|
|
case sc.renameat, sc.renameat2:
|
|
oldfd := int(int64(arg(regs, 0)))
|
|
newfd := int(int64(arg(regs, 2)))
|
|
if err := t.translateArgPathMode(st, regs, 1, oldfd, prootext.PathWrite); err != nil {
|
|
return err
|
|
}
|
|
return t.translateArgPathMode(st, regs, 3, newfd, prootext.PathCreate)
|
|
case sc.link:
|
|
return t.translateHardLink(st, regs, 0, atFDCWD, 1, atFDCWD, 0)
|
|
case sc.linkat:
|
|
oldfd := int(int64(arg(regs, 0)))
|
|
newfd := int(int64(arg(regs, 2)))
|
|
return t.translateHardLink(st, regs, 1, oldfd, 3, newfd, arg(regs, 4))
|
|
case sc.symlink:
|
|
if err := t.translateSymlinkTarget(st, regs, 0); err != nil {
|
|
return err
|
|
}
|
|
return t.translateArgPathMode(st, regs, 1, atFDCWD, prootext.PathCreate)
|
|
case sc.symlinkat:
|
|
if err := t.translateSymlinkTarget(st, regs, 0); err != nil {
|
|
return err
|
|
}
|
|
dirfd := int(int64(arg(regs, 1)))
|
|
return t.translateArgPathMode(st, regs, 2, dirfd, prootext.PathCreate)
|
|
case sc.readlink:
|
|
guest, err := t.translateArgPathReturnGuest(st, regs, 0, atFDCWD)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
st.pending = &pendingExit{kind: "readlink", guestPath: guest, buf: arg(regs, 1), size: arg(regs, 2)}
|
|
return nil
|
|
case sc.readlinkat:
|
|
dirfd := int(int64(arg(regs, 0)))
|
|
guest, err := t.translateArgPathReturnGuest(st, regs, 1, dirfd)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
st.pending = &pendingExit{kind: "readlink", guestPath: guest, buf: arg(regs, 2), size: arg(regs, 3)}
|
|
return nil
|
|
case sc.chdir:
|
|
guest, err := t.translateArgPathReturnGuest(st, regs, 0, atFDCWD)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
st.pending = &pendingExit{kind: "chdir", guestPath: guest}
|
|
return nil
|
|
case sc.fchdir:
|
|
st.pending = &pendingExit{kind: "fchdir", fd: arg(regs, 0)}
|
|
return nil
|
|
case sc.getcwd:
|
|
st.pending = &pendingExit{kind: "getcwd", buf: arg(regs, 0), size: arg(regs, 1)}
|
|
return nil
|
|
case sc.getuid:
|
|
st.pending = &pendingExit{kind: "ret", size: uint64(st.creds.ruid)}
|
|
return nil
|
|
case sc.geteuid:
|
|
st.pending = &pendingExit{kind: "ret", size: uint64(st.creds.euid)}
|
|
return nil
|
|
case sc.getgid:
|
|
st.pending = &pendingExit{kind: "ret", size: uint64(st.creds.rgid)}
|
|
return nil
|
|
case sc.getegid:
|
|
st.pending = &pendingExit{kind: "ret", size: uint64(st.creds.egid)}
|
|
return nil
|
|
case sc.getresuid:
|
|
st.pending = &pendingExit{kind: "getresuid", buf: arg(regs, 0), size: arg(regs, 1), fd: arg(regs, 2)}
|
|
return nil
|
|
case sc.getresgid:
|
|
st.pending = &pendingExit{kind: "getresgid", buf: arg(regs, 0), size: arg(regs, 1), fd: arg(regs, 2)}
|
|
return nil
|
|
case sc.getgroups:
|
|
st.pending = &pendingExit{kind: "getgroups", size: arg(regs, 0), buf: arg(regs, 1)}
|
|
return nil
|
|
case sc.setuid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
uid := uint32(arg(regs, 0))
|
|
if !st.creds.canUseUID(uid) {
|
|
t.debugf("pid=%d deny setuid(%d): r/e/s=%d/%d/%d", st.pid, uid, st.creds.ruid, st.creds.euid, st.creds.suid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
kind := "setuid"
|
|
if !st.creds.uidPrivileged() {
|
|
kind = "seteuid"
|
|
}
|
|
st.pending = &pendingExit{kind: kind, size: uint64(uid)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setgid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
gid := uint32(arg(regs, 0))
|
|
if !st.creds.canUseGID(gid) {
|
|
t.debugf("pid=%d deny setgid(%d): r/e/s=%d/%d/%d", st.pid, gid, st.creds.rgid, st.creds.egid, st.creds.sgid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
kind := "setgid"
|
|
if !st.creds.gidPrivileged() {
|
|
kind = "setegid"
|
|
}
|
|
st.pending = &pendingExit{kind: kind, size: uint64(gid)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setreuid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
ruid, euid := uint32(arg(regs, 0)), uint32(arg(regs, 1))
|
|
if !st.creds.canSetUID(ruid, euid) {
|
|
t.debugf("pid=%d deny setreuid(%d,%d): r/e/s=%d/%d/%d", st.pid, ruid, euid, st.creds.ruid, st.creds.euid, st.creds.suid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
st.pending = &pendingExit{kind: "setreuid", buf: uint64(ruid), size: uint64(euid)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setregid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
rgid, egid := uint32(arg(regs, 0)), uint32(arg(regs, 1))
|
|
if !st.creds.canSetGID(rgid, egid) {
|
|
t.debugf("pid=%d deny setregid(%d,%d): r/e/s=%d/%d/%d", st.pid, rgid, egid, st.creds.rgid, st.creds.egid, st.creds.sgid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
st.pending = &pendingExit{kind: "setregid", buf: uint64(rgid), size: uint64(egid)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setresuid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
ruid, euid, suid := uint32(arg(regs, 0)), uint32(arg(regs, 1)), uint32(arg(regs, 2))
|
|
if !st.creds.canSetUID(ruid, euid, suid) {
|
|
t.debugf("pid=%d deny setresuid(%d,%d,%d): r/e/s=%d/%d/%d", st.pid, ruid, euid, suid, st.creds.ruid, st.creds.euid, st.creds.suid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
st.pending = &pendingExit{kind: "setresuid", buf: uint64(ruid), size: uint64(euid), fd: uint64(suid)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setresgid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
rgid, egid, sgid := uint32(arg(regs, 0)), uint32(arg(regs, 1)), uint32(arg(regs, 2))
|
|
if !st.creds.canSetGID(rgid, egid, sgid) {
|
|
t.debugf("pid=%d deny setresgid(%d,%d,%d): r/e/s=%d/%d/%d", st.pid, rgid, egid, sgid, st.creds.rgid, st.creds.egid, st.creds.sgid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
st.pending = &pendingExit{kind: "setresgid", buf: uint64(rgid), size: uint64(egid), fd: uint64(sgid)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setgroups:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
if !st.creds.gidPrivileged() {
|
|
t.debugf("pid=%d deny setgroups: euid=%d", st.pid, st.creds.euid)
|
|
return t.fakeErrno(st, regs, syscall.EPERM)
|
|
}
|
|
groups, err := readTraceeUint32Array(st.pid, uintptr(arg(regs, 1)), arg(regs, 0))
|
|
if err != nil {
|
|
// setgroups() is part of apt's privilege drop. If the group vector
|
|
// cannot be read because the tracee is between exec mappings or uses a
|
|
// special pointer, still emulate success instead of letting the real
|
|
// unprivileged syscall run and fail with EPERM.
|
|
t.debugf("pid=%d failed reading setgroups vector: %v", st.pid, err)
|
|
groups = nil
|
|
}
|
|
st.creds.groups = groups
|
|
setSysno(regs, sc.getpid)
|
|
st.pending = &pendingExit{kind: "fakezero"}
|
|
return setRegs(st.pid, regs)
|
|
case sc.setfsuid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
old := st.creds.fsuid
|
|
uid := uint32(arg(regs, 0))
|
|
if st.creds.canUseUID(uid) {
|
|
st.creds.fsuid = uid
|
|
}
|
|
st.pending = &pendingExit{kind: "ret", size: uint64(old)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.setfsgid:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
old := st.creds.fsgid
|
|
gid := uint32(arg(regs, 0))
|
|
if st.creds.canUseGID(gid) {
|
|
st.creds.fsgid = gid
|
|
}
|
|
st.pending = &pendingExit{kind: "ret", size: uint64(old)}
|
|
setSysno(regs, sc.getpid)
|
|
return setRegs(st.pid, regs)
|
|
case sc.chroot:
|
|
t.debugf("pid=%d fake credential syscall nr=%d args=%d,%d,%d", st.pid, nr, arg(regs, 0), arg(regs, 1), arg(regs, 2))
|
|
setSysno(regs, sc.getpid)
|
|
st.pending = &pendingExit{kind: "fakezero"}
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (t *tracer) onSyscallExit(st *traceeState, regs *syscall.PtraceRegs) error {
|
|
if st.pending == nil {
|
|
return nil
|
|
}
|
|
p := st.pending
|
|
st.pending = nil
|
|
retv := retval(regs)
|
|
|
|
switch p.kind {
|
|
case "hardlink":
|
|
if retv == 0 {
|
|
return nil
|
|
}
|
|
if !shouldFallbackHardLink(retv) {
|
|
return nil
|
|
}
|
|
if err := emulateHardLinkFallback(p.sourcePath, p.targetPath, p.size); err != nil {
|
|
t.debugf("pid=%d hardlink fallback failed source=%q target=%q: %v", st.pid, p.sourcePath, p.targetPath, err)
|
|
setRetval(regs, -int64(errnoFromError(err)))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
t.debugf("pid=%d hardlink fallback copied source=%q target=%q after native errno=%d", st.pid, p.sourcePath, p.targetPath, -retv)
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "chdir":
|
|
if retv == 0 {
|
|
st.cwd = cleanGuestPath(p.guestPath)
|
|
}
|
|
case "fchdir":
|
|
if retv == 0 {
|
|
if guest, ok := t.fdGuestPath(st.pid, int(p.fd)); ok {
|
|
st.cwd = cleanGuestPath(guest)
|
|
}
|
|
}
|
|
case "ret":
|
|
setRetval(regs, int64(uint32(p.size)))
|
|
return setRegs(st.pid, regs)
|
|
case "errno":
|
|
setRetval(regs, -int64(syscall.Errno(p.size)))
|
|
return setRegs(st.pid, regs)
|
|
case "fakezero":
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setuid":
|
|
uid := uint32(p.size)
|
|
st.creds.ruid, st.creds.euid, st.creds.suid, st.creds.fsuid = uid, uid, uid, uid
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "seteuid":
|
|
uid := uint32(p.size)
|
|
st.creds.euid, st.creds.fsuid = uid, uid
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setgid":
|
|
gid := uint32(p.size)
|
|
st.creds.rgid, st.creds.egid, st.creds.sgid, st.creds.fsgid = gid, gid, gid, gid
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setegid":
|
|
gid := uint32(p.size)
|
|
st.creds.egid, st.creds.fsgid = gid, gid
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setreuid":
|
|
if v := uint32(p.buf); v != ^uint32(0) {
|
|
st.creds.ruid = v
|
|
}
|
|
if v := uint32(p.size); v != ^uint32(0) {
|
|
st.creds.euid = v
|
|
st.creds.fsuid = v
|
|
}
|
|
st.creds.suid = st.creds.euid
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setregid":
|
|
if v := uint32(p.buf); v != ^uint32(0) {
|
|
st.creds.rgid = v
|
|
}
|
|
if v := uint32(p.size); v != ^uint32(0) {
|
|
st.creds.egid = v
|
|
st.creds.fsgid = v
|
|
}
|
|
st.creds.sgid = st.creds.egid
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setresuid":
|
|
if v := uint32(p.buf); v != ^uint32(0) {
|
|
st.creds.ruid = v
|
|
}
|
|
if v := uint32(p.size); v != ^uint32(0) {
|
|
st.creds.euid = v
|
|
st.creds.fsuid = v
|
|
}
|
|
if v := uint32(p.fd); v != ^uint32(0) {
|
|
st.creds.suid = v
|
|
}
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "setresgid":
|
|
if v := uint32(p.buf); v != ^uint32(0) {
|
|
st.creds.rgid = v
|
|
}
|
|
if v := uint32(p.size); v != ^uint32(0) {
|
|
st.creds.egid = v
|
|
st.creds.fsgid = v
|
|
}
|
|
if v := uint32(p.fd); v != ^uint32(0) {
|
|
st.creds.sgid = v
|
|
}
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "getresuid":
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.buf), st.creds.ruid)
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.size), st.creds.euid)
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.fd), st.creds.suid)
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "getresgid":
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.buf), st.creds.rgid)
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.size), st.creds.egid)
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.fd), st.creds.sgid)
|
|
setRetval(regs, 0)
|
|
return setRegs(st.pid, regs)
|
|
case "getgroups":
|
|
if p.size == 0 {
|
|
setRetval(regs, int64(len(st.creds.groups)))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
if p.size < uint64(len(st.creds.groups)) {
|
|
setRetval(regs, -int64(syscall.EINVAL))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
for i, g := range st.creds.groups {
|
|
_ = writeTraceeUint32(st.pid, uintptr(p.buf)+uintptr(i*4), g)
|
|
}
|
|
setRetval(regs, int64(len(st.creds.groups)))
|
|
return setRegs(st.pid, regs)
|
|
case "getcwd":
|
|
if p.size == 0 {
|
|
setRetval(regs, -int64(syscall.ERANGE))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
data := append([]byte(cleanGuestPath(st.cwd)), 0)
|
|
if uint64(len(data)) > p.size {
|
|
setRetval(regs, -int64(syscall.ERANGE))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
if _, err := syscall.PtracePokeData(st.pid, uintptr(p.buf), data); err != nil {
|
|
return err
|
|
}
|
|
setRetval(regs, int64(len(data)))
|
|
return setRegs(st.pid, regs)
|
|
case "readlink":
|
|
if retv <= 0 {
|
|
return nil
|
|
}
|
|
buf := make([]byte, retv)
|
|
if _, err := syscall.PtracePeekData(st.pid, uintptr(p.buf), buf); err != nil {
|
|
return nil
|
|
}
|
|
target := string(buf)
|
|
if strings.HasPrefix(target, t.pm.root) || filepath.IsAbs(target) {
|
|
guest := t.pm.HostToGuest(target)
|
|
if guest != target && guest != "" {
|
|
out := []byte(guest)
|
|
if uint64(len(out)) > p.size {
|
|
out = out[:p.size]
|
|
}
|
|
if _, err := syscall.PtracePokeData(st.pid, uintptr(p.buf), out); err == nil {
|
|
setRetval(regs, int64(len(out)))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (t *tracer) translateSymlinkTarget(st *traceeState, regs *syscall.PtraceRegs, n int) error {
|
|
addr := arg(regs, n)
|
|
if addr == 0 {
|
|
return nil
|
|
}
|
|
old, err := readTraceeString(st.pid, uintptr(addr), 4096)
|
|
if err != nil || old == "" {
|
|
return err
|
|
}
|
|
|
|
// The link name is translated separately. The link *target*, however, is
|
|
// stored verbatim by the kernel. If a guest creates an absolute symlink such
|
|
// as /tmp/apt-dpkg-install/00-foo.deb -> /var/cache/apt/archives/foo.deb and
|
|
// we leave the target unchanged, the host kernel later follows it outside the
|
|
// rootfs and dpkg sees ENOENT. Store the host target for absolute links, and
|
|
// translate it back on readlink() exit so guest-visible semantics remain
|
|
// /var/cache/apt/archives/foo.deb.
|
|
if !filepath.IsAbs(old) {
|
|
return nil
|
|
}
|
|
|
|
guest, host, special := t.translateSpecialProcPath(st, "/", old)
|
|
if !special {
|
|
guest, host = t.pm.Translate("/", old)
|
|
}
|
|
t.debugf("pid=%d symlink-target old=%q guest=%q host=%q", st.pid, old, guest, host)
|
|
newAddr, err := writeTraceeString(st, uintptr(addr), old, host)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
setArg(regs, n, uint64(newAddr))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
|
|
const (
|
|
atSymlinkFollow = 0x400
|
|
atEmptyPath = 0x1000
|
|
)
|
|
|
|
// translateHardLink rewrites both link/linkat path arguments and retains the
|
|
// translated host paths until syscall-exit. Android kernels and filesystems can
|
|
// reject otherwise valid hard links from an unprivileged tracee; dpkg relies on
|
|
// those links for rollback files, so the exit handler can create a regular copy
|
|
// when the native operation fails with a policy/filesystem error.
|
|
func (t *tracer) translateHardLink(st *traceeState, regs *syscall.PtraceRegs, oldArg, oldDirfd, newArg, newDirfd int, flags uint64) error {
|
|
if err := t.translateArgPath(st, regs, oldArg, oldDirfd); err != nil {
|
|
return err
|
|
}
|
|
source, err := readTraceeString(st.pid, uintptr(arg(regs, oldArg)), 4096)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if source == "" && flags&atEmptyPath != 0 && oldDirfd >= 0 {
|
|
source, err = os.Readlink(fmt.Sprintf("/proc/%d/fd/%d", st.pid, oldDirfd))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := t.translateArgPathMode(st, regs, newArg, newDirfd, prootext.PathCreate); err != nil {
|
|
return err
|
|
}
|
|
target, err := readTraceeString(st.pid, uintptr(arg(regs, newArg)), 4096)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
st.pending = &pendingExit{
|
|
kind: "hardlink",
|
|
sourcePath: source,
|
|
targetPath: target,
|
|
size: flags,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func shouldFallbackHardLink(retv int64) bool {
|
|
if retv >= 0 {
|
|
return false
|
|
}
|
|
switch syscall.Errno(-retv) {
|
|
case syscall.EPERM, syscall.EACCES, syscall.EXDEV, syscall.ENOSYS, syscall.EOPNOTSUPP:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// emulateHardLinkFallback implements the backup semantics needed by dpkg when
|
|
// the host rejects link/linkat. A regular file is copied with O_EXCL so the
|
|
// operation keeps link(2)'s no-overwrite behavior. Symlinks are recreated when
|
|
// linkat was not asked to follow them. This is deliberately a fallback: native
|
|
// hard links remain the primary path and retain full inode-sharing semantics.
|
|
func emulateHardLinkFallback(source, target string, flags uint64) (retErr error) {
|
|
info, err := os.Lstat(source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.IsDir() {
|
|
return syscall.EPERM
|
|
}
|
|
|
|
if info.Mode()&os.ModeSymlink != 0 && flags&atSymlinkFollow == 0 {
|
|
value, err := os.Readlink(source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Symlink(value, target)
|
|
}
|
|
if info.Mode()&os.ModeSymlink != 0 {
|
|
info, err = os.Stat(source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if !info.Mode().IsRegular() {
|
|
return syscall.EPERM
|
|
}
|
|
|
|
src, err := os.Open(source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer src.Close()
|
|
|
|
dst, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_EXCL, info.Mode().Perm())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
created := true
|
|
defer func() {
|
|
if closeErr := dst.Close(); retErr == nil && closeErr != nil {
|
|
retErr = closeErr
|
|
}
|
|
if retErr != nil && created {
|
|
_ = os.Remove(target)
|
|
}
|
|
}()
|
|
|
|
if _, err := io.Copy(dst, src); err != nil {
|
|
return err
|
|
}
|
|
if err := dst.Sync(); err != nil {
|
|
return err
|
|
}
|
|
if err := dst.Chmod(info.Mode().Perm()); err != nil {
|
|
return err
|
|
}
|
|
created = false
|
|
return nil
|
|
}
|
|
|
|
func errnoFromError(err error) syscall.Errno {
|
|
var errno syscall.Errno
|
|
if errors.As(err, &errno) {
|
|
return errno
|
|
}
|
|
return syscall.EIO
|
|
}
|
|
|
|
func (t *tracer) translateArgPath(st *traceeState, regs *syscall.PtraceRegs, n int, dirfd int) error {
|
|
_, err := t.translateArgPathReturnGuestMode(st, regs, n, dirfd, prootext.PathRead)
|
|
return err
|
|
}
|
|
|
|
func (t *tracer) translateArgPathMode(st *traceeState, regs *syscall.PtraceRegs, n int, dirfd int, mode prootext.PathMode) error {
|
|
_, err := t.translateArgPathReturnGuestMode(st, regs, n, dirfd, mode)
|
|
return err
|
|
}
|
|
|
|
func (t *tracer) translateArgPathReturnGuest(st *traceeState, regs *syscall.PtraceRegs, n int, dirfd int) (string, error) {
|
|
return t.translateArgPathReturnGuestMode(st, regs, n, dirfd, prootext.PathRead)
|
|
}
|
|
|
|
func (t *tracer) translateArgPathReturnGuestMode(st *traceeState, regs *syscall.PtraceRegs, n int, dirfd int, mode prootext.PathMode) (string, error) {
|
|
addr := arg(regs, n)
|
|
if addr == 0 {
|
|
return "", nil
|
|
}
|
|
old, err := readTraceeString(st.pid, uintptr(addr), 4096)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if old == "" {
|
|
return "", nil
|
|
}
|
|
cwd := st.cwd
|
|
if dirfd != atFDCWD && !strings.HasPrefix(old, "/") {
|
|
if guest, ok := t.fdGuestPath(st.pid, dirfd); ok {
|
|
cwd = guest
|
|
}
|
|
}
|
|
guest, host, special := t.translateSpecialProcPath(st, cwd, old)
|
|
if !special {
|
|
guest, host = t.pm.TranslateMode(cwd, old, mode)
|
|
}
|
|
t.debugf("pid=%d path mode=%d cwd=%q old=%q guest=%q host=%q", st.pid, mode, cwd, old, guest, host)
|
|
newAddr, err := writeTraceeString(st, uintptr(addr), old, host)
|
|
if err != nil {
|
|
return guest, err
|
|
}
|
|
setArg(regs, n, uint64(newAddr))
|
|
return guest, setRegs(st.pid, regs)
|
|
}
|
|
|
|
func pathModeFromOpenFlags(flags uint64) prootext.PathMode {
|
|
const accessMode = uint64(os.O_RDONLY | os.O_WRONLY | os.O_RDWR)
|
|
switch flags & accessMode {
|
|
case uint64(os.O_WRONLY), uint64(os.O_RDWR):
|
|
if flags&uint64(os.O_CREATE|os.O_TRUNC) != 0 {
|
|
return prootext.PathCreate
|
|
}
|
|
return prootext.PathWrite
|
|
default:
|
|
return prootext.PathRead
|
|
}
|
|
}
|
|
|
|
func (t *tracer) pathModeFromOpenHow(st *traceeState, addr uint64) prootext.PathMode {
|
|
flags, err := readTraceeUint64(st.pid, uintptr(addr))
|
|
if err != nil {
|
|
return prootext.PathWrite
|
|
}
|
|
return pathModeFromOpenFlags(flags)
|
|
}
|
|
|
|
func (t *tracer) fdGuestPath(pid int, fd int) (string, bool) {
|
|
if fd < 0 {
|
|
return "", false
|
|
}
|
|
link := fmt.Sprintf("/proc/%d/fd/%d", pid, fd)
|
|
target, err := os.Readlink(link)
|
|
if err != nil {
|
|
return "", false
|
|
}
|
|
return t.pm.HostToGuest(target), true
|
|
}
|
|
|
|
func (t *tracer) translateSpecialProcPath(st *traceeState, cwd, p string) (guest, host string, ok bool) {
|
|
guest = joinGuest(cwd, p)
|
|
|
|
// /proc is intentionally not bound by default because walking the host
|
|
// procfs from commands such as `find /` races disappearing tasks. Some
|
|
// programs, however, require the process-local procfs aliases, especially
|
|
// apt/dpkg probing /proc/self/fd. Provide just those aliases without
|
|
// exposing the entire host /proc tree as a guest bind.
|
|
if guest == "/proc/self" || strings.HasPrefix(guest, "/proc/self/") {
|
|
suffix := strings.TrimPrefix(guest, "/proc/self")
|
|
return guest, filepath.Clean(filepath.Join("/proc", strconv.Itoa(st.pid), suffix)), true
|
|
}
|
|
if guest == "/proc/thread-self" || strings.HasPrefix(guest, "/proc/thread-self/") {
|
|
suffix := strings.TrimPrefix(guest, "/proc/thread-self")
|
|
return guest, filepath.Clean(filepath.Join("/proc", strconv.Itoa(st.pid), "task", strconv.Itoa(st.pid), suffix)), true
|
|
}
|
|
|
|
// If the tracee uses the real PID returned by getpid(), keep self-like
|
|
// entries usable even with DefaultBinds not containing /proc. Do not map
|
|
// arbitrary /proc/<other-pid> paths; that would reintroduce the host procfs
|
|
// traversal problem this backend tries to avoid by default.
|
|
pidPrefix := "/proc/" + strconv.Itoa(st.pid)
|
|
if guest == pidPrefix || strings.HasPrefix(guest, pidPrefix+"/") {
|
|
suffix := strings.TrimPrefix(guest, pidPrefix)
|
|
switch {
|
|
case suffix == "", suffix == "/fd", strings.HasPrefix(suffix, "/fd/"),
|
|
suffix == "/fdinfo", strings.HasPrefix(suffix, "/fdinfo/"),
|
|
suffix == "/cwd", suffix == "/exe", suffix == "/root", suffix == "/status":
|
|
return guest, filepath.Clean(filepath.Join("/proc", strconv.Itoa(st.pid), suffix)), true
|
|
}
|
|
}
|
|
|
|
return "", "", false
|
|
}
|
|
|
|
func readTraceeString(pid int, addr uintptr, max int) (string, error) {
|
|
if addr == 0 {
|
|
return "", nil
|
|
}
|
|
var out []byte
|
|
buf := make([]byte, 256)
|
|
for len(out) < max {
|
|
n, err := syscall.PtracePeekData(pid, addr+uintptr(len(out)), buf)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
for i := 0; i < n; i++ {
|
|
if buf[i] == 0 {
|
|
return string(out), nil
|
|
}
|
|
out = append(out, buf[i])
|
|
if len(out) >= max {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return string(out), nil
|
|
}
|
|
|
|
func writeTraceeString(st *traceeState, oldAddr uintptr, old, new string) (uintptr, error) {
|
|
data := append([]byte(new), 0)
|
|
addr := uintptr(st.scratch - uint64(len(data)+16))
|
|
addr &^= uintptr(15)
|
|
st.scratch = uint64(addr)
|
|
_, err := syscall.PtracePokeData(st.pid, addr, data)
|
|
return addr, err
|
|
}
|
|
|
|
func (t *tracer) translateExecve(st *traceeState, regs *syscall.PtraceRegs, pathArg, argvArg int, dirfd int) error {
|
|
pathAddr := arg(regs, pathArg)
|
|
if pathAddr == 0 {
|
|
return nil
|
|
}
|
|
old, err := readTraceeString(st.pid, uintptr(pathAddr), 4096)
|
|
if err != nil || old == "" {
|
|
return err
|
|
}
|
|
cwd := st.cwd
|
|
if dirfd != atFDCWD && !strings.HasPrefix(old, "/") {
|
|
if guest, ok := t.fdGuestPath(st.pid, dirfd); ok {
|
|
cwd = guest
|
|
}
|
|
}
|
|
guest, host, special := t.translateSpecialProcPath(st, cwd, old)
|
|
if !special {
|
|
guest, host = t.pm.Translate(cwd, old)
|
|
}
|
|
t.debugf("pid=%d exec cwd=%q old=%q guest=%q host=%q", st.pid, cwd, old, guest, host)
|
|
argv, _ := readTraceeStringVector(st.pid, uintptr(arg(regs, argvArg)), 4096)
|
|
if len(argv) == 0 {
|
|
argv = []string{old}
|
|
}
|
|
if rw, changed := t.pm.resolveExec(host, guest, argv); changed {
|
|
t.debugf("pid=%d exec rewrite execPath=%q argv=%q", st.pid, rw.ExecPath, strings.Join(rw.Argv, " "))
|
|
newPath, err := writeScratchString(st, rw.ExecPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
argvPtr, err := writeTraceeStringVector(st, rw.Argv)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
setArg(regs, pathArg, uint64(newPath))
|
|
setArg(regs, argvArg, uint64(argvPtr))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
newAddr, err := writeTraceeString(st, uintptr(pathAddr), old, host)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
setArg(regs, pathArg, uint64(newAddr))
|
|
return setRegs(st.pid, regs)
|
|
}
|
|
|
|
func writeTraceeUint32(pid int, addr uintptr, v uint32) error {
|
|
if addr == 0 {
|
|
return nil
|
|
}
|
|
data := make([]byte, 4)
|
|
putTraceeUint32(data, v)
|
|
_, err := syscall.PtracePokeData(pid, addr, data)
|
|
return err
|
|
}
|
|
|
|
func readTraceeUint32(pid int, addr uintptr) (uint32, error) {
|
|
buf := make([]byte, 4)
|
|
if _, err := syscall.PtracePeekData(pid, addr, buf); err != nil {
|
|
return 0, err
|
|
}
|
|
return traceeUint32(buf), nil
|
|
}
|
|
|
|
func readTraceeUint64(pid int, addr uintptr) (uint64, error) {
|
|
buf := make([]byte, 8)
|
|
if _, err := syscall.PtracePeekData(pid, addr, buf); err != nil {
|
|
return 0, err
|
|
}
|
|
if nativeLittleEndian {
|
|
var v uint64
|
|
for i := range buf {
|
|
v |= uint64(buf[i]) << (8 * uint(i))
|
|
}
|
|
return v, nil
|
|
}
|
|
var v uint64
|
|
for i := range buf {
|
|
v = (v << 8) | uint64(buf[i])
|
|
}
|
|
return v, nil
|
|
}
|
|
|
|
func readTraceeUint32Array(pid int, addr uintptr, count uint64) ([]uint32, error) {
|
|
if count == 0 {
|
|
return nil, nil
|
|
}
|
|
if addr == 0 {
|
|
return nil, syscall.EFAULT
|
|
}
|
|
out := make([]uint32, 0, count)
|
|
for i := uint64(0); i < count; i++ {
|
|
v, err := readTraceeUint32(pid, addr+uintptr(i*4))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, v)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func readTraceeStringVector(pid int, addr uintptr, max int) ([]string, error) {
|
|
if addr == 0 {
|
|
return nil, nil
|
|
}
|
|
out := []string{}
|
|
for i := 0; i < max; i++ {
|
|
p, err := readTraceePtr(pid, addr+uintptr(i*ptrSize))
|
|
if err != nil {
|
|
return out, err
|
|
}
|
|
if p == 0 {
|
|
return out, nil
|
|
}
|
|
s, err := readTraceeString(pid, uintptr(p), 4096)
|
|
if err != nil {
|
|
return out, err
|
|
}
|
|
out = append(out, s)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func readTraceePtr(pid int, addr uintptr) (uint64, error) {
|
|
buf := make([]byte, ptrSize)
|
|
_, err := syscall.PtracePeekData(pid, addr, buf)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return traceePtr(buf), nil
|
|
}
|
|
|
|
func writeScratchString(st *traceeState, s string) (uintptr, error) {
|
|
data := append([]byte(s), 0)
|
|
addr := uintptr(st.scratch - uint64(len(data)+16))
|
|
addr &^= uintptr(15)
|
|
st.scratch = uint64(addr)
|
|
_, err := syscall.PtracePokeData(st.pid, addr, data)
|
|
return addr, err
|
|
}
|
|
|
|
func writeTraceeStringVector(st *traceeState, values []string) (uintptr, error) {
|
|
ptrs := make([]uint64, 0, len(values)+1)
|
|
for _, s := range values {
|
|
addr, err := writeScratchString(st, s)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
ptrs = append(ptrs, uint64(addr))
|
|
}
|
|
ptrs = append(ptrs, 0)
|
|
data := make([]byte, len(ptrs)*ptrSize)
|
|
for i, p := range ptrs {
|
|
base := i * ptrSize
|
|
putTraceePtr(data[base:base+ptrSize], p)
|
|
}
|
|
addr := uintptr(st.scratch - uint64(len(data)+16))
|
|
addr &^= uintptr(15)
|
|
st.scratch = uint64(addr)
|
|
_, err := syscall.PtracePokeData(st.pid, addr, data)
|
|
return addr, err
|
|
}
|
|
|
|
func traceeUint32(buf []byte) uint32 {
|
|
if nativeLittleEndian {
|
|
return uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
|
|
}
|
|
return uint32(buf[3]) | uint32(buf[2])<<8 | uint32(buf[1])<<16 | uint32(buf[0])<<24
|
|
}
|
|
|
|
func putTraceeUint32(buf []byte, v uint32) {
|
|
if nativeLittleEndian {
|
|
buf[0] = byte(v)
|
|
buf[1] = byte(v >> 8)
|
|
buf[2] = byte(v >> 16)
|
|
buf[3] = byte(v >> 24)
|
|
return
|
|
}
|
|
buf[0] = byte(v >> 24)
|
|
buf[1] = byte(v >> 16)
|
|
buf[2] = byte(v >> 8)
|
|
buf[3] = byte(v)
|
|
}
|
|
|
|
func traceePtr(buf []byte) uint64 {
|
|
var v uint64
|
|
if nativeLittleEndian {
|
|
for i := 0; i < ptrSize; i++ {
|
|
v |= uint64(buf[i]) << (8 * uint(i))
|
|
}
|
|
return v
|
|
}
|
|
for i := 0; i < ptrSize; i++ {
|
|
v = (v << 8) | uint64(buf[i])
|
|
}
|
|
return v
|
|
}
|
|
|
|
func putTraceePtr(buf []byte, v uint64) {
|
|
if nativeLittleEndian {
|
|
for i := 0; i < ptrSize; i++ {
|
|
buf[i] = byte(v >> (8 * uint(i)))
|
|
}
|
|
return
|
|
}
|
|
for i := 0; i < ptrSize; i++ {
|
|
buf[i] = byte(v >> (8 * uint(ptrSize-1-i)))
|
|
}
|
|
}
|