subgraph-oz/Godeps/_workspace/src/github.com/subgraph/go-seccomp/seccomp.go

// Copyright 2015 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Package seccomp implements support for compiling and installing Seccomp-BPF policy files.
//   - http://www.chromium.org/chromium-os/developer-guide/chromium-os-sandboxing
//
// Typical usage:
//	// Check for the required kernel support for seccomp.
//	if err := seccomp.CheckSupport(); err != nil {
//		log.Fatal(err)
//	}
//
//	// Compile BPF program from a Chromium-OS policy file.
//	bpf, err := seccomp.Compile(path)
//	if err != nil {
//		log.Fatal(err)
//	}
//
//	// Install Seccomp-BPF filter program with the kernel.
//	if err := seccomp.Install(bpf); err != nil {
//		log.Fatal(err)
//	}
//
// For background and more information:
//   - http://www.tcpdump.org/papers/bpf-usenix93.pdf
//   - http://en.wikipedia.org/wiki/Seccomp
//   - http://lwn.net/Articles/475043/
//   - http://outflux.net/teach-seccomp/
//   - http://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt
//   - http://github.com/torvalds/linux/blob/master/kernel/seccomp.c
//
// TODO:
//   - Exit the program if any thread is killed because of seccomp violation.
//   - Provide a debug mode to log system calls used during normal operation.
package seccomp

import (
	"bytes"
	"fmt"
	"io/ioutil"
	"regexp"
	"runtime"
	"strconv"
	"strings"
	"syscall"
	"unsafe"
)

// #include <sys/prctl.h>
// #include "unistd_64.h"
// #include "seccomp.h"
import "C"

// SeccompData is the format the BPF program executes over.
// This struct mirrors struct seccomp_data from <linux/seccomp.h>.
type SeccompData struct {
	NR                 int32     // The system call number.
	Arch               uint32    // System call convention as an AUDIT_ARCH_* value.
	InstructionPointer uint64    // At the time of the system call.
	Args               [6]uint64 // System call arguments (always stored as 64-bit values).
}

// C version of the struct used for sanity checking.
type seccomp_data C.struct_seccomp_data

// bpfLoadNR returns the instruction to load the NR field in SeccompData.
func bpfLoadNR() SockFilter {
	return bpfLoad(unsafe.Offsetof(SeccompData{}.NR))
}

// bpfLoadArch returns the instruction to load the Arch field in SeccompData.
func bpfLoadArch() SockFilter {
	return bpfLoad(unsafe.Offsetof(SeccompData{}.Arch))
}

// bpfLoadArg returns the instruction to load one word of an argument in SeccompData.
func bpfLoadArg(arg, word int) SockFilter {
	return bpfLoad(unsafe.Offsetof(SeccompData{}.Args) + uintptr(((2*arg)+word)*4))
}

// retKill returns the code for seccomp kill action.
func retKill() uint32 {
	return C.SECCOMP_RET_KILL
}

// retTrap returns the code for seccomp trap action.
func retTrap() uint32 {
	return C.SECCOMP_RET_TRAP
}

// retErrno returns the code for seccomp errno action with the specified errno embedded.
func retErrno(errno syscall.Errno) uint32 {
	return C.SECCOMP_RET_ERRNO | (uint32(errno) & C.SECCOMP_RET_DATA)
}

// retAllow returns the code for seccomp allow action.
func retAllow() uint32 {
	return C.SECCOMP_RET_ALLOW
}

// policy represents the seccomp policy for a single syscall.
type policy struct {
	// name of the syscall.
	name string

	// expr is evaluated on the syscall arguments.
	// nil expr evaluates to false.
	expr orExpr

	// then is executed if the expr evaluates to true.
	// (cannot be specified in policy file, used in tests only).
	then SockFilter

	// default action (else) if the expr evaluates to false.
	// nil means jump to end of program for the overall default.
	def *SockFilter
}

// orExpr is a list of and expressions.
type orExpr []andExpr

// andExpr is a list of arg comparisons.
type andExpr []argComp

// argComp represents a basic argument comparison in the policy.
type argComp struct {
	idx  int    // 0..5 for indexing into SeccompData.Args.
	oper string // comparison operator: "==", "!=", or "&".
	val  uint64 // upper 32 bits compared only if nbits>32.
}

// String converts the internal policy representation back to policy file syntax.
func (p policy) String() string {
	var buf bytes.Buffer
	fmt.Fprintf(&buf, "%s: ", p.name)

	for i, and := range p.expr {
		if i > 0 {
			fmt.Fprintf(&buf, " || ")
		}
		for j, arg := range and {
			if j > 0 {
				fmt.Fprintf(&buf, " && ")
			}
			fmt.Fprintf(&buf, "arg%d %s %#x", arg.idx, arg.oper, arg.val)
		}
	}

	pret := func(f SockFilter) {
		if f.Code == opRET {
			switch f.K & C.SECCOMP_RET_ACTION {
			case C.SECCOMP_RET_ALLOW:
				fmt.Fprintf(&buf, "1")
				return
			case C.SECCOMP_RET_ERRNO:
				fmt.Fprintf(&buf, "return %d", f.K&C.SECCOMP_RET_DATA)
				return
			}
		}
		fmt.Fprintf(&buf, "%s", f)
	}
	if p.then != bpfRet(retAllow()) {
		fmt.Fprintf(&buf, " ? ")
		pret(p.then)
	}
	if p.def != nil {
		if p.expr != nil {
			fmt.Fprintf(&buf, "; ")
		}
		pret(*p.def)
	}

	return buf.String()
}

// Syntax of policy line for a single syscall.
var (
	allowRE      = regexp.MustCompile(`^([[:word:]]+) *: *1$`)
	returnRE     = regexp.MustCompile(`^([[:word:]]+) *: *return *([[:word:]]+)$`)
	exprRE       = regexp.MustCompile(`^([[:word:]]+) *:([^;]+)$`)
	exprReturnRE = regexp.MustCompile(`^([[:word:]]+) *:([^;]+); *return *([[:word:]]+)$`)

	argRE = regexp.MustCompile(`^arg([0-5]) *(==|!=|&) *([[:word:]]+)$`)
)

// parseLine parses the policy line for a single syscall.
func parseLine(line string) (policy, error) {
	var name, expr, ret string
	var then SockFilter
	var def *SockFilter

	line = strings.TrimSpace(line)
	if match := allowRE.FindStringSubmatch(line); match != nil {
		name = match[1]
		def = ptr(bpfRet(retAllow()))
	} else if match = returnRE.FindStringSubmatch(line); match != nil {
		name = match[1]
		ret = match[2]
	} else if match = exprRE.FindStringSubmatch(line); match != nil {
		name = match[1]
		expr = match[2]
	} else if match = exprReturnRE.FindStringSubmatch(line); match != nil {
		name = match[1]
		expr = match[2]
		ret = match[3]
	} else {
		return policy{}, fmt.Errorf("invalid syntax")
	}

	if _, ok := syscallNum[name]; !ok {
		return policy{}, fmt.Errorf("unknown syscall: %s", name)
	}

	var or orExpr
	if expr != "" {
		for _, sub := range strings.Split(expr, "||") {
			var and andExpr
			for _, arg := range strings.Split(sub, "&&") {
				arg = strings.TrimSpace(arg)
				match := argRE.FindStringSubmatch(arg)
				if match == nil {
					return policy{}, fmt.Errorf("invalid expression: %s", arg)
				}
				idx, err := strconv.Atoi(match[1])
				if err != nil {
					return policy{}, fmt.Errorf("invalid arg: %s", arg)
				}
				oper := match[2]
				val, err := strconv.ParseUint(match[3], 0, 64)
				if err != nil {
					return policy{}, fmt.Errorf("invalid value: %s", arg)
				}
				and = append(and, argComp{idx, oper, val})
			}
			or = append(or, and)
		}
	}

	then = bpfRet(retAllow())

	if ret != "" {
		errno, err := strconv.ParseUint(ret, 0, 16)
		if err != nil {
			return policy{}, fmt.Errorf("invalid errno: %s", ret)
		}
		def = ptr(bpfRet(retErrno(syscall.Errno(errno))))
	}

	return policy{name, or, then, def}, nil
}

// parseLines parses multiple policy lines, each one for a single syscall.
// Empty lines and lines beginning with "#" are ignored.
// Multiple policies for a syscall are detected and reported as error.
func parseLines(lines []string) ([]policy, error) {
	var ps []policy
	seen := make(map[string]int)
	for i, line := range lines {
		lineno := i + 1
		if line == "" || strings.HasPrefix(line, "#") {
			continue
		}
		p, err := parseLine(line)
		if err != nil {
			return nil, fmt.Errorf("line %d: %v", lineno, err)
		}
		if seen[p.name] > 0 {
			return nil, fmt.Errorf("lines %d,%d: multiple policies for %s",
				seen[p.name], lineno, p.name)
		}
		seen[p.name] = lineno
		ps = append(ps, p)
	}
	return ps, nil
}

// parseFile reads a Chromium-OS Seccomp-BPF policy file and parses its contents.
func parseFile(path string) ([]policy, error) {
	file, err := ioutil.ReadFile(path)
	if err != nil {
		return nil, err
	}
	return parseLines(strings.Split(string(file), "\n"))
}

// compile compiles a Seccomp-BPF program implementing the syscall policies.
// long specifies whether to generate 32-bit or 64-bit argument comparisons.
// def is the overall default action to take when the syscall does not match
// any policy in the filter.
func compile(ps []policy, long bool, def SockFilter) ([]SockFilter, error) {
	var bpf []SockFilter
	do := func(insn SockFilter) {
		bpf = append(bpf, insn)
	}

	// ref maps a label to addresses of all the instructions that jump to it.
	ref := make(map[string][]int)
	jump := func(name string) {
		// jump to a label with unresolved address: insert a placeholder instruction.
		ref[name] = append(ref[name], len(bpf))
		do(SockFilter{})
	}
	label := func(name string) {
		// label address resolved: replace placeholder instructions with actual jumps.
		for _, i := range ref[name] {
			bpf[i] = bpfJump(len(bpf) - (i + 1))
		}
		delete(ref, name)
	}

	// Conditional jumps: jump if condition is true, fall through otherwise.
	jeq := func(val uint32, target string) {
		// if A == val { goto target }
		do(bpfJeq(val, 0, 1))
		jump(target)
	}
	jne := func(val uint32, target string) {
		// if A != val { goto target }
		do(bpfJeq(val, 1, 0))
		jump(target)
	}
	jset := func(val uint32, target string) {
		// if A&val != 0 { goto target }
		do(bpfJset(val, 0, 1))
		jump(target)
	}
	jnset := func(val uint32, target string) {
		// if A&val == 0 { goto target }
		do(bpfJset(val, 1, 0))
		jump(target)
	}

	do(bpfLoadArch())
	do(bpfJeq(auditArch, 1, 0))
	do(bpfRet(retKill()))

	do(bpfLoadNR())
	for _, p := range ps {
		nr, ok := syscallNum[p.name]
		if !ok {
			return nil, fmt.Errorf("unknown syscall: %s", p.name)
		}
		jne(uint32(nr), "nextcall")
		for _, and := range p.expr {
			for _, arg := range and {
				val := struct{ high, low uint32 }{uint32(arg.val >> 32), uint32(arg.val)}
				switch arg.oper {
				case "==":
					if long {
						do(bpfLoadArg(arg.idx, 1))
						jne(val.high, "nextor")
					}
					do(bpfLoadArg(arg.idx, 0))
					jne(val.low, "nextor")
				case "!=":
					if long {
						do(bpfLoadArg(arg.idx, 1))
						jne(val.high, "nextand")
					}
					do(bpfLoadArg(arg.idx, 0))
					jeq(val.low, "nextor")
				case "&":
					if long {
						do(bpfLoadArg(arg.idx, 1))
						jset(val.high, "nextand")
					}
					do(bpfLoadArg(arg.idx, 0))
					jnset(val.low, "nextor")
				default:
					return nil, fmt.Errorf("unknown operator: %q", arg.oper)
				}

				// Comparison was satisfied. Move on to the next comparison in &&.
				label("nextand")
			}

			// All comparisons in && were satisfied.
			do(p.then)

			// Some comparison in && was false. Move on to the next expression in ||.
			label("nextor")
		}

		// All expressions in || evaluated to false (or expr was nil).
		if p.def != nil {
			do(*p.def)
		} else {
			jump("default")
		}

		label("nextcall")
	}

	label("default")
	do(def)

	if len(ref) > 0 {
		return nil, fmt.Errorf("unresolved labels: %v\n%v", ref, bpf)
	}
	return bpf, nil
}

// Compile reads a Chromium-OS policy file and compiles a
// Seccomp-BPF filter program implementing the policies.
func Compile(path string) ([]SockFilter, error) {
	ps, err := parseFile(path)
	if err != nil {
		return nil, err
	}
	return compile(ps, nbits > 32, bpfRet(retKill()))
}

// prctl is a wrapper for the 'prctl' system call.
// See 'man prctl' for details.
func prctl(option uintptr, args ...uintptr) error {
	if len(args) > 4 {
		return syscall.E2BIG
	}
	var arg [4]uintptr
	copy(arg[:], args)
	_, _, e := syscall.Syscall6(C.__NR_prctl, option, arg[0], arg[1], arg[2], arg[3], 0)
	if e != 0 {
		return e
	}
	return nil
}

// seccomp is a wrapper for the 'seccomp' system call.
// See <linux/seccomp.h> for valid op and flag values.
// uargs is typically a pointer to struct sock_fprog.
func seccomp(op, flags uintptr, uargs unsafe.Pointer) error {
	_, _, e := syscall.Syscall(C.__NR_seccomp, op, flags, uintptr(uargs))
	if e != 0 {
		return e
	}
	return nil
}

// CheckSupport checks for the required seccomp support in the kernel.
func CheckSupport() error {
	// This is based on http://outflux.net/teach-seccomp/autodetect.html.
	if err := prctl(C.PR_GET_SECCOMP); err != nil {
		return fmt.Errorf("seccomp not available: %v", err)
	}
	if err := prctl(C.PR_SET_SECCOMP, C.SECCOMP_MODE_FILTER, 0); err != syscall.EFAULT {
		return fmt.Errorf("seccomp filter not available: %v", err)
	}
	if err := seccomp(C.SECCOMP_SET_MODE_FILTER, 0, nil); err != syscall.EFAULT {
		return fmt.Errorf("seccomp syscall not available: %v", err)
	}
	if err := seccomp(C.SECCOMP_SET_MODE_FILTER, C.SECCOMP_FILTER_FLAG_TSYNC, nil); err != syscall.EFAULT {
		return fmt.Errorf("seccomp tsync not available: %v", err)
	}
	return nil
}

// Load makes the seccomp system call to install the bpf filter for
// all threads (with tsync). prctl(set_no_new_privs, 1) must have
// been called (from the same thread) before calling Load for the
// first time.
//   Most users of this library should use Install instead of calling
//   Load directly. There are a couple of situations where it may be
//   necessary to use Load instead of Install:
//   - If a previous call to Install has disabled the 'prctl' system
//     call, Install cannot be called again. In that case, it is safe
//     to add additional filters directly with Load.
//   - If the process is running as a priviledged user, and you want
//     to load the seccomp filter without setting no_new_privs.
func Load(bpf []SockFilter) error {
	if size, limit := len(bpf), 0xffff; size > limit {
		return fmt.Errorf("filter program too big: %d bpf instructions (limit = %d)", size, limit)
	}
	prog := &SockFprog{
		Filter: &bpf[0],
		Len:    uint16(len(bpf)),
	}
	return seccomp(C.SECCOMP_SET_MODE_FILTER, C.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(prog))
}

// Install makes the necessary system calls to install the Seccomp-BPF
// filter for the current process (all threads). Install can be called
// multiple times to install additional filters.
func Install(bpf []SockFilter) error {
	// prctl(set_no_new_privs, 1) must be called (from the same thread)
	// before a seccomp filter can be installed by an unprivileged user:
	// - http://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt.
	runtime.LockOSThread()
	defer runtime.UnlockOSThread()
	if err := prctl(C.PR_SET_NO_NEW_PRIVS, 1); err != nil {
		return err
	}
	return Load(bpf)
}