You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

495 lines
14 KiB

// Copyright 2015 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package seccomp implements support for compiling and installing Seccomp-BPF policy files.
// - http://www.chromium.org/chromium-os/developer-guide/chromium-os-sandboxing
//
// Typical usage:
// // Check for the required kernel support for seccomp.
// if err := seccomp.CheckSupport(); err != nil {
// log.Fatal(err)
// }
//
// // Compile BPF program from a Chromium-OS policy file.
// bpf, err := seccomp.Compile(path)
// if err != nil {
// log.Fatal(err)
// }
//
// // Install Seccomp-BPF filter program with the kernel.
// if err := seccomp.Install(bpf); err != nil {
// log.Fatal(err)
// }
//
// For background and more information:
// - http://www.tcpdump.org/papers/bpf-usenix93.pdf
// - http://en.wikipedia.org/wiki/Seccomp
// - http://lwn.net/Articles/475043/
// - http://outflux.net/teach-seccomp/
// - http://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt
// - http://github.com/torvalds/linux/blob/master/kernel/seccomp.c
//
// TODO:
// - Exit the program if any thread is killed because of seccomp violation.
// - Provide a debug mode to log system calls used during normal operation.
package seccomp
import (
"bytes"
"fmt"
"io/ioutil"
"regexp"
"runtime"
"strconv"
"strings"
"syscall"
"unsafe"
)
// #include <sys/prctl.h>
// #include "unistd_64.h"
// #include "seccomp.h"
import "C"
// SeccompData is the format the BPF program executes over.
// This struct mirrors struct seccomp_data from <linux/seccomp.h>.
type SeccompData struct {
NR int32 // The system call number.
Arch uint32 // System call convention as an AUDIT_ARCH_* value.
InstructionPointer uint64 // At the time of the system call.
Args [6]uint64 // System call arguments (always stored as 64-bit values).
}
// C version of the struct used for sanity checking.
type seccomp_data C.struct_seccomp_data
// bpfLoadNR returns the instruction to load the NR field in SeccompData.
func bpfLoadNR() SockFilter {
return bpfLoad(unsafe.Offsetof(SeccompData{}.NR))
}
// bpfLoadArch returns the instruction to load the Arch field in SeccompData.
func bpfLoadArch() SockFilter {
return bpfLoad(unsafe.Offsetof(SeccompData{}.Arch))
}
// bpfLoadArg returns the instruction to load one word of an argument in SeccompData.
func bpfLoadArg(arg, word int) SockFilter {
return bpfLoad(unsafe.Offsetof(SeccompData{}.Args) + uintptr(((2*arg)+word)*4))
}
// retKill returns the code for seccomp kill action.
func retKill() uint32 {
return C.SECCOMP_RET_KILL
}
// retTrap returns the code for seccomp trap action.
func retTrap() uint32 {
return C.SECCOMP_RET_TRAP
}
// retErrno returns the code for seccomp errno action with the specified errno embedded.
func retErrno(errno syscall.Errno) uint32 {
return C.SECCOMP_RET_ERRNO | (uint32(errno) & C.SECCOMP_RET_DATA)
}
// retAllow returns the code for seccomp allow action.
func retAllow() uint32 {
return C.SECCOMP_RET_ALLOW
}
// policy represents the seccomp policy for a single syscall.
type policy struct {
// name of the syscall.
name string
// expr is evaluated on the syscall arguments.
// nil expr evaluates to false.
expr orExpr
// then is executed if the expr evaluates to true.
// (cannot be specified in policy file, used in tests only).
then SockFilter
// default action (else) if the expr evaluates to false.
// nil means jump to end of program for the overall default.
def *SockFilter
}
// orExpr is a list of and expressions.
type orExpr []andExpr
// andExpr is a list of arg comparisons.
type andExpr []argComp
// argComp represents a basic argument comparison in the policy.
type argComp struct {
idx int // 0..5 for indexing into SeccompData.Args.
oper string // comparison operator: "==", "!=", or "&".
val uint64 // upper 32 bits compared only if nbits>32.
}
// String converts the internal policy representation back to policy file syntax.
func (p policy) String() string {
var buf bytes.Buffer
fmt.Fprintf(&buf, "%s: ", p.name)
for i, and := range p.expr {
if i > 0 {
fmt.Fprintf(&buf, " || ")
}
for j, arg := range and {
if j > 0 {
fmt.Fprintf(&buf, " && ")
}
fmt.Fprintf(&buf, "arg%d %s %#x", arg.idx, arg.oper, arg.val)
}
}
pret := func(f SockFilter) {
if f.Code == opRET {
switch f.K & C.SECCOMP_RET_ACTION {
case C.SECCOMP_RET_ALLOW:
fmt.Fprintf(&buf, "1")
return
case C.SECCOMP_RET_ERRNO:
fmt.Fprintf(&buf, "return %d", f.K&C.SECCOMP_RET_DATA)
return
}
}
fmt.Fprintf(&buf, "%s", f)
}
if p.then != bpfRet(retAllow()) {
fmt.Fprintf(&buf, " ? ")
pret(p.then)
}
if p.def != nil {
if p.expr != nil {
fmt.Fprintf(&buf, "; ")
}
pret(*p.def)
}
return buf.String()
}
// Syntax of policy line for a single syscall.
var (
allowRE = regexp.MustCompile(`^([[:word:]]+) *: *1$`)
returnRE = regexp.MustCompile(`^([[:word:]]+) *: *return *([[:word:]]+)$`)
exprRE = regexp.MustCompile(`^([[:word:]]+) *:([^;]+)$`)
exprReturnRE = regexp.MustCompile(`^([[:word:]]+) *:([^;]+); *return *([[:word:]]+)$`)
argRE = regexp.MustCompile(`^arg([0-5]) *(==|!=|&) *([[:word:]]+)$`)
)
// parseLine parses the policy line for a single syscall.
func parseLine(line string) (policy, error) {
var name, expr, ret string
var then SockFilter
var def *SockFilter
line = strings.TrimSpace(line)
if match := allowRE.FindStringSubmatch(line); match != nil {
name = match[1]
def = ptr(bpfRet(retAllow()))
} else if match = returnRE.FindStringSubmatch(line); match != nil {
name = match[1]
ret = match[2]
} else if match = exprRE.FindStringSubmatch(line); match != nil {
name = match[1]
expr = match[2]
} else if match = exprReturnRE.FindStringSubmatch(line); match != nil {
name = match[1]
expr = match[2]
ret = match[3]
} else {
return policy{}, fmt.Errorf("invalid syntax")
}
if _, ok := syscallNum[name]; !ok {
return policy{}, fmt.Errorf("unknown syscall: %s", name)
}
var or orExpr
if expr != "" {
for _, sub := range strings.Split(expr, "||") {
var and andExpr
for _, arg := range strings.Split(sub, "&&") {
arg = strings.TrimSpace(arg)
match := argRE.FindStringSubmatch(arg)
if match == nil {
return policy{}, fmt.Errorf("invalid expression: %s", arg)
}
idx, err := strconv.Atoi(match[1])
if err != nil {
return policy{}, fmt.Errorf("invalid arg: %s", arg)
}
oper := match[2]
val, err := strconv.ParseUint(match[3], 0, 64)
if err != nil {
return policy{}, fmt.Errorf("invalid value: %s", arg)
}
and = append(and, argComp{idx, oper, val})
}
or = append(or, and)
}
}
then = bpfRet(retAllow())
if ret != "" {
errno, err := strconv.ParseUint(ret, 0, 16)
if err != nil {
return policy{}, fmt.Errorf("invalid errno: %s", ret)
}
def = ptr(bpfRet(retErrno(syscall.Errno(errno))))
}
return policy{name, or, then, def}, nil
}
// parseLines parses multiple policy lines, each one for a single syscall.
// Empty lines and lines beginning with "#" are ignored.
// Multiple policies for a syscall are detected and reported as error.
func parseLines(lines []string) ([]policy, error) {
var ps []policy
seen := make(map[string]int)
for i, line := range lines {
lineno := i + 1
if line == "" || strings.HasPrefix(line, "#") {
continue
}
p, err := parseLine(line)
if err != nil {
return nil, fmt.Errorf("line %d: %v", lineno, err)
}
if seen[p.name] > 0 {
return nil, fmt.Errorf("lines %d,%d: multiple policies for %s",
seen[p.name], lineno, p.name)
}
seen[p.name] = lineno
ps = append(ps, p)
}
return ps, nil
}
// parseFile reads a Chromium-OS Seccomp-BPF policy file and parses its contents.
func parseFile(path string) ([]policy, error) {
file, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
return parseLines(strings.Split(string(file), "\n"))
}
// compile compiles a Seccomp-BPF program implementing the syscall policies.
// long specifies whether to generate 32-bit or 64-bit argument comparisons.
// def is the overall default action to take when the syscall does not match
// any policy in the filter.
func compile(ps []policy, long bool, def SockFilter) ([]SockFilter, error) {
var bpf []SockFilter
do := func(insn SockFilter) {
bpf = append(bpf, insn)
}
// ref maps a label to addresses of all the instructions that jump to it.
ref := make(map[string][]int)
jump := func(name string) {
// jump to a label with unresolved address: insert a placeholder instruction.
ref[name] = append(ref[name], len(bpf))
do(SockFilter{})
}
label := func(name string) {
// label address resolved: replace placeholder instructions with actual jumps.
for _, i := range ref[name] {
bpf[i] = bpfJump(len(bpf) - (i + 1))
}
delete(ref, name)
}
// Conditional jumps: jump if condition is true, fall through otherwise.
jeq := func(val uint32, target string) {
// if A == val { goto target }
do(bpfJeq(val, 0, 1))
jump(target)
}
jne := func(val uint32, target string) {
// if A != val { goto target }
do(bpfJeq(val, 1, 0))
jump(target)
}
jset := func(val uint32, target string) {
// if A&val != 0 { goto target }
do(bpfJset(val, 0, 1))
jump(target)
}
jnset := func(val uint32, target string) {
// if A&val == 0 { goto target }
do(bpfJset(val, 1, 0))
jump(target)
}
do(bpfLoadArch())
do(bpfJeq(auditArch, 1, 0))
do(bpfRet(retKill()))
do(bpfLoadNR())
for _, p := range ps {
nr, ok := syscallNum[p.name]
if !ok {
return nil, fmt.Errorf("unknown syscall: %s", p.name)
}
jne(uint32(nr), "nextcall")
for _, and := range p.expr {
for _, arg := range and {
val := struct{ high, low uint32 }{uint32(arg.val >> 32), uint32(arg.val)}
switch arg.oper {
case "==":
if long {
do(bpfLoadArg(arg.idx, 1))
jne(val.high, "nextor")
}
do(bpfLoadArg(arg.idx, 0))
jne(val.low, "nextor")
case "!=":
if long {
do(bpfLoadArg(arg.idx, 1))
jne(val.high, "nextand")
}
do(bpfLoadArg(arg.idx, 0))
jeq(val.low, "nextor")
case "&":
if long {
do(bpfLoadArg(arg.idx, 1))
jset(val.high, "nextand")
}
do(bpfLoadArg(arg.idx, 0))
jnset(val.low, "nextor")
default:
return nil, fmt.Errorf("unknown operator: %q", arg.oper)
}
// Comparison was satisfied. Move on to the next comparison in &&.
label("nextand")
}
// All comparisons in && were satisfied.
do(p.then)
// Some comparison in && was false. Move on to the next expression in ||.
label("nextor")
}
// All expressions in || evaluated to false (or expr was nil).
if p.def != nil {
do(*p.def)
} else {
jump("default")
}
label("nextcall")
}
label("default")
do(def)
if len(ref) > 0 {
return nil, fmt.Errorf("unresolved labels: %v\n%v", ref, bpf)
}
return bpf, nil
}
// Compile reads a Chromium-OS policy file and compiles a
// Seccomp-BPF filter program implementing the policies.
func Compile(path string) ([]SockFilter, error) {
ps, err := parseFile(path)
if err != nil {
return nil, err
}
return compile(ps, nbits > 32, bpfRet(retKill()))
}
// prctl is a wrapper for the 'prctl' system call.
// See 'man prctl' for details.
func prctl(option uintptr, args ...uintptr) error {
if len(args) > 4 {
return syscall.E2BIG
}
var arg [4]uintptr
copy(arg[:], args)
_, _, e := syscall.Syscall6(C.__NR_prctl, option, arg[0], arg[1], arg[2], arg[3], 0)
if e != 0 {
return e
}
return nil
}
// seccomp is a wrapper for the 'seccomp' system call.
// See <linux/seccomp.h> for valid op and flag values.
// uargs is typically a pointer to struct sock_fprog.
func seccomp(op, flags uintptr, uargs unsafe.Pointer) error {
_, _, e := syscall.Syscall(C.__NR_seccomp, op, flags, uintptr(uargs))
if e != 0 {
return e
}
return nil
}
// CheckSupport checks for the required seccomp support in the kernel.
func CheckSupport() error {
// This is based on http://outflux.net/teach-seccomp/autodetect.html.
if err := prctl(C.PR_GET_SECCOMP); err != nil {
return fmt.Errorf("seccomp not available: %v", err)
}
if err := prctl(C.PR_SET_SECCOMP, C.SECCOMP_MODE_FILTER, 0); err != syscall.EFAULT {
return fmt.Errorf("seccomp filter not available: %v", err)
}
if err := seccomp(C.SECCOMP_SET_MODE_FILTER, 0, nil); err != syscall.EFAULT {
return fmt.Errorf("seccomp syscall not available: %v", err)
}
if err := seccomp(C.SECCOMP_SET_MODE_FILTER, C.SECCOMP_FILTER_FLAG_TSYNC, nil); err != syscall.EFAULT {
return fmt.Errorf("seccomp tsync not available: %v", err)
}
return nil
}
// Load makes the seccomp system call to install the bpf filter for
// all threads (with tsync). prctl(set_no_new_privs, 1) must have
// been called (from the same thread) before calling Load for the
// first time.
// Most users of this library should use Install instead of calling
// Load directly. There are a couple of situations where it may be
// necessary to use Load instead of Install:
// - If a previous call to Install has disabled the 'prctl' system
// call, Install cannot be called again. In that case, it is safe
// to add additional filters directly with Load.
// - If the process is running as a priviledged user, and you want
// to load the seccomp filter without setting no_new_privs.
func Load(bpf []SockFilter) error {
if size, limit := len(bpf), 0xffff; size > limit {
return fmt.Errorf("filter program too big: %d bpf instructions (limit = %d)", size, limit)
}
prog := &SockFprog{
Filter: &bpf[0],
Len: uint16(len(bpf)),
}
return seccomp(C.SECCOMP_SET_MODE_FILTER, C.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(prog))
}
// Install makes the necessary system calls to install the Seccomp-BPF
// filter for the current process (all threads). Install can be called
// multiple times to install additional filters.
func Install(bpf []SockFilter) error {
// prctl(set_no_new_privs, 1) must be called (from the same thread)
// before a seccomp filter can be installed by an unprivileged user:
// - http://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if err := prctl(C.PR_SET_NO_NEW_PRIVS, 1); err != nil {
return err
}
return Load(bpf)
}