Skip to content

Commit 2b88c03

Browse files
committed
refactor: move some c code to go
Move all the stage-0 c code and some of the stage-2 c code to go code, because they are not related to namespaces, and could be implemented by golang. Signed-off-by: lifubang <[email protected]>
1 parent c873744 commit 2b88c03

14 files changed

+555
-560
lines changed

libcontainer/configs/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ type IDMap struct {
2727
Size int64 `json:"size"`
2828
}
2929

30+
// ToString is to serialize the IDMap to a string.
31+
func (i IDMap) ToString() string {
32+
return fmt.Sprintf("%d %d %d", i.ContainerID, i.HostID, i.Size)
33+
}
34+
3035
// Seccomp represents syscall restrictions
3136
// By default, only the native architecture of the kernel is allowed to be used
3237
// for syscalls. Additional architectures can be added by specifying them in

libcontainer/container_linux.go

Lines changed: 6 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,10 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
536536
cmd.Env = append(cmd.Env,
537537
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
538538
)
539+
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.stage1SockChild)
540+
cmd.Env = append(cmd.Env,
541+
"_LIBCONTAINER_STAGE1PIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
542+
)
539543
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
540544
cmd.Env = append(cmd.Env,
541545
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
@@ -1022,17 +1026,6 @@ func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]str
10221026
return paths, nil
10231027
}
10241028

1025-
func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
1026-
data := bytes.NewBuffer(nil)
1027-
for _, im := range idMap {
1028-
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
1029-
if _, err := data.WriteString(line); err != nil {
1030-
return nil, err
1031-
}
1032-
}
1033-
return data.Bytes(), nil
1034-
}
1035-
10361029
// netlinkError is an error wrapper type for use by custom netlink message
10371030
// types. Panics with errors are wrapped in netlinkError so that the recover
10381031
// in bootstrapData can distinguish intentional panics.
@@ -1079,59 +1072,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
10791072
})
10801073
}
10811074

1082-
// write namespace paths only when we are not joining an existing user ns
1083-
_, joinExistingUser := nsMaps[configs.NEWUSER]
1084-
if !joinExistingUser {
1085-
// write uid mappings
1086-
if len(c.config.UIDMappings) > 0 {
1087-
if c.config.RootlessEUID {
1088-
// We resolve the paths for new{u,g}idmap from
1089-
// the context of runc to avoid doing a path
1090-
// lookup in the nsexec context.
1091-
if path, err := exec.LookPath("newuidmap"); err == nil {
1092-
r.AddData(&Bytemsg{
1093-
Type: UidmapPathAttr,
1094-
Value: []byte(path),
1095-
})
1096-
}
1097-
}
1098-
b, err := encodeIDMapping(c.config.UIDMappings)
1099-
if err != nil {
1100-
return nil, err
1101-
}
1102-
r.AddData(&Bytemsg{
1103-
Type: UidmapAttr,
1104-
Value: b,
1105-
})
1106-
}
1107-
1108-
// write gid mappings
1109-
if len(c.config.GIDMappings) > 0 {
1110-
b, err := encodeIDMapping(c.config.GIDMappings)
1111-
if err != nil {
1112-
return nil, err
1113-
}
1114-
r.AddData(&Bytemsg{
1115-
Type: GidmapAttr,
1116-
Value: b,
1117-
})
1118-
if c.config.RootlessEUID {
1119-
if path, err := exec.LookPath("newgidmap"); err == nil {
1120-
r.AddData(&Bytemsg{
1121-
Type: GidmapPathAttr,
1122-
Value: []byte(path),
1123-
})
1124-
}
1125-
}
1126-
if requiresRootOrMappingTool(c.config) {
1127-
r.AddData(&Boolmsg{
1128-
Type: SetgroupAttr,
1129-
Value: true,
1130-
})
1131-
}
1132-
}
1133-
}
1134-
11351075
if c.config.OomScoreAdj != nil {
11361076
// write oom_score_adj
11371077
r.AddData(&Bytemsg{
@@ -1140,12 +1080,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
11401080
})
11411081
}
11421082

1143-
// write rootless
1144-
r.AddData(&Boolmsg{
1145-
Type: RootlessEUIDAttr,
1146-
Value: c.config.RootlessEUID,
1147-
})
1148-
11491083
// write boottime and monotonic time ns offsets.
11501084
if c.config.TimeOffsets != nil {
11511085
var offsetSpec bytes.Buffer
@@ -1186,9 +1120,9 @@ func ignoreTerminateErrors(err error) error {
11861120
return err
11871121
}
11881122

1189-
func requiresRootOrMappingTool(c *configs.Config) bool {
1123+
func requiresRootOrMappingTool(gidMappings []configs.IDMap) bool {
11901124
gidMap := []configs.IDMap{
11911125
{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
11921126
}
1193-
return !reflect.DeepEqual(c.GIDMappings, gidMap)
1127+
return !reflect.DeepEqual(gidMappings, gidMap)
11941128
}

libcontainer/container_setup_linux.go

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package libcontainer
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
"io"
7+
"os"
8+
9+
"github.com/opencontainers/runc/libcontainer/configs"
10+
"github.com/opencontainers/runc/libcontainer/system"
11+
"github.com/sirupsen/logrus"
12+
"github.com/vishvananda/netlink/nl"
13+
"golang.org/x/sys/execabs"
14+
"golang.org/x/sys/unix"
15+
)
16+
17+
// NsExecSyncMsg is used for communication between the parent and child during
18+
// container setup.
19+
type NsExecSyncMsg uint32
20+
21+
const (
22+
SyncUsermapPls NsExecSyncMsg = iota + 0x40
23+
SyncUsermapAck
24+
SyncRecvPidPls
25+
SyncRecvPidAck
26+
SyncTimeOffsetsPls
27+
SyncTimeOffsetsAck
28+
)
29+
30+
const bufSize = 4
31+
32+
// setupNsExec is used to help nsexec to setup the container and wait the container's pid.
33+
func (s *containerProcess) setupNsExec(syncSock *os.File) error {
34+
logrus.Debugf("waiting nsexec to report the container's pid")
35+
err := ParseNsExecSync(syncSock, func(msg NsExecSyncMsg) error {
36+
switch msg {
37+
case SyncUsermapPls:
38+
logrus.Debugf("nsexec has requested userns mappings")
39+
if err := s.setupUsermap(); err != nil {
40+
return err
41+
}
42+
return AckNsExecSync(syncSock, SyncUsermapAck)
43+
case SyncTimeOffsetsPls:
44+
logrus.Debugf("nsexec has requested to configure timens offsets")
45+
if err := system.UpdateTimeNsOffsets(s.cmd.Process.Pid, s.container.config.TimeOffsets); err != nil {
46+
return err
47+
}
48+
return AckNsExecSync(syncSock, SyncTimeOffsetsAck)
49+
case SyncRecvPidPls:
50+
logrus.Debugf("nsexec has reported pid")
51+
var pid uint32
52+
if err := binary.Read(syncSock, nl.NativeEndian(), &pid); err != nil {
53+
return err
54+
}
55+
s.childPid = int(pid)
56+
return AckNsExecSync(syncSock, SyncRecvPidAck)
57+
default:
58+
return fmt.Errorf("unexpected message %d", msg)
59+
}
60+
})
61+
62+
return err
63+
}
64+
65+
// ParseNsExecSync runs the given callback function on each message received
66+
// from the child. It will return once the child sends SYNC_RECVPID_PLS.
67+
func ParseNsExecSync(r io.Reader, fn func(NsExecSyncMsg) error) error {
68+
var (
69+
msg NsExecSyncMsg
70+
buf [bufSize]byte
71+
)
72+
73+
native := nl.NativeEndian()
74+
75+
for {
76+
if _, err := io.ReadAtLeast(r, buf[:], bufSize); err != nil {
77+
return err
78+
}
79+
msg = NsExecSyncMsg(native.Uint32(buf[:]))
80+
if err := fn(msg); err != nil {
81+
return err
82+
}
83+
if msg == SyncRecvPidPls {
84+
break
85+
}
86+
}
87+
return nil
88+
}
89+
90+
// AckNsExecSync is used to send a message to the child.
91+
func AckNsExecSync(f *os.File, msg NsExecSyncMsg) error {
92+
var buf [bufSize]byte
93+
native := nl.NativeEndian()
94+
native.PutUint32(buf[:], uint32(msg))
95+
if _, err := unix.Write(int(f.Fd()), buf[:]); err != nil {
96+
logrus.Debugf("failed to write message to nsexec: %v", err)
97+
return err
98+
}
99+
return nil
100+
}
101+
102+
// setupUsermap is used to set up the user mappings.
103+
func (s *containerProcess) setupUsermap() error {
104+
var uidMapPath, gidMapPath string
105+
106+
// Enable setgroups(2) if we've been asked to. But we also have to explicitly
107+
// disable setgroups(2) if we're creating a rootless container for single-entry
108+
// mapping. (this is required since Linux 3.19).
109+
// For rootless multi-entry mapping, we should use newuidmap/newgidmap
110+
// to do mapping user namespace.
111+
if s.config.Config.RootlessEUID && !requiresRootOrMappingTool(s.config.Config.GIDMappings) {
112+
_ = system.UpdateSetgroups(s.cmd.Process.Pid, system.SetgroupsDeny)
113+
}
114+
115+
nsMaps := make(map[configs.NamespaceType]string)
116+
for _, ns := range s.container.config.Namespaces {
117+
if ns.Path != "" {
118+
nsMaps[ns.Type] = ns.Path
119+
}
120+
}
121+
_, joinExistingUser := nsMaps[configs.NEWUSER]
122+
if !joinExistingUser {
123+
// write uid mappings
124+
if len(s.container.config.UIDMappings) > 0 {
125+
if s.container.config.RootlessEUID {
126+
if path, err := execabs.LookPath("newuidmap"); err == nil {
127+
uidMapPath = path
128+
}
129+
}
130+
}
131+
132+
// write gid mappings
133+
if len(s.container.config.GIDMappings) > 0 {
134+
if s.container.config.RootlessEUID {
135+
if path, err := execabs.LookPath("newgidmap"); err == nil {
136+
gidMapPath = path
137+
}
138+
}
139+
}
140+
}
141+
142+
/* Set up mappings. */
143+
if err := system.UpdateUidmap(uidMapPath, s.cmd.Process.Pid, s.container.config.UIDMappings); err != nil {
144+
return err
145+
}
146+
return system.UpdateGidmap(gidMapPath, s.cmd.Process.Pid, s.container.config.GIDMappings)
147+
}

libcontainer/init_linux.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"runtime/debug"
1313
"strconv"
1414
"syscall"
15+
"unsafe"
1516

1617
"github.com/containerd/console"
1718
"github.com/opencontainers/runtime-spec/specs-go"
@@ -33,11 +34,6 @@ const (
3334
initStandard initType = "standard"
3435
)
3536

36-
type pid struct {
37-
Pid int `json:"stage2_pid"`
38-
PidFirstChild int `json:"stage1_pid"`
39-
}
40-
4137
// network is an internal struct used to setup container networks.
4238
type network struct {
4339
configs.Network
@@ -174,6 +170,11 @@ func startInitialization() (retErr error) {
174170

175171
logrus.SetOutput(logPipe)
176172
logrus.SetFormatter(new(logrus.JSONFormatter))
173+
174+
/* For debugging. */
175+
procName := append([]byte("runc:[2:INIT]"), 0)
176+
_ = unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&procName[0])), 0, 0, 0)
177+
177178
logrus.Debug("child process in init()")
178179

179180
// Only init processes have FIFOFD.
@@ -227,6 +228,24 @@ func startInitialization() (retErr error) {
227228
return err
228229
}
229230

231+
if _, err := unix.Setsid(); err != nil {
232+
return os.NewSyscallError("setsid", err)
233+
}
234+
235+
if err := unix.Setuid(0); err != nil {
236+
return os.NewSyscallError("setuid", err)
237+
}
238+
239+
if err := unix.Setgid(0); err != nil {
240+
return os.NewSyscallError("setgid", err)
241+
}
242+
243+
if !config.Config.RootlessEUID && requiresRootOrMappingTool(config.Config.GIDMappings) {
244+
if err := unix.Setgroups([]int{0}); err != nil {
245+
return os.NewSyscallError("setgroups", err)
246+
}
247+
}
248+
230249
// If init succeeds, it will not return, hence none of the defers will be called.
231250
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe)
232251
}

libcontainer/message_linux.go

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,11 @@ import (
1111
// list of known message types we want to send to bootstrap program
1212
// The number is randomly chosen to not conflict with known netlink types
1313
const (
14-
InitMsg uint16 = 62000
15-
CloneFlagsAttr uint16 = 27281
16-
NsPathsAttr uint16 = 27282
17-
UidmapAttr uint16 = 27283
18-
GidmapAttr uint16 = 27284
19-
SetgroupAttr uint16 = 27285
20-
OomScoreAdjAttr uint16 = 27286
21-
RootlessEUIDAttr uint16 = 27287
22-
UidmapPathAttr uint16 = 27288
23-
GidmapPathAttr uint16 = 27289
24-
TimeOffsetsAttr uint16 = 27290
14+
InitMsg uint16 = 62000
15+
CloneFlagsAttr uint16 = 27281
16+
NsPathsAttr uint16 = 27282
17+
OomScoreAdjAttr uint16 = 27286
18+
TimeOffsetsAttr uint16 = 27290
2519
)
2620

2721
type Int32msg struct {

libcontainer/nsenter/log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ void write_log(int level, const char *format, ...)
5858
if (stage == NULL)
5959
goto out;
6060
} else {
61-
ret = asprintf(&stage, "nsexec-%d", current_stage);
61+
ret = asprintf(&stage, "nsexec-%d", current_stage + 1);
6262
if (ret < 0) {
6363
stage = NULL;
6464
goto out;

0 commit comments

Comments
 (0)