Skip to content

Commit

Permalink
Add memory monitor measurement logics (#408)
Browse files Browse the repository at this point in the history
* Adding mem monitoring measurement

* Adding unit test for measureCELEvents
  • Loading branch information
yawangwang authored Feb 9, 2024
1 parent fd156ad commit f1aa3d2
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 24 deletions.
70 changes: 48 additions & 22 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,23 @@ func appendTokenMounts(mounts []specs.Mount) []specs.Mount {
return append(mounts, m)
}

func (r *ContainerRunner) measureCELEvents(ctx context.Context) error {
if err := r.measureContainerClaims(ctx); err != nil {
return fmt.Errorf("failed to measure container claims: %v", err)
}
if r.launchSpec.Experiments.EnableMeasureMemoryMonitor {
if err := r.measureMemoryMonitor(); err != nil {
return fmt.Errorf("failed to measure memory monitoring state: %v", err)
}
}

separator := cel.CosTlv{
EventType: cel.LaunchSeparatorType,
EventContent: nil, // Success
}
return r.attestAgent.MeasureEvent(separator)
}

// measureContainerClaims will measure various container claims into the COS
// eventlog in the AttestationAgent.
func (r *ContainerRunner) measureContainerClaims(ctx context.Context) error {
Expand Down Expand Up @@ -334,11 +351,21 @@ func (r *ContainerRunner) measureContainerClaims(ctx context.Context) error {
}
}

separator := cel.CosTlv{
EventType: cel.LaunchSeparatorType,
EventContent: nil, // Success
return nil
}

// measureMemoryMonitor will measure memory monitoring claims into the COS
// eventlog in the AttestationAgent.
func (r *ContainerRunner) measureMemoryMonitor() error {
var enabled uint8
if r.launchSpec.MemoryMonitoringEnabled {
enabled = 1
}
return r.attestAgent.MeasureEvent(separator)
if err := r.attestAgent.MeasureEvent(cel.CosTlv{EventType: cel.MemoryMonitorType, EventContent: []byte{enabled}}); err != nil {
return err
}
r.logger.Println("Successfully measured memory monitoring event")
return nil
}

// Retrieves the default OIDC token from the attestation service, and returns how long
Expand Down Expand Up @@ -494,9 +521,10 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()

if err := r.measureContainerClaims(ctx); err != nil {
return fmt.Errorf("failed to measure container claims: %v", err)
if err := r.measureCELEvents(ctx); err != nil {
return fmt.Errorf("failed to measure CEL events: %v", err)
}

if err := r.fetchAndWriteToken(ctx); err != nil {
return fmt.Errorf("failed to fetch and write OIDC token: %v", err)
}
Expand All @@ -513,24 +541,22 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
defer teeServer.Shutdown(ctx)
}

if r.launchSpec.Experiments.EnableMemoryMonitoring {
// start node-problem-detector.service to collect memory related metrics.
if r.launchSpec.MemoryMonitoringEnabled {
r.logger.Println("MemoryMonitoring is enabled by the VM operator")
s, err := systemctl.New()
if err != nil {
return fmt.Errorf("failed to create systemctl client: %v", err)
}
defer s.Close()
// start node-problem-detector.service to collect memory related metrics.
if r.launchSpec.MemoryMonitoringEnabled {
r.logger.Println("MemoryMonitoring is enabled by the VM operator")
s, err := systemctl.New()
if err != nil {
return fmt.Errorf("failed to create systemctl client: %v", err)
}
defer s.Close()

r.logger.Println("Starting a systemctl operation: systemctl start node-problem-detector.service")
if err := s.Start("node-problem-detector.service"); err != nil {
return fmt.Errorf("failed to start node-problem-detector.service: %v", err)
}
r.logger.Println("node-problem-detector.service successfully started.")
} else {
r.logger.Println("MemoryMonitoring is disabled by the VM operator")
r.logger.Println("Starting a systemctl operation: systemctl start node-problem-detector.service")
if err := s.Start("node-problem-detector.service"); err != nil {
return fmt.Errorf("failed to start node-problem-detector.service: %v", err)
}
r.logger.Println("node-problem-detector.service successfully started.")
} else {
r.logger.Println("MemoryMonitoring is disabled by the VM operator")
}

var streamOpt cio.Opt
Expand Down
127 changes: 127 additions & 0 deletions launcher/container_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@ import (
"github.com/containerd/containerd"
"github.com/containerd/containerd/defaults"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/golang-jwt/jwt/v4"
"github.com/google/go-cmp/cmp"
"github.com/google/go-tpm-tools/cel"
"github.com/google/go-tpm-tools/launcher/agent"
"github.com/google/go-tpm-tools/launcher/internal/experiments"
"github.com/google/go-tpm-tools/launcher/launcherfile"
"github.com/google/go-tpm-tools/launcher/spec"
"github.com/opencontainers/go-digest"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/oauth2"
)

Expand Down Expand Up @@ -540,3 +544,126 @@ func TestInitImageDockerPublic(t *testing.T) {
}
}
}

func TestMeasureCELEvents(t *testing.T) {
ctx := context.Background()
fakeContainer := &fakeContainer{
image: &fakeImage{
name: "fake image name",
digest: "fake digest",
id: "fake id",
},
args: []string{"fake args"},
env: []string{"fake env"},
}

testCases := []struct {
name string
wantCELEvents []cel.CosType
launchSpec spec.LaunchSpec
}{
{
name: "measure full container events and launch separator event",
wantCELEvents: []cel.CosType{
cel.ImageRefType,
cel.ImageDigestType,
cel.RestartPolicyType,
cel.ImageIDType,
cel.ArgType,
cel.EnvVarType,
cel.OverrideEnvType,
cel.OverrideArgType,
cel.LaunchSeparatorType,
},
launchSpec: spec.LaunchSpec{
Envs: []spec.EnvVar{{Name: "hello", Value: "world"}},
Cmd: []string{"hello world"},
},
},
{
name: "measure partial container events, memory monitoring event, and launch separator event",
wantCELEvents: []cel.CosType{
cel.ImageRefType,
cel.ImageDigestType,
cel.RestartPolicyType,
cel.ImageIDType,
cel.ArgType,
cel.EnvVarType,
cel.MemoryMonitorType,
cel.LaunchSeparatorType,
},
launchSpec: spec.LaunchSpec{Experiments: experiments.Experiments{EnableMeasureMemoryMonitor: true}},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
gotEvents := []cel.CosType{}

fakeAgent := &fakeAttestationAgent{
measureEventFunc: func(content cel.Content) error {
got, _ := content.GetTLV()
tlv := &cel.TLV{}
tlv.UnmarshalBinary(got.Value)
gotEvents = append(gotEvents, cel.CosType(tlv.Type))
return nil
},
}

r := ContainerRunner{
attestAgent: fakeAgent,
container: fakeContainer,
launchSpec: tc.launchSpec,
logger: log.Default(),
}

if err := r.measureCELEvents(ctx); err != nil {
t.Errorf("failed to measureCELEvents: %v", err)
}

if !cmp.Equal(gotEvents, tc.wantCELEvents) {
t.Errorf("failed to measure CEL events, got %v, but want %v", gotEvents, tc.wantCELEvents)
}
})
}
}

// This ensures fakeContainer implements containerd.Container interface.
var _ containerd.Container = &fakeContainer{}

// This ensures fakeImage implements containerd.Image interface.
var _ containerd.Image = &fakeImage{}

type fakeContainer struct {
containerd.Container
image containerd.Image
args []string
env []string
}

func (c *fakeContainer) Image(context.Context) (containerd.Image, error) {
return c.image, nil
}

func (c *fakeContainer) Spec(context.Context) (*oci.Spec, error) {
return &oci.Spec{Process: &specs.Process{Args: c.args, Env: c.env}}, nil
}

type fakeImage struct {
containerd.Image
name string
digest digest.Digest
id digest.Digest
}

func (i *fakeImage) Name() string {
return i.name
}

func (i *fakeImage) Target() v1.Descriptor {
return v1.Descriptor{Digest: i.digest}
}

func (i *fakeImage) Config(_ context.Context) (v1.Descriptor, error) {
return v1.Descriptor{Digest: i.id}, nil
}
6 changes: 4 additions & 2 deletions launcher/image/test/test_memory_monitoring.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ steps:
- name: 'gcr.io/cloud-builders/gcloud'
id: CheckMemoryMonitoringEnabled
entrypoint: 'bash'
args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-enable-${BUILD_ID}', '${_ZONE}', 'node-problem-detector.service successfully started']
# Search a regex pattern that ensures memory monitoring is enabled and measured into COS event logs.
args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-enable-${BUILD_ID}', '${_ZONE}', 'Successfully measured memory monitoring event.*node-problem-detector.service successfully started']
waitFor: ['CreateVMMemoryMemonitorEnabled']
- name: 'gcr.io/cloud-builders/gcloud'
id: CleanUpVMMemoryMonitorEnabled
Expand All @@ -47,7 +48,8 @@ steps:
- name: 'gcr.io/cloud-builders/gcloud'
id: CheckMemoryMonitoringDisabled
entrypoint: 'bash'
args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-disable-${BUILD_ID}', '${_ZONE}', 'MemoryMonitoring is disabled by the VM operator']
# Search a regex pattern that ensures memory monitoring is disabled and measured into COS event logs.
args: ['scripts/test_memory_monitoring.sh', '${_VM_NAME_PREFIX}-disable-${BUILD_ID}', '${_ZONE}', 'Successfully measured memory monitoring event.*MemoryMonitoring is disabled by the VM operator']
waitFor: ['CreateVMMemoryMemonitorDisabled']
- name: 'gcr.io/cloud-builders/gcloud'
id: CleanUpVMMemoryMonitorDisabled
Expand Down
1 change: 1 addition & 0 deletions launcher/internal/experiments/experiments.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type Experiments struct {
EnableOnDemandAttestation bool
EnableMemoryMonitoring bool
EnableSignedContainerCache bool
EnableMeasureMemoryMonitor bool
}

// New takes a filepath, opens the file, and calls ReadJsonInput with the contents
Expand Down
14 changes: 14 additions & 0 deletions launcher/internal/systemctl/systemctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
type Systemd interface {
Start(string) error
Stop(string) error
IsActive(context.Context, string) (string, error)
Close()
}

Expand Down Expand Up @@ -42,6 +43,19 @@ func (s *Systemctl) Stop(unit string) error {
return runSystemdCmd(s.dbus.StopUnitContext, "stop", unit)
}

// IsActive is the equivalent of `systemctl is-active $unit`.
// The status can be "active", "activating", "deactivating", "inactive" or "failed".
func (s *Systemctl) IsActive(ctx context.Context, unit string) (string, error) {
status, err := s.dbus.ListUnitsByNamesContext(ctx, []string{unit})
if err != nil {
return "", err
}
if len(status) != 1 {
return "", fmt.Errorf("want 1 unit from ListUnitsByNames, got %d", len(status))
}
return status[0].ActiveState, nil
}

// Close disconnects from dbus.
func (s *Systemctl) Close() { s.dbus.Close() }

Expand Down
40 changes: 40 additions & 0 deletions launcher/internal/systemctl/systemctl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,43 @@ func TestRunSystmedCmd(t *testing.T) {
})
}
}

// TestGetStatus reads the `-.mount` which should exist on all systemd
// systems and ensures that one of its properties is valid.
func TestGetStatus(t *testing.T) {
systemctl, err := New()
if err != nil {
t.Skipf("Failed to create systemctl client: %v", err)
}

t.Cleanup(systemctl.Close)

testCases := []struct {
name string
unit string
want string
}{
{
name: "success",
unit: "-.mount", //`-.mount` which should exist on all systemd systems,
want: "active",
},
{
name: "success with an inactive unit",
unit: "node-problem-detector.service",
want: "inactive",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got, err := systemctl.IsActive(context.Background(), tc.unit)
if err != nil {
t.Fatalf("failed to read status for unit [%s]: %v", tc.unit, got)
}
if got != tc.want {
t.Errorf("GetStatus returned unexpected status for unit [%s], got %s, but want %s", tc.unit, got, tc.want)
}
})
}
}

0 comments on commit f1aa3d2

Please sign in to comment.