firecracker/tests/integration_tests/functional/test_pause_resume.py at main · firecracker-microvm/firecracker · GitHub

Name: firecracker/tests/integration_tests/functional/test_pause_resume.py at main · firecracker-microvm/firecracker · GitHub
Rating: 4.6 (4761 reviews)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Basic tests scenarios for snapshot save/restore."""

importplatform
importtime
fromsubprocessimportTimeoutExpired

importpytest


defverify_net_emulation_paused(metrics):
"""Verify net emulation is paused based on provided metrics."""
net_metrics=metrics["net"]
assertnet_metrics["rx_queue_event_count"] ==0
assertnet_metrics["rx_partial_writes"] ==0
assertnet_metrics["rx_tap_event_count"] ==0
assertnet_metrics["rx_bytes_count"] ==0
assertnet_metrics["rx_packets_count"] ==0
assertnet_metrics["rx_fails"] ==0
assertnet_metrics["rx_count"] ==0
assertnet_metrics["tap_read_fails"] ==0
assertnet_metrics["tap_write_fails"] ==0
assertnet_metrics["tx_bytes_count"] ==0
assertnet_metrics["tx_fails"] ==0
assertnet_metrics["tx_count"] ==0
assertnet_metrics["tx_packets_count"] ==0
assertnet_metrics["tx_queue_event_count"] ==0
print(net_metrics)


deftest_pause_resume(uvm_nano):
"""
 Test scenario: boot/pause/resume.
 """
microvm=uvm_nano
microvm.add_net_iface()

# Pausing the microVM before being started is not allowed.
withpytest.raises(RuntimeError):
microvm.api.vm.patch(state="Paused")

# Resuming the microVM before being started is also not allowed.
withpytest.raises(RuntimeError):
microvm.api.vm.patch(state="Resumed")

microvm.start()

# Pausing the microVM after it's been started is successful.
microvm.api.vm.patch(state="Paused")

# Flush and reset metrics as they contain pre-pause data.
microvm.flush_metrics()

# Verify guest is no longer active.
withpytest.raises(TimeoutExpired):
microvm.ssh.check_output("true", timeout=1)

# Verify emulation was indeed paused and no events from either
# guest or host side were handled.
verify_net_emulation_paused(microvm.flush_metrics())

# Pausing the microVM when it is already `Paused` is allowed
# (microVM remains in `Paused` state).
microvm.api.vm.patch(state="Paused")

# Resuming the microVM is successful.
microvm.api.vm.patch(state="Resumed")

# Verify guest is active again.
microvm.ssh.check_output("true")

# Resuming the microVM when it is already `Resumed` is allowed
# (microVM remains in the running state).
microvm.api.vm.patch(state="Resumed")

# Verify guest is still active.

microvm.kill()


deftest_describe_instance(uvm_nano):
"""
 Test scenario: DescribeInstance different states.
 """
microvm=uvm_nano

# Check MicroVM state is "Not started"
response=microvm.api.describe.get()
assert"Not started"inresponse.text

# Start MicroVM
microvm.start()

# Check MicroVM state is "Running"
response=microvm.api.describe.get()
assert"Running"inresponse.text

# Pause MicroVM
microvm.api.vm.patch(state="Paused")

# Check MicroVM state is "Paused"
response=microvm.api.describe.get()
assert"Paused"inresponse.text

# Resume MicroVM
response=microvm.api.vm.patch(state="Resumed")

# Check MicroVM state is "Running" after VM is resumed
response=microvm.api.describe.get()
assert"Running"inresponse.text

microvm.kill()


deftest_pause_resume_preboot(uvm_nano):
"""
 Test pause/resume operations are not allowed pre-boot.
 """
basevm=uvm_nano

expected_err="not supported before starting the microVM"

# Try to pause microvm when not running, it must fail.
withpytest.raises(RuntimeError, match=expected_err):
basevm.api.vm.patch(state="Paused")

# Try to resume microvm when not running, it must fail.
withpytest.raises(RuntimeError, match=expected_err):
basevm.api.vm.patch(state="Resumed")


@pytest.mark.skipif(
platform.machine() !="x86_64", reason="Only x86_64 supports pvclocks."
)
deftest_kvmclock_ctrl(uvm_plain_any):
"""
 Test that pausing vCPUs does not trigger a soft lock-up
 """

microvm=uvm_plain_any
microvm.help.enable_console()
microvm.spawn()

# With 2 vCPUs under certain conditions soft lockup warnings can rarely be in dmesg causing this test to fail.
# Example of the warning: `watchdog: BUG: soft lockup - CPU#0 stuck for (x)s! [(udev-worker):758]`
# With 1 vCPU this intermittent issue doesn't occur. If the KVM_CLOCK_CTRL IOCTL is not made
# the test will fail with 1 vCPU, so we can assert the call to the IOCTL is made.
microvm.basic_config(vcpu_count=1)
microvm.add_net_iface()
microvm.start()

# Launch reproducer in host
# This launches `ls -R /` in a loop inside the guest. The command writes its output in the
# console. This detail is important as it writing in the console seems to increase the probability
# that we will pause the execution inside the kernel and cause a lock up. Setting KVM_CLOCK_CTRL
# bit that informs the guest we're pausing the vCPUs, should avoid that lock up.
microvm.ssh.check_output(
"timeout 60 sh -c 'while true; do ls -R /; done' > /dev/ttyS0 2>&1 < /dev/null &"
 )

for_inrange(12):
microvm.api.vm.patch(state="Paused")
time.sleep(5)
microvm.api.vm.patch(state="Resumed")

dmesg=microvm.ssh.check_output("dmesg").stdout
assert"rcu_sched self-detected stall on CPU"notindmesg
assert"rcu_preempt detected stalls on CPUs/tasks"notindmesg
assert"BUG: soft lockup -"notindmesg