Skip to content

Commit c92b3c2

Browse files
committed
Refactor to make more resilient
1 parent 6e1d8b2 commit c92b3c2

31 files changed

+344
-127
lines changed

createdisk.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ wait_for_ssh ${VM_NAME} ${VM_IP}
5252
if [ ${BUNDLE_TYPE} != "microshift" ]; then
5353
# Disable kubelet service
5454
${SSH} core@${VM_IP} -- sudo systemctl disable kubelet
55-
55+
5656
# Stop the kubelet service so it will not reprovision the pods
5757
${SSH} core@${VM_IP} -- sudo systemctl stop kubelet
5858
fi
@@ -109,6 +109,8 @@ ${SSH} core@${VM_IP} 'sudo bash -x -s' <<EOF
109109
[Unit]
110110
Description=gvisor-tap-vsock Network Traffic Forwarder
111111
After=sys-devices-virtual-net-%i.device
112+
After=crc-env-file-exists.path
113+
After=crc-no-tap.service
112114
113115
[Service]
114116
Restart=on-failure

systemd/crc-cluster-status.service

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
[Unit]
22
Description=CRC Unit checking if cluster is ready
3+
After=crc-env-file-exists.path
34
After=crc-wait-apiserver-up.service crc-pullsecret.service
45
After=ocp-mco-sshkey.service ocp-cluster-ca.service
56
After=ocp-custom-domain.service ocp-userpasswords.service
67
After=ocp-clusterid.service
78
StartLimitIntervalSec=450
8-
StartLimitBurst=10
9+
StartLimitBurst=40
910

1011
[Service]
1112
Type=oneshot
1213
Restart=on-failure
13-
RestartSec=40
14-
EnvironmentFile=-/etc/sysconfig/crc-env
14+
RestartSec=10
1515
ExecCondition=/usr/local/bin/crc-self-sufficient-env.sh
1616
ExecStart=/usr/local/bin/crc-cluster-status.sh
1717
RemainAfterExit=true

systemd/crc-cluster-status.sh

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ set -o errtrace
77
set -x
88

99
export KUBECONFIG=/opt/kubeconfig
10+
MAXIMUM_LOGIN_RETRY=10
11+
RETRY_DELAY=5
1012

1113
if [ ! -f /opt/crc/pass_kubeadmin ]; then
1214
echo "kubeadmin password file not found"
@@ -21,19 +23,41 @@ fi
2123

2224

2325
echo "Logging into OpenShift with kubeadmin user to update $KUBECONFIG"
24-
COUNTER=1
25-
MAXIMUM_LOGIN_RETRY=10
2626

27-
# use a `(set +x)` subshell to avoid leaking the password
28-
until (set +x ; oc login --insecure-skip-tls-verify=true -u kubeadmin -p "$(cat /opt/crc/pass_kubeadmin)" https://api.crc.testing:6443 > /dev/null 2>&1); do
29-
if [ "$COUNTER" -ge "$MAXIMUM_LOGIN_RETRY" ]; then
30-
echo "Unable to login to the cluster..., authentication failed."
27+
try_login() {
28+
( # use a `(set +x)` subshell to avoid leaking the password
29+
set +x
30+
set +e # don't abort on error in this subshell
31+
oc login --insecure-skip-tls-verify=true \
32+
-u kubeadmin \
33+
-p "$(cat /opt/crc/pass_kubeadmin)" \
34+
https://api.crc.testing:6443 > /dev/null 2>&1
35+
)
36+
success="$?"
37+
if [[ "$success" == 0 ]]; then
38+
echo "Login successed"
39+
else
40+
echo "Login didn't complete ..."
41+
fi
42+
43+
return "$success"
44+
}
45+
46+
for ((counter=1; counter<=MAXIMUM_LOGIN_RETRY; counter++)); do
47+
echo "Login attempt $counter/$MAXIMUM_LOGIN_RETRY"
48+
if try_login; then
49+
break
50+
fi
51+
if (( counter == MAXIMUM_LOGIN_RETRY )); then
52+
echo "Unable to login to the cluster after $counter attempts; authentication failed."
3153
exit 1
3254
fi
33-
echo "Logging into OpenShift with updated credentials try $COUNTER, hang on...."
34-
sleep 5
35-
((COUNTER++))
55+
sleep "$RETRY_DELAY"
3656
done
3757

3858
# need to set a marker to let `crc` know the cluster is ready
3959
touch /tmp/.crc-cluster-ready
60+
61+
echo "All done"
62+
63+
exit 0

systemd/crc-dnsmasq.service

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
[Unit]
22
Description=CRC Unit for configuring dnsmasq
33
Wants=ovs-configuration.service
4+
After=crc-env-file-exists.path
45
After=ovs-configuration.service
56
Before=kubelet-dependencies.target
67
StartLimitIntervalSec=30
78

89
[Service]
910
Type=oneshot
1011
Restart=on-failure
11-
EnvironmentFile=-/etc/sysconfig/crc-env
12-
ExecStartPre=/bin/systemctl start ovs-configuration.service
1312
ExecCondition=/usr/local/bin/crc-self-sufficient-env.sh
13+
ExecStartPre=/bin/systemctl start ovs-configuration.service
1414
ExecStart=/usr/local/bin/crc-dnsmasq.sh
1515
ExecStartPost=/usr/bin/systemctl restart NetworkManager.service
1616
ExecStartPost=/usr/bin/systemctl restart dnsmasq.service

systemd/crc-env-file-exists.path

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[Unit]
2+
Description=Wait for /etc/sysconfig/crc-env file to be populated
3+
4+
[Path]
5+
PathExists=/etc/sysconfig/crc-env
6+
7+
[Install]
8+
WantedBy=multi-user.target

systemd/crc-needs-tap.sh

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/bash
2+
3+
set -o pipefail
4+
set -o errexit
5+
set -o nounset
6+
set -o errtrace
7+
set -x
8+
9+
source /etc/sysconfig/crc-env || echo "WARNING: crc-env not found"
10+
11+
running_on_aws() {
12+
# Set a timeout for the curl command
13+
TIMEOUT=1
14+
15+
# Check the metadata service first
16+
if curl -s -m $TIMEOUT http://169.254.169.254/latest/meta-data/instance-id > /dev/null 2>&1; then
17+
echo "✅ Running on an AWS EC2 instance."
18+
return 0
19+
20+
# As a fallback, check system information
21+
elif [[ $(sudo cat /sys/class/dmi/id/product_name) == *"Amazon EC2"* ]]; then
22+
echo "✅ Running on an AWS EC2 instance (detected via DMI)."
23+
return 0
24+
else
25+
echo "❌ Not running on an AWS EC2 instance."
26+
return 1
27+
fi
28+
}
29+
30+
NEED_TAP=0
31+
DONT_NEED_TAP=1
32+
33+
if running_on_aws ; then
34+
echo "Running on AWS. Don't need tap0."
35+
exit $DONT_NEED_TAP
36+
fi
37+
38+
if systemd-detect-virt | grep -E -q '^(kvm|apple|microsoft)$' ; then
39+
echo "Running with '$(systemd-detect-virt)' virtualization. Need tap0."
40+
exit $NEED_TAP
41+
fi
42+
43+
if /usr/local/bin/crc-self-sufficient-env.sh; then
44+
echo "Running with a self-sufficient bundle. Don't keep tap0"
45+
exit $DONT_NEED_TAP
46+
fi
47+
48+
echo "No particular environment detected. Don't keep tap0"
49+
50+
exit $DONT_NEED_TAP

systemd/crc-no-tap.service

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
[Unit]
2-
Description=Ensure that tap0 network configuration is absent on Apple Virtualization
2+
Description=Ensure that tap0 network configuration is disabled when not necessary
33
Before=NetworkManager.service
4+
45
After=local-fs.target
6+
After=crc-env-file-exists.path
57
RequiresMountsFor=/etc/NetworkManager/system-connections
68

79
[Service]
810
Type=oneshot
9-
EnvironmentFile=-/etc/sysconfig/crc-env
11+
# start under the condition that CRC doesn't need TAP
12+
ExecCondition=/bin/bash -c '! /usr/local/bin/crc-needs-tap.sh'
1013
ExecStart=/usr/local/bin/crc-no-tap.sh
1114

1215
[Install]

systemd/crc-no-tap.sh

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
#!/bin/bash
22

3-
# Return true if running under Apple Virtualization or CRC_SELF_SUFFICIENT is set, otherwise false
3+
set -o pipefail
4+
set -o errexit
5+
set -o nounset
6+
set -o errtrace
7+
set -x
48

5-
if systemd-detect-virt | grep -q '^apple$' || [ -n "$CRC_SELF_SUFFICIENT" ]; then
6-
rm -f /etc/NetworkManager/system-connections/tap0.nmconnection
7-
systemctl disable --now gv-user-network@tap0.service
8-
fi
9+
echo "Disabling the tap0 network configuration ..."
10+
11+
rm -f /etc/NetworkManager/system-connections/tap0.nmconnection
12+
systemctl disable --now [email protected] || true
913

1014
exit 0

systemd/crc-pullsecret.service

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
[Unit]
22
Description=CRC Unit for adding pull secret to cluster
3+
After=cloud-init-crc-env.path
34
After=crc-wait-apiserver-up.service
45
StartLimitIntervalSec=450
5-
StartLimitBurst=10
6+
StartLimitBurst=40
67
ConditionPathExists=!/opt/crc/%n.done
78

89
[Service]
910
Type=oneshot
1011
Restart=on-failure
11-
RestartSec=40
12-
EnvironmentFile=-/etc/sysconfig/crc-env
12+
RestartSec=10
1313
ExecCondition=/usr/local/bin/crc-self-sufficient-env.sh
1414
ExecStart=/usr/local/bin/crc-pullsecret.sh
1515
ExecStartPost=-touch /opt/crc/%n.done

systemd/crc-pullsecret.sh

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@ set -x
99
source /usr/local/bin/crc-systemd-common.sh
1010
export KUBECONFIG="/opt/kubeconfig"
1111

12-
wait_for_resource secret
12+
wait_for_resource_or_die secret
1313

14-
set +x # disable the logging to avoid leaking the pull secrets
14+
set +x # /!\ disable the logging to avoid leaking the pull secrets
1515

1616
# check if existing pull-secret is valid if not add the one from /opt/crc/pull-secret
17-
existingPsB64=$(oc get secret pull-secret -n openshift-config -o jsonpath="{['data']['\.dockerconfigjson']}")
18-
existingPs=$(echo "${existingPsB64}" | base64 -d)
17+
existingPs=$(oc get secret pull-secret -n openshift-config \
18+
-o jsonpath="{['data']['\.dockerconfigjson']}" \
19+
| base64 -d)
1920

2021
# check if the .auths field is there
2122
if echo "${existingPs}" | jq -e 'has("auths")' >/dev/null 2>&1; then
@@ -24,9 +25,12 @@ if echo "${existingPs}" | jq -e 'has("auths")' >/dev/null 2>&1; then
2425
fi
2526

2627
echo "Cluster doesn't have the pull secrets. Setting them from /opt/crc/pull-secret ..."
28+
2729
pullSecretB64=$(base64 -w0 < /opt/crc/pull-secret)
2830
# Create the JSON patch in memory and pipe it to the oc command
2931
printf '{"data":{".dockerconfigjson": "%s"}}' "${pullSecretB64}" | \
30-
oc patch secret pull-secret -n openshift-config --type merge --patch-file -
32+
oc patch secret pull-secret -n openshift-config --type merge --patch-file=/dev/stdin
33+
34+
echo "All done"
3135

3236
exit 0

0 commit comments

Comments
 (0)