|
1 | | -/* |
2 | | - This file is a jsonnet template of a eksctl's cluster configuration file, |
3 | | - that is used with the eksctl CLI to both update and initialize an AWS EKS |
4 | | - based cluster. |
5 | | -
|
6 | | - This file has in turn been generated from eksctl/template.jsonnet which is |
7 | | - relevant to compare with for changes over time. |
8 | | -
|
9 | | - To use jsonnet to generate an eksctl configuration file from this, do: |
10 | | -
|
11 | | - jsonnet projectpythia.jsonnet > projectpythia.eksctl.yaml |
12 | | -
|
13 | | - References: |
14 | | - - https://eksctl.io/usage/schema/ |
15 | | -*/ |
16 | | -local ng = import './libsonnet/nodegroup.jsonnet'; |
17 | | - |
18 | | -// place all cluster nodes here |
19 | | -local clusterRegion = 'us-west-2'; |
20 | | -local masterAzs = ['us-west-2a', 'us-west-2b', 'us-west-2c']; |
21 | | -local nodeAz = 'us-west-2a'; |
22 | | - |
23 | | -// Node definitions for notebook nodes. Config here is merged |
24 | | -// with our notebook node definition. |
25 | | -// A `node.kubernetes.io/instance-type label is added, so pods |
26 | | -// can request a particular kind of node with a nodeSelector |
27 | | -local notebookNodes = [ |
28 | | - { instanceType: 'r5.xlarge' }, |
29 | | - { instanceType: 'r5.4xlarge' }, |
30 | | - { instanceType: 'r5.16xlarge' }, |
31 | | - { |
32 | | - instanceType: 'g4dn.xlarge', |
33 | | - minSize: 0, |
34 | | - tags+: { |
35 | | - 'k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu': '1', |
36 | | - 'k8s.io/cluster-autoscaler/node-template/label/k8s.amazonaws.com/accelerator': 'nvidia-tesla-t4', |
37 | | - }, |
38 | | - taints+: { |
39 | | - 'nvidia.com/gpu': 'present:NoSchedule', |
40 | | - }, |
41 | | - labels+: { |
42 | | - '2i2c/has-gpu': 'true', |
43 | | - 'k8s.amazonaws.com/accelerator': 'nvidia-tesla-t4', |
44 | | - }, |
45 | | - // Allow provisioning GPUs across all AZs, to prevent situation where all |
46 | | - // GPUs in a single AZ are in use and no new nodes can be spawned |
47 | | - availabilityZones: masterAzs, |
48 | | - }, |
49 | | -]; |
50 | | -local daskNodes = []; |
51 | | - |
52 | | - |
53 | | -{ |
54 | | - apiVersion: 'eksctl.io/v1alpha5', |
55 | | - kind: 'ClusterConfig', |
56 | | - metadata+: { |
57 | | - name: 'projectpythia', |
58 | | - region: clusterRegion, |
59 | | - version: '1.32', |
60 | | - tags+: { |
61 | | - ManagedBy: '2i2c', |
62 | | - '2i2c.org/cluster-name': $.metadata.name, |
63 | | - }, |
64 | | - }, |
65 | | - availabilityZones: masterAzs, |
66 | | - iam: { |
67 | | - withOIDC: true, |
68 | | - }, |
69 | | - // If you add an addon to this config, run the create addon command. |
70 | | - // |
71 | | - // eksctl create addon --config-file=projectpythia.eksctl.yaml |
72 | | - // |
73 | | - addons: [ |
74 | | - { version: 'latest', tags: $.metadata.tags } + addon |
75 | | - for addon in |
76 | | - [ |
77 | | - { name: 'coredns' }, |
78 | | - { name: 'kube-proxy' }, |
79 | | - { |
80 | | - // vpc-cni is a Amazon maintained container networking interface |
81 | | - // (CNI), where a CNI is required for k8s networking. The aws-node |
82 | | - // DaemonSet in kube-system stems from installing this. |
83 | | - // |
84 | | - // Related docs: https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/network-plugins/ |
85 | | - // https://docs.aws.amazon.com/eks/latest/userguide/managing-vpc-cni.html |
86 | | - // |
87 | | - name: 'vpc-cni', |
88 | | - attachPolicyARNs: ['arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy'], |
89 | | - // configurationValues ref: https://github.com/aws/amazon-vpc-cni-k8s/blob/HEAD/charts/aws-vpc-cni/values.yaml |
90 | | - configurationValues: ||| |
91 | | - enableNetworkPolicy: "false" |
92 | | - |||, |
93 | | - }, |
94 | | - { |
95 | | - // aws-ebs-csi-driver ensures that our PVCs are bound to PVs that |
96 | | - // couple to AWS EBS based storage, without it expect to see pods |
97 | | - // mounting a PVC failing to schedule and PVC resources that are |
98 | | - // unbound. |
99 | | - // |
100 | | - // Related docs: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html |
101 | | - // |
102 | | - name: 'aws-ebs-csi-driver', |
103 | | - wellKnownPolicies: { |
104 | | - ebsCSIController: true, |
105 | | - }, |
106 | | - // We enable detailed metrics collection to watch for issues with |
107 | | - // jupyterhub-home-nfs~ |
108 | | - // configurationValues ref: https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/HEAD/charts/aws-ebs-csi-driver/values.yaml |
109 | | - configurationValues: ||| |
110 | | - defaultStorageClass: |
111 | | - enabled: true |
112 | | - controller: |
113 | | - enableMetrics: true |
114 | | - node: |
115 | | - enableMetrics: true |
116 | | - |||, |
117 | | - }, |
118 | | - ] |
| 1 | +local cluster = import './libsonnet/cluster.jsonnet'; |
| 2 | + |
| 3 | +local c = cluster.makeCluster( |
| 4 | + name='projectpythia', |
| 5 | + region='us-west-2', |
| 6 | + nodeAz='us-west-2a', |
| 7 | + version='1.34', |
| 8 | + coreNodeInstanceType='r8i-flex.large', |
| 9 | + notebookCPUInstanceTypes=[ |
| 10 | + 'r5.xlarge', |
| 11 | + 'r5.4xlarge', |
| 12 | + 'r5.16xlarge', |
119 | 13 | ], |
120 | | - nodeGroups: [ |
121 | | - n { clusterName: $.metadata.name } |
122 | | - for n in |
123 | | - [ |
124 | | - ng { |
125 | | - namePrefix: 'core', |
126 | | - nameSuffix: 'a', |
127 | | - nameIncludeInstanceType: false, |
128 | | - availabilityZones: [nodeAz], |
129 | | - instanceType: 'r5.xlarge', |
130 | | - minSize: 1, |
131 | | - maxSize: 6, |
132 | | - labels+: { |
133 | | - 'hub.jupyter.org/node-purpose': 'core', |
134 | | - 'k8s.dask.org/node-purpose': 'core', |
135 | | - }, |
136 | | - }, |
137 | | - ] + [ |
138 | | - ng { |
139 | | - namePrefix: 'nb', |
140 | | - availabilityZones: [nodeAz], |
141 | | - minSize: 0, |
142 | | - maxSize: 500, |
143 | | - instanceType: n.instanceType, |
144 | | - labels+: { |
145 | | - 'hub.jupyter.org/node-purpose': 'user', |
146 | | - 'k8s.dask.org/node-purpose': 'scheduler', |
147 | | - }, |
148 | | - taints+: { |
149 | | - 'hub.jupyter.org_dedicated': 'user:NoSchedule', |
150 | | - 'hub.jupyter.org/dedicated': 'user:NoSchedule', |
151 | | - }, |
152 | | - tags+: { |
153 | | - '2i2c:node-purpose': 'user', |
154 | | - }, |
155 | | - } + n |
156 | | - for n in notebookNodes |
157 | | - ] + ( |
158 | | - if daskNodes != null then |
159 | | - [ |
160 | | - ng { |
161 | | - namePrefix: 'dask', |
162 | | - availabilityZones: [nodeAz], |
163 | | - minSize: 0, |
164 | | - maxSize: 500, |
165 | | - labels+: { |
166 | | - 'k8s.dask.org/node-purpose': 'worker', |
167 | | - }, |
168 | | - taints+: { |
169 | | - 'k8s.dask.org_dedicated': 'worker:NoSchedule', |
170 | | - 'k8s.dask.org/dedicated': 'worker:NoSchedule', |
171 | | - }, |
172 | | - tags+: { |
173 | | - '2i2c:node-purpose': 'worker', |
174 | | - }, |
175 | | - instancesDistribution+: { |
176 | | - onDemandBaseCapacity: 0, |
177 | | - onDemandPercentageAboveBaseCapacity: 0, |
178 | | - spotAllocationStrategy: 'capacity-optimized', |
179 | | - }, |
180 | | - } + n |
181 | | - for n in daskNodes |
182 | | - ] else [] |
183 | | - ) |
| 14 | + daskInstanceTypes=[], |
| 15 | + hubs=['staging', 'prod', 'pythia-binder'], |
| 16 | + notebookGPUNodeGroups=[ |
| 17 | + { |
| 18 | + instanceType: 'g4dn.xlarge', |
| 19 | + }, |
184 | 20 | ], |
185 | | -} |
| 21 | + nodeGroupGenerations=['a'], |
| 22 | +); |
| 23 | + |
| 24 | +c |
0 commit comments