-
Notifications
You must be signed in to change notification settings - Fork 15
Add more instances and node groups to cover all templates by default #41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
16fcd99
eb4d82b
133934f
ffb59d2
444b8fd
826a5d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,16 +27,20 @@ locals { | |
| ) | ||
|
|
||
| # Map of GPU types to their product names and instance types | ||
| gpu_types = { | ||
| "T4" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.4xlarge"] | ||
| } | ||
| "A10G" = { | ||
| product_name = "NVIDIA-A10G" | ||
| instance_types = ["g5.4xlarge"] | ||
| } | ||
| } | ||
| # Additional GPU types can be added via gpu_instances.tfvars | ||
| gpu_types = merge( | ||
|
||
| { | ||
| "T4" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.4xlarge"] | ||
| } | ||
| "A10G" = { | ||
| product_name = "NVIDIA-A10G" | ||
| instance_types = ["g5.4xlarge"] | ||
| } | ||
| }, | ||
| var.additional_gpu_types | ||
| ) | ||
|
|
||
| # Base configuration for GPU node groups | ||
| gpu_node_group_base = { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| # GPU Instance Types for EKS | ||
| # | ||
| # This file contains additional GPU instance configurations that can be used | ||
| # with the EKS cluster. To use these configurations, include this file when | ||
| # running terraform: | ||
| # | ||
| # terraform plan -var-file="gpu_instances.tfvars" | ||
| # terraform apply -var-file="gpu_instances.tfvars" | ||
| # | ||
| # You can also selectively enable specific GPU types by setting the | ||
| # node_group_gpu_types variable. | ||
| # | ||
| # Note: Entries here will override defaults with the same key (e.g., T4 below | ||
| # overrides the default T4 to add more instance types). | ||
|
|
||
| # GPU types - overrides defaults and adds new types | ||
| additional_gpu_types = { | ||
| # Override default T4 to include additional instance types | ||
| "T4" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.xlarge", "g4dn.2xlarge", "g4dn.4xlarge"] | ||
| } | ||
| "T4-4x" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.12xlarge"] | ||
| } | ||
| "L4" = { | ||
| product_name = "NVIDIA-L4" | ||
| instance_types = ["g6.2xlarge", "g6.4xlarge"] | ||
| } | ||
| "L4-4x" = { | ||
| product_name = "NVIDIA-L4" | ||
| instance_types = ["g6.24xlarge"] | ||
| } | ||
| } | ||
|
|
||
| # Enable all GPU types (default A10G plus types defined above) | ||
| node_group_gpu_types = ["T4", "A10G", "T4-4x", "L4", "L4-4x"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,12 +81,41 @@ variable "eks_cluster_version" { | |
| variable "node_group_gpu_types" { | ||
| description = <<-EOT | ||
| (Optional) The GPU types of the EKS nodes. | ||
| Possible values: ["T4", "A10G"] | ||
| Possible values: ["T4", "A10G"] plus any keys defined in additional_gpu_types | ||
| EOT | ||
| type = list(string) | ||
| default = ["T4"] | ||
| } | ||
|
|
||
| variable "additional_gpu_types" { | ||
|
||
| description = <<-EOT | ||
| (Optional) Additional GPU types to add or override in the EKS cluster. | ||
| Entries with the same key as a default (e.g., "T4") will override the default entirely. | ||
| See gpu_instances.tfvars for examples. | ||
|
|
||
| ex: | ||
| ``` | ||
| additional_gpu_types = { | ||
| # Override default T4 with more instance types | ||
| "T4" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.xlarge", "g4dn.2xlarge", "g4dn.4xlarge"] | ||
| } | ||
| # Add new GPU type | ||
| "L4" = { | ||
| product_name = "NVIDIA-L4" | ||
| instance_types = ["g6.2xlarge", "g6.4xlarge"] | ||
| } | ||
| } | ||
| ``` | ||
| EOT | ||
| type = map(object({ | ||
| product_name = string | ||
| instance_types = list(string) | ||
| })) | ||
| default = {} | ||
| } | ||
|
|
||
| variable "enable_efs" { | ||
| description = <<-EOT | ||
| (Optional) Enable the creation of an EFS instance. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| # GPU Instance Types for EKS | ||
| # | ||
| # This file contains additional GPU instance configurations that can be used | ||
| # with the EKS cluster. To use these configurations, include this file when | ||
| # running terraform: | ||
| # | ||
| # terraform plan -var-file="gpu_instances.tfvars" | ||
| # terraform apply -var-file="gpu_instances.tfvars" | ||
| # | ||
| # You can also selectively enable specific GPU types by setting the | ||
| # node_group_gpu_types variable. | ||
| # | ||
| # Note: Entries here will override defaults with the same key (e.g., T4 below | ||
| # overrides the default T4 to add more instance types). | ||
|
|
||
| # GPU types - overrides defaults and adds new types | ||
| additional_gpu_types = { | ||
| # Override default T4 to include additional instance types | ||
| "T4" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.xlarge", "g4dn.2xlarge", "g4dn.4xlarge"] | ||
| } | ||
| "T4-4x" = { | ||
| product_name = "Tesla-T4" | ||
| instance_types = ["g4dn.12xlarge"] | ||
| } | ||
| "L4" = { | ||
| product_name = "NVIDIA-L4" | ||
| instance_types = ["g6.2xlarge", "g6.4xlarge"] | ||
| } | ||
| "L4-4x" = { | ||
| product_name = "NVIDIA-L4" | ||
| instance_types = ["g6.24xlarge"] | ||
| } | ||
| } | ||
|
|
||
| # Enable all GPU types (default A10G plus types defined above) | ||
| node_group_gpu_types = ["T4", "A10G", "T4-4x", "L4", "L4-4x"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's change the
gpu_instances.tfvarstogpu_instances.tfvars.example