Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions config/clusters/maap/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,67 @@ jupyterhub:
admin_users:
- freitagb
- wildintellect
authenticator_class: generic-oauth
enable_auth_state: true
GenericOAuthenticator:
# We want to get user data from the JWT that's the id token,
# not from a separate userdata API call
userdata_from_id_token: true
scope:
- basic
- profile
- openid
username_claim: preferred_username
manage_groups: true
auth_state_groups_key: oauth_user.roles
admin_groups:
- Admin
# Being granted *any* jupyterhub related role should allow you
# to login
allowed_groups:
- Admin
- CPU:XS
- CPU:S
- CPU:M
- CPU:L
- CPU:XL
- CPU:XXL
- CPU:XXXL
- GPU:T4
extraConfig:
001-username-claim: |
def populate_token(spawner, auth_state):
# For our deployment-service-check health check user, there is no auth_state.
# So these env variables need not be set.
if auth_state:

spawner.environment.update({
"MAAP_PGT": f"jwt:{auth_state.get("id_token", "")}",
"KC_ACCESS_TOKEN": auth_state.get("access_token", ""),
"KC_ID_TOKEN": auth_state.get("id_token", ""),
"KC_REFRESH_TOKEN": auth_state.get("refresh_token", "")
})

c.Spawner.auth_state_hook = populate_token
00-volumes-and-volume-mounts-as-dict: |
# The base jupyterhub config in zero-to-jupyterhub defines
# volumes and volume_mounts as lists.
# But we can't add new volumes or volume_mounts to the list
# as that replaces the entire list.
# So we convert them to dictionaries, which allows us to
# add new volumes and volume_mounts as needed.
if isinstance(c.KubeSpawner.volumes, list):
existing_volumes = c.KubeSpawner.volumes
c.KubeSpawner.volumes = {}
for volume in existing_volumes:
c.KubeSpawner.volumes[volume["name"]] = volume
if isinstance(c.KubeSpawner.volume_mounts, list):
existing_volume_mounts = c.KubeSpawner.volume_mounts
c.KubeSpawner.volume_mounts = {}
for idx, volume_mount in enumerate(existing_volume_mounts):
c.KubeSpawner.volume_mounts[f"{idx}-{volume_mount['name']}"] = volume_mount

c.KubeSpawner.volumes["s3fs-volume"] = {"name": "s3fs-volume", "emptyDir": {}}
singleuser:
cloudMetadata:
blockWithIptables: false
Expand Down Expand Up @@ -85,6 +146,58 @@ jupyterhub:
mountPath: /home/rstudio/shared
subPath: _shared
readOnly: true
- name: s3fs-volume
mountPath: /home/jovyan/my-private-bucket
subPath: my-private-bucket
mountPropagation: HostToContainer
readOnly: false
- name: s3fs-volume
mountPath: /home/jovyan/my-public-bucket
subPath: my-public-bucket
mountPropagation: HostToContainer
readOnly: false
- name: s3fs-volume
mountPath: /home/jovyan/shared-buckets
subPath: shared-buckets
mountPropagation: HostToContainer
readOnly: true
- name: s3fs-volume
mountPath: /home/jovyan/triaged-jobs
subPath: triaged-jobs
mountPropagation: HostToContainer
readOnly: true
extraContainers:
- name: s3fs
image: mas.dit.maap-project.org/root/che-sidecar-s3fs:2i2c
image_pull_policy: Always
securityContext:
privileged: true
resources:
limits:
memory: 512Mi
cpu: 1.0
requests:
# If we don't set requests, k8s sets requests == limits!
# So we set something tiny
memory: 64Mi
cpu: 0.01
volumeMounts:
- name: s3fs-volume
mountPath: /my-public-bucket
subPath: my-public-bucket
mountPropagation: Bidirectional
- name: s3fs-volume
mountPath: /my-private-bucket
subPath: my-private-bucket
mountPropagation: Bidirectional
- name: s3fs-volume
mountPath: /shared-buckets
subPath: shared-buckets
mountPropagation: Bidirectional
- name: s3fs-volume
mountPath: /triaged-jobs
subPath: triaged-jobs
mountPropagation: Bidirectional
profileList:
- display_name: Choose your environment and resources
default: true
Expand Down
245 changes: 217 additions & 28 deletions config/clusters/maap/prod.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,234 @@ userServiceAccount:
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::916098889494:role/maap-prod
jupyterhub:
custom:
2i2c:
add_staff_user_ids_to_admin_users: false
homepage:
gitRepoBranch: master
gitRepoUrl: https://github.com/MAAP-Project/maap-hub-homepage
singleuser:
extraEnv:
SCRATCH_BUCKET: s3://maap-scratch-prod/$(JUPYTERHUB_USER)
MAAP_API_HOST: api.maap-project.org
DOCKERIMAGE_PATH_DEFAULT: mas.maap-project.org/root/maap-workspaces/custom_images/maap_base:v5.0.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will set this as the environment variable no matter what image is used. Is that what was expected? In the last PR, I saw this was set to be the same as the name of the image, in which case it should use $(JUPYTER_IMAGE) as the value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, extracted out DOCKERIMAGE_PATH_BASE_IMAGE!

DOCKERIMAGE_PATH_BASE_IMAGE: $(JUPYTER_IMAGE)
WORKSPACE_BUCKET: maap-ops-workspace
nodeSelector:
2i2c/hub-name: prod
# NOTE the only difference between profileList for staging and prod is the image tags
# prod should always have the latest official release and staging has develop unless we are
# actively doing a release in which case staging images point to our release tag
profileList:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally, we would like to keep profileLists in common.yaml, and use the same image in staging and prod. The staging here is primarily for testing infrastructure changes, and we (2i2c) would like to generally keep it the exact same as prod. So that if we have tested something on staging, we're 99% confident it would work in prod.

having different images in staging and prod could cause problems here, in case the images being different causes failure when migrating. It could also cause the other parts of profile Lists (such as resource config) to drift out of sync between these two.

However, we also recognize that you want to probably test out different images as you're onboarding an existing userbase to this hub, and want to be flexible.

So I see two paths forward:

  1. Use the same image tags for staging and prod, and put it in common.yaml. Image testing happens purely via unlisted choice. This is the preferred way, and also where we should go long term.
  2. If (1) doesn't fit with your existing workflows for building images, leave a block comment above the profileList config in staging and prod, documenting that it's duplicated, and that whoever is modifying it should take care to make sure that the only differences between these two should be the image tags, and everything else should be kept in sync manually. We can then revisit this in 3-6 months, after the initial migration is completed and the pace of image changes has changed.

I wanna unblock y'all asap, so while I have a preference for (1) happy to do either.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry jumping into this conversation as I come back from leave.

Let me know if I am phrasing this correctly -
You are saying that staging and prod are meant for infrastructure testing and everything else remains the same. In that case, we (MAAP) as tenants of this infrastructure should be deploying 3 versions of your prod configuration for our own customers and venues (DIT, UAT and OPS). The tenant should not need to worry about your changes in your staging environment.
We should be able to deploy multiple 2i2c prod environments with different MAAP configurations for our testing.

Does that make sense?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On MAAP, the DIT, UAT and OPS venues come with their associated deployments of the API and data processing clusters which impact the jupyter extensions used in the images. So in terms of testing, we are not just testing the images, but also entire the deployment venue which is isolated in its own cloud env.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a block comment above profileList and we would like to go with option 2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @grallewellyn! I've retitled the PR slightly and merged this!

@sujen1412 I opened #7233 to split off the other conversation so we don't lose track of it!

- display_name: Choose your environment and resources
default: true
profile_options:
image:
display_name: Environment
dynamic_image_building:
enabled: true
unlisted_choice:
enabled: true
display_name: Custom image
validation_regex: ^.+:.+$
validation_message: Must be a publicly available docker image, of form <image-name>:<tag>
kubespawner_override:
image: '{value}'
choices:
01-pangeo:
display_name: Modified Pangeo Notebook
description: Pangeo based notebook with a Python environment
kubespawner_override:
image: mas.maap-project.org/root/maap-workspaces/2i2c/pangeo:v5.0.0
init_containers:
- name: jupyterhub-gitpuller-init
image: public.ecr.aws/nasa-veda/jupyterhub-gitpuller-init:97eb45f9d23b128aff810e45911857d5cffd05c2
env:
- name: TARGET_PATH
value: veda-docs
- name: SOURCE_REPO
value: https://github.com/NASA-IMPACT/veda-docs
volumeMounts:
- name: home
mountPath: /home/jovyan
subPath: '{escaped_username}'
securityContext:
runAsUser: 1000
runAsGroup: 1000
02-rocker:
display_name: Rocker Geospatial
description: JupyterHub environment with many R geospatial libraries pre-installed
kubespawner_override:
image: mas.maap-project.org/root/maap-workspaces/2i2c/r:v5.0.0
init_containers:
- name: jupyterhub-gitpuller-init
image: public.ecr.aws/nasa-veda/jupyterhub-gitpuller-init:97eb45f9d23b128aff810e45911857d5cffd05c2
env:
- name: TARGET_PATH
value: veda-docs
- name: SOURCE_REPO
value: https://github.com/NASA-IMPACT/veda-docs
volumeMounts:
- name: home
mountPath: /home/jovyan
subPath: '{escaped_username}'
securityContext:
runAsUser: 1000
runAsGroup: 1000
03-isce3:
display_name: isce3
description: Pangeo based notebook with a Python environment and isce3
kubespawner_override:
image: mas.maap-project.org/root/maap-workspaces/2i2c/isce3:v5.0.0
init_containers:
- name: jupyterhub-gitpuller-init
image: public.ecr.aws/nasa-veda/jupyterhub-gitpuller-init:97eb45f9d23b128aff810e45911857d5cffd05c2
env:
- name: TARGET_PATH
value: veda-docs
- name: SOURCE_REPO
value: https://github.com/NASA-IMPACT/veda-docs
volumeMounts:
- name: home
mountPath: /home/jovyan
subPath: '{escaped_username}'
securityContext:
runAsUser: 1000
runAsGroup: 1000
04-qgis:
display_name: QGIS on Linux Desktop
description: Linux desktop in the browser, with qgis installed
kubespawner_override:
# Launch people directly into the Linux desktop when they start
default_url: /desktop
# Built from https://github.com/2i2c-org/nasa-qgis-image
image: quay.io/2i2c/nasa-qgis-image:d76118ea0c15
resource_allocation:
display_name: Resource Allocation
choices:
mem_1_9:
display_name: 1.9 GB RAM, upto 3.7 CPUs
allowed_groups:
- CPU:XS
kubespawner_override:
mem_guarantee: 1991244775
mem_limit: 1991244775
cpu_guarantee: 0.2328125
cpu_limit: 3.725
node_selector:
node.kubernetes.io/instance-type: r5.xlarge
mem_3_7:
display_name: 3.7 GB RAM, upto 3.7 CPUs
allowed_groups:
- CPU:S
kubespawner_override:
mem_guarantee: 3982489550
mem_limit: 3982489550
cpu_guarantee: 0.465625
cpu_limit: 3.725
node_selector:
node.kubernetes.io/instance-type: r5.xlarge
mem_7_4:
display_name: 7.4 GB RAM, upto 3.7 CPUs
allowed_groups:
- CPU:M
kubespawner_override:
mem_guarantee: 7964979101
mem_limit: 7964979101
cpu_guarantee: 0.93125
cpu_limit: 3.725
node_selector:
node.kubernetes.io/instance-type: r5.xlarge
mem_14_8:
display_name: 14.8 GB RAM, upto 3.7 CPUs
allowed_groups:
- CPU:L
kubespawner_override:
mem_guarantee: 15929958203
mem_limit: 15929958203
cpu_guarantee: 1.8625
cpu_limit: 3.725
node_selector:
node.kubernetes.io/instance-type: r5.xlarge
default: true
mem_29_7:
display_name: 29.7 GB RAM, upto 3.7 CPUs
allowed_groups:
- CPU:XL
kubespawner_override:
mem_guarantee: 31859916406
mem_limit: 31859916406
cpu_guarantee: 3.725
cpu_limit: 3.725
node_selector:
node.kubernetes.io/instance-type: r5.xlarge
mem_60_6:
display_name: 60.6 GB RAM, upto 15.6 CPUs
allowed_groups:
- CPU:XXL
kubespawner_override:
mem_guarantee: 65094448840
mem_limit: 65094448840
cpu_guarantee: 7.8475
cpu_limit: 15.695
node_selector:
node.kubernetes.io/instance-type: r5.4xlarge
mem_121_2:
display_name: 121.2 GB RAM, upto 15.6 CPUs
allowed_groups:
- CPU:XXXL
kubespawner_override:
mem_guarantee: 130188897681
mem_limit: 130188897681
cpu_guarantee: 15.695
cpu_limit: 15.695
node_selector:
node.kubernetes.io/instance-type: r5.4xlarge
- display_name: NVIDIA Tesla T4, ~16 GB, ~4 CPUs
description: Start a container on a dedicated node with a GPU
slug: gpu
allowed_groups:
- GPU:T4
profile_options:
image:
display_name: Environment
dynamic_image_building:
enabled: true
unlisted_choice:
enabled: true
display_name: Custom image
validation_regex: ^.+:.+$
validation_message: Must be a publicly available docker image of form <image-name>:<tag>
kubespawner_override:
image: '{value}'
choices:
pytorch:
display_name: Pangeo PyTorch ML Notebook
default: false
slug: pytorch
kubespawner_override:
image: quay.io/pangeo/pytorch-notebook:2024.11.11
tensorflow2:
display_name: Pangeo Tensorflow2 ML Notebook
default: true
slug: tensorflow2
kubespawner_override:
image: quay.io/pangeo/ml-notebook:2024.11.11
kubespawner_override:
environment:
NVIDIA_DRIVER_CAPABILITIES: compute,utility
mem_limit:
mem_guarantee: 14G
node_selector:
node.kubernetes.io/instance-type: g4dn.xlarge
extra_resource_limits:
nvidia.com/gpu: '1'
hub:
config:
JupyterHub:
authenticator_class: generic-oauth
Authenticator:
admin_users: []
GenericOAuthenticator:
oauth_callback_url: https://hub.maap-project.org/hub/oauth_callback
token_url: https://auth.openveda.cloud/realms/maap/protocol/openid-connect/token
authorize_url: https://auth.openveda.cloud/realms/maap/protocol/openid-connect/auth
# We want to get user data from the JWT that's the id token,
# not from a separate userdata API call
userdata_from_id_token: true
scope:
- basic
- profile
- openid
username_claim: preferred_username
manage_groups: true
auth_state_groups_key: oauth_user.roles
admin_groups:
- Admin
# Being granted *any* jupyterhub related role should allow you
# to login
allowed_groups:
- Admin
- CPU:XS
- CPU:S
- CPU:M
- CPU:L
- CPU:XL
- CPU:XXL
- CPU:XXXL
- GPU:T4
ingress:
hosts: [hub.maap-project.org]
tls:
Expand Down
Loading
Loading