Hi , i am trying now for a while to figure out what i am missing but i can’t find it.
My submit.yml.erb looks like this:
---
batch_connect:
template: basic
script:
queue_name: <%= custom_queue %>
native:
- "--nodes"
- "<%= bc_num_slots.blank? ? 1 : bc_num_slots.to_i %>"
- "--ntasks"
- "<%= num_ntasks.blank? ? 1 : num_ntasks.to_i %>"
- "--cpus-per-task"
- "<%= num_cpus.blank? ? 1 : num_cpus.to_i %>"
- "--mem"
- "<%= num_mem.blank? ? 1 : num_mem.to_i %>G"
<%- unless nodelist.blank? -%>
- "--nodelist"
- "<%= nodelist %>"
<%- end -%>
<%- unless email.blank? -%>
- "--mail-user"
- "<%= email %>"
- "--mail-type"
- "BEGIN,END,FAIL"
<%- end -%>
- "--hint"
- "nomultithread"
<%- if num_gpus.to_i >0 -%>
- "--gpus-per-node"
- "<%= num_gpus.to_i %>"
<%- end -%>
<%- if custom_queue.to_s == "backfill" -%>
- "--requeue"
<%- end -%>
and my form.yml.erb looks like that:
<%-
require 'open3'
require 'json'
begin
# read partitions.json and get patitions value
file = File.read(File.join(__dir__, 'partitions.json'))
partitions_hash = JSON.parse(file)
# Command to Run
script = 'sinfo -h --format="%P"'
# Create a partitions array to dynamically populate the queues associated with the user
partitions = []
# Store the output, error, status
output, status = Open3.capture2('bash', stdin_data: script)
# puts status
if status.success?
# Add it to the custom_envs array by splitting the output at '\n'.
output.split("\n").each do |queue|
queue = queue.gsub("*", "")
if partitions_hash.has_key?(queue)
partitions.push(queue)
end
end
puts partitions
else
partition_error = "Error"
end
rescue => e
partition_error = e.message.strip
end
-%>
# Batch Connect app configuration file
#
# @note Used to define the submitted cluster, title, description, and
# hard-coded/user-defined attributes that make up this Batch Connect app.
---
# **MUST** set cluster id here that matches cluster configuration file located
# under /etc/ood/config/clusters.d/*.yml
# @example Use the Owens cluster at Ohio Supercomputer Center
# cluster: "owens"
cluster: "omnia"
# Define attribute values that aren't meant to be modified by the user within
# the Dashboard form
cluster: "omnia"
form:
- custom_queue
- mode
- profiles
- working_dir
- num_cpus
- ntasks
- gpu_options
- num_gpus
- num_mem
- nodelist
- bc_account
- bc_num_slots
- bc_num_hours
- sub_type
- bind_paths
- version
- sif_file
- conda_path
- conda_name
#- extra_jupyter_args
- email
submit: submit.yml.erb
id : submitForm
name: submitForm
title: Jupyter Lab
attributes:
bc_num_slots:
label: "Number of nodes"
# Set the corresponding modules that need to be loaded for Jupyter to run
#
# @note It is called within the batch job as `module load <modules>` if
# defined
# @example Do not load any modules
# modules: ""
# @example Using default python module
# modules: "python"
# @example Using specific python module
# modules: "python/3.5"
# @example Using combination of modules
# modules: "python/3.5 cuda/8.0.44"
custom_queue:
label: Partition
widget: select
value: interactive
cacheable: false
help: |
- [Partition Documentation](http://hpc-docs.iee.fraunhofer.de/partitions/)
<%- if partition_error || partitions.blank?-%>
<div class="text-danger">Error while fetching Partition. Please contact support!</div>
<%- else -%>
options:
<%- partitions.each do |q| -%>
- [
"<%= q %>", "<%= q %>",
<%= JSON.generate(partitions_hash[q]) %>
]
<%- end -%>
<%- end -%>
# working_dir:
# label: Working Directory
# help: "Optionally select your Jupyter project directory The default is your home directory ($HOME) when left empty."
# cacheable: false
# data-filepicker: true
# data-target-file-type: dirs
# readonly: false
# Working_dir
working_dir:
widget: "path_selector"
directory: "/mnt/"
show_hidden: true
show_files: true
favorites: false
help: |
# "Optionally select your Jupyter project directory The default is your home directory ($HOME) when left empty."
mode:
label: Mode
help: "Choose between simple pre-defined profiles or advanced self configuration"
widget: select
options:
- [ "simple", "x",
data-hide-gpu-options: true,
data-hide-num-cpus: true,
data-hide-num-mem: true,
data-hide-num-gpus: true
]
- [ "advanced", "y",
data-hide-profiles: true
]
profiles:
label: Mode
help: "Choose a profile"
widget: select
options:
- [ "4 CPU, 8GB RAM", "4c8r",
]
- [ "8 CPU, 16GB RAM", "8c16r",
]
- [ "16 CPU, 32GB RAM", "16c32r",
]
sub_type:
label: Submission Environment
widget: select
help : Select desired submission environment.
cacheable: false
options:
- [
"Mod (Basic)", "mod_basic",
data-hide-sif-file: true,
data-hide-conda-path: true,
data-hide-conda-name: true,
data-hide-bind-paths: true
]
- [
"Apptainer Container", "sif_basic",
data-hide-conda-path: true,
data-hide-version: true,
data-hide-conda-name: true
]
- [
"Custom Conda Enviroment", "conda_env",
data-hide-sif-file: true,
data-hide-version: true,
data-hide-bind-paths: true
]
version:
label: Jupyter Kernel
help: Select the desired Jupyter Kernel
cacheable: false
widget: select
options:
- [ "Python 3.10 Kernel", "py310" ]
- [ "Tensorflow 2.12", "tensorflow/2.12"]
- [ "Pytorch 2.0", "pytorch/2.0"]
# sif_file:
# label: Container File
# help: |
# Select an Apptainer/Singularity Container **(.sif/.simg) that includes Jupyterlab**. This is required when using a Container Submission Environment! **pip install jupyterlab** inside your container.
# cacheable: false
# data-filepicker: true
# data-target-file-type: files
# data-target-file-pattern: '(.simg|.sif)$'
# readonly: true
# Containers
sif_file:
widget: "path_selector"
directory: "/mnt/"
show_hidden: true
show_files: true
favorites: false
help: |
# Select an Apptainer/Singularity Container **(.sif/.simg) that includes Jupyterlab**. This is required when using a Container Submission Environment! **pip install jupyterlab** inside your container.
bind_paths:
widget: "text_field"
label: "Folders to bind into your container"
value: ""
help: |
- Bind additional directories, default: $HOME
- example: /data:/mnt binds your /data/ folder to /mnt inside the container
# conda_path:
# label: Conda Enviroment
# help: |
# Select a Conda Enviroment : path to the **/bin folder that includes Jupyterlab**. This is required when using a Container Submission Environment! Use **pip install jupyterlab** inside your conda enviroment.
# cacheable: false
# data-filepicker: true
# data-show-hidden: true
# data-target-file-type: dirs
# data-target-file-pattern: 'bin'
# readonly: true
# initialdir: $HOME
conda_path:
widget: "path_selector"
directory: "/mnt/"
show_hidden: true
show_files: true
favorites: false
help: |
# Select a Conda Enviroment : path to the **/bin folder that includes Jupyterlab**. This is required when using a Container Submission Environment! Use **pip install jupyterlab** inside your conda enviroment.
conda_name:
widget: "text_field"
label: "Name of your conda Env"
value: "envXY"
help: |
- Name of your custom conda enviroment **required !**
num_cpus:
label: CPUs (Cores)
cacheable: false
widget: number_field
max: 16
min: 1
step: 1
value: 1
num_ntasks:
label: Number of tasks
cacheable: false
widget: number_field
min: 1
step: 1
value: 1
#GPU Options
gpu_options:
widget: select
label: "GPU type"
help: "Choose between Full GPU use and MIG Mode"
options:
- [
"No GPU", "0",
data-hide-num-gpus: true
]
- ["A100 40GB (1 GPU)", "a100"]
jupyter_type:
widget: select
label: "Jupyter Session Type"
help: "Choose between Jupyter Notebook and Jupyter Lab"
options:
- ["Jupyter Notebook", "jupyter notebook"]
- ["Jupyter Lab", "jupyter-lab"]
#Number of GPU's
num_gpus:
label: "GPUs"
help: "Number of GPU or Cuda devices"
cacheable: false
widget: number_field
max: 2
min: 0
step: 1
value: 0
#Memory allocation
num_mem:
label: Memory (GB/Gigabytes)
help: Total memory is available to all assigned threads!
cacheable: false
widget: number_field
max: 64
min: 4
step: 1
value: 4
email:
label: Email Address
help: |
Enter your email address if you would like to receive job notifications (start, finished, failed, etc..). Otherwise, leave the field empty.
When i start a jupyter notebook from the UI and choose 4 CPU’s for example it will allocate 8 while the content of the job_script_options.json shows the correct amount of 4:
native
0 "--nodes"
1 "1"
2 "--ntasks"
3 "1"
4 "--cpus-per-task"
5 "4"
6 "--mem"
7 "16G"
8 "--mail-user"
9 "xxxx@xxxx"
10 "--mail-type"
11 "BEGIN,END,FAIL"
12 "--hint"
13 "nomultithread"
wall_time 3600
queue_name "progress"
And scontrol show jobs shows:
ReqTRES=cpu=4,mem=4G,node=1,billing=4
** AllocTRES=cpu=8,mem=4G,node=1,billing=8**
Does somebody maybe know why this is happening?
Kind Regards
Kreefd