-bash-4.1$ sacct -r gpu_open JobID JobName Partition Account AllocCPUS State NodeList ExitCode ------------ ---------- ---------- ---------- ---------- ---------- --------------- -------- 756822 wrap gpu_open e154466_g+ 1 RUNNING dcalph134 0:0 -bash-4.1$ squeue -w dcalph134 JOBID USER ST PARTITION NAME COMMAND SUBMIT_TIME CPUS GRES NODES NODELIST(REASON) -bash-4.1$ sacct -r gpu_opne JobID JobName Partition Account AllocCPUS State NodeList ExitCode ------------ ---------- ---------- ---------- ---------- ---------- --------------- -------- -bash-4.1$ sacct -r gpu_open JobID JobName Partition Account AllocCPUS State NodeList ExitCode ------------ ---------- ---------- ---------- ---------- ---------- --------------- -------- 756822 wrap gpu_open e154466_g+ 1 RUNNING dcalph134 0:0 -bash-4.1$ scontrol show jobid=756822 JobId=756822 JobName=wrap UserId=e154466(19383) GroupId=boks_users(2080) MCS_label=N/A Priority=1349999 Nice=0 Account=e154466_gpu QOS=normal JobState=RUNNING Reason=None Dependency=(null) Requeue=0 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0 RunTime=17:10:47 TimeLimit=UNLIMITED TimeMin=N/A SubmitTime=May 22 19:45 EligibleTime=May 22 19:45 StartTime=May 22 19:45 EndTime=Unknown Deadline=N/A PreemptTime=None SuspendTime=None SecsPreSuspend=0 Partition=gpu_open AllocNode:Sid=DCALPH000:93594 ReqNodeList=(null) ExcNodeList=(null) NodeList=dcalph134 BatchHost=dcalph134 NumNodes=1 NumCPUs=1 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:* TRES=cpu=1,node=1 Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=1 MinMemoryNode=0 MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00 Gres=(null) Reservation=(null) OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null) Command=(null) WorkDir=/user/e154466 Comment={"script":"#!/bin/sh\n# This script was created by sbatch --wrap.\n\nsleep 100000\n","cmdline":"","comment":"","env":{"MODULE_VERSION_STACK":"3.2.10","MANPATH":"/cm/shared/apps/slurm/17.02.10/man:/usr/share/man/overrides:/usr/share/man/en:/usr/share/man:/opt/boksm/man:/usr/local/share/man:/cm/local/apps/environment-modules/current/share/man","HOSTNAME":"DCALPH000","TERM":"xterm","SHELL":"/bin/bash","HISTSIZE":"1000","SSH_CLIENT":"172.24.4.121 51346 22","LIBRARY_PATH":"/cm/shared/apps/slurm/17.02.10/lib64/slurm:/cm/shared/apps/slurm/17.02.10/lib64","QTDIR":"/usr/lib64/qt-3.3","QTINC":"/usr/lib64/qt-3.3/include","SSH_TTY":"/dev/pts/43","SQUEUE_PARTITION":"test,interact,license,lic_low,normal,low,open","USER":"e154466","LD_LIBRARY_PATH":"/usr/local/cuda-9.1/lib64:/cm/shared/apps/slurm/17.02.10/lib64/slurm:/cm/shared/apps/slurm/17.02.10/lib64","LS_COLORS":"rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arj=01;31:*.taz=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lz=01;31:*.xz=01;31:*.bz2=01;31:*.tbz=01;31:*.tbz2=01;31:*.bz=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.rar=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.jpg=01;35:*.jpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.axv=01;35:*.anx=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.axa=01;36:*.oga=01;36:*.spx=01;36:*.xspf=01;36:","SINFO_FORMAT":"%n %.10T %.5a %.8e %.7m %.4c %.8O %C","CPATH":"/cm/shared/apps/slurm/17.02.10/include","SACCT_FORMAT":"JobID,JobName,Partition,Account,AllocCPUS,State,NodeList,ExitCode","MODULE_VERSION":"3.2.10","MAIL":"/var/spool/mail/e154466","PATH":"/usr/local/cuda-9.1/bin:/sw/QuantumWise/VNL-ATK-2017.1/bin:/sw/QuantumWise/VNL-ATK-2017.1/bin:/sw/QuantumWise/VNL-ATK-2017.1/bin:/cm/shared/apps/slurm/17.02.10/sbin:/cm/shared/apps/slurm/17.02.10/bin:/usr/lib64/qt-3.3/bin:/bin:/usr/bin:/opt/boksm/bin:/usr/local/sbin:/usr/sbin:/sbin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/3.2.10/bin:/opt/dell/srvadmin/bin","SQUEUE_SORT":"U,P,N","PWD":"/user/e154466","_LMFILES_":"/cm/shared/modulefiles/slurm/17.02.10:/cm/shared/modulefiles/app_env/cuda-9.1","LANG":"en_US.UTF-8","MODULEPATH":"/cm/local/modulefiles:/cm/shared/modulefiles","ESI_HOME":"/hpc_lsf/application/ESI_Software","LOADEDMODULES":"slurm/17.02.10:app_env/cuda-9.1","SSH_ASKPASS":"/usr/libexec/openssh/gnome-ssh-askpass","HISTCONTROL":"ignoredups","SQUEUE_FORMAT2":"jobid:7,username:9,statecompact:3,partition:13,name:15,command:10,submittime:13,numcpus:5,gres:11,numnodes:6,reasonlist:50","SHLVL":"1","HOME":"/user/e154466","LOGNAME":"e154466","QTLIB":"/usr/lib64/qt-3.3/lib","CVS_RSH":"ssh","SSH_CONNECTION":"172.24.4.121 51346 10.41.26.200 22","MODULESHOME":"/cm/local/apps/environment-modules/3.2.10/Modules/3.2.10","SLURM_TIME_FORMAT":"%b %e %k:%M","LESSOPEN":"||/usr/bin/lesspipe.sh %s","G_BROKEN_FILENAMES":"1","BASH_FUNC_module()":"() { eval `/cm/local/apps/environment-modules/3.2.10/Modules/$MODULE_VERSION/bin/modulecmd bash $*`\n}","_":"/cm/shared/apps/slurm/17.02.10/bin/sbatch","SLURM_NPROCS":"1","SLURM_NTASKS":"1","SLURM_JOB_NAME":"wrap","SLURM_RLIMIT_CPU":"18446744073709551615","SLURM_RLIMIT_FSIZE":"18446744073709551615","SLURM_RLIMIT_DATA":"18446744073709551615","SLURM_RLIMIT_STACK":"18446744073709551615","SLURM_RLIMIT_CORE":"0","SLURM_RLIMIT_RSS":"18446744073709551615","SLURM_RLIMIT_NPROC":"2066973","SLURM_RLIMIT_NOFILE":"65536","SLURM_RLIMIT_MEMLOCK":"18446744073709551615","SLURM_RLIMIT_AS":"18446744073709551615","SLURM_PRIO_PROCESS":"0","SLURM_SUBMIT_DIR":"/user/e154466","SLURM_SUBMIT_HOST":"DCALPH000","SLURM_UMASK":"0022"}} StdErr=/user/e154466/slurm-756822.out StdIn=/dev/null StdOut=/user/e154466/slurm-756822.out Power= -bash-4.1$ squeue -w dcalph134 JOBID USER ST PARTITION NAME COMMAND SUBMIT_TIME CPUS GRES NODES NODELIST(REASON) -bash-4.1$ squeue -u e154466 JOBID USER ST PARTITION NAME COMMAND SUBMIT_TIME CPUS GRES NODES NODELIST(REASON) 755497 e154466 R test wrap (null) May 22 16:52 1 (null) 1 dcalph132 755498 e154466 R test wrap (null) May 22 16:53 1 gpu:2 1 dcalph198 -bash-4.1$ Why squeue is not showing #756822 ?
Created attachment 6923 [details] slurm.conf
Is there any debugging that could help? slurmctl/slurmd logs seems useless here (At least I do not see any clue at all in them) Will attach them just in case...
Created attachment 6926 [details] slurmctld log
Created attachment 6927 [details] slurmd log from dcalph134
Created attachment 6928 [details] gres.conf
Tangential to your question, but in your slurm.conf you have LaunchParameters=slurmctld_memlock_all slurmctld_memlock_all doesn't exist - you're looking for "slurmstepd_memlock_all" SQUEUE environment variables affect what squeue will print, so I'd like to what those are. What is the output of printenv | grep SQUEUE
(In reply to Marshall Garey from comment #7) > Tangential to your question, but in your slurm.conf you have > > LaunchParameters=slurmctld_memlock_all > > slurmctld_memlock_all doesn't exist - you're looking for > "slurmstepd_memlock_all" Thanks! > > > SQUEUE environment variables affect what squeue will print, so I'd like to > what those are. What is the output of > > printenv | grep SQUEUE Oops, indeed SQUEUE_PARTITION was set. Not a bug. I am closing this.
SQUEUE_PARTITION was set. Not a bug.