View | Details | Raw Unified | Return to ticket 954
Collapse All | Expand All

(-)a/doc/man/man1/salloc.1 (-2 / +6 lines)
Lines 653-660 round\-robin fashion). Link Here
653
.TP
653
.TP
654
\fB\-\-mail\-type\fR=<\fItype\fR>
654
\fB\-\-mail\-type\fR=<\fItype\fR>
655
Notify user by email when certain event types occur.
655
Notify user by email when certain event types occur.
656
Valid \fItype\fR values are BEGIN, END, FAIL, REQUEUE, and ALL (any state
656
Valid \fItype\fR values are BEGIN, END, FAIL, REQUEUE, ALL (equivalent to
657
change). The user to be notified is indicated with \fB\-\-mail\-user\fR.
657
BEGIN, END, FAIL and REQUEUE), TIME_LIMIT, TIME_LIMIT_90 (reached 90 percent of
658
time limit), TIME_LIMIT_80 (reached 80 percent of time limit), and TIME_LIMIT_50
659
(reached 50 percent of time limit).
660
Multiple \fItype\fR values may be specified in a comma separated list.
661
The user to be notified is indicated with \fB\-\-mail\-user\fR.
658
662
659
.TP
663
.TP
660
\fB\-\-mail\-user\fR=<\fIuser\fR>
664
\fB\-\-mail\-user\fR=<\fIuser\fR>
(-)a/doc/man/man1/sbatch.1 (-2 / +6 lines)
Lines 740-747 round\-robin fashion). Link Here
740
.TP
740
.TP
741
\fB\-\-mail\-type\fR=<\fItype\fR>
741
\fB\-\-mail\-type\fR=<\fItype\fR>
742
Notify user by email when certain event types occur.
742
Notify user by email when certain event types occur.
743
Valid \fItype\fR values are BEGIN, END, FAIL, REQUEUE, and ALL (any state
743
Valid \fItype\fR values are BEGIN, END, FAIL, REQUEUE, ALL (equivalent to
744
change). The user to be notified is indicated with \fB\-\-mail\-user\fR.
744
BEGIN, END, FAIL and REQUEUE), TIME_LIMIT, TIME_LIMIT_90 (reached 90 percent of
745
time limit), TIME_LIMIT_80 (reached 80 percent of time limit), and TIME_LIMIT_50
746
(reached 50 percent of time limit).
747
Multiple \fItype\fR values may be specified in a comma separated list.
748
The user to be notified is indicated with \fB\-\-mail\-user\fR.
745
749
746
.TP
750
.TP
747
\fB\-\-mail\-user\fR=<\fIuser\fR>
751
\fB\-\-mail\-user\fR=<\fIuser\fR>
(-)a/doc/man/man1/srun.1 (-2 / +6 lines)
Lines 799-806 round\-robin fashion). Link Here
799
.TP
799
.TP
800
\fB\-\-mail\-type\fR=<\fItype\fR>
800
\fB\-\-mail\-type\fR=<\fItype\fR>
801
Notify user by email when certain event types occur.
801
Notify user by email when certain event types occur.
802
Valid \fItype\fR values are BEGIN, END, FAIL, REQUEUE, and ALL (any state
802
Valid \fItype\fR values are BEGIN, END, FAIL, REQUEUE, ALL (equivalent to
803
change). The user to be notified is indicated with \fB\-\-mail\-user\fR.
803
BEGIN, END, FAIL and REQUEUE), TIME_LIMIT, TIME_LIMIT_90 (reached 90 percent of
804
time limit), TIME_LIMIT_80 (reached 80 percent of time limit), and TIME_LIMIT_50
805
(reached 50 percent of time limit).
806
Multiple \fItype\fR values may be specified in a comma separated list.
807
The user to be notified is indicated with \fB\-\-mail\-user\fR.
804
808
805
.TP
809
.TP
806
\fB\-\-mail\-user\fR=<\fIuser\fR>
810
\fB\-\-mail\-user\fR=<\fIuser\fR>
(-)a/slurm/slurm.h.in (+4 lines)
Lines 287-292 enum job_states { Link Here
287
#define MAIL_JOB_END      0x0002	/* notify when job ends */
287
#define MAIL_JOB_END      0x0002	/* notify when job ends */
288
#define MAIL_JOB_FAIL     0x0004	/* notify if job fails */
288
#define MAIL_JOB_FAIL     0x0004	/* notify if job fails */
289
#define MAIL_JOB_REQUEUE  0x0008	/* notify if job requeued */
289
#define MAIL_JOB_REQUEUE  0x0008	/* notify if job requeued */
290
#define MAIL_JOB_TIME100  0x0010	/* notify on reaching 100% of time limit */
291
#define MAIL_JOB_TIME90   0x0020	/* notify on reaching 90% of time limit */
292
#define MAIL_JOB_TIME80   0x0040	/* notify on reaching 80% of time limit */
293
#define MAIL_JOB_TIME50   0x0080	/* notify on reaching 50% of time limit */
290
294
291
#define NICE_OFFSET 10000	/* offset for job's nice value */
295
#define NICE_OFFSET 10000	/* offset for job's nice value */
292
296
(-)a/src/common/proc_args.c (-28 / +77 lines)
Lines 710-751 bool verify_hint(const char *arg, int *min_sockets, int *min_cores, Link Here
710
710
711
uint16_t parse_mail_type(const char *arg)
711
uint16_t parse_mail_type(const char *arg)
712
{
712
{
713
	uint16_t rc;
713
	char *buf, *tok, *save_ptr = NULL;
714
714
	uint16_t rc = 0;
715
	if (strcasecmp(arg, "BEGIN") == 0)
715
716
		rc = MAIL_JOB_BEGIN;
716
	if (!arg)
717
	else if  (strcasecmp(arg, "END") == 0)
717
		return rc;
718
		rc = MAIL_JOB_END;
718
719
	else if (strcasecmp(arg, "FAIL") == 0)
719
	buf = xstrdup(arg);
720
		rc = MAIL_JOB_FAIL;
720
	tok = strtok_r(buf, ",", &save_ptr);
721
	else if (strcasecmp(arg, "REQUEUE") == 0)
721
	while (tok) {
722
		rc = MAIL_JOB_REQUEUE;
722
		if (strcasecmp(tok, "BEGIN") == 0)
723
	else if (strcasecmp(arg, "ALL") == 0)
723
			rc |= MAIL_JOB_BEGIN;
724
		rc = MAIL_JOB_BEGIN |  MAIL_JOB_END |  MAIL_JOB_FAIL |
724
		else if  (strcasecmp(tok, "END") == 0)
725
		     MAIL_JOB_REQUEUE;
725
			rc |= MAIL_JOB_END;
726
	else
726
		else if (strcasecmp(tok, "FAIL") == 0)
727
		rc = 0;		/* failure */
727
			rc |= MAIL_JOB_FAIL;
728
		else if (strcasecmp(tok, "REQUEUE") == 0)
729
			rc |= MAIL_JOB_REQUEUE;
730
		else if (strcasecmp(tok, "ALL") == 0)
731
			rc |= MAIL_JOB_BEGIN |  MAIL_JOB_END |  MAIL_JOB_FAIL |
732
			      MAIL_JOB_REQUEUE;
733
		else if (strcasecmp(tok, "TIME_LIMIT") == 0)
734
			rc |= MAIL_JOB_TIME100;
735
		else if (strcasecmp(tok, "TIME_LIMIT_90") == 0)
736
			rc |= MAIL_JOB_TIME90;
737
		else if (strcasecmp(tok, "TIME_LIMIT_80") == 0)
738
			rc |= MAIL_JOB_TIME80;
739
		else if (strcasecmp(tok, "TIME_LIMIT_50") == 0)
740
			rc |= MAIL_JOB_TIME50;
741
		tok = strtok_r(NULL, ",", &save_ptr);
742
	}
743
	xfree(buf);
728
744
729
	return rc;
745
	return rc;
730
}
746
}
731
char *print_mail_type(const uint16_t type)
747
char *print_mail_type(const uint16_t type)
732
{
748
{
749
	static char buf[256];
750
751
	buf[0] = '\0';
752
733
	if (type == 0)
753
	if (type == 0)
734
		return "NONE";
754
		return "NONE";
735
755
736
	if (type == MAIL_JOB_BEGIN)
756
	if (type & MAIL_JOB_BEGIN) {
737
		return "BEGIN";
757
		if (buf[0])
738
	if (type == MAIL_JOB_END)
758
			strcat(buf, ",");
739
		return "END";
759
		strcat(buf, "BEGIN");
740
	if (type == MAIL_JOB_FAIL)
760
	}
741
		return "FAIL";
761
	if (type & MAIL_JOB_END) {
742
	if (type == MAIL_JOB_REQUEUE)
762
		if (buf[0])
743
		return "REQUEUE";
763
			strcat(buf, ",");
744
	if (type == (MAIL_JOB_BEGIN |  MAIL_JOB_END |  MAIL_JOB_FAIL |
764
		strcat(buf, "END");
745
		     MAIL_JOB_REQUEUE))
765
	}
746
		return "ALL";
766
	if (type & MAIL_JOB_FAIL) {
747
767
		if (buf[0])
748
	return "MULTIPLE";
768
			strcat(buf, ",");
769
		strcat(buf, "FAIL");
770
	}
771
	if (type & MAIL_JOB_REQUEUE) {
772
		if (buf[0])
773
			strcat(buf, ",");
774
		strcat(buf, "REQUEUE");
775
	}
776
	if (type & MAIL_JOB_TIME50) {
777
		if (buf[0])
778
			strcat(buf, ",");
779
		strcat(buf, "TIME_LIMIT_50");
780
	}
781
	if (type & MAIL_JOB_TIME80) {
782
		if (buf[0])
783
			strcat(buf, ",");
784
		strcat(buf, "TIME_LIMIT_80");
785
	}
786
	if (type & MAIL_JOB_TIME90) {
787
		if (buf[0])
788
			strcat(buf, ",");
789
		strcat(buf, "TIME_LIMIT_90");
790
	}
791
	if (type & MAIL_JOB_TIME100) {
792
		if (buf[0])
793
			strcat(buf, ",");
794
		strcat(buf, "TIME_LIMIT");
795
	}
796
797
	return buf;
749
}
798
}
750
799
751
static void
800
static void
(-)a/src/slurmctld/agent.c (+23 lines)
Lines 1499-1504 static char *_mail_type_str(uint16_t mail_type) Link Here
1499
		return "Failed";
1499
		return "Failed";
1500
	if (mail_type == MAIL_JOB_REQUEUE)
1500
	if (mail_type == MAIL_JOB_REQUEUE)
1501
		return "Requeued";
1501
		return "Requeued";
1502
	if (mail_type == MAIL_JOB_TIME100)
1503
		return "Reached time limit";
1504
	if (mail_type == MAIL_JOB_TIME90)
1505
		return "Reached 90% of time limit";
1506
	if (mail_type == MAIL_JOB_TIME80)
1507
		return "Reached 80% of time limit";
1508
	if (mail_type == MAIL_JOB_TIME50)
1509
		return "Reached 50% of time limit";
1502
	return "unknown";
1510
	return "unknown";
1503
}
1511
}
1504
1512
Lines 1513-1518 static void _set_job_time(struct job_record *job_ptr, uint16_t mail_type, Link Here
1513
		interval = job_ptr->start_time - job_ptr->details->submit_time;
1521
		interval = job_ptr->start_time - job_ptr->details->submit_time;
1514
		snprintf(buf, buf_len, ", Queued time ");
1522
		snprintf(buf, buf_len, ", Queued time ");
1515
		secs2time_str(interval, buf+14, buf_len-14);
1523
		secs2time_str(interval, buf+14, buf_len-14);
1524
		return;
1516
	}
1525
	}
1517
1526
1518
	if (((mail_type == MAIL_JOB_END) || (mail_type == MAIL_JOB_FAIL) ||
1527
	if (((mail_type == MAIL_JOB_END) || (mail_type == MAIL_JOB_FAIL) ||
Lines 1525-1530 static void _set_job_time(struct job_record *job_ptr, uint16_t mail_type, Link Here
1525
			interval = job_ptr->end_time - job_ptr->start_time;
1534
			interval = job_ptr->end_time - job_ptr->start_time;
1526
		snprintf(buf, buf_len, ", Run time ");
1535
		snprintf(buf, buf_len, ", Run time ");
1527
		secs2time_str(interval, buf+11, buf_len-11);
1536
		secs2time_str(interval, buf+11, buf_len-11);
1537
		return;
1538
	}
1539
1540
	if (((mail_type == MAIL_JOB_TIME100) ||
1541
	     (mail_type == MAIL_JOB_TIME90)  ||
1542
	     (mail_type == MAIL_JOB_TIME80)  ||
1543
	     (mail_type == MAIL_JOB_TIME50)) && job_ptr->start_time) {
1544
		if (job_ptr->suspend_time) {
1545
			interval  = time(NULL) - job_ptr->suspend_time;
1546
			interval += job_ptr->pre_sus_time;
1547
		} else
1548
			interval = time(NULL) - job_ptr->start_time;
1549
		snprintf(buf, buf_len, ", Run time ");
1550
		secs2time_str(interval, buf+11, buf_len-11);
1528
	}
1551
	}
1529
}
1552
}
1530
1553
(-)a/src/slurmctld/job_mgr.c (+23 lines)
Lines 5989-5994 void job_time_limit(void) Link Here
5989
				job_ptr->warn_signal = 0;
5989
				job_ptr->warn_signal = 0;
5990
				job_ptr->warn_time = 0;
5990
				job_ptr->warn_time = 0;
5991
			}
5991
			}
5992
			if ((job_ptr->mail_type & MAIL_JOB_TIME100) &&
5993
			    (now >= job_ptr->end_time)) {
5994
				job_ptr->mail_type &= (~MAIL_JOB_TIME100);
5995
				mail_job_info(job_ptr, MAIL_JOB_TIME100);
5996
			}
5997
			if ((job_ptr->mail_type & MAIL_JOB_TIME90) &&
5998
			    (now + (job_ptr->time_limit * 60 * 0.1) >=
5999
			     job_ptr->end_time)) {
6000
				job_ptr->mail_type &= (~MAIL_JOB_TIME90);
6001
				mail_job_info(job_ptr, MAIL_JOB_TIME90);
6002
			}
6003
			if ((job_ptr->mail_type & MAIL_JOB_TIME80) &&
6004
			    (now + (job_ptr->time_limit * 60 * 0.2) >=
6005
			     job_ptr->end_time)) {
6006
				job_ptr->mail_type &= (~MAIL_JOB_TIME80);
6007
				mail_job_info(job_ptr, MAIL_JOB_TIME80);
6008
			}
6009
			if ((job_ptr->mail_type & MAIL_JOB_TIME50) &&
6010
			    (now + (job_ptr->time_limit * 60 * 0.5) >=
6011
			     job_ptr->end_time)) {
6012
				job_ptr->mail_type &= (~MAIL_JOB_TIME50);
6013
				mail_job_info(job_ptr, MAIL_JOB_TIME50);
6014
			}
5992
			if (job_ptr->end_time <= over_run) {
6015
			if (job_ptr->end_time <= over_run) {
5993
				last_job_update = now;
6016
				last_job_update = now;
5994
				info("Time limit exhausted for JobId=%u",
6017
				info("Time limit exhausted for JobId=%u",

Return to ticket 954