Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tapasco
tapasco
Commits
1e24bf6e
Commit
1e24bf6e
authored
Aug 11, 2017
by
Jens Korinth
Browse files
Slurm fixes
parent
37331e13
Changes
4
Show whitespace changes
Inline
Side-by-side
common/slurm.job.template
View file @
1e24bf6e
...
...
@@ -6,13 +6,9 @@
#SBATCH --mem-per-cpu=@@MEM_PER_CPU@@
#SBATCH -n @@CPUS@@
#SBATCH -t @@TIMELIMIT@@
#SBATC
h
--comment="@@COMMENT@@"
#SBATC
H
--comment="@@COMMENT@@"
export
TAPASCO_HOME
=
@@TAPASCO_HOME@@
source
~/vivado.sh
pushd
$TAPASCO_HOME
source
./setup.sh
popd
source
@@TAPASCO_HOME@@/setup.sh
# user commands begin here
echo
"SLURM job #
$SLURM_JOB_ID
started at
$(
date
)
"
...
...
src/main/scala/tapasco/slurm/Slurm.scala
View file @
1e24bf6e
...
...
@@ -113,14 +113,15 @@ final object Slurm extends Publisher {
* @param script Job script file to schedule via `sbatch`.
* @return Either a positive integer (SLURM id), or an Exception.
**/
def
apply
(
script
:
Path
,
retries
:
Int
=
3
)
:
Option
[
Int
]
=
catchAllDefault
[
Option
[
Int
]](
None
,
"Slurm scheduling failed: "
)
{
def
apply
(
script
:
Path
,
retries
:
Int
=
10
)
:
Option
[
Int
]
=
catchAllDefault
[
Option
[
Int
]](
None
,
"Slurm scheduling failed: "
)
{
val
cmd
=
"sbatch %s"
.
format
(
script
.
toAbsolutePath
().
normalize
().
toString
)
logger
.
debug
(
"running slurm batch job: '%s'"
.
format
(
cmd
))
val
res
=
cmd
.!!
val
id
=
slurmSubmissionAck
.
findFirstMatchIn
(
res
)
map
(
_
group
(
1
)
toInt
)
if
(
id
.
isEmpty
)
{
if
(
retries
>
0
)
{
Thread
.
sleep
(
slurmRetryDelay
)
// wait 10 secs
// wait for 10 secs + random up to 5 secs to avoid congestion
Thread
.
sleep
(
slurmRetryDelay
+
scala
.
util
.
Random
.
nextInt
()
%
(
slurmRetryDelay
/
2
))
apply
(
script
,
retries
-
1
)
}
else
{
throw
new
SlurmException
(
script
.
toString
,
res
)
}
}
else
{
...
...
src/main/scala/tapasco/task/ComposeTask.scala
View file @
1e24bf6e
...
...
@@ -28,7 +28,7 @@ class ComposeTask(composition: Composition,
private
[
this
]
implicit
val
_logger
=
de
.
tu_darmstadt
.
cs
.
esa
.
tapasco
.
Logging
.
logger
(
getClass
)
private
[
this
]
val
_slurm
=
Slurm
.
enabled
private
[
this
]
var
_composerResult
:
Option
[
Composer.Result
]
=
None
private
[
this
]
val
_outDir
=
cfg
.
outputDir
(
composition
,
target
,
designFrequency
)
private
[
this
]
val
_outDir
=
cfg
.
outputDir
(
composition
,
target
,
designFrequency
,
features
getOrElse
Seq
()
)
private
[
this
]
val
_logFile
=
logFile
getOrElse
"%s/%s.log"
.
format
(
_outDir
,
"tapasco"
)
private
[
this
]
val
_errorLogFile
=
Paths
.
get
(
_logFile
).
resolveSibling
(
"slurm-compose.errors.log"
)
...
...
@@ -85,13 +85,13 @@ class ComposeTask(composition: Composition,
)
// define SLURM job
val
job
=
Slurm
.
Job
(
name
=
"compose-%s-%s-%s-%1.2f"
.
format
(
composition
.
id
,
target
.
ad
.
name
,
target
.
pd
.
name
,
designFrequency
)
,
name
=
elementdesc
,
slurmLog
=
slgFile
.
toString
,
errorLog
=
_errorLogFile
.
toString
,
consumer
=
this
,
maxHours
=
ComposeTask
.
MAX_COMPOSE_HOURS
,
commands
=
Seq
(
"tapasco --configFile %s"
.
format
(
cfgFile
.
toString
)),
comment
=
Some
(
"%s"
.
format
(
composition
.
composition
map
(
ce
=>
"%s % d"
.
format
(
ce
.
kernel
,
ce
.
count
))
mkString
", "
)
)
comment
=
Some
(
_outDir
.
toString
)
)
// generate non-SLURM config with single job
val
newCfg
=
cfg
...
...
@@ -144,7 +144,7 @@ class ComposeTask(composition: Composition,
object
ComposeTask
{
import
scala.io._
import
de.tu_darmstadt.cs.esa.tapasco.reports._
private
final
val
MAX_COMPOSE_HOURS
=
48
private
final
val
MAX_COMPOSE_HOURS
=
23
private
final
val
RE_RESULT
=
"""compose run .*result: ([^,]+)"""
.
r
.
unanchored
private
final
val
RE_LOG
=
"""compose run .*result: \S+.*logfile: '([^']+)'"""
.
r
.
unanchored
private
final
val
RE_TIMING
=
"""compose run .*result: \S+.*timing report: '([^']+)'"""
.
r
.
unanchored
...
...
src/main/scala/tapasco/task/Tasks.scala
View file @
1e24bf6e
...
...
@@ -3,7 +3,6 @@ import de.tu_darmstadt.cs.esa.tapasco.util.Publisher
import
scala.collection.JavaConverters._
import
scala.concurrent.Future
import
scala.util.
{
Failure
,
Success
}
import
scala.concurrent.ExecutionContext.Implicits.global
import
java.util.concurrent.LinkedBlockingQueue
import
java.time.LocalDateTime
...
...
@@ -87,6 +86,9 @@ class Tasks extends Publisher {
type
Event
=
Tasks
.
Event
import
Tasks.Events._
private
[
this
]
final
val
_logger
=
de
.
tu_darmstadt
.
cs
.
esa
.
tapasco
.
Logging
.
logger
(
getClass
)
private
[
this
]
final
implicit
val
_exectx
=
scala
.
concurrent
.
ExecutionContext
.
fromExecutorService
(
java
.
util
.
concurrent
.
Executors
.
newCachedThreadPool
()
)
override
def
+=
(
el
:
EventListener
)
:
Unit
=
{
super
.+=(
el
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment