-
Notifications
You must be signed in to change notification settings - Fork 29.3k
[SPARK-22292][Mesos] Added spark.mem.max support for Mesos #19510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5f3b35b
9714694
dc4f441
e82f828
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -64,6 +64,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| private val MAX_SLAVE_FAILURES = 2 | ||||
|
|
||||
| private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt) | ||||
| private val maxMemOption = conf.getOption("spark.mem.max").map(Utils.memoryStringToMb) | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we defend against minimum values and fail fast? For example default executor memory is 1.4MB. We could calculate the value returned by MesosSchedulerUtils.executorMemory. I don't think these values calculated in canLaunchTask ever change. Same applies for cpus.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @skonto Then at here, we parse the option, check the minimum and if it is too small, throw exception?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exception I think would be ok, the idea if something is never going to work let the user know, especially for the novice user. In general, the minimum would be a warning if we dont want an exception thrown.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should I add the check with this PR or a separate one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to have a check similar to Line 73 in 06df34d
|
||||
|
|
||||
| private val executorCoresOption = conf.getOption("spark.executor.cores").map(_.toInt) | ||||
|
|
||||
|
|
@@ -76,6 +77,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| cores - (cores % minCoresPerExecutor) | ||||
| } | ||||
|
|
||||
| private val maxMem = maxMemOption.getOrElse(Int.MaxValue) | ||||
|
|
||||
| private val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false) | ||||
|
|
||||
| private val maxGpus = conf.getInt("spark.mesos.gpus.max", 0) | ||||
|
|
@@ -95,8 +98,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
|
|
||||
| // Cores we have acquired with each Mesos task ID | ||||
| private val coresByTaskId = new mutable.HashMap[String, Int] | ||||
| private val memByTaskId = new mutable.HashMap[String, Int] | ||||
| private val gpusByTaskId = new mutable.HashMap[String, Int] | ||||
| private var totalCoresAcquired = 0 | ||||
| private var totalMemAcquired = 0 | ||||
| private var totalGpusAcquired = 0 | ||||
|
|
||||
| // The amount of time to wait for locality scheduling | ||||
|
|
@@ -149,6 +154,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| private val rejectOfferDurationForReachedMaxCores = | ||||
| getRejectOfferDurationForReachedMaxCores(sc.conf) | ||||
|
|
||||
| // Reject offers when we reached the maximum amount of memory for this framework | ||||
| private val rejectOfferDurationForReachedMaxMem = | ||||
| getRejectOfferDurationForReachedMaxMem(sc.conf) | ||||
|
|
||||
| // A client for talking to the external shuffle service | ||||
| private val mesosExternalShuffleClient: Option[MesosExternalShuffleClient] = { | ||||
| if (shuffleServiceEnabled) { | ||||
|
|
@@ -398,6 +407,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| offer, | ||||
| Some("reached spark.cores.max"), | ||||
| Some(rejectOfferDurationForReachedMaxCores)) | ||||
| } else if (totalMemAcquired >= maxMem) { | ||||
| // Reject an offer for a configurable amount of time to avoid starving other frameworks | ||||
| declineOffer(driver, | ||||
| offer, | ||||
| Some("reached spark.mem.max"), | ||||
| Some(rejectOfferDurationForReachedMaxMem)) | ||||
| } else { | ||||
| declineOffer( | ||||
| driver, | ||||
|
|
@@ -462,7 +477,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| tasks(offer.getId) ::= taskBuilder.build() | ||||
| remainingResources(offerId) = resourcesLeft.asJava | ||||
| totalCoresAcquired += taskCPUs | ||||
| totalMemAcquired += taskMemory | ||||
| coresByTaskId(taskId) = taskCPUs | ||||
| memByTaskId(taskId) = taskMemory | ||||
| if (taskGPUs > 0) { | ||||
| totalGpusAcquired += taskGPUs | ||||
| gpusByTaskId(taskId) = taskGPUs | ||||
|
|
@@ -511,6 +528,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| cpus <= offerCPUs && | ||||
| cpus + totalCoresAcquired <= maxCores && | ||||
| mem <= offerMem && | ||||
| mem + totalMemAcquired <= maxMem && | ||||
| numExecutors < executorLimit && | ||||
| slaves.get(slaveId).map(_.taskFailures).getOrElse(0) < MAX_SLAVE_FAILURES && | ||||
| meetsPortRequirements && | ||||
|
|
@@ -584,6 +602,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( | |||
| totalCoresAcquired -= cores | ||||
| coresByTaskId -= taskId | ||||
| } | ||||
| // Also remove the memory we have remembered for this task, if it's in the hashmap | ||||
| for (mem <- memByTaskId.get(taskId)) { | ||||
| totalMemAcquired -= mem | ||||
| memByTaskId -= taskId | ||||
| } | ||||
| // Also remove the gpus we have remembered for this task, if it's in the hashmap | ||||
| for (gpus <- gpusByTaskId.get(taskId)) { | ||||
| totalGpusAcquired -= gpus | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add "across the cluster (not from each machine)". And, something about there is no maximum if this property is not set.