[学习交流] 【上海校区】spark-3.0 application 调度算法解析

spark 各个版本的application 调度算法还是有这明显的不同之处的。从spark1.3.0 到 spark 1.6.1、spark2.0 到现在最新的spark 3.0 ，调度算法有了一定的修改。下面大家一起学习一下，最新的spark 版本spark-3.0的Application 调度机制。

private def startExecutorsOnWorkers(): Unit = {
  // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
  // in the queue, then the second app, etc.
  for (app <- waitingApps) {
//如果在 spark-submmit 脚本中，指定了每个executor 多少个 CPU core，
// 则每个Executor 分配该个数的 core，
// 否则默认每个executor 只分配 1 个 CPU core
val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
// If the cores left is less than the coresPerExecutor,the cores left will not be allocated
//  当前 APP 还需要分配的  core  数不能  小于单个 executor 启动的 CPU core 数
if (app.coresLeft >= coresPerExecutor) {
   // Filter out workers that don't have enough resources to launch an executo/*ku*/r
   // 过滤出状态为 ALIVE，并且还能发布 Executor 的 worker
   // 按照剩余的 CPU core 数  倒序
   val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
      .filter(canLaunchExecutor(_, app.desc))
      .sortBy(_.coresFree).reverse
   if (waitingApps.length == 1 && usableWorkers.isEmpty) {
      logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
   }
// TODO:  默认采用 spreadOutApps  调度算法，将 application需要的 executor资源分派到  多个 worker 上去
   val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)

   // Now that we've decided how many cores to allocate on each worker, let's allocate them
   for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
      allocateWorkerResourceToExecutors(
      app, assignedCores(pos), app.desc.coresPerExecutor, usableWorkers(pos))
   }
}
  }
}
判断一个 worker 是否可以发布 executor
private def canLaunchExecutor(worker: WorkerInfo, desc: ApplicationDescription): Boolean = {
  canLaunch(
worker,
desc.memoryPerExecutorMB,
desc.coresPerExecutor.getOrElse(1),
desc.resourceReqsPerExecutor)
}
让我们看一看里面的 canlaunch 方法
private def canLaunch(
worker: WorkerInfo,
memoryReq: Int,
coresReq: Int,
resourceRequirements: Seq[ResourceRequirement])
  : Boolean = {
  // worker 上空闲的内存值  要大于等于  请求的内存值
  val enoughMem = worker.memoryFree >= memoryReq
  // worker 上空闲的 core 数  要大于等于  请求的 core数
  val enoughCores = worker.coresFree >= coresReq
  //  worker 是否满足 executor 请求的资源
  val enoughResources = ResourceUtils.resourcesMeetRequirements(
worker.resourcesAmountFree, resourceRequirements)
  enoughMem && enoughCores && enoughResources
}

回到上面的 scheduleExecutorsOnWorkers
private def scheduleExecutorsOnWorkers(
app: ApplicationInfo,
usableWorkers: Array[WorkerInfo],
spreadOutApps: Boolean): Array[Int] = {
  val coresPerExecutor = app.desc.coresPerExecutor
  val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
  // 默认情况下是开启  oneExecutorPerWorker 机制的，也就是默认是在一个 worker 上  只启动一个 executor的
  //  如果在spark -submit 脚本中设置了coresPerExecutor ，在worker资源充足的时候，则会在每个worker 上，启动多个executor
  val oneExecutorPerWorker = coresPerExecutor.isEmpty
  val memoryPerExecutor = app.desc.memoryPerExecutorMB
  val resourceReqsPerExecutor = app.desc.resourceReqsPerExecutor
  val numUsable = usableWorkers.length
  val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
  val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker
  var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)

// 判断  Worker节点是否能够启动Executor
  def canLaunchExecutorForApp(pos: Int): Boolean = {

val keepScheduling = coresToAssign >= minCoresPerExecutor
val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor
val assignedExecutorNum = assignedExecutors(pos)

// If we allow multiple executors per worker, then we can always launch new executors.
// Otherwise, if there is already an executor on this worker, just give it more cores.

// 如果spark -submit 脚本中设置了coresPerExecutor值，
// 或者当前这个worker 还没有为这个 application 分配过  executor ,
val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutorNum == 0
   // TODO:  可以启动新的 Executor
if (launchingNewExecutor) {
   val assignedMemory = assignedExecutorNum * memoryPerExecutor
   val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor
   val assignedResources = resourceReqsPerExecutor.map {
      req => req.resourceName -> req.amount * assignedExecutorNum
   }.toMap
   val resourcesFree = usableWorkers(pos).resourcesAmountFree.map {
      case (rName, free) => rName -> (free - assignedResources.getOrElse(rName, 0))
   }
   val enoughResources = ResourceUtils.resourcesMeetRequirements(
      resourcesFree, resourceReqsPerExecutor)
   val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
   keepScheduling && enoughCores && enoughMemory && enoughResources && underLimit
} else {
   // We're adding cores to an existing executor, so no need
   // to check memory and executor limits
   // TODO:  不满足启动新的 Executor条件，则在老的 Executor 上追加  core 数
   keepScheduling && enoughCores
}
  }

  // Keep launching executors until no more workers can accommodate any
  // more executors, or if we have reached this application's limits

  var freeWorkers = (0 until numUsable).filter(canLaunchExecutorForApp)
  while (freeWorkers.nonEmpty) {
freeWorkers.foreach { pos =>
   var keepScheduling = true
   while (keepScheduling && canLaunchExecutorForApp(pos)) {
      coresToAssign -= minCoresPerExecutor
      assignedCores(pos) += minCoresPerExecutor

      // If we are launching one executor per worker, then every iteration assigns 1 core
      // to the executor. Otherwise, every iteration assigns cores to a new executor.
      if (oneExecutorPerWorker) {
      //TODO: 如果该Worker节点不能启动新的 Executor，则每次在老的executor 上分配 minCoresPerExecutor 个 CPU core(此时该值默认为 1 )
      assignedExecutors(pos) = 1
      } else {
      //TODO: 如果该Worker节点可以启动新的 Executor，则每次在新的executor 上分配 minCoresPerExecutor 个 CPU core（此时该值为 spark-submit脚本配置的 coresPerExecutor 值）
      assignedExecutors(pos) += 1
      }

      // Spreading out an application means spreading out its executors across as
      // many workers as possible. If we are not spreading out, then we should keep
      // scheduling executors on this worker until we use all of its resources.
      // Otherwise, just move on to the next worker.
      if (spreadOutApps) {
      // TODO：这里传入 keepScheduling = false , 就是每次 worker上只分配一次 core ,然后到下一个 worker 上  再去分配 core，直到 worker
      // TODO:  完成一次遍历
      keepScheduling = false
      }
   }
}
freeWorkers = freeWorkers.filter(canLaunchExecutorForApp)
  }
  // 返回每个Worker节点分配的CPU核数
  assignedCores
}

再来分析 allocateWorkerResourceToExecutors
private def allocateWorkerResourceToExecutors(
app: ApplicationInfo,
assignedCores: Int,
coresPerExecutor: Option[Int],
worker: WorkerInfo): Unit = {
  // If the number of cores per executor is specified, we divide the cores assigned
  // to this worker evenly among the executors with no remainder.
  // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
  val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
  val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
  for (i <- 1 to numExecutors) {
val allocated = worker.acquireResources(app.desc.resourceReqsPerExecutor)
// TODO : 当前这个 application 追加一次  Executor
val exec = app.addExecutor(worker, coresToAssign, allocated)
//TODO：给worker 线程发送 launchExecutor 命令
launchExecutor(worker, exec)
app.state = ApplicationState.RUNNING
  }

梦缠绕的时候 · 梦缠绕的时候

任何问题欢迎在评论区留言

梦缠绕的时候 · 梦缠绕的时候

或者添加学姐维系
DKA-2018

帐号		自动登录	找回密码
密码			加入黑马

[学习交流] 【上海校区】spark-3.0 application 调度算法解析

2 个回复

浏览过的版块