spark 调度算法,其实可以使用yarn.yarn自带的三种调度算法。
spark 的SchedulingAlgorithm 两种调度算法的优先级比较
FIFO:
- --计算优先级的差。注意,在程序中,大部分时候是优先级的数字越小,它优先级越高
- --如果优先级相同,那么stage编号越靠前,优先级越高
- --如果优先级字段和stage id都相同,那么s2比s1更优先。(有这种情况?)
FAIR:
- --没有达到最小资源的task比已经达到最小资源的task优先级高
- --如果两个task都没达到最小资源,那么比较它们占用最小资源的比例。比例越小越优先
- --否则比较占用权重资源的比例,比例越小越优先
- --如果所有上述的比较都相同,那么名字小的优先(哈哈,名字很重要);
- --名字相同,则s2优先级高。
[mw_shl_code=java,true]
/**
* An interface for sort algorithm
* FIFO: FIFO algorithm between TaskSetManagers
* FS: FS algorithm between Pools, and FIFO or FS within Pools
*/
private[spark] trait SchedulingAlgorithm {
def comparator(s1: Schedulable, s2: Schedulable): Boolean
}
private[spark] class FIFOSchedulingAlgorithm extends SchedulingAlgorithm {
override def comparator(s1: Schedulable, s2: Schedulable): Boolean = {
val priority1 = s1.priority
val priority2 = s2.priority
var res = math.signum(priority1 - priority2) --计算优先级的差。注意,在程序中,大部分时候是优先级的数字越小,它优先级越高[/mw_shl_code]
[mw_shl_code=java,true]if (res == 0) {
val stageId1 = s1.stageId
val stageId2 = s2.stageId
res = math.signum(stageId1 - stageId2) --如果优先级相同,那么stage编号越靠前,优先级越高。
}
if (res < 0) {
true
} else {
false ---如果优先级字段和stage id都相同,那么s2比s1更优先。(有这种情况?)
}
}
}
private[spark] class FairSchedulingAlgorithm extends SchedulingAlgorithm {
override def comparator(s1: Schedulable, s2: Schedulable): Boolean = {
val minShare1 = s1.minShare
val minShare2 = s2.minShare
val runningTasks1 = s1.runningTasks
val runningTasks2 = s2.runningTasks
val s1Needy = runningTasks1 < minShare1
val s2Needy = runningTasks2 < minShare2
val minShareRatio1 = runningTasks1.toDouble / math.max(minShare1, 1.0).toDouble
val minShareRatio2 = runningTasks2.toDouble / math.max(minShare2, 1.0).toDouble
val taskToWeightRatio1 = runningTasks1.toDouble / s1.weight.toDouble
val taskToWeightRatio2 = runningTasks2.toDouble / s2.weight.toDouble
var compare:Int = 0
if (s1Needy && !s2Needy) { ----没有达到最小资源的task比已经达到最小资源的task优先级高
return true
} else if (!s1Needy && s2Needy) { ----没有达到最小资源的task比已经达到最小资源的task优先级高
return false
} else if (s1Needy && s2Needy) { ---如果两个task都没达到最小资源,那么比较它们占用最小资源的比例。比例越小越优先
compare = minShareRatio1.compareTo(minShareRatio2)
} else { ---否则比较占用权重资源的比例,比例越小越优先
compare = taskToWeightRatio1.compareTo(taskToWeightRatio2)
}
if (compare < 0) {
true
} else if (compare > 0) {
false
} else {----如果所有上述的比较都相同,那么名字小的优先(哈哈,名字很重要);名字相同,则s2优先级高。
s1.name < s2.name
}
}
}[/mw_shl_code]
|