在client提交任务的源码分析那篇中我们知道了客户端提交给JobManager的是一个JobGraph对象,那么当JobManager的Dispatcher组件接收到JobGraph后做了哪些处理呢,这篇我们从源码分析一些这个处理过程。
源码分析
NettyRPC 接收到请求调用的是channelRead0方法,所以在JM端程序的入口:
RedirectHandler.channelRead0
===> AbstractHandler.respondAsLeader
===>AbstractHandler.respondToRequest
===> JobSubmitHandler.handleRequest
===>gateway.submitJob(jobGraph, timeout) 实际调用的是 Dispatcher.submitJob,源码如下:
public CompletableFuture<Acknowledge> submitJob(JobGraph jobGraph, Time timeout) {
final JobID jobId = jobGraph.getJobID();
log.info("Submitting job {} ({}).", jobId, jobGraph.getName());
final RunningJobsRegistry.JobSchedulingStatus jobSchedulingStatus;
try {
jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobId);//根据任务ID获取状态,PENDING,RUNNING, DODE
} catch (IOException e) {
return FutureUtils.completedExceptionally(new FlinkException(String.format("Failed to retrieve job scheduling status for job %s.", jobId), e));
}
if (jobSchedulingStatus == RunningJobsRegistry.JobSchedulingStatus.DONE || jobManagerRunnerFutures.containsKey(jobId)) {
return FutureUtils.completedExceptionally(
new JobSubmissionException(jobId, String.format("Job has already been submitted and is in state %s.", jobSchedulingStatus)));
} else {
final CompletableFuture<Acknowledge> persistAndRunFuture = waitForTerminatingJobManager(jobId, jobGraph, this::persistAndRunJob)//持久化并运行
.thenApply(ignored -> Acknowledge.get());
return persistAndRunFuture.exceptionally(
(Throwable throwable) -> {
final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);
log.error("Failed to submit job {}.", jobId, strippedThrowable);
throw new CompletionException(
new JobSubmissionException(jobId, "Failed to submit job.", strippedThrowable));
});
}
}
继续进到persistAndRunJob方法查看
private CompletableFuture<Void> persistAndRunJob(JobGraph jobGraph) throws Exception {
submittedJobGraphStore.putJobGraph(new SubmittedJobGraph(jobGraph, null));//jobGraph 存入 submittedJobGraphStore,只有ha模式下会存入zk,其他模式没做任何处理
final CompletableFuture<Void> runJobFuture = runJob(jobGraph);//执行任务
return runJobFuture.whenComplete(BiConsumerWithException.unchecked((Object ignored, Throwable throwable) -> {
if (throwable != null) {
submittedJobGraphStore.removeJobGraph(jobGraph.getJobID());
}
}));
}
private CompletableFuture<Void> runJob(JobGraph jobGraph) {
Preconditions.checkState(!jobManagerRunnerFutures.containsKey(jobGraph.getJobID()));
final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = createJobManagerRunner(jobGraph);//创建JobRunner
jobManagerRunnerFutures.put(jobGraph.getJobID(), jobManagerRunnerFuture);
return jobManagerRunnerFuture
.thenApply(FunctionUtils.nullFn())
.whenCompleteAsync(
(ignored, throwable) -> {
if (throwable != null) {
jobManagerRunnerFutures.remove(jobGraph.getJobID());
}
},
getMainThreadExecutor());
}
private CompletableFuture<JobManagerRunner> createJobManagerRunner(JobGraph jobGraph) {
final RpcService rpcService = getRpcService();
final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = CompletableFuture.supplyAsync(
CheckedSupplier.unchecked(() ->
jobManagerRunnerFactory.createJobManagerRunner(// ==> DefaultJobManagerRunnerFactory,
ResourceID.generate(),
jobGraph,
configuration,
rpcService,
highAvailabilityServ