序
本文主要研究一下flink的CheckpointScheduler
CheckpointCoordinatorDeActivator
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorDeActivator.java
/**
* This actor listens to changes in the JobStatus and activates or deactivates the periodic
* checkpoint scheduler.
*/
public class CheckpointCoordinatorDeActivator implements JobStatusListener {
private final CheckpointCoordinator coordinator;
public CheckpointCoordinatorDeActivator(CheckpointCoordinator coordinator) {
this.coordinator = checkNotNull(coordinator);
}
@Override
public void jobStatusChanges(JobID jobId, JobStatus newJobStatus, long timestamp, Throwable error) {
if (newJobStatus == JobStatus.RUNNING) {
// start the checkpoint scheduler
coordinator.startCheckpointScheduler();
} else {
// anything else should stop the trigger for now
coordinator.stopCheckpointScheduler();
}
}
}
CheckpointCoordinatorDeActivator实现了JobStatusListener接口,在jobStatusChanges的时候,根据状态来调用coordinator.startCheckpointScheduler或者coordinator.stopCheckpointScheduler
CheckpointCoordinator.ScheduledTrigger
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
/**
* The checkpoint coordinator coordinates the distributed snapshots of operators and state.
* It triggers the checkpoint by sending the messages to the relevant tasks and collects the
* checkpoint acknowledgements. It also collects and maintains the overview of the state handles
* reported by the tasks that acknowledge the checkpoint.
*/
public class CheckpointCoordinator {
/** Map from checkpoint ID to the pending checkpoint */
private final Map<Long, PendingCheckpoint> pendingCheckpoints;
/** The number of consecutive failed trigger attempts */
private final AtomicInteger numUnsuccessfulCheckpointsTriggers = new AtomicInteger(0);
//……
public void startCheckpointScheduler() {
synchronized (lock) {
if (shutdown) {
throw new IllegalArgumentException(“Checkpoint coordinator is shut down”);
}
// make sure all prior timers are cancelled
stopCheckpointScheduler();
periodicScheduling = true;
long initialDelay = ThreadLocalRandom.current().nextLong(
minPauseBetweenCheckpointsNanos / 1_000_000L, baseInterval + 1L);
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(), initialDelay, baseInterval, TimeUnit.MILLISECONDS);
}
}
public void stopCheckpointScheduler() {
synchronized (lock) {
triggerRequestQueued = false;
periodicScheduling = false;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
for (PendingCheckpoint p : pendingCheckpoints.values()) {
p.abortError(new Exception(“Checkpoint Coordinator is suspending.”));
}
pendingCheckpoints.clear();
numUnsuccessfulCheckpointsTriggers.set(0);
}
}
private final class ScheduledTrigger implements Runnable {
@Override
public void run() {
try {
triggerCheckpoint(System.currentTimeMillis(), true);
}
catch (Exception e) {
LOG.error(“Exception while triggering checkpoint for job {}.”, job, e);
}
}
}
//……
}
CheckpointCoordinator的startCheckpointScheduler方法首先调用stopCheckpointScheduler取消PendingCheckpoint,之后使用timer.scheduleAtFixedRate重新调度ScheduledTrigger
stopCheckpointScheduler会调用PendingCheckpoint.abortError来取消pendingCheckpoints,然后清空pendingCheckpoints(Map<Long, PendingCheckpoint>)以及numUnsuccessfulCheckpointsTriggers(AtomicInteger)
ScheduledTrigger实现了Runnable接口,其run方法主要是调用triggerCheckpoint,传递的isPeriodic参数为true
CheckpointCoordinator.triggerCheckpoint
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
/**
* The checkpoint coordinator coordinates the distributed snapshots of operators and state.
* It triggers the checkpoint by sending the messages to the relevant tasks and collects the
* checkpoint acknowledgements. It also collects and maintains the overview of the state handles
* reported by the tasks that acknowledge the checkpoint.
*/
public class CheckpointCoordinator {
/** Tasks who need to be sent a message when a checkpoint is started */
private final ExecutionVertex[] tasksToTrigger;
/** Tasks who need to acknowledge a checkpoint before it succeeds */
private final ExecutionVertex[] tasksToWaitFor;
/** Map from checkpoint ID to the pending checkpoint */
private final Map<Long, PendingCheckpoint> pendingCheckpoints;
/** The maximum number of checkpoints that may be in progress at the same time */
private final int maxConcurrentCheckpointAttempts;
/** The min time(in ns) to delay after a checkpoint could be triggered. Allows to
* enforce minimum processing time between checkpoint attempts */
private final long minPauseBetweenCheckpointsNanos;
/**
* Triggers a new standard checkpoint and uses the given timestamp as the checkpoint
* timestamp.
*
* @param timestamp The timestamp for the checkpoint.
* @param isPeriodic Flag indicating whether this triggered checkpoint is
* periodic. If this flag is true, but the periodic scheduler is disabled,
* the checkpoint will be declined.
* @return <code>true</code> if triggering the checkpoint succeeded.
*/
public boolean triggerCheckpoint(long timestamp, boolean isPeriodic) {
return triggerCheckpoint(timestamp, checkpointProperties, null, isPeriodic).isSuccess();
}
@VisibleForTesting
public CheckpointTriggerResult triggerCheckpoint(
long timestamp,
CheckpointProperties props,
@Nullable String externalSavepointLocation,
boolean isPeriodic) {
// make some eager pre-checks
synchronized (lock) {
// abort if the coordinator has been shutdown in the meantime
if (shutdown) {
return new CheckpointTriggerResult(CheckpointDeclineReason.COORDINATOR_SHUTDOWN);
}
// Don’t allow periodic checkpoint if scheduling has been disabled
if (isPeriodic && !periodicScheduling) {
return new CheckpointTriggerResult(CheckpointDeclineReason.PERIODIC_SCHEDULER_SHUTDOWN);
}
// validate whether the checkpoint can be triggered, with respect to the limit of
// concurrent checkpoints, and the minimum time between checkpoints.
// these checks are not relevant for savepoints
if (!props.forceCheckpoint()) {
// sanity check: there should never be more than one trigger request queued
if (triggerRequestQueued) {
LOG.warn(“Trying to trigger another checkpoint for job {} while one was queued already.”, job);
return new CheckpointTriggerResult(CheckpointDeclineReason.ALREADY_QUEUED);
}
// if too many checkpoints are currently in progress, we need to mark that a request is queued
if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
triggerRequestQueued = true;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
return new CheckpointTriggerResult(CheckpointDeclineReason.TOO_MANY_CONCURRENT_CHECKPOINTS);
}
// make sure the minimum interval between checkpoints has passed
final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
final long durationTillNextMillis = (earliestNext – System.nanoTime()) / 1_000_000;
if (durationTillNextMillis > 0) {
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
// Reassign the new trigger to the currentPeriodicTrigger
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(),
durationTillNextMillis, baseInterval, TimeUnit.MILLISECONDS);
return new CheckpointTriggerResult(CheckpointDeclineReason.MINIMUM_TIME_BETWEEN_CHECKPOINTS);
}
}
}
// check if all tasks that we need to trigger are running.
// if not, abort the checkpoint
Execution[] executions = new Execution[tasksToTrigger.length];
for (int i = 0; i < tasksToTrigger.length; i++) {
Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt();
if (ee == null) {
LOG.info(“Checkpoint triggering task {} of job {} is not being executed at the moment. Aborting checkpoint.”,
tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
job);
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
} else if (ee.getState() == ExecutionState.RUNNING) {
executions[i] = ee;
} else {
LOG.info(“Checkpoint triggering task {} of job {} is not in state {} but {} instead. Aborting checkpoint.”,
tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
job,
ExecutionState.RUNNING,
ee.getState());
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
}
}
// next, check if all tasks that need to acknowledge the checkpoint are running.
// if not, abort the checkpoint
Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length);
for (ExecutionVertex ev : tasksToWaitFor) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
ackTasks.put(ee.getAttemptId(), ev);
} else {
LOG.info(“Checkpoint acknowledging task {} of job {} is not being executed at the moment. Aborting checkpoint.”,
ev.getTaskNameWithSubtaskIndex(),
job);
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
}
}
// we will actually trigger this checkpoint!
// we lock with a special lock to make sure that trigger requests do not overtake each other.
// this is not done with the coordinator-wide lock, because the ‘checkpointIdCounter’
// may issue blocking operations. Using a different lock than the coordinator-wide lock,
// we avoid blocking the processing of ‘acknowledge/decline’ messages during that time.
synchronized (triggerLock) {
final CheckpointStorageLocation checkpointStorageLocation;
final long checkpointID;
try {
// this must happen outside the coordinator-wide lock, because it communicates
// with external services (in HA mode) and may block for a while.
checkpointID = checkpointIdCounter.getAndIncrement();
checkpointStorageLocation = props.isSavepoint() ?
checkpointStorage.initializeLocationForSavepoint(checkpointID, externalSavepointLocation) :
checkpointStorage.initializeLocationForCheckpoint(checkpointID);
}
catch (Throwable t) {
int numUnsuccessful = numUnsuccessfulCheckpointsTriggers.incrementAndGet();
LOG.warn(“Failed to trigger checkpoint for job {} ({} consecutive failed attempts so far).”,
job,
numUnsuccessful,
t);
return new CheckpointTriggerResult(CheckpointDeclineReason.EXCEPTION);
}
final PendingCheckpoint checkpoint = new PendingCheckpoint(
job,
checkpointID,
timestamp,
ackTasks,
props,
checkpointStorageLocation,
executor);
if (statsTracker != null) {
PendingCheckpointStats callback = statsTracker.reportPendingCheckpoint(
checkpointID,
timestamp,
props);
checkpoint.setStatsCallback(callback);
}
// schedule the timer that will clean up the expired checkpoints
final Runnable canceller = () -> {
synchronized (lock) {
// only do the work if the checkpoint is not discarded anyways
// note that checkpoint completion discards the pending checkpoint object
if (!checkpoint.isDiscarded()) {
LOG.info(“Checkpoint {} of job {} expired before completing.”, checkpointID, job);
checkpoint.abortExpired();
pendingCheckpoints.remove(checkpointID);
序
本文主要研究一下flink的CheckpointScheduler
CheckpointCoordinatorDeActivator
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorDeActivator.java
/**
* This actor listens to changes in the JobStatus and activates or deactivates the periodic
* checkpoint scheduler.
*/
public class CheckpointCoordinatorDeActivator implements JobStatusListener {
private final CheckpointCoordinator coordinator;
public CheckpointCoordinatorDeActivator(CheckpointCoordinator coordinator) {
this.coordinator = checkNotNull(coordinator);
}
@Override
public void jobStatusChanges(JobID jobId, JobStatus newJobStatus, long timestamp, Throwable error) {
if (newJobStatus == JobStatus.RUNNING) {
// start the checkpoint scheduler
coordinator.startCheckpointScheduler();
} else {
// anything else should stop the trigger for now
coordinator.stopCheckpointScheduler();
}
}
}
CheckpointCoordinatorDeActivator实现了JobStatusListener接口,在jobStatusChanges的时候,根据状态来调用coordinator.startCheckpointScheduler或者coordinator.stopCheckpointScheduler
CheckpointCoordinator.ScheduledTrigger
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
/**
* The checkpoint coordinator coordinates the distributed snapshots of operators and state.
* It triggers the checkpoint by sending the messages to the relevant tasks and collects the
* checkpoint acknowledgements. It also collects and maintains the overview of the state handles
* reported by the tasks that acknowledge the checkpoint.
*/
public class CheckpointCoordinator {
/** Map from checkpoint ID to the pending checkpoint */
private final Map<Long, PendingCheckpoint> pendingCheckpoints;
/** The number of consecutive failed trigger attempts */
private final AtomicInteger numUnsuccessfulCheckpointsTriggers = new AtomicInteger(0);
//……
public void startCheckpointScheduler() {
synchronized (lock) {
if (shutdown) {
throw new IllegalArgumentException(“Checkpoint coordinator is shut down”);
}
// make sure all prior timers are cancelled
stopCheckpointScheduler();
periodicScheduling = true;
long initialDelay = ThreadLocalRandom.current().nextLong(
minPauseBetweenCheckpointsNanos / 1_000_000L, baseInterval + 1L);
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(), initialDelay, baseInterval, TimeUnit.MILLISECONDS);
}
}
public void stopCheckpointScheduler() {
synchronized (lock) {
triggerRequestQueued = false;
periodicScheduling = false;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
for (PendingCheckpoint p : pendingCheckpoints.values()) {
p.abortError(new Exception(“Checkpoint Coordinator is suspending.”));
}
pendingCheckpoints.clear();
numUnsuccessfulCheckpointsTriggers.set(0);
}
}
private final class ScheduledTrigger implements Runnable {
@Override
public void run() {
try {
triggerCheckpoint(System.currentTimeMillis(), true);
}
catch (Exception e) {
LOG.error(“Exception while triggering checkpoint for job {}.”, job, e);
}
}
}
//……
}
CheckpointCoordinator的startCheckpointScheduler方法首先调用stopCheckpointScheduler取消PendingCheckpoint,之后使用timer.scheduleAtFixedRate重新调度ScheduledTrigger
stopCheckpointScheduler会调用PendingCheckpoint.abortError来取消pendingCheckpoints,然后清空pendingCheckpoints(Map<Long, PendingCheckpoint>)以及numUnsuccessfulCheckpointsTriggers(AtomicInteger)
ScheduledTrigger实现了Runnable接口,其run方法主要是调用triggerCheckpoint,传递的isPeriodic参数为true
CheckpointCoordinator.triggerCheckpoint
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
/**
* The checkpoint coordinator coordinates the distributed snapshots of operators and state.
* It triggers the checkpoint by sending the messages to the relevant tasks and collects the
* checkpoint acknowledgements. It also collects and maintains the overview of the state handles
* reported by the tasks that acknowledge the checkpoint.
*/
public class CheckpointCoordinator {
/** Tasks who need to be sent a message when a checkpoint is started */
private final ExecutionVertex[] tasksToTrigger;
/** Tasks who need to acknowledge a checkpoint before it succeeds */
private final ExecutionVertex[] tasksToWaitFor;
/** Map from checkpoint ID to the pending checkpoint */
private final Map<Long, PendingCheckpoint> pendingCheckpoints;
/** The maximum number of checkpoints that may be in progress at the same time */
private final int maxConcurrentCheckpointAttempts;
/** The min time(in ns) to delay after a checkpoint could be triggered. Allows to
* enforce minimum processing time between checkpoint attempts */
private final long minPauseBetweenCheckpointsNanos;
/**
* Triggers a new standard checkpoint and uses the given timestamp as the checkpoint
* timestamp.
*
* @param timestamp The timestamp for the checkpoint.
* @param isPeriodic Flag indicating whether this triggered checkpoint is
* periodic. If this flag is true, but the periodic scheduler is disabled,
* the checkpoint will be declined.
* @return <code>true</code> if triggering the checkpoint succeeded.
*/
public boolean triggerCheckpoint(long timestamp, boolean isPeriodic) {
return triggerCheckpoint(timestamp, checkpointProperties, null, isPeriodic).isSuccess();
}
@VisibleForTesting
public CheckpointTriggerResult triggerCheckpoint(
long timestamp,
CheckpointProperties props,
@Nullable String externalSavepointLocation,
boolean isPeriodic) {
// make some eager pre-checks
synchronized (lock) {
// abort if the coordinator has been shutdown in the meantime
if (shutdown) {
return new CheckpointTriggerResult(CheckpointDeclineReason.COORDINATOR_SHUTDOWN);
}
// Don’t allow periodic checkpoint if scheduling has been disabled
if (isPeriodic && !periodicScheduling) {
return new CheckpointTriggerResult(CheckpointDeclineReason.PERIODIC_SCHEDULER_SHUTDOWN);
}
// validate whether the checkpoint can be triggered, with respect to the limit of
// concurrent checkpoints, and the minimum time between checkpoints.
// these checks are not relevant for savepoints
if (!props.forceCheckpoint()) {
// sanity check: there should never be more than one trigger request queued
if (triggerRequestQueued) {
LOG.warn(“Trying to trigger another checkpoint for job {} while one was queued already.”, job);
return new CheckpointTriggerResult(CheckpointDeclineReason.ALREADY_QUEUED);
}
}
// if too many checkpoints are currently in progress, we need to mark that a request is queued
if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
triggerRequestQueued = true;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
return new CheckpointTriggerResult(CheckpointDeclineReason.TOO_MANY_CONCURRENT_CHECKPOINTS);
}
// make sure the minimum interval between checkpoints has passed
final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
final long durationTillNextMillis = (earliestNext – System.nanoTime()) / 1_000_000;
if (durationTillNextMillis > 0) {
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
// Reassign the new trigger to the currentPeriodicTrigger
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(),
durationTillNextMillis, baseInterval, TimeUnit.MILLISECONDS);
return new CheckpointTriggerResult(CheckpointDeclineReason.MINIMUM_TIME_BETWEEN_CHECKPOINTS);
}
}
}
// check if all tasks that we need to trigger are running.
// if not, abort the checkpoint
Execution[] executions = new Execution[tasksToTrigger.length];
for (int i = 0; i < tasksToTrigger.length; i++) {
Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt();
if (ee == null) {
LOG.info(“Checkpoint triggering task {} of job {} is not being executed at the moment. Aborting checkpoint.”,
tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
job);
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
} else if (ee.getState() == ExecutionState.RUNNING) {
executions[i] = ee;
} else {
LOG.info(“Checkpoint triggering task {} of job {} is not in state {} but {} instead. Aborting checkpoint.”,
tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
job,
ExecutionState.RUNNING,
ee.getState());
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
}
}
// next, check if all tasks that need to acknowledge the checkpoint are running.
// if not, abort the checkpoint
Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length);
for (ExecutionVertex ev : tasksToWaitFor) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
ackTasks.put(ee.getAttemptId(), ev);
} else {
LOG.info(“Checkpoint acknowledging task {} of job {} is not being executed at the moment. Aborting checkpoint.”,
ev.getTaskNameWithSubtaskIndex(),
job);
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
}
}
// we will actually trigger this checkpoint!
// we lock with a special lock to make sure that trigger requests do not overtake each other.
// this is not done with the coordinator-wide lock, because the ‘checkpointIdCounter’
// may issue blocking operations. Using a different lock than the coordinator-wide lock,
// we avoid blocking the processing of ‘acknowledge/decline’ messages during that time.
synchronized (triggerLock) {
final CheckpointStorageLocation checkpointStorageLocation;
final long checkpointID;
try {
// this must happen outside the coordinator-wide lock, because it communicates
// with external services (in HA mode) and may block for a while.
checkpointID = checkpointIdCounter.getAndIncrement();
checkpointStorageLocation = props.isSavepoint() ?
checkpointStorage.initializeLocationForSavepoint(checkpointID, externalSavepointLocation) :
checkpointStorage.initializeLocationForCheckpoint(checkpointID);
}
catch (Throwable t) {
int numUnsuccessful = numUnsuccessfulCheckpointsTriggers.incrementAndGet();
LOG.warn(“Failed to trigger checkpoint for job {} ({} consecutive failed attempts so far).”,
job,
numUnsuccessful,
t);
return new CheckpointTriggerResult(CheckpointDeclineReason.EXCEPTION);
}
final PendingCheckpoint checkpoint = new PendingCheckpoint(
job,
checkpointID,
timestamp,
ackTasks,
props,
checkpointStorageLocation,
executor);
if (statsTracker != null) {
PendingCheckpointStats callback = statsTracker.reportPendingCheckpoint(
checkpointID,
timestamp,
props);
checkpoint.setStatsCallback(callback);
}
// schedule the timer that will clean up the expired checkpoints
final Runnable canceller = () -> {
synchronized (lock) {
// only do the work if the checkpoint is not discarded anyways
// note that checkpoint completion discards the pending checkpoint object
if (!checkpoint.isDiscarded()) {
LOG.info(“Checkpoint {} of job {} expired before completing.”, checkpointID, job);
checkpoint.abortExpired();
pendingCheckpoints.remove(checkpointID);
rememberRecentCheckpointId(checkpointID);
triggerQueuedRequests();
}
}
};
try {
// re-acquire the coordinator-wide lock
synchronized (lock) {
// since we released the lock in the meantime, we need to re-check
// that the conditions still hold.
if (shutdown) {
return new CheckpointTriggerResult(CheckpointDeclineReason.COORDINATOR_SHUTDOWN);
}
else if (!props.forceCheckpoint()) {
if (triggerRequestQueued) {
LOG.warn(“Trying to trigger another checkpoint for job {} while one was queued already.”, job);
return new CheckpointTriggerResult(CheckpointDeclineReason.ALREADY_QUEUED);
}
if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
triggerRequestQueued = true;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
return new CheckpointTriggerResult(CheckpointDeclineReason.TOO_MANY_CONCURRENT_CHECKPOINTS);
}
// make sure the minimum interval between checkpoints has passed
final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
final long durationTillNextMillis = (earliestNext – System.nanoTime()) / 1_000_000;
if (durationTillNextMillis > 0) {
if (currentPeriodicTrigger != null) {
tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
job);
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
} else if (ee.getState() == ExecutionState.RUNNING) {
executions[i] = ee;
} else {
LOG.info(“Checkpoint triggering task {} of job {} is not in state {} but {} instead. Aborting checkpoint.”,
tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
job,
ExecutionState.RUNNING,
ee.getState());
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
}
}
// next, check if all tasks that need to acknowledge the checkpoint are running.
// if not, abort the checkpoint
Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length);
for (ExecutionVertex ev : tasksToWaitFor) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
ackTasks.put(ee.getAttemptId(), ev);
} else {
LOG.info(“Checkpoint acknowledging task {} of job {} is not being executed at the moment. Aborting checkpoint.”,
ev.getTaskNameWithSubtaskIndex(),
job);
return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
}
}
// we will actually trigger this checkpoint!
// we lock with a special lock to make sure that trigger requests do not overtake each other.
// this is not done with the coordinator-wide lock, because the ‘checkpointIdCounter’
// may issue blocking operations. Using a different lock than the coordinator-wide lock,
// we avoid blocking the processing of ‘acknowledge/decline’ messages during that time.
synchronized (triggerLock) {
final CheckpointStorageLocation checkpointStorageLocation;
final long checkpointID;
try {
// this must happen outside the coordinator-wide lock, because it communicates
// with external services (in HA mode) and may block for a while.
checkpointID = checkpointIdCounter.getAndIncrement();
checkpointStorageLocation = props.isSavepoint() ?
checkpointStorage.initializeLocationForSavepoint(checkpointID, externalSavepointLocation) :
checkpointStorage.initializeLocationForCheckpoint(checkpointID);
}
catch (Throwable t) {
int numUnsuccessful = numUnsuccessfulCheckpointsTriggers.incrementAndGet();
LOG.warn(“Failed to trigger checkpoint for job {} ({} consecutive failed attempts so far).”,
job,
numUnsuccessful,
t);
return new CheckpointTriggerResult(CheckpointDeclineReason.EXCEPTION);
}
final PendingCheckpoint checkpoint = new PendingCheckpoint(
job,
checkpointID,
timestamp,
ackTasks,
props,
checkpointStorageLocation,
executor);
if (statsTracker != null) {
PendingCheckpointStats callback = statsTracker.reportPendingCheckpoint(
checkpointID,
timestamp,
props);
checkpoint.setStatsCallback(callback);
}
// schedule the timer that will clean up the expired checkpoints
final Runnable canceller = () -> {
synchronized (lock) {
// only do the work if the checkpoint is not discarded anyways
// note that checkpoint completion discards the pending checkpoint object
if (!checkpoint.isDiscarded()) {
LOG.info(“Checkpoint {} of job {} expired before completing.”, checkpointID, job);
checkpoint.abortExpired();
pendingCheckpoints.remove(checkpointID);
rememberRecentCheckpointId(checkpointID);
triggerQueuedRequests();
}
}
};
try {
// re-acquire the coordinator-wide lock
synchronized (lock) {
// since we released the lock in the meantime, we need to re-check
// that the conditions still hold.
if (shutdown) {
return new CheckpointTriggerResult(CheckpointDeclineReason.COORDINATOR_SHUTDOWN);
}
else if (!props.forceCheckpoint()) {
if (triggerRequestQueued) {
LOG.warn(“Trying to trigger another checkpoint for job {} while one was queued already.”, job);
return new CheckpointTriggerResult(CheckpointDeclineReason.ALREADY_QUEUED);
}
if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
triggerRequestQueued = true;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
return new CheckpointTriggerResult(CheckpointDeclineReason.TOO_MANY_CONCURRENT_CHECKPOINTS);
}
// make sure the minimum interval between checkpoints has passed
final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
final long durationTillNextMillis = (earliestNext – System.nanoTime()) / 1_000_000;
if (durationTillNextMillis > 0) {
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
// Reassign the new trigger to the currentPeriodicTrigger
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(),
durationTillNextMillis, baseInterval, TimeUnit.MILLISECONDS);
return new CheckpointTriggerResult(CheckpointDeclineReason.MINIMUM_TIME_BETWEEN_CHECKPOINTS);
}
}
LOG.info(“Triggering checkpoint {} @ {} for job {}.”, checkpointID, timestamp, job);
pendingCheckpoints.put(checkpointID, checkpoint);
ScheduledFuture<?> cancellerHandle = timer.schedule(
canceller,
checkpointTimeout, TimeUnit.MILLISECONDS);
if (!checkpoint.setCancellerHandle(cancellerHandle)) {
// checkpoint is already disposed!
cancellerHandle.cancel(false);
}
// trigger the master hooks for the checkpoint
final List<MasterState> masterStates = MasterHooks.triggerMasterHooks(masterHooks.values(),
checkpointID, timestamp, executor, Time.milliseconds(checkpointTimeout));
for (MasterState s : masterStates) {
checkpoint.addMasterState(s);
}
}
// end of lock scope
final CheckpointOptions checkpointOptions = new CheckpointOptions(
props.getCheckpointType(),
checkpointStorageLocation.getLocationReference());
// send the messages to the tasks that trigger their checkpoint
for (Execution execution: executions) {
execution.triggerCheckpoint(checkpointID, timestamp, checkpointOptions);
}
numUnsuccessfulCheckpointsTriggers.set(0);
return new CheckpointTriggerResult(checkpoint);
}
catch (Throwable t) {
// guard the map against concurrent modifications
synchronized (lock) {
pendingCheckpoints.remove(checkpointID);
}
int numUnsuccessful = numUnsuccessfulCheckpointsTriggers.incrementAndGet();
LOG.warn(“Failed to trigger checkpoint {} for job {}. ({} consecutive failed attempts so far)”,
checkpointID, job, numUnsuccessful, t);
if (!checkpoint.isDiscarded()) {
checkpoint.abortError(new Exception(“Failed to trigger checkpoint”, t));
}
try {
checkpointStorageLocation.disposeOnFailure();
}
catch (Throwable t2) {
LOG.warn(“Cannot dispose failed checkpoint storage location {}”, checkpointStorageLocation, t2);
}
return new CheckpointTriggerResult(CheckpointDeclineReason.EXCEPTION);
}
} // end trigger lock
}
//……
}
首先判断如果不是forceCheckpoint的话,则判断当前的pendingCheckpoints值是否超过