Skip to content

Commit

Permalink
Fix backup verification race condition causing missing notifications (#…
Browse files Browse the repository at this point in the history
…1034)

* Remove metaproxy validation it is never null in practice.

* Remove DateRange validation. It is never null in practice.

* Remove debug logging.

* Remove latest backup metadata validation. It is never null in practice.

* Consolidate repeated code into private verifyBackup.

* Change method names to better reflect what they do.

* Update latestResult wherever possible.

* Rewrite logic in findLatestVerfiedBackup to make it look more like verifyBackupsInRange.

* Change signature of BackupNotificationMgr.notify to not depend on BackupVerificationResult.

* Return all verified BackupMetadata instead of BackupVerificationResult when verifying en masse. It has enough information to skip the call to find the most recently verified backup.

Also, fix some tests that broke in this process: remove the check for the snapshot time in TestBackupVerification that only makes sense when the Path is for a file that does not exist. Also, mock the appropriate functions in MockBackupVerification in TestBackupVerificationTask.

* Rename findLatestVerifiedBackup responding to review comments.
  • Loading branch information
mattl-netflix committed Mar 27, 2023
1 parent d8c5cf7 commit 0fb87f9
Show file tree
Hide file tree
Showing 10 changed files with 135 additions and 185 deletions.
Expand Up @@ -34,9 +34,9 @@ public final class BackupMetadata implements Serializable {
private BackupVersion backupVersion;
private String snapshotLocation;

public BackupMetadata(BackupVersion backupVersion, String token, Date start) throws Exception {
public BackupMetadata(BackupVersion backupVersion, String token, Date start) {
if (start == null || token == null || StringUtils.isEmpty(token))
throw new Exception(
throw new IllegalArgumentException(
String.format(
"Invalid Input: Token: %s or start date: %s is null or empty.",
token, start));
Expand Down
120 changes: 47 additions & 73 deletions priam/src/main/java/com/netflix/priam/backup/BackupVerification.java
Expand Up @@ -14,13 +14,15 @@
package com.netflix.priam.backup;

import com.netflix.priam.backupv2.IMetaProxy;
import com.netflix.priam.scheduler.UnsupportedTypeException;
import com.netflix.priam.utils.DateUtil;
import com.netflix.priam.utils.DateUtil.DateRange;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.*;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Provider;
Expand Down Expand Up @@ -66,100 +68,72 @@ public IMetaProxy getMetaProxy(BackupVersion backupVersion) {
return null;
}

public Optional<BackupVerificationResult> verifyBackup(
public Optional<BackupVerificationResult> verifyLatestBackup(
BackupVersion backupVersion, boolean force, DateRange dateRange)
throws UnsupportedTypeException, IllegalArgumentException {
throws IllegalArgumentException {
IMetaProxy metaProxy = getMetaProxy(backupVersion);
if (metaProxy == null) {
throw new UnsupportedTypeException(
"BackupVersion type: " + backupVersion + " is not supported");
}

if (dateRange == null) {
throw new IllegalArgumentException("dateRange provided is null");
}

List<BackupMetadata> metadata =
backupStatusMgr.getLatestBackupMetadata(backupVersion, dateRange);
if (metadata == null || metadata.isEmpty()) return Optional.empty();
for (BackupMetadata backupMetadata : metadata) {
if (backupMetadata.getLastValidated() != null && !force) {
// Backup is already validated. Nothing to do.
latestResult = new BackupVerificationResult();
latestResult.valid = true;
latestResult.manifestAvailable = true;
latestResult.snapshotInstant = backupMetadata.getStart().toInstant();
Path snapshotLocation = Paths.get(backupMetadata.getSnapshotLocation());
latestResult.remotePath =
snapshotLocation.subpath(1, snapshotLocation.getNameCount()).toString();
for (BackupMetadata backupMetadata :
backupStatusMgr.getLatestBackupMetadata(backupVersion, dateRange)) {
if (backupMetadata.getLastValidated() == null || force) {
Optional<BackupVerificationResult> result = verifyBackup(metaProxy, backupMetadata);
if (result.isPresent()) {
return result;
}
} else {
updateLatestResult(backupMetadata);
return Optional.of(latestResult);
}
BackupVerificationResult backupVerificationResult =
verifyBackup(metaProxy, backupMetadata);
if (logger.isDebugEnabled())
logger.debug(
"BackupVerification: metadata: {}, result: {}",
backupMetadata,
backupVerificationResult);
if (backupVerificationResult.valid) {
backupMetadata.setLastValidated(new Date(DateUtil.getInstant().toEpochMilli()));
backupStatusMgr.update(backupMetadata);
latestResult = backupVerificationResult;
return Optional.of(backupVerificationResult);
}
}
latestResult = null;
return Optional.empty();
}

public List<BackupVerificationResult> verifyAllBackups(
BackupVersion backupVersion, DateRange dateRange)
throws UnsupportedTypeException, IllegalArgumentException {
public List<BackupMetadata> verifyBackupsInRange(
BackupVersion backupVersion, DateRange dateRange) throws IllegalArgumentException {
IMetaProxy metaProxy = getMetaProxy(backupVersion);
if (metaProxy == null) {
throw new UnsupportedTypeException(
"BackupVersion type: " + backupVersion + " is not supported");
}

if (dateRange == null) {
throw new IllegalArgumentException("dateRange provided is null");
}

List<BackupVerificationResult> result = new ArrayList<>();

List<BackupMetadata> metadata =
backupStatusMgr.getLatestBackupMetadata(backupVersion, dateRange);
if (metadata == null || metadata.isEmpty()) return result;
for (BackupMetadata backupMetadata : metadata) {
if (backupMetadata.getLastValidated() == null) {
BackupVerificationResult backupVerificationResult =
verifyBackup(metaProxy, backupMetadata);
if (logger.isDebugEnabled())
logger.debug(
"BackupVerification: metadata: {}, result: {}",
backupMetadata,
backupVerificationResult);
if (backupVerificationResult.valid) {
backupMetadata.setLastValidated(new Date(DateUtil.getInstant().toEpochMilli()));
backupStatusMgr.update(backupMetadata);
result.add(backupVerificationResult);
}
List<BackupMetadata> results = new ArrayList<>();
for (BackupMetadata backupMetadata :
backupStatusMgr.getLatestBackupMetadata(backupVersion, dateRange)) {
if (backupMetadata.getLastValidated() != null
|| verifyBackup(metaProxy, backupMetadata).isPresent()) {
results.add(backupMetadata);
}
}
return result;
return results;
}

/** returns the latest valid backup verification result if we have found one within the SLO * */
public Optional<Instant> getLatestVerfifiedBackupTime() {
return latestResult == null ? Optional.empty() : Optional.of(latestResult.snapshotInstant);
}

private BackupVerificationResult verifyBackup(
private Optional<BackupVerificationResult> verifyBackup(
IMetaProxy metaProxy, BackupMetadata latestBackupMetaData) {
Path metadataLocation = Paths.get(latestBackupMetaData.getSnapshotLocation());
metadataLocation = metadataLocation.subpath(1, metadataLocation.getNameCount());
AbstractBackupPath abstractBackupPath = abstractBackupPathProvider.get();
abstractBackupPath.parseRemote(metadataLocation.toString());
return metaProxy.isMetaFileValid(abstractBackupPath);
BackupVerificationResult result = metaProxy.isMetaFileValid(abstractBackupPath);
if (result.valid) {
updateLatestResult(latestBackupMetaData);
Date now = new Date(DateUtil.getInstant().toEpochMilli());
latestBackupMetaData.setLastValidated(now);
backupStatusMgr.update(latestBackupMetaData);
return Optional.of(result);
}
return Optional.empty();
}

private void updateLatestResult(BackupMetadata backupMetadata) {
Instant snapshotInstant = backupMetadata.getStart().toInstant();
if (latestResult == null || latestResult.snapshotInstant.isBefore(snapshotInstant)) {
latestResult = new BackupVerificationResult();
latestResult.valid = true;
latestResult.manifestAvailable = true;
latestResult.snapshotInstant = backupMetadata.getStart().toInstant();
Path snapshotLocation = Paths.get(backupMetadata.getSnapshotLocation());
latestResult.remotePath =
snapshotLocation.subpath(1, snapshotLocation.getNameCount()).toString();
}
}
}
Expand Up @@ -84,21 +84,24 @@ public void execute() throws Exception {
Instant slo =
now.minus(backupRestoreConfig.getBackupVerificationSLOInHours(), ChronoUnit.HOURS);
DateRange dateRange = new DateRange(slo, now);
List<BackupVerificationResult> verificationResults =
backupVerification.verifyAllBackups(BackupVersion.SNAPSHOT_META_SERVICE, dateRange);
List<BackupMetadata> verifiedBackups =
backupVerification.verifyBackupsInRange(
BackupVersion.SNAPSHOT_META_SERVICE, dateRange);

verificationResults.forEach(
result -> {
logger.info(
"Sending {} message for backup: {}",
AbstractBackupPath.BackupFileType.SNAPSHOT_VERIFIED,
result.snapshotInstant);
backupNotificationMgr.notify(result);
});
verifiedBackups
.stream()
.filter(result -> result.getLastValidated().toInstant().isAfter(now))
.forEach(
result -> {
logger.info(
"Sending {} message for backup: {}",
AbstractBackupPath.BackupFileType.SNAPSHOT_VERIFIED,
result.getSnapshotLocation());
backupNotificationMgr.notify(
result.getSnapshotLocation(), result.getStart().toInstant());
});

if (!backupVerification
.verifyBackup(BackupVersion.SNAPSHOT_META_SERVICE, false /* force */, dateRange)
.isPresent()) {
if (verifiedBackups.isEmpty()) {
logger.error(
"Not able to find any snapshot which is valid in our SLO window: {} hours",
backupRestoreConfig.getBackupVerificationSLOInHours());
Expand Down
Expand Up @@ -16,11 +16,11 @@
import com.amazonaws.services.sns.model.MessageAttributeValue;
import com.netflix.priam.backup.AbstractBackupPath;
import com.netflix.priam.backup.BackupRestoreException;
import com.netflix.priam.backup.BackupVerificationResult;
import com.netflix.priam.config.IBackupRestoreConfig;
import com.netflix.priam.config.IConfiguration;
import com.netflix.priam.identity.InstanceIdentity;
import com.netflix.priam.identity.config.InstanceInfo;
import java.time.Instant;
import java.util.*;
import javax.inject.Inject;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -61,26 +61,27 @@ public BackupNotificationMgr(
this.notifiedBackupFileTypes = "";
}

public void notify(BackupVerificationResult backupVerificationResult) {
public void notify(String remotePath, Instant snapshotInstant) {
JSONObject jsonObject = new JSONObject();
try {
jsonObject.put("s3bucketname", this.config.getBackupPrefix());
jsonObject.put("s3clustername", config.getAppName());
jsonObject.put("s3namespace", backupVerificationResult.remotePath);
jsonObject.put("s3namespace", remotePath);
jsonObject.put("region", instanceInfo.getRegion());
jsonObject.put("rack", instanceInfo.getRac());
jsonObject.put("token", instanceIdentity.getInstance().getToken());
jsonObject.put(
"backuptype", AbstractBackupPath.BackupFileType.SNAPSHOT_VERIFIED.name());
jsonObject.put("snapshotInstant", backupVerificationResult.snapshotInstant);
jsonObject.put("snapshotInstant", snapshotInstant);
// SNS Attributes for filtering messages. Cluster name and backup file type.
Map<String, MessageAttributeValue> messageAttributes = getMessageAttributes(jsonObject);

this.notificationService.notify(jsonObject.toString(), messageAttributes);
} catch (JSONException exception) {
logger.error(
"JSON exception during generation of notification for snapshot verification: {}. Msg: {}",
backupVerificationResult,
"JSON exception during generation of notification for snapshot verification: {}. path: {}, time: {}",
remotePath,
snapshotInstant,
exception.getLocalizedMessage());
}
}
Expand Down
Expand Up @@ -219,7 +219,8 @@ public Response validateSnapshotByDate(
throws Exception {
DateUtil.DateRange dateRange = new DateUtil.DateRange(daterange);
Optional<BackupVerificationResult> result =
backupVerification.verifyBackup(BackupVersion.SNAPSHOT_BACKUP, force, dateRange);
backupVerification.verifyLatestBackup(
BackupVersion.SNAPSHOT_BACKUP, force, dateRange);
if (!result.isPresent()) {
return Response.noContent()
.entity("No valid meta found for provided time range")
Expand Down
Expand Up @@ -125,7 +125,7 @@ public Response validateV2SnapshotByDate(
throws Exception {
DateUtil.DateRange dateRange = new DateUtil.DateRange(daterange);
Optional<BackupVerificationResult> result =
backupVerification.verifyBackup(
backupVerification.verifyLatestBackup(
BackupVersion.SNAPSHOT_META_SERVICE, force, dateRange);
if (!result.isPresent()) {
return Response.noContent()
Expand Down

0 comments on commit 0fb87f9

Please sign in to comment.