Skip to content

Commit

Permalink
feat: add fallback logic when loading the main tlog fails (aws-greeng…
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeDombo authored Apr 21, 2021
1 parent e8be1f2 commit 52c2765
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 19 deletions.
34 changes: 33 additions & 1 deletion src/main/java/com/aws/greengrass/config/ConfigurationReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

public final class ConfigurationReader {
private static final Logger logger = LogManager.getLogger(ConfigurationReader.class);
private static final TypeReference<Tlogline> TLOG_LINE_REF = new TypeReference<Tlogline>() {
};

private ConfigurationReader() {
}
Expand All @@ -42,7 +44,7 @@ public static void mergeTLogInto(Configuration config, Reader reader, boolean fo

for (l = in.readLine(); l != null; l = in.readLine()) {
try {
Tlogline tlogline = Coerce.toObject(l, new TypeReference<Tlogline>() {});
Tlogline tlogline = Coerce.toObject(l, TLOG_LINE_REF);
if (WhatHappened.changed.equals(tlogline.action)) {

Topic targetTopic = config.lookup(tlogline.timestamp, tlogline.topicPath);
Expand Down Expand Up @@ -100,6 +102,36 @@ private static void mergeTLogInto(Configuration c, Path p) throws IOException {
}
}

/**
* Validate the tlog contents at the given path. Throws an IOException if any entry is invalid.
*
* @param tlogPath path to the file to validate.
* @throws IOException if any entry is invalid.
*/
public static void validateTlog(Path tlogPath) throws IOException {
try (BufferedReader in = Files.newBufferedReader(tlogPath)) {
// We have been seeing that very rarely the transaction log gets corrupted when a device (specifically
// raspberry pi using an SD card) has a power outage.
// The corruption is happening at the hardware level and there really isn't anything that we can do
// about it right now.
// The corruption that we see is that the tlog file is filled with kilobytes of null
// bytes, depending on how large the configuration was before dumping the entire config to disk.

// When parsing the tlog using a buffered reader, the corrupt section won't be parsable and so we will
// throw an exception here. This validate method is specifically targeting this type of corruption where
// the first line is corrupted.
// The other opportunity for corruption is when we write to the end of the
// file and we do not want to throw on that corruption because our reader is already setup to skip over that
// type of problem.

String l = in.readLine();
// null if EOF
if (l != null) {
Coerce.toObject(l, TLOG_LINE_REF);
}
}
}

/**
* Create a Configuration based on a transaction log's path.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,11 @@ void provision(Kernel kernel) throws IOException, DeviceConfigurationException {
kernel.getContext().get(DeviceConfiguration.class).getNucleusVersion());
}

// Dump config since we've just provisioned so that the bootstrap config will enable us to
// reach the cloud when needed. Must do this now because we normally would never overwrite the bootstrap
// file, however we need to do it since we've only just learned about our endpoints, certs, etc.
kernel.writeEffectiveConfigAsTransactionLog(kernel.getNucleusPaths().configPath()
.resolve(Kernel.DEFAULT_BOOTSTRAP_CONFIG_TLOG_FILE));
}

@SuppressWarnings("PMD.PreserveStackTrace")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public class Kernel {
static final String DEFAULT_CONFIG_YAML_FILE_READ = "config.yaml";
static final String DEFAULT_CONFIG_YAML_FILE_WRITE = "effectiveConfig.yaml";
static final String DEFAULT_CONFIG_TLOG_FILE = "config.tlog";
public static final String DEFAULT_BOOTSTRAP_CONFIG_TLOG_FILE = "bootstrap.tlog";
public static final String SERVICE_DIGEST_TOPIC_KEY = "service-digest";
private static final String DEPLOYMENT_STAGE_LOG_KEY = "stage";
protected static final ObjectMapper CONFIG_YAML_WRITER =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package com.aws.greengrass.lifecyclemanager;

import com.amazon.aws.iot.greengrass.component.common.DependencyType;
import com.aws.greengrass.config.ConfigurationReader;
import com.aws.greengrass.config.ConfigurationWriter;
import com.aws.greengrass.dependency.EZPlugins;
import com.aws.greengrass.dependency.ImplementsService;
Expand All @@ -23,6 +24,7 @@
import com.aws.greengrass.logging.impl.LogManager;
import com.aws.greengrass.logging.impl.config.LogConfig;
import com.aws.greengrass.telemetry.impl.config.TelemetryConfig;
import com.aws.greengrass.util.CommitableFile;
import com.aws.greengrass.util.NucleusPaths;
import com.aws.greengrass.util.Utils;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
Expand Down Expand Up @@ -137,35 +139,96 @@ void initConfigAndTlog(String configFilePath) {
void initConfigAndTlog() {
try {
Path transactionLogPath = nucleusPaths.configPath().resolve(Kernel.DEFAULT_CONFIG_TLOG_FILE);
boolean readFromNonTlog = false;

if (Objects.nonNull(kernelCommandLine.getProvidedConfigPathName())) {
// If a config file is provided, kernel will use the provided file as a new base
// and ignore existing config and tlog files.
// This is used by the nucleus bootstrap workflow
kernel.getConfig().read(kernelCommandLine.getProvidedConfigPathName());
readFromNonTlog = true;
} else {
Path externalConfig = nucleusPaths.configPath().resolve(Kernel.DEFAULT_CONFIG_YAML_FILE_READ);
boolean externalConfigFromCmd = Utils.isNotEmpty(kernelCommandLine.getProvidedInitialConfigPath());
if (externalConfigFromCmd) {
externalConfig = Paths.get(kernelCommandLine.getProvidedInitialConfigPath());
}

Path bootstrapTlogPath = nucleusPaths.configPath().resolve(Kernel.DEFAULT_BOOTSTRAP_CONFIG_TLOG_FILE);

boolean bootstrapTlogExists = Files.exists(bootstrapTlogPath);
boolean tlogExists = Files.exists(transactionLogPath);
boolean externalConfigExists = Files.exists(externalConfig);

// if tlog is present, read the tlog first because the yaml config file may not be up to date
IOException tlogValidationError = null;
if (tlogExists) {
try {
ConfigurationReader.validateTlog(transactionLogPath);
} catch (IOException e) {
tlogValidationError = e;
}
}

// if tlog is present, read the tlog first because the yaml config file may not be up to date
if (tlogExists && tlogValidationError == null) {
kernel.getConfig().read(transactionLogPath);
}

// tlog recovery logic if the main tlog isn't valid
if (tlogValidationError != null) {
// Attempt to load from backup tlog file
Path backupTlogPath = CommitableFile.getBackupFile(transactionLogPath);
boolean backupValid = false;
if (Files.exists(backupTlogPath)) {
try {
ConfigurationReader.validateTlog(backupTlogPath);
backupValid = true;
} catch (IOException e) {
logger.atError().log("Backup transaction log at {} is invalid", backupTlogPath, e);
}
}

if (backupValid) {
logger.atError()
.log("Transaction log {} is invalid and so is the backup at {}, will attempt to "
+ "load configuration from {}", transactionLogPath, backupTlogPath,
bootstrapTlogPath, tlogValidationError);
kernel.getConfig().read(backupTlogPath);
readFromNonTlog = true;
} else if (bootstrapTlogExists) {
// If no backup or if the backup was invalid, then try loading from bootstrap
logger.atError()
.log("Transaction log {} is invalid and no usable backup exists, will attempt to load "
+ "configuration from {}", transactionLogPath, bootstrapTlogPath,
tlogValidationError);
kernel.getConfig().read(bootstrapTlogPath);
readFromNonTlog = true;
} else {
// There are no files to load from
logger.atError()
.log("Transaction log {} is invalid and no usable backup exists", transactionLogPath,
tlogValidationError);
}
}

boolean externalConfigExists = Files.exists(externalConfig);
// If there is no tlog, or the path was provided via commandline, read in that file
if ((externalConfigFromCmd || !tlogExists) && externalConfigExists) {
kernel.getConfig().read(externalConfig);
readFromNonTlog = true;
}

// If no bootstrap was present, then write one out now that we've loaded our config so that we can
// fallback to something
if (!bootstrapTlogExists) {
kernel.writeEffectiveConfigAsTransactionLog(bootstrapTlogPath);
}
}

// write new tlog and config files
kernel.writeEffectiveConfigAsTransactionLog(transactionLogPath);
// only dump out the current config if we read from a source which was not the tlog
if (readFromNonTlog) {
kernel.writeEffectiveConfigAsTransactionLog(transactionLogPath);
}
kernel.writeEffectiveConfig();

// hook tlog to config so that changes over time are persisted to the tlog
Expand Down
48 changes: 35 additions & 13 deletions src/main/java/com/aws/greengrass/util/CommitableFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,39 @@
import com.aws.greengrass.logging.api.Logger;
import com.aws.greengrass.logging.impl.LogManager;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;

import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;

/**
* Equivalent to FileOutputStream except that it has to be committed in order to be
* Equivalent to OutputStream except that it has to be committed in order to be
* made permanent. If it is closed or the process exits before the commit, the old
* version of the file remains.
*/
@SuppressWarnings("PMD.AvoidFileStream")
public final class CommitableFile extends FileOutputStream implements Commitable {
public final class CommitableFile extends OutputStream implements Commitable {
private static final Logger logger = LogManager.getLogger(CommitableFile.class);
private final Path newVersion;
private final Path target;
private final Path backup;
private final boolean commitOnClose;
private final OutputStream out;
private boolean closed;

/**
* Creates a new instance of SafeFileOutputStream.
* Creates a new instance of CommitableFile.
*/
private CommitableFile(Path n, Path b, Path t, boolean commitOnClose) throws IOException {
super(n.toFile());
super();
newVersion = n;
target = t;
backup = b;
this.commitOnClose = commitOnClose;
out = Files.newOutputStream(n, StandardOpenOption.CREATE, StandardOpenOption.WRITE,
StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC);
}

/**
Expand Down Expand Up @@ -88,7 +91,7 @@ public static Path getBackupFile(Path path) {
}

@Override
public void close() {
public void close() throws IOException {
if (!closed) {
if (commitOnClose) {
commit();
Expand All @@ -105,10 +108,7 @@ public void close() {
public void abandon() {
if (!closed) {
try {
super.close();
} catch (IOException ignored) {
}
try {
out.close();
Files.deleteIfExists(newVersion);
} catch (IOException ignore) {
}
Expand All @@ -117,14 +117,15 @@ public void abandon() {
}

/**
* Close the file and commit the new version. The old version becomes a backup
* Close the file and commit the new version. The old version becomes a backup
*/
@SuppressWarnings("ConvertToTryWithResources")
@Override
public void commit() {
if (!closed) {
try {
super.close();
flush();
out.close();
} catch (IOException ignore) {
}
if (Files.exists(newVersion)) {
Expand All @@ -135,6 +136,27 @@ public void commit() {
}
}

// Remaining methods pass through the calls to the underlying OutputStream
@Override
public void write(byte[] b) throws IOException {
out.write(b);
}

@Override
public void write(byte[] b, int off, int len) throws IOException {
out.write(b, off, len);
}

@Override
public void write(int b) throws IOException {
out.write(b);
}

@Override
public void flush() throws IOException {
out.flush();
}

static void move(Path from, Path to) {
try {
if (Files.exists(from)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
class GreengrassSetupTest {
@Mock
private DeviceProvisioningHelper deviceProvisioningHelper;
@Mock
@Mock(answer = Answers.RETURNS_DEEP_STUBS)
private Kernel kernel;
@Mock
private Context context;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -205,7 +206,9 @@ void GIVEN_kernel_WHEN_launch_without_config_THEN_tlog_read_from_disk() throws E

kernelLifecycle.initConfigAndTlog();
verify(mockKernel.getConfig()).read(eq(configTlog.toPath()));
verify(mockKernel).writeEffectiveConfigAsTransactionLog(tempRootDir.resolve("config").resolve("config.tlog"));
// Since we read from the tlog, we don't need to re-write the same info
verify(mockKernel, never()).writeEffectiveConfigAsTransactionLog(
tempRootDir.resolve("config").resolve("config.tlog"));
verify(mockKernel).writeEffectiveConfig();
}

Expand Down

0 comments on commit 52c2765

Please sign in to comment.