stellar · ThomasBrady · Sep 18, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024
diff --git a/docs/software/commands.md b/docs/software/commands.md
@@ -209,8 +209,23 @@ apply.
   hash for a checkpoint ledger, and then verifies the entire earlier history
   of an archive that ends in that ledger hash, writing the output to a reference
   list of trusted checkpoint hashes.
-  Option **--output-filename <FILE-NAME>** is mandatory and specifies the file
-  to write the trusted checkpoint hashes to.
+  * Option **--history-hash <HASH>** is optional and specifies the hash of the ledger
+  at the end of the verification range. When provided, `stellar-core` will use the history
+  hash to verify the range, rather than the latest checkpoint hash obtained from consensus.
+  Used in conjunction with `--history-ledger`.
+  * Option **--history-ledger <LEDGER-NUMBER>** is optional and specifies the ledger
+  number to end the verification at.  Used in conjunction with `--history-hash`.
+  * Option **--output-filename <FILE-NAME>** is mandatory and specifies the file
+  to write the trusted checkpoint hashes to. The file will contain a JSON array 
+  of arrays, where each inner array contains the ledger number and the corresponding
+  checkpoint hash of the form `[[999, "hash-abc"], [935, "hash-def"], ... [0, "hash-xyz]]`.
+  * Option **--trusted-checkpoint-file <FILE-NAME>** is optional. If provided,
+  stellar-core will parse the latest checkpoint ledger number and hash from the file and verify from this ledger to the latest checkpoint ledger obtained from the network.
+  * Option **--from-ledger <LEDGER-NUMBER>** is optional and specifies the ledger
+  number to start the verification from.
+
+> Note: It is an error to provide both the `--trusted-checkpoint-hashes` and `--from-ledger` options.
+
 * **version**: Print version info and then exit.
 
 ## HTTP Commands

diff --git a/src/catchup/CatchupWork.cpp b/src/catchup/CatchupWork.cpp
@@ -177,7 +177,7 @@ CatchupWork::downloadVerifyLedgerChain(CatchupRange const& catchupRange,
 
     mVerifyLedgers = std::make_shared<VerifyLedgerChainWork>(
         mApp, *mDownloadDir, verifyRange, mLastClosedLedgerHashPair,
-        mRangeEndFuture, std::move(fatalFailurePromise));
+        std::nullopt, mRangeEndFuture, std::move(fatalFailurePromise));
 
     // Never retry the sequence: downloads already have retries, and there's no
     // point retrying verification

diff --git a/src/catchup/VerifyLedgerChainWork.cpp b/src/catchup/VerifyLedgerChainWork.cpp
@@ -107,6 +107,7 @@ trySetFuture(std::promise<T>& promise, T value)
 VerifyLedgerChainWork::VerifyLedgerChainWork(
     Application& app, TmpDir const& downloadDir, LedgerRange const& range,
     LedgerNumHashPair const& lastClosedLedger,
+    std::optional<LedgerNumHashPair> const& maxPrevVerified,
     std::shared_future<LedgerNumHashPair> trustedMaxLedger,
     std::promise<bool>&& fatalFailure,
     std::shared_ptr<std::ofstream> outputStream)
@@ -118,6 +119,7 @@ VerifyLedgerChainWork::VerifyLedgerChainWork(
                           : mApp.getHistoryManager().checkpointContainingLedger(
                                 mRange.last()))
     , mLastClosed(lastClosedLedger)
+    , mMaxPrevVerified(maxPrevVerified)
     , mFatalFailurePromise(std::move(fatalFailure))
     , mTrustedMaxLedger(trustedMaxLedger)
     , mVerifiedMinLedgerPrevFuture(mVerifiedMinLedgerPrev.get_future().share())
@@ -209,7 +211,7 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint()
         }
 
         // Verify ledger with local state by comparing to LCL
-        // When checking against LCL, see it the local node is in the bad state,
+        // When checking against LCL, see if the local node is in a bad state
         // or if the archive is in a bad state (in which case, retry)
         if (curr.header.ledgerSeq == mLastClosed.first)
         {
@@ -240,6 +242,20 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint()
                 mChainDisagreesWithLocalState = lclResult;
             }
         }
+        // If the curr history entry is the same ledger as our mMaxPrevVerified,
+        // verify that the hashes match.
+        if (mMaxPrevVerified &&
+            curr.header.ledgerSeq == mMaxPrevVerified->first &&
+            curr.hash != mMaxPrevVerified->second)
+        {
+            CLOG_ERROR(History,
+                       "Checkpoint {} does not agree with trusted "
+                       "checkpoint hash {}",
+                       LedgerManager::ledgerAbbrev(curr),
+                       LedgerManager::ledgerAbbrev(mMaxPrevVerified->first,
+                                                   *mMaxPrevVerified->second));
+            return HistoryManager::VERIFY_STATUS_ERR_BAD_HASH;
+        }
 
         if (beginCheckpoint)
         {
@@ -354,7 +370,7 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint()
     }
     else
     {
-        // Otherwise we just finished a checkpoint _after_ than the first call
+        // Otherwise we just finished a checkpoint _after_ the first call
         // to this method and the `incoming` value we read out of
         // `mVerifiedAhead` should have content, because the previous call
         // should have saved something in `mVerifiedAhead`.
@@ -409,6 +425,11 @@ VerifyLedgerChainWork::onSuccess()
     {
         for (auto const& pair : mVerifiedLedgers)
         {
+            if (mMaxPrevVerified && mMaxPrevVerified->first == pair.first)
+            {
+                // Skip writing the trusted hash to the output file.
+                continue;
+            }
             (*mOutputStream) << "\n[" << pair.first << ", \""
                              << binToHex(*pair.second) << "\"],";
         }

diff --git a/src/catchup/VerifyLedgerChainWork.h b/src/catchup/VerifyLedgerChainWork.h
@@ -26,6 +26,10 @@ class VerifyLedgerChainWork : public BasicWork
     LedgerRange const mRange;
     uint32_t mCurrCheckpoint;
     LedgerNumHashPair const mLastClosed;
+    // The max ledger number and hash that we have verified up to at some time
+    // in the past (or genesis if we have no previous verification). Invocations
+    // of VerifyLedgerChainWork will verify down to this ledger.
+    std::optional<LedgerNumHashPair> const mMaxPrevVerified;
 
     // Record any instance where the chain we're verifying disagrees with the
     // local node state. This _might_ mean we can't possibly catch up (eg. we're
@@ -78,6 +82,7 @@ class VerifyLedgerChainWork : public BasicWork
     VerifyLedgerChainWork(
         Application& app, TmpDir const& downloadDir, LedgerRange const& range,
         LedgerNumHashPair const& lastClosedLedger,
+        std::optional<LedgerNumHashPair> const& maxPrevVerified,
         std::shared_future<LedgerNumHashPair> trustedMaxLedger,
         std::promise<bool>&& fatalFailure,
         std::shared_ptr<std::ofstream> outputStream = nullptr);

diff --git a/src/history/test/HistoryTests.cpp b/src/history/test/HistoryTests.cpp
@@ -242,7 +242,7 @@ TEST_CASE("Ledger chain verification", "[ledgerheaderverification]")
         std::shared_future<bool> fatalFailureFuture =
             fataFailurePromise.get_future().share();
         auto w = wm.executeWork<VerifyLedgerChainWork>(
-            tmpDir, ledgerRange, lclPair, ledgerRangeEndFuture,
+            tmpDir, ledgerRange, lclPair, std::nullopt, ledgerRangeEndFuture,
             std::move(fataFailurePromise));
         REQUIRE(expectedState == w->getState());
         REQUIRE(fatalFailureFuture.valid());

diff --git a/src/historywork/WriteVerifiedCheckpointHashesWork.cpp b/src/historywork/WriteVerifiedCheckpointHashesWork.cpp
@@ -4,46 +4,86 @@
 
 #include "historywork/WriteVerifiedCheckpointHashesWork.h"
 #include "catchup/VerifyLedgerChainWork.h"
+#include "crypto/Hex.h"
 #include "history/HistoryManager.h"
 #include "historywork/BatchDownloadWork.h"
 #include "ledger/LedgerManager.h"
 #include "ledger/LedgerRange.h"
 #include "main/Application.h"
+#include "util/Fs.h"
 #include "util/GlobalChecks.h"
 #include "util/Logging.h"
 #include "work/ConditionalWork.h"
 #include <Tracy.hpp>
 #include <algorithm>
+#include <filesystem>
 #include <fmt/format.h>
 
 namespace stellar
 {
+LedgerNumHashPair
+WriteVerifiedCheckpointHashesWork::loadLatestHashPairFromJsonOutput(
+    std::filesystem::path const& path)
+{
+    if (!fs::exists(path.string()))
+    {
+        throw std::runtime_error("file not found: " + path.string());
+    }
+
+    std::ifstream in(path);
+    Json::Value root;
+    Json::Reader rdr;
+    if (!rdr.parse(in, root))
+    {
+        throw std::runtime_error("failed to parse JSON input " + path.string());
+    }
+    if (!root.isArray())
+    {
+        throw std::runtime_error("expected top-level array in " +
+                                 path.string());
+    }
+    if (root.size() < 2)
+    {
+        throw std::runtime_error(
+            "expected at least one trusted ledger, hash pair in " +
+            path.string());
+    }
+    // Latest hash is the first element in the array.
+    auto const& jpair = root[0];
+    if (!jpair.isArray() || (jpair.size() != 2))
+    {
+        throw std::runtime_error("expecting 2-element sub-array in " +
+                                 path.string());
+    }
+    return {jpair[0].asUInt(), hexToBin256(jpair[1].asString())};
+}
 
 Hash
 WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput(
-    uint32_t seq, std::string const& filename)
+    uint32_t seq, std::filesystem::path const& path)
 {
-    std::ifstream in(filename);
+    std::ifstream in(path);
     if (!in)
     {
-        throw std::runtime_error("error opening " + filename);
+        throw std::runtime_error("error opening " + path.string());
     }
     Json::Value root;
     Json::Reader rdr;
     if (!rdr.parse(in, root))
     {
-        throw std::runtime_error("failed to parse JSON input " + filename);
+        throw std::runtime_error("failed to parse JSON input " + path.string());
     }
     if (!root.isArray())
     {
-        throw std::runtime_error("expected top-level array in " + filename);
+        throw std::runtime_error("expected top-level array in " +
+                                 path.string());
     }
     for (auto const& jpair : root)
     {
         if (!jpair.isArray() || (jpair.size() != 2))
         {
             throw std::runtime_error("expecting 2-element sub-array in " +
-                                     filename);
+                                     path.string());
         }
         if (jpair[0].asUInt() == seq)
         {
@@ -54,16 +94,26 @@ WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput(
 }
 
 WriteVerifiedCheckpointHashesWork::WriteVerifiedCheckpointHashesWork(
-    Application& app, LedgerNumHashPair rangeEnd, std::string const& outputFile,
-    uint32_t nestedBatchSize, std::shared_ptr<HistoryArchive> archive)
+    Application& app, LedgerNumHashPair rangeEnd,
+    std::filesystem::path const& outputFile,
+    std::optional<std::filesystem::path> const& trustedHashFile,
+    std::optional<LedgerNumHashPair> const& latestTrustedHashPair,
+    std::optional<uint32_t> const& fromLedger, uint32_t nestedBatchSize,
+    std::shared_ptr<HistoryArchive> archive)
     : BatchWork(app, "write-verified-checkpoint-hashes")
     , mNestedBatchSize(nestedBatchSize)
     , mRangeEnd(rangeEnd)
     , mRangeEndPromise()
     , mRangeEndFuture(mRangeEndPromise.get_future().share())
     , mCurrCheckpoint(rangeEnd.first)
     , mArchive(archive)
-    , mOutputFileName(outputFile)
+    , mTrustedHashPath(trustedHashFile)
+    , mOutputPath(outputFile)
+    , mTmpDir("verify-checkpoints")
+    , mTmpOutputPath(std::filesystem::path(mTmpDir.getName()) /
+                     outputFile.filename())
+    , mLatestTrustedHashPair(latestTrustedHashPair)
+    , mFromLedger(fromLedger)
 {
     mRangeEndPromise.set_value(mRangeEnd);
     if (mArchive)
@@ -81,6 +131,14 @@ WriteVerifiedCheckpointHashesWork::~WriteVerifiedCheckpointHashesWork()
 bool
 WriteVerifiedCheckpointHashesWork::hasNext() const
 {
+    if (mFromLedger)
+    {
+        return mCurrCheckpoint >= *mFromLedger;
+    }
+    else if (mLatestTrustedHashPair)
+    {
+        return mCurrCheckpoint > mLatestTrustedHashPair->first;
+    }
     return mCurrCheckpoint != LedgerManager::GENESIS_LEDGER_SEQ;
 }
 
@@ -101,9 +159,31 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork()
                                 std::make_optional<Hash>(lclHe.hash));
     uint32_t const span = mNestedBatchSize * freq;
     uint32_t const last = mCurrCheckpoint;
-    uint32_t const first =
-        last <= span ? LedgerManager::GENESIS_LEDGER_SEQ
-                     : hm.firstLedgerInCheckpointContaining(last - span);
+    uint32_t first = last <= span
+                         ? LedgerManager::GENESIS_LEDGER_SEQ
+                         : hm.firstLedgerInCheckpointContaining(last - span);
+    // If the first ledger in the range is less than mFromLedger then the
+    // range should be constrained to start at mFromLedger, or the checkpoint
+    // immediately before it if mFromLedger is not a checkpoint boundary.
+    if (mFromLedger && first < *mFromLedger)
+    {
+        if (hm.isLastLedgerInCheckpoint(*mFromLedger))
+        {
+            first = *mFromLedger;
+        }
+        else
+        {
+            first = hm.lastLedgerBeforeCheckpointContaining(*mFromLedger);
+        }
+        releaseAssertOrThrow(first <= *mFromLedger);
+    }
+    // If the latest trusted ledger is greater than the first
+    // ledger in the range then the range should start at the trusted ledger.
+    else if (mLatestTrustedHashPair && first < mLatestTrustedHashPair->first)
+    {
+        first = mLatestTrustedHashPair->first;
+        releaseAssertOrThrow(hm.isLastLedgerInCheckpoint(first));
+    }
 
     LedgerRange const ledgerRange = LedgerRange::inclusive(first, last);
     CheckpointRange const checkpointRange(ledgerRange, hm);
@@ -138,8 +218,8 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork()
                          : mRangeEndFuture);
 
     auto currWork = std::make_shared<VerifyLedgerChainWork>(
-        mApp, *tmpDir, ledgerRange, lcl, prevTrusted, std::promise<bool>(),
-        mOutputFile);
+        mApp, *tmpDir, ledgerRange, lcl, mLatestTrustedHashPair, prevTrusted,
+        std::promise<bool>(), mOutputFile);
     auto prevWork = mPrevVerifyWork;
     auto predicate = [prevWork](Application&) {
         if (!prevWork)
@@ -168,11 +248,11 @@ WriteVerifiedCheckpointHashesWork::startOutputFile()
 {
     releaseAssert(!mOutputFile);
     auto mode = std::ios::out | std::ios::trunc;
-    mOutputFile = std::make_shared<std::ofstream>(mOutputFileName, mode);
+    mOutputFile = std::make_shared<std::ofstream>(mTmpOutputPath, mode);
     if (!*mOutputFile)
     {
         throw std::runtime_error("error opening output file " +
-                                 mOutputFileName);
+                                 mTmpOutputPath.string());
     }
     (*mOutputFile) << "[";
 }
@@ -182,13 +262,51 @@ WriteVerifiedCheckpointHashesWork::endOutputFile()
 {
     if (mOutputFile && mOutputFile->is_open())
     {
-        // Each line of output made by a VerifyLedgerChainWork has a trailing
-        // comma, and trailing commas are not a valid end of a JSON array; so we
-        // terminate the array here with an entry that does _not_ have a
-        // trailing comma (and identifies an invalid ledger number anyways).
-        (*mOutputFile) << "\n[0, \"\"]\n]\n";
+        if (mTrustedHashPath && fs::exists(mTrustedHashPath->string()))
+        {
+            // Append everything except the first line of mTrustedHashFile to
+            // mOutputFile.
+            std::ifstream trustedHashFile(*mTrustedHashPath);
+            if (trustedHashFile)
+            {
+                std::string line;
+                // Ignore the first line ("["")
+                std::getline(trustedHashFile, line);
+                // Append the rest of the lines to mOutputFile.
+                while (std::getline(trustedHashFile, line))
+                {
+                    (*mOutputFile) << "\n" << line;
+                }
+                trustedHashFile.close();
+            }
+            else
+            {
+                CLOG_WARNING(History, "failed to open trusted hash file {}",
+                             *mTrustedHashPath);
+            }
+        }
+        else
+        {
+            // Each line of output made by a VerifyLedgerChainWork has a
+            // trailing comma, and trailing commas are not a valid end of a JSON
+            // array; so we terminate the array here with an entry that does
+            // _not_ have a trailing comma (and identifies an invalid ledger
+            // number anyways).
+            (*mOutputFile) << "\n[0, \"\"]\n]\n";
+        }
         mOutputFile->close();
         mOutputFile.reset();
+
+        if (!fs::exists(mTmpOutputPath.string()))
+        {
+            CLOG_ERROR(History, "temporary output file {} does not exist",
+                       mTmpOutputPath);
+            return;
+        }
+        // The output file was written to a temporary file, so rename it to
+        // the output path provided by the user.
+        fs::durableRename(mTmpOutputPath.string(), mOutputPath.string(),
+                          mOutputPath.relative_path().string());
     }
 }