From fab9a8112fd82b14b992123abc42aea3eb032e79 Mon Sep 17 00:00:00 2001 From: "James C. Owens" Date: Wed, 1 May 2024 17:56:18 -0400 Subject: [PATCH] Scraper greylisting machinery (Part 1) --- src/gridcoin/scraper/fwd.h | 5 ++ src/gridcoin/scraper/scraper.cpp | 120 +++++++++++++++++++++++-------- 2 files changed, 95 insertions(+), 30 deletions(-) diff --git a/src/gridcoin/scraper/fwd.h b/src/gridcoin/scraper/fwd.h index d91f9c8eee..19f1a123e2 100644 --- a/src/gridcoin/scraper/fwd.h +++ b/src/gridcoin/scraper/fwd.h @@ -159,6 +159,11 @@ struct ConvergedManifest */ std::vector vExcludedProjects; + //! + //! \brief The list of projects that have been greylisted. + //! + std::vector vGreylistedProjects; + /** Populates the part pointers map in the convergence */ bool PopulateConvergedManifestPartPtrsMap(); diff --git a/src/gridcoin/scraper/scraper.cpp b/src/gridcoin/scraper/scraper.cpp index 0dfcb83578..659e40e5ed 100755 --- a/src/gridcoin/scraper/scraper.cpp +++ b/src/gridcoin/scraper/scraper.cpp @@ -1567,7 +1567,7 @@ void Scraper(bool bSingleShot) uiInterface.NotifyScraperEvent(scrapereventtypes::Stats, CT_UPDATING, {}); // Get a read-only view of the current project whitelist: - const WhitelistSnapshot projectWhitelist = GetWhitelist().Snapshot(); + const WhitelistSnapshot projectWhitelist = GetWhitelist().Snapshot(GRC::ProjectEntry::ProjectFilterFlag::ALL_BUT_DELETED); // Delete manifest entries not on whitelist. Take a lock on cs_StructScraperFileManifest for this. { @@ -3630,7 +3630,8 @@ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::fi // At the individual (byCPIDbyProject) level the AvgRAC is the same as the RAC. statsentry.statsvalue.dAvgRAC = statsentry.statsvalue.dRAC; - // Mag is dealt with on the second pass... so is left at 0.0 on the first pass. + // Mag is dealt with on the second pass, so is set to 0.0 on the first pass. + statsentry.statsvalue.dMag = 0.0; statsentry.statskey.objecttype = statsobjecttype::byCPIDbyProject; statsentry.statskey.objectID = project + "," + cpid; @@ -3648,20 +3649,24 @@ bool ProcessProjectStatsFromStreamByCPID(const std::string& project, boostio::fi // The mScraperStats here is scoped to only this project so we do not need project filtering here. ScraperStats::iterator entry; - for (auto const& entry : mScraperStats) - { - ScraperObjectStats statsentry; + // Statistics tracked for greylisted projects have zero project magnitude, so no need to go through + // and update the CPID level mags. They are all zero if project magnitude is zero. + if (projectmag > 0) { + for (auto const& entry : mScraperStats) + { + ScraperObjectStats statsentry; - statsentry.statskey = entry.first; - statsentry.statsvalue.dTC = entry.second.statsvalue.dTC; - statsentry.statsvalue.dRAT = entry.second.statsvalue.dRAT; - statsentry.statsvalue.dRAC = entry.second.statsvalue.dRAC; - // As per the above the individual (byCPIDbyProject) level the AvgRAC is the same as the RAC. - statsentry.statsvalue.dAvgRAC = entry.second.statsvalue.dAvgRAC; - statsentry.statsvalue.dMag = MagRound(entry.second.statsvalue.dRAC / dProjectRAC * projectmag); + statsentry.statskey = entry.first; + statsentry.statsvalue.dTC = entry.second.statsvalue.dTC; + statsentry.statsvalue.dRAT = entry.second.statsvalue.dRAT; + statsentry.statsvalue.dRAC = entry.second.statsvalue.dRAC; + // As per the above the individual (byCPIDbyProject) level the AvgRAC is the same as the RAC. + statsentry.statsvalue.dAvgRAC = entry.second.statsvalue.dAvgRAC; + statsentry.statsvalue.dMag = MagRound(entry.second.statsvalue.dRAC / dProjectRAC * projectmag); - // Update map entry with the magnitude. - mScraperStats[statsentry.statskey] = statsentry; + // Update map entry with the magnitude. + mScraperStats[statsentry.statskey] = statsentry; + } } // Due to rounding to MAG_ROUND, the actual total project magnitude will not be exactly projectmag, @@ -3796,6 +3801,9 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() { _log(logattribute::INFO, "GetScraperStatsByCurrentFileManifestState", "Beginning stats processing."); + // Get a read-only view of the current project greylist + const WhitelistSnapshot greylist = GetWhitelist().Snapshot(GRC::ProjectEntry::ProjectFilterFlag::GREYLISTED); + // Enumerate the count of active projects from the file manifest. Since the manifest is // constructed starting with the whitelist, and then using only the current files, this // will always be less than or equal to the whitelist count from whitelist. @@ -3803,10 +3811,11 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() { LOCK(cs_StructScraperFileManifest); - for (auto const& entry : StructScraperFileManifest.mScraperFileManifest) - { - // - if (entry.second.current && !entry.second.excludefromcsmanifest) nActiveProjects++; + for (const auto& entry : StructScraperFileManifest.mScraperFileManifest) { + // Count as active if current, not marked as to be excluded, and also not greylisted. + if (entry.second.current + && !entry.second.excludefromcsmanifest + && !greylist.Contains(entry.second.project)) nActiveProjects++; } } double dMagnitudePerProject = NETWORK_MAGNITUDE / nActiveProjects; @@ -3831,7 +3840,12 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState() _log(logattribute::INFO, "GetScraperStatsByCurrentFileManifestState", "Processing stats for project: " + project); - LoadProjectFileToStatsByCPID(project, file, dMagnitudePerProject, mProjectScraperStats); + if (!greylist.Contains(entry.second.project)) { + LoadProjectFileToStatsByCPID(project, file, dMagnitudePerProject, mProjectScraperStats); + } else { + // Project magnitude for a greylisted project is zero. + LoadProjectFileToStatsByCPID(project, file, 0.0, mProjectScraperStats); + } // Insert into overall map. for (auto const& entry2 : mProjectScraperStats) @@ -3868,6 +3882,9 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const Converge { _log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Beginning stats processing."); + // Get a read-only view of the current project greylist + const WhitelistSnapshot greylist = GetWhitelist().Snapshot(GRC::ProjectEntry::ProjectFilterFlag::GREYLISTED); + ScraperStatsAndVerifiedBeacons stats_and_verified_beacons; // Enumerate the count of active projects from the dummy converged manifest. One of the parts @@ -3897,6 +3914,14 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const Converge stats_and_verified_beacons.mVerifiedMap = VerifiedBeaconMap; unsigned int nActiveProjects = StructConvergedManifest.ConvergedManifestPartPtrsMap.size() - exclude_parts_from_count; + + // If a project part is greylisted, do not count it as an active project, even though stats have been collected. + for (const auto& project : StructConvergedManifest.ConvergedManifestPartPtrsMap) { + if (greylist.Contains(project.first)) { + --nActiveProjects; + } + } + _log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Number of active projects in converged manifest = " + ToString(nActiveProjects)); @@ -3916,7 +3941,12 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const Converge { _log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Processing stats for project: " + project); - LoadProjectObjectToStatsByCPID(project, entry->second->data, dMagnitudePerProject, mProjectScraperStats); + if (!greylist.Contains(project)) { + LoadProjectObjectToStatsByCPID(project, entry->second->data, dMagnitudePerProject, mProjectScraperStats); + } else { + // Project magnitude for a greylisted project is zero. + LoadProjectObjectToStatsByCPID(project, entry->second->data, 0.0, mProjectScraperStats); + } // Insert into overall map. for (auto const& entry2 : mProjectScraperStats) @@ -3939,6 +3969,9 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifes { _log(logattribute::INFO, "GetScraperStatsFromSingleManifest", "Beginning stats processing."); + // Get a read-only view of the current project greylist + const WhitelistSnapshot greylist = GetWhitelist().Snapshot(GRC::ProjectEntry::ProjectFilterFlag::GREYLISTED); + // Create a dummy converged manifest and fill out the dummy ConvergedManifest structure from the provided // manifest. ConvergedManifest StructDummyConvergedManifest(manifest); @@ -3973,6 +4006,14 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifes unsigned int nActiveProjects = StructDummyConvergedManifest.ConvergedManifestPartPtrsMap.size() - exclude_parts_from_count; + + // If a project part is greylisted, do not count it as an active project, even though stats have been collected. + for (const auto& project : StructDummyConvergedManifest.ConvergedManifestPartPtrsMap) { + if (greylist.Contains(project.first)) { + --nActiveProjects; + } + } + _log(logattribute::INFO, "GetScraperStatsFromSingleManifest", "Number of active projects in converged manifest = " + ToString(nActiveProjects)); @@ -3989,7 +4030,12 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifes { _log(logattribute::INFO, "GetScraperStatsFromSingleManifest", "Processing stats for project: " + project); - LoadProjectObjectToStatsByCPID(project, entry->second->data, dMagnitudePerProject, mProjectScraperStats); + if (!greylist.Contains(project)) { + LoadProjectObjectToStatsByCPID(project, entry->second->data, dMagnitudePerProject, mProjectScraperStats); + } else { + // Project magnitude for a greylisted project is zero. + LoadProjectObjectToStatsByCPID(project, entry->second->data, 0.0, mProjectScraperStats); + } // Insert into overall map. stats_and_verified_beacons.mScraperStats.insert(mProjectScraperStats.begin(), mProjectScraperStats.end()); @@ -4849,7 +4895,7 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes // Get a read-only view of the current project whitelist to fill out the // excluded projects vector later on: - const WhitelistSnapshot projectWhitelist = GetWhitelist().Snapshot(); + const WhitelistSnapshot projectWhitelist = GetWhitelist().Snapshot(GRC::ProjectEntry::ProjectFilterFlag::ALL_BUT_DELETED); if (bConvergenceSuccessful) { @@ -4878,6 +4924,12 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes // to try and recover project by project. for (const auto& iProjects : projectWhitelist) { + // If project is greylisted, push project name to greylist vector. + if (iProjects.m_status == GRC::ProjectEntryStatus::MAN_GREYLISTED + || iProjects.m_status == GRC::ProjectEntryStatus::AUTO_GREYLISTED) { + StructConvergedManifest.vGreylistedProjects.push_back(iProjects.m_name); + } + if (StructConvergedManifest.ConvergedManifestPartPtrsMap.find(iProjects.m_name) == StructConvergedManifest.ConvergedManifestPartPtrsMap.end()) { @@ -4961,6 +5013,12 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project for (const auto& iWhitelistProject : projectWhitelist) { + // If project is greylisted, push project name to greylist vector for later use. + if (iWhitelistProject.m_status == GRC::ProjectEntryStatus::MAN_GREYLISTED + ||iWhitelistProject.m_status == GRC::ProjectEntryStatus::AUTO_GREYLISTED) { + StructConvergedManifest.vGreylistedProjects.push_back(iWhitelistProject.m_name); + } + // Do a map for unique ProjectObject times ordered by descending time then content hash. Note that for Project // Objects (Parts), the content hash is the object hash. We also need the consensus block here, because we are // "composing" the manifest by parts, so we will need to choose the latest consensus block by manifest time. This @@ -5132,8 +5190,10 @@ bool ScraperConstructConvergedManifestByProject(const WhitelistSnapshot& project auto convergence_by_project_ratio = [](){ LOCK(cs_ScraperGlobals); return CONVERGENCE_BY_PROJECT_RATIO; }; - // If we meet the rule of CONVERGENCE_BY_PROJECT_RATIO, then proceed to fill out the rest of the map. - if ((double)iCountSuccessfulConvergedProjects / (double)projectWhitelist.size() >= convergence_by_project_ratio()) + // If we meet the rule of CONVERGENCE_BY_PROJECT_RATIO, then proceed to fill out the rest of the map. Note that the greylisted + // projects are excluded from the count in the denominator as it is not expected to necessarily achieve convergence on those. + if ((double)iCountSuccessfulConvergedProjects / + (double)(projectWhitelist.size() - StructConvergedManifest.vGreylistedProjects.size()) >= convergence_by_project_ratio()) { AppCacheSection mScrapers = GetScrapersCache(); @@ -6069,13 +6129,13 @@ UniValue testnewsb(const UniValue& params, bool fHelp) { LOCK(cs_ConvergedScraperStatsCache); - if (!ConvergedScraperStatsCache.NewFormatSuperblock.WellFormed()) - { - UniValue error(UniValue::VOBJ); - error.pushKV("Error:", "Wait until a convergence is formed."); + if (!ConvergedScraperStatsCache.NewFormatSuperblock.WellFormed()) + { + UniValue error(UniValue::VOBJ); + error.pushKV("Error:", "Wait until a convergence is formed."); - return error; - } + return error; + } _log(logattribute::INFO, "testnewsb", "Size of the PastConvergences map = " + ToString(ConvergedScraperStatsCache.PastConvergences.size()));