[yul-phaser] Add ProgramCache class

This commit is contained in:
Kamil Śliwak 2020-02-26 19:28:58 +01:00
parent f89e154693
commit 8b443627e2
5 changed files with 396 additions and 0 deletions

View File

@ -154,6 +154,7 @@ set(yul_phaser_sources
yulPhaser/Phaser.cpp
yulPhaser/Population.cpp
yulPhaser/Program.cpp
yulPhaser/ProgramCache.cpp
yulPhaser/Selections.cpp
yulPhaser/SimulationRNG.cpp
@ -170,6 +171,7 @@ set(yul_phaser_sources
../tools/yulPhaser/Phaser.cpp
../tools/yulPhaser/Population.cpp
../tools/yulPhaser/Program.cpp
../tools/yulPhaser/ProgramCache.cpp
../tools/yulPhaser/Selections.cpp
../tools/yulPhaser/SimulationRNG.cpp
)

View File

@ -0,0 +1,207 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#include <tools/yulPhaser/ProgramCache.h>
#include <tools/yulPhaser/Chromosome.h>
#include <liblangutil/CharStream.h>
#include <libsolutil/CommonIO.h>
#include <boost/test/unit_test.hpp>
#include <string>
#include <set>
using namespace std;
using namespace solidity::util;
using namespace solidity::langutil;
using namespace solidity::yul;
namespace solidity::phaser::test
{
class ProgramCacheFixture
{
protected:
static constexpr char SampleSourceCode[] =
"{\n"
" for { let i := 0 } not(eq(i, 15)) { i := add(i, 1) }\n"
" {\n"
" let x := 1\n"
" mstore(i, 2)\n"
" }\n"
"}\n";
Program optimisedProgram(Program _program, string _abbreviatedOptimisationSteps) const
{
Program result = move(_program);
result.optimise(Chromosome(_abbreviatedOptimisationSteps).optimisationSteps());
return result;
}
static set<string> cachedKeys(ProgramCache const& _programCache)
{
set<string> keys;
for (auto pair = _programCache.entries().begin(); pair != _programCache.entries().end(); ++pair)
keys.insert(pair->first);
return keys;
}
CharStream m_sourceStream = CharStream(SampleSourceCode, "program-cache-test");
Program m_program = get<Program>(Program::load(m_sourceStream));
ProgramCache m_programCache{m_program};
};
BOOST_AUTO_TEST_SUITE(Phaser)
BOOST_AUTO_TEST_SUITE(ProgramCacheTest)
BOOST_FIXTURE_TEST_CASE(optimiseProgram_should_apply_optimisation_steps_to_program, ProgramCacheFixture)
{
Program expectedProgram = optimisedProgram(m_program, "IuO");
assert(toString(expectedProgram) != toString(m_program));
Program cachedProgram = m_programCache.optimiseProgram("IuO");
BOOST_TEST(toString(cachedProgram) == toString(expectedProgram));
}
BOOST_FIXTURE_TEST_CASE(optimiseProgram_should_store_programs_for_all_prefixes, ProgramCacheFixture)
{
Program programI = optimisedProgram(m_program, "I");
Program programIu = optimisedProgram(programI, "u");
Program programIuO = optimisedProgram(programIu, "O");
assert(toString(m_program) != toString(programI));
assert(toString(m_program) != toString(programIu));
assert(toString(m_program) != toString(programIuO));
assert(toString(programI) != toString(programIu));
assert(toString(programI) != toString(programIuO));
assert(toString(programIu) != toString(programIuO));
BOOST_REQUIRE(m_programCache.size() == 0);
Program cachedProgram = m_programCache.optimiseProgram("IuO");
BOOST_TEST(toString(cachedProgram) == toString(programIuO));
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu", "IuO"}));
BOOST_TEST(toString(*m_programCache.find("I")) == toString(programI));
BOOST_TEST(toString(*m_programCache.find("Iu")) == toString(programIu));
BOOST_TEST(toString(*m_programCache.find("IuO")) == toString(programIuO));
}
BOOST_FIXTURE_TEST_CASE(optimiseProgram_should_repeat_the_chromosome_requested_number_of_times, ProgramCacheFixture)
{
string steps = "IuOIuO";
Program cachedProgram = m_programCache.optimiseProgram("IuO", 2);
ProgramCache cacheNoRepetitions(m_program);
Program cachedProgramNoRepetitions = cacheNoRepetitions.optimiseProgram("IuOIuO");
BOOST_TEST(toString(cachedProgram) == toString(cachedProgramNoRepetitions));
for (size_t size = 1; size <= 6; ++size)
{
BOOST_REQUIRE(m_programCache.contains(steps.substr(0, size)));
BOOST_REQUIRE(cacheNoRepetitions.contains(steps.substr(0, size)));
BOOST_TEST(
toString(*cacheNoRepetitions.find(steps.substr(0, size))) ==
toString(*m_programCache.find(steps.substr(0, size)))
);
}
}
BOOST_FIXTURE_TEST_CASE(optimiseProgram_should_reuse_the_longest_prefix_and_move_it_to_the_next_round, ProgramCacheFixture)
{
BOOST_TEST(m_programCache.currentRound() == 0);
m_programCache.optimiseProgram("Iu");
m_programCache.optimiseProgram("Ia");
m_programCache.startRound(1);
BOOST_TEST(m_programCache.currentRound() == 1);
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu", "Ia"}));
BOOST_TEST(m_programCache.entries().find("I")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("Iu")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("Ia")->second.roundNumber == 0);
m_programCache.optimiseProgram("IuOI");
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu", "Ia", "IuO", "IuOI"}));
BOOST_TEST(m_programCache.entries().find("I")->second.roundNumber == 1);
BOOST_TEST(m_programCache.entries().find("Iu")->second.roundNumber == 1);
BOOST_TEST(m_programCache.entries().find("Ia")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("IuO")->second.roundNumber == 1);
BOOST_TEST(m_programCache.entries().find("IuOI")->second.roundNumber == 1);
}
BOOST_FIXTURE_TEST_CASE(startRound_should_remove_entries_older_than_two_rounds, ProgramCacheFixture)
{
BOOST_TEST(m_programCache.currentRound() == 0);
BOOST_TEST(m_programCache.size() == 0);
m_programCache.optimiseProgram("Iu");
BOOST_TEST(m_programCache.currentRound() == 0);
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu"}));
BOOST_TEST(m_programCache.entries().find("I")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("Iu")->second.roundNumber == 0);
m_programCache.optimiseProgram("a");
BOOST_TEST(m_programCache.currentRound() == 0);
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu", "a"}));
BOOST_TEST(m_programCache.entries().find("I")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("Iu")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("a")->second.roundNumber == 0);
m_programCache.startRound(1);
BOOST_TEST(m_programCache.currentRound() == 1);
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu", "a"}));
BOOST_TEST(m_programCache.entries().find("I")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("Iu")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("a")->second.roundNumber == 0);
m_programCache.optimiseProgram("af");
BOOST_TEST(m_programCache.currentRound() == 1);
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"I", "Iu", "a", "af"}));
BOOST_TEST(m_programCache.entries().find("I")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("Iu")->second.roundNumber == 0);
BOOST_TEST(m_programCache.entries().find("a")->second.roundNumber == 1);
BOOST_TEST(m_programCache.entries().find("af")->second.roundNumber == 1);
m_programCache.startRound(2);
BOOST_TEST(m_programCache.currentRound() == 2);
BOOST_REQUIRE((cachedKeys(m_programCache) == set<string>{"a", "af"}));
BOOST_TEST(m_programCache.entries().find("a")->second.roundNumber == 1);
BOOST_TEST(m_programCache.entries().find("af")->second.roundNumber == 1);
m_programCache.startRound(3);
BOOST_TEST(m_programCache.currentRound() == 3);
BOOST_TEST(m_programCache.size() == 0);
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE_END()
}

View File

@ -35,6 +35,8 @@ add_executable(yul-phaser
yulPhaser/PairSelections.cpp
yulPhaser/Selections.h
yulPhaser/Selections.cpp
yulPhaser/ProgramCache.h
yulPhaser/ProgramCache.cpp
yulPhaser/Program.h
yulPhaser/Program.cpp
yulPhaser/SimulationRNG.h

View File

@ -0,0 +1,94 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#include <tools/yulPhaser/ProgramCache.h>
#include <libyul/optimiser/Suite.h>
using namespace std;
using namespace solidity::yul;
using namespace solidity::phaser;
Program ProgramCache::optimiseProgram(
string const& _abbreviatedOptimisationSteps,
size_t _repetitionCount
)
{
string targetOptimisations = _abbreviatedOptimisationSteps;
for (size_t i = 1; i < _repetitionCount; ++i)
targetOptimisations += _abbreviatedOptimisationSteps;
size_t prefixSize = 0;
for (size_t i = 1; i <= targetOptimisations.size(); ++i)
{
auto const& pair = m_entries.find(targetOptimisations.substr(0, i));
if (pair != m_entries.end())
{
pair->second.roundNumber = m_currentRound;
++prefixSize;
}
else
break;
}
Program intermediateProgram = (
prefixSize == 0 ?
m_program :
m_entries.at(targetOptimisations.substr(0, prefixSize)).program
);
for (size_t i = prefixSize + 1; i <= targetOptimisations.size(); ++i)
{
string stepName = OptimiserSuite::stepAbbreviationToNameMap().at(targetOptimisations[i - 1]);
intermediateProgram.optimise({stepName});
m_entries.insert({targetOptimisations.substr(0, i), {intermediateProgram, m_currentRound}});
}
return intermediateProgram;
}
void ProgramCache::startRound(size_t _roundNumber)
{
assert(_roundNumber > m_currentRound);
m_currentRound = _roundNumber;
for (auto pair = m_entries.begin(); pair != m_entries.end();)
{
assert(pair->second.roundNumber < m_currentRound);
if (pair->second.roundNumber < m_currentRound - 1)
m_entries.erase(pair++);
else
++pair;
}
}
void ProgramCache::clear()
{
m_entries.clear();
m_currentRound = 0;
}
Program const* ProgramCache::find(string const& _abbreviatedOptimisationSteps) const
{
auto const& pair = m_entries.find(_abbreviatedOptimisationSteps);
if (pair == m_entries.end())
return nullptr;
return &(pair->second.program);
}

View File

@ -0,0 +1,91 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <tools/yulPhaser/Program.h>
#include <map>
#include <string>
namespace solidity::phaser
{
/**
* Structure used by @a ProgramCache to store intermediate programs and metadata associated
* with them.
*/
struct CacheEntry
{
Program program;
size_t roundNumber;
CacheEntry(Program _program, size_t _roundNumber):
program(std::move(_program)),
roundNumber(_roundNumber) {}
};
/**
* Class that optimises programs one step at a time which allows it to store and later reuse the
* results of the intermediate steps.
*
* The cache keeps track of the current round number and associates newly created entries with it.
* @a startRound() must be called at the beginning of a round so that entries that are too old
* can be purged. The current strategy is to store programs corresponding to all possible prefixes
* encountered in the current and the previous rounds. Entries older than that get removed to
* conserve memory.
*
* The current strategy does speed things up (about 4:1 hit:miss ratio observed in my limited
* experiments) but there's room for improvement. We could fit more useful programs in
* the cache by being more picky about which ones we choose.
*
* There is currently no way to purge entries without starting a new round. Since the programs
* take a lot of memory, this may lead to the cache eating up all the available RAM if sequences are
* long and programs large. A limiter based on entry count or total program size would be useful.
*/
class ProgramCache
{
public:
explicit ProgramCache(Program _program):
m_program(std::move(_program)) {}
Program optimiseProgram(
std::string const& _abbreviatedOptimisationSteps,
size_t _repetitionCount = 1
);
void startRound(size_t _nextRoundNumber);
void clear();
size_t size() const { return m_entries.size(); }
Program const* find(std::string const& _abbreviatedOptimisationSteps) const;
bool contains(std::string const& _abbreviatedOptimisationSteps) const { return find(_abbreviatedOptimisationSteps) != nullptr; }
std::map<std::string, CacheEntry> const& entries() const { return m_entries; };
Program const& program() const { return m_program; }
size_t currentRound() const { return m_currentRound; }
private:
// The best matching data structure here would be a trie of chromosome prefixes but since
// the programs are orders of magnitude larger than the prefixes, it does not really matter.
// A map should be good enough.
std::map<std::string, CacheEntry> m_entries;
Program m_program;
size_t m_currentRound = 0;
};
}