Merge pull request #8327 from imapp-pl/yul-phaser-random-algorithm

[yul-phaser] Random algorithm
This commit is contained in:
chriseth 2020-02-26 14:44:24 +01:00 committed by GitHub
commit 426c4a2e38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 804 additions and 87 deletions

View File

@ -144,8 +144,10 @@ set(yul_phaser_sources
yulPhaser/CommonTest.cpp
yulPhaser/Chromosome.cpp
yulPhaser/FitnessMetrics.cpp
yulPhaser/GeneticAlgorithms.cpp
yulPhaser/Population.cpp
yulPhaser/Program.cpp
yulPhaser/Selections.cpp
yulPhaser/SimulationRNG.cpp
# FIXME: yul-phaser is not a library so I can't just add it to target_link_libraries().
@ -153,8 +155,10 @@ set(yul_phaser_sources
# unnecessary duplication. Create a library or find a way to reuse the list in both places.
../tools/yulPhaser/Chromosome.cpp
../tools/yulPhaser/FitnessMetrics.cpp
../tools/yulPhaser/GeneticAlgorithms.cpp
../tools/yulPhaser/Population.cpp
../tools/yulPhaser/Program.cpp
../tools/yulPhaser/Selections.cpp
../tools/yulPhaser/SimulationRNG.cpp
)
detect_stray_source_files("${yul_phaser_sources}" "yulPhaser/")

View File

@ -49,3 +49,18 @@ string phaser::test::stripWhitespace(string const& input)
regex whitespaceRegex("\\s+");
return regex_replace(input, whitespaceRegex, "");
}
size_t phaser::test::countSubstringOccurrences(string const& _inputString, string const& _substring)
{
assert(_substring.size() > 0);
size_t count = 0;
size_t lastOccurrence = 0;
while ((lastOccurrence = _inputString.find(_substring, lastOccurrence)) != string::npos)
{
++count;
lastOccurrence += _substring.size();
}
return count;
}

View File

@ -67,6 +67,10 @@ std::map<std::string, size_t> enumerateOptmisationSteps();
/// Returns the input string with all the whitespace characters (spaces, line endings, etc.) removed.
std::string stripWhitespace(std::string const& input);
/// Counts the number of times one strinng can be found inside another. Only non-overlapping
/// occurrences are counted.
size_t countSubstringOccurrences(std::string const& _inputString, std::string const& _substring);
// STATISTICAL UTILITIES
/// Calculates the mean value of a series of samples given in a vector.

View File

@ -83,6 +83,25 @@ BOOST_AUTO_TEST_CASE(stripWhitespace_should_remove_all_whitespace_characters_fro
BOOST_TEST(stripWhitespace(" a b \n\n c \n\t\v") == "abc");
}
BOOST_AUTO_TEST_CASE(countSubstringOccurrences_should_count_non_overlapping_substring_occurrences_in_a_string)
{
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "a") == 6);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "aa") == 2);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "aaa") == 2);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "aaab") == 1);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "b") == 2);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "d") == 1);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "cdc") == 1);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "x") == 0);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "aaaa") == 0);
BOOST_TEST(countSubstringOccurrences("aaabcdcbaaa", "dcd") == 0);
BOOST_TEST(countSubstringOccurrences("", "a") == 0);
BOOST_TEST(countSubstringOccurrences("", "aa") == 0);
BOOST_TEST(countSubstringOccurrences("a", "aa") == 0);
}
BOOST_AUTO_TEST_CASE(mean_should_calculate_statistical_mean)
{
BOOST_TEST(mean<int>({0}) == 0.0);

View File

@ -0,0 +1,139 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#include <test/yulPhaser/Common.h>
#include <tools/yulPhaser/FitnessMetrics.h>
#include <tools/yulPhaser/GeneticAlgorithms.h>
#include <tools/yulPhaser/Population.h>
#include <tools/yulPhaser/Program.h>
#include <liblangutil/CharStream.h>
#include <libsolutil/CommonIO.h>
#include <boost/test/unit_test.hpp>
#include <boost/test/tools/output_test_stream.hpp>
#include <vector>
using namespace std;
using namespace boost::unit_test::framework;
using namespace boost::test_tools;
using namespace solidity::langutil;
using namespace solidity::util;
namespace solidity::phaser::test
{
class DummyAlgorithm: public GeneticAlgorithm
{
public:
using GeneticAlgorithm::GeneticAlgorithm;
void runNextRound() override { ++m_currentRound; }
size_t m_currentRound = 0;
};
class GeneticAlgorithmFixture
{
protected:
shared_ptr<FitnessMetric> m_fitnessMetric = make_shared<ChromosomeLengthMetric>();
output_test_stream m_output;
};
BOOST_AUTO_TEST_SUITE(Phaser)
BOOST_AUTO_TEST_SUITE(GeneticAlgorithmsTest)
BOOST_AUTO_TEST_SUITE(GeneticAlgorithmTest)
BOOST_FIXTURE_TEST_CASE(run_should_call_runNextRound_once_per_round, GeneticAlgorithmFixture)
{
DummyAlgorithm algorithm(Population(m_fitnessMetric), m_output);
BOOST_TEST(algorithm.m_currentRound == 0);
algorithm.run(10);
BOOST_TEST(algorithm.m_currentRound == 10);
algorithm.run(3);
BOOST_TEST(algorithm.m_currentRound == 13);
}
BOOST_FIXTURE_TEST_CASE(run_should_print_the_top_chromosome, GeneticAlgorithmFixture)
{
// run() is allowed to print more but should at least print the first one
DummyAlgorithm algorithm(
// NOTE: Chromosomes chosen so that they're not substrings of each other and are not
// words likely to appear in the output in normal circumstances.
Population(m_fitnessMetric, {Chromosome("fcCUnDve"), Chromosome("jsxIOo"), Chromosome("ighTLM")}),
m_output
);
BOOST_TEST(m_output.is_empty());
algorithm.run(1);
BOOST_TEST(countSubstringOccurrences(m_output.str(), toString(algorithm.population().individuals()[0].chromosome)) == 1);
algorithm.run(3);
BOOST_TEST(countSubstringOccurrences(m_output.str(), toString(algorithm.population().individuals()[0].chromosome)) == 4);
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE(RandomAlgorithmTest)
BOOST_FIXTURE_TEST_CASE(runNextRound_should_preserve_elite_and_randomise_rest_of_population, GeneticAlgorithmFixture)
{
auto population = Population::makeRandom(m_fitnessMetric, 4, 3, 3) + Population::makeRandom(m_fitnessMetric, 4, 5, 5);
RandomAlgorithm algorithm(population, m_output, {0.5, 1, 1});
assert((chromosomeLengths(algorithm.population()) == vector<size_t>{3, 3, 3, 3, 5, 5, 5, 5}));
algorithm.runNextRound();
BOOST_TEST((chromosomeLengths(algorithm.population()) == vector<size_t>{1, 1, 1, 1, 3, 3, 3, 3}));
}
BOOST_FIXTURE_TEST_CASE(runNextRound_should_not_replace_elite_with_worse_individuals, GeneticAlgorithmFixture)
{
auto population = Population::makeRandom(m_fitnessMetric, 4, 3, 3) + Population::makeRandom(m_fitnessMetric, 4, 5, 5);
RandomAlgorithm algorithm(population, m_output, {0.5, 7, 7});
assert((chromosomeLengths(algorithm.population()) == vector<size_t>{3, 3, 3, 3, 5, 5, 5, 5}));
algorithm.runNextRound();
BOOST_TEST((chromosomeLengths(algorithm.population()) == vector<size_t>{3, 3, 3, 3, 7, 7, 7, 7}));
}
BOOST_FIXTURE_TEST_CASE(runNextRound_should_replace_all_chromosomes_if_zero_size_elite, GeneticAlgorithmFixture)
{
auto population = Population::makeRandom(m_fitnessMetric, 4, 3, 3) + Population::makeRandom(m_fitnessMetric, 4, 5, 5);
RandomAlgorithm algorithm(population, m_output, {0.0, 1, 1});
assert((chromosomeLengths(algorithm.population()) == vector<size_t>{3, 3, 3, 3, 5, 5, 5, 5}));
algorithm.runNextRound();
BOOST_TEST((chromosomeLengths(algorithm.population()) == vector<size_t>{1, 1, 1, 1, 1, 1, 1, 1}));
}
BOOST_FIXTURE_TEST_CASE(runNextRound_should_not_replace_any_chromosomes_if_whole_population_is_the_elite, GeneticAlgorithmFixture)
{
auto population = Population::makeRandom(m_fitnessMetric, 4, 3, 3) + Population::makeRandom(m_fitnessMetric, 4, 5, 5);
RandomAlgorithm algorithm(population, m_output, {1.0, 1, 1});
assert((chromosomeLengths(algorithm.population()) == vector<size_t>{3, 3, 3, 3, 5, 5, 5, 5}));
algorithm.runNextRound();
BOOST_TEST((chromosomeLengths(algorithm.population()) == vector<size_t>{3, 3, 3, 3, 5, 5, 5, 5}));
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE_END()
}

View File

@ -20,6 +20,7 @@
#include <tools/yulPhaser/Chromosome.h>
#include <tools/yulPhaser/Population.h>
#include <tools/yulPhaser/Program.h>
#include <tools/yulPhaser/Selections.h>
#include <libyul/optimiser/BlockFlattener.h>
#include <libyul/optimiser/SSAReverser.h>
@ -181,38 +182,6 @@ BOOST_FIXTURE_TEST_CASE(makeRandom_should_compute_fitness, PopulationFixture)
BOOST_TEST(population.individuals()[2].fitness == m_fitnessMetric->evaluate(population.individuals()[2].chromosome));
}
BOOST_FIXTURE_TEST_CASE(run_should_not_make_fitness_of_top_chromosomes_worse, PopulationFixture)
{
stringstream output;
vector<Chromosome> chromosomes = {
Chromosome(vector<string>{StructuralSimplifier::name}),
Chromosome(vector<string>{BlockFlattener::name}),
Chromosome(vector<string>{SSAReverser::name}),
Chromosome(vector<string>{UnusedPruner::name}),
Chromosome(vector<string>{StructuralSimplifier::name, BlockFlattener::name}),
};
Population population(m_fitnessMetric, chromosomes);
size_t initialTopFitness[2] = {
m_fitnessMetric->evaluate(chromosomes[0]),
m_fitnessMetric->evaluate(chromosomes[1]),
};
for (int i = 0; i < 6; ++i)
{
population.run(1, output);
BOOST_TEST(population.individuals().size() == 5);
size_t currentTopFitness[2] = {
population.individuals()[0].fitness,
population.individuals()[1].fitness,
};
BOOST_TEST(currentTopFitness[0] <= initialTopFitness[0]);
BOOST_TEST(currentTopFitness[1] <= initialTopFitness[1]);
BOOST_TEST(currentTopFitness[0] <= currentTopFitness[1]);
}
}
BOOST_FIXTURE_TEST_CASE(plus_operator_should_add_two_populations, PopulationFixture)
{
BOOST_CHECK_EQUAL(
@ -222,6 +191,41 @@ BOOST_FIXTURE_TEST_CASE(plus_operator_should_add_two_populations, PopulationFixt
);
}
BOOST_FIXTURE_TEST_CASE(select_should_return_population_containing_individuals_indicated_by_selection, PopulationFixture)
{
Population population(m_fitnessMetric, {Chromosome("a"), Chromosome("c"), Chromosome("g"), Chromosome("h")});
RangeSelection selection(0.25, 0.75);
assert(selection.materialise(population.individuals().size()) == (vector<size_t>{1, 2}));
BOOST_TEST(
population.select(selection) ==
Population(m_fitnessMetric, {population.individuals()[1].chromosome, population.individuals()[2].chromosome})
);
}
BOOST_FIXTURE_TEST_CASE(select_should_include_duplicates_if_selection_contains_duplicates, PopulationFixture)
{
Population population(m_fitnessMetric, {Chromosome("a"), Chromosome("c")});
MosaicSelection selection({0, 1}, 2.0);
assert(selection.materialise(population.individuals().size()) == (vector<size_t>{0, 1, 0, 1}));
BOOST_TEST(population.select(selection) == Population(m_fitnessMetric, {
population.individuals()[0].chromosome,
population.individuals()[1].chromosome,
population.individuals()[0].chromosome,
population.individuals()[1].chromosome,
}));
}
BOOST_FIXTURE_TEST_CASE(select_should_return_empty_population_if_selection_is_empty, PopulationFixture)
{
Population population(m_fitnessMetric, {Chromosome("a"), Chromosome("c")});
RangeSelection selection(0.0, 0.0);
assert(selection.materialise(population.individuals().size()).empty());
BOOST_TEST(population.select(selection).individuals().empty());
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,206 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#include <test/yulPhaser/Common.h>
#include <tools/yulPhaser/Selections.h>
#include <tools/yulPhaser/SimulationRNG.h>
#include <libsolutil/CommonData.h>
#include <boost/test/unit_test.hpp>
#include <algorithm>
#include <vector>
using namespace std;
namespace solidity::phaser::test
{
BOOST_AUTO_TEST_SUITE(Phaser)
BOOST_AUTO_TEST_SUITE(SelectionsTest)
BOOST_AUTO_TEST_SUITE(RangeSelectionTest)
BOOST_AUTO_TEST_CASE(materialise)
{
BOOST_TEST(RangeSelection(0.0, 1.0).materialise(10) == vector<size_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
BOOST_TEST(RangeSelection(0.0, 0.1).materialise(10) == vector<size_t>({0}));
BOOST_TEST(RangeSelection(0.0, 0.2).materialise(10) == vector<size_t>({0, 1}));
BOOST_TEST(RangeSelection(0.0, 0.7).materialise(10) == vector<size_t>({0, 1, 2, 3, 4, 5, 6}));
BOOST_TEST(RangeSelection(0.9, 1.0).materialise(10) == vector<size_t>({ 9}));
BOOST_TEST(RangeSelection(0.8, 1.0).materialise(10) == vector<size_t>({ 8, 9}));
BOOST_TEST(RangeSelection(0.5, 1.0).materialise(10) == vector<size_t>({ 5, 6, 7, 8, 9}));
BOOST_TEST(RangeSelection(0.3, 0.6).materialise(10) == vector<size_t>({ 3, 4, 5 }));
BOOST_TEST(RangeSelection(0.2, 0.7).materialise(10) == vector<size_t>({ 2, 3, 4, 5, 6 }));
BOOST_TEST(RangeSelection(0.4, 0.7).materialise(10) == vector<size_t>({ 4, 5, 6 }));
BOOST_TEST(RangeSelection(0.4, 0.7).materialise(5) == vector<size_t>({2, 3}));
}
BOOST_AUTO_TEST_CASE(materialise_should_round_indices)
{
BOOST_TEST(RangeSelection(0.01, 0.99).materialise(10) == vector<size_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
BOOST_TEST(RangeSelection(0.04, 0.96).materialise(10) == vector<size_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
BOOST_TEST(RangeSelection(0.05, 0.95).materialise(10) == vector<size_t>({ 1, 2, 3, 4, 5, 6, 7, 8, 9}));
BOOST_TEST(RangeSelection(0.06, 0.94).materialise(10) == vector<size_t>({ 1, 2, 3, 4, 5, 6, 7, 8 }));
}
BOOST_AUTO_TEST_CASE(materialise_should_handle_empty_collections)
{
BOOST_TEST(RangeSelection(0.0, 0.0).materialise(0).empty());
BOOST_TEST(RangeSelection(0.0, 1.0).materialise(0).empty());
BOOST_TEST(RangeSelection(0.5, 1.0).materialise(0).empty());
BOOST_TEST(RangeSelection(0.0, 0.5).materialise(0).empty());
BOOST_TEST(RangeSelection(0.2, 0.7).materialise(0).empty());
}
BOOST_AUTO_TEST_CASE(materialise_should_handle_empty_selection_ranges)
{
BOOST_TEST(RangeSelection(0.0, 0.0).materialise(1).empty());
BOOST_TEST(RangeSelection(1.0, 1.0).materialise(1).empty());
BOOST_TEST(RangeSelection(0.0, 0.0).materialise(100).empty());
BOOST_TEST(RangeSelection(1.0, 1.0).materialise(100).empty());
BOOST_TEST(RangeSelection(0.5, 0.5).materialise(100).empty());
BOOST_TEST(RangeSelection(0.45, 0.54).materialise(10).empty());
BOOST_TEST(!RangeSelection(0.45, 0.54).materialise(100).empty());
BOOST_TEST(RangeSelection(0.045, 0.054).materialise(100).empty());
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE(MosaicSelectionTest)
BOOST_AUTO_TEST_CASE(materialise)
{
BOOST_TEST(MosaicSelection({1}, 0.5).materialise(4) == vector<size_t>({1, 1}));
BOOST_TEST(MosaicSelection({1}, 1.0).materialise(4) == vector<size_t>({1, 1, 1, 1}));
BOOST_TEST(MosaicSelection({1}, 2.0).materialise(4) == vector<size_t>({1, 1, 1, 1, 1, 1, 1, 1}));
BOOST_TEST(MosaicSelection({1}, 1.0).materialise(2) == vector<size_t>({1, 1}));
BOOST_TEST(MosaicSelection({0, 1}, 0.5).materialise(4) == vector<size_t>({0, 1}));
BOOST_TEST(MosaicSelection({0, 1}, 1.0).materialise(4) == vector<size_t>({0, 1, 0, 1}));
BOOST_TEST(MosaicSelection({0, 1}, 2.0).materialise(4) == vector<size_t>({0, 1, 0, 1, 0, 1, 0, 1}));
BOOST_TEST(MosaicSelection({0, 1}, 1.0).materialise(2) == vector<size_t>({0, 1}));
BOOST_TEST(MosaicSelection({3, 2, 1, 0}, 0.5).materialise(4) == vector<size_t>({3, 2}));
BOOST_TEST(MosaicSelection({3, 2, 1, 0}, 1.0).materialise(4) == vector<size_t>({3, 2, 1, 0}));
BOOST_TEST(MosaicSelection({3, 2, 1, 0}, 2.0).materialise(4) == vector<size_t>({3, 2, 1, 0, 3, 2, 1, 0}));
BOOST_TEST(MosaicSelection({1, 0, 1, 0}, 1.0).materialise(2) == vector<size_t>({1, 0}));
}
BOOST_AUTO_TEST_CASE(materialise_should_round_indices)
{
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 0.49).materialise(5) == vector<size_t>({4, 3}));
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 0.50).materialise(5) == vector<size_t>({4, 3, 2}));
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 0.51).materialise(5) == vector<size_t>({4, 3, 2}));
}
BOOST_AUTO_TEST_CASE(materialise_should_handle_empty_collections)
{
BOOST_TEST(MosaicSelection({1}, 1.0).materialise(0).empty());
BOOST_TEST(MosaicSelection({1, 3}, 2.0).materialise(0).empty());
BOOST_TEST(MosaicSelection({5, 4, 3, 2}, 0.5).materialise(0).empty());
}
BOOST_AUTO_TEST_CASE(materialise_should_handle_empty_selections)
{
BOOST_TEST(MosaicSelection({1}, 0.0).materialise(8).empty());
BOOST_TEST(MosaicSelection({1, 3}, 0.0).materialise(8).empty());
BOOST_TEST(MosaicSelection({5, 4, 3, 2}, 0.0).materialise(8).empty());
}
BOOST_AUTO_TEST_CASE(materialise_should_clamp_indices_at_collection_size)
{
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 1.0).materialise(4) == vector<size_t>({3, 3, 2, 1}));
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 2.0).materialise(3) == vector<size_t>({2, 2, 2, 1, 0, 2}));
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 1.0).materialise(1) == vector<size_t>({0}));
BOOST_TEST(MosaicSelection({4, 3, 2, 1, 0}, 7.0).materialise(1) == vector<size_t>({0, 0, 0, 0, 0, 0, 0}));
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE(RandomSelectionTest)
BOOST_AUTO_TEST_CASE(materialise_should_return_random_values_with_equal_probabilities)
{
constexpr int collectionSize = 10;
constexpr int selectionSize = 100;
constexpr double relativeTolerance = 0.1;
constexpr double expectedValue = (collectionSize - 1) / 2.0;
constexpr double variance = (collectionSize * collectionSize - 1) / 12.0;
SimulationRNG::reset(1);
vector<size_t> samples = RandomSelection(selectionSize).materialise(collectionSize);
BOOST_TEST(abs(mean(samples) - expectedValue) < expectedValue * relativeTolerance);
BOOST_TEST(abs(meanSquaredError(samples, expectedValue) - variance) < variance * relativeTolerance);
}
BOOST_AUTO_TEST_CASE(materialise_should_return_only_values_that_can_be_used_as_collection_indices)
{
const size_t collectionSize = 200;
vector<size_t> indices = RandomSelection(0.5).materialise(collectionSize);
BOOST_TEST(indices.size() == 100);
BOOST_TEST(all_of(indices.begin(), indices.end(), [&](auto const& index){ return index <= collectionSize; }));
}
BOOST_AUTO_TEST_CASE(materialise_should_return_number_of_indices_thats_a_fraction_of_collection_size)
{
BOOST_TEST(RandomSelection(0.0).materialise(10).size() == 0);
BOOST_TEST(RandomSelection(0.3).materialise(10).size() == 3);
BOOST_TEST(RandomSelection(0.5).materialise(10).size() == 5);
BOOST_TEST(RandomSelection(0.7).materialise(10).size() == 7);
BOOST_TEST(RandomSelection(1.0).materialise(10).size() == 10);
}
BOOST_AUTO_TEST_CASE(materialise_should_support_number_of_indices_bigger_than_collection_size)
{
BOOST_TEST(RandomSelection(2.0).materialise(5).size() == 10);
BOOST_TEST(RandomSelection(1.5).materialise(10).size() == 15);
BOOST_TEST(RandomSelection(10.0).materialise(10).size() == 100);
}
BOOST_AUTO_TEST_CASE(materialise_should_round_the_number_of_indices_to_the_nearest_integer)
{
BOOST_TEST(RandomSelection(0.49).materialise(3).size() == 1);
BOOST_TEST(RandomSelection(0.50).materialise(3).size() == 2);
BOOST_TEST(RandomSelection(0.51).materialise(3).size() == 2);
BOOST_TEST(RandomSelection(1.51).materialise(3).size() == 5);
BOOST_TEST(RandomSelection(0.01).materialise(2).size() == 0);
BOOST_TEST(RandomSelection(0.01).materialise(3).size() == 0);
}
BOOST_AUTO_TEST_CASE(materialise_should_return_no_indices_if_collection_is_empty)
{
BOOST_TEST(RandomSelection(0.0).materialise(0).empty());
BOOST_TEST(RandomSelection(0.5).materialise(0).empty());
BOOST_TEST(RandomSelection(1.0).materialise(0).empty());
BOOST_TEST(RandomSelection(2.0).materialise(0).empty());
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE_END()
}

View File

@ -15,12 +15,16 @@ install(TARGETS solidity-upgrade DESTINATION "${CMAKE_INSTALL_BINDIR}")
add_executable(yul-phaser
yulPhaser/main.cpp
yulPhaser/GeneticAlgorithms.h
yulPhaser/GeneticAlgorithms.cpp
yulPhaser/Population.h
yulPhaser/Population.cpp
yulPhaser/FitnessMetrics.h
yulPhaser/FitnessMetrics.cpp
yulPhaser/Chromosome.h
yulPhaser/Chromosome.cpp
yulPhaser/Selections.h
yulPhaser/Selections.cpp
yulPhaser/Program.h
yulPhaser/Program.cpp
yulPhaser/SimulationRNG.h

View File

@ -0,0 +1,50 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#include <tools/yulPhaser/GeneticAlgorithms.h>
#include <tools/yulPhaser/Selections.h>
using namespace std;
using namespace solidity::phaser;
void GeneticAlgorithm::run(optional<size_t> _numRounds)
{
for (size_t round = 0; !_numRounds.has_value() || round < _numRounds.value(); ++round)
{
runNextRound();
m_outputStream << "---------- ROUND " << round << " ----------" << endl;
m_outputStream << m_population;
}
}
void RandomAlgorithm::runNextRound()
{
RangeSelection elite(0.0, m_options.elitePoolSize);
Population elitePopulation = m_population.select(elite);
size_t replacementCount = m_population.individuals().size() - elitePopulation.individuals().size();
m_population =
move(elitePopulation) +
Population::makeRandom(
m_population.fitnessMetric(),
replacementCount,
m_options.minChromosomeLength,
m_options.maxChromosomeLength
);
}

View File

@ -0,0 +1,115 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Contains an abstract base class representing a genetic algorithm and its concrete implementations.
*/
#pragma once
#include <tools/yulPhaser/Population.h>
#include <optional>
#include <ostream>
namespace solidity::phaser
{
/**
* Abstract base class for genetic algorithms.
*
* The main feature is the @a run() method that executes the algorithm, updating the internal
* population during each round and printing the results to the stream provided to the constructor.
*
* Derived classes can provide specific methods for updating the population by implementing
* the @a runNextRound() method.
*/
class GeneticAlgorithm
{
public:
GeneticAlgorithm(Population _initialPopulation, std::ostream& _outputStream):
m_population(std::move(_initialPopulation)),
m_outputStream(_outputStream) {}
GeneticAlgorithm(GeneticAlgorithm const&) = delete;
GeneticAlgorithm& operator=(GeneticAlgorithm const&) = delete;
virtual ~GeneticAlgorithm() = default;
Population const& population() const { return m_population; }
void run(std::optional<size_t> _numRounds = std::nullopt);
/// The method that actually implements the algorithm. Should use @a m_population as input and
/// replace it with the updated state after the round.
virtual void runNextRound() = 0;
protected:
Population m_population;
private:
std::ostream& m_outputStream;
};
/**
* Completely random genetic algorithm,
*
* The algorithm simply replaces the worst chromosomes with entirely new ones, generated
* randomly and not based on any member of the current population. Only a constant proportion of the
* chromosomes (the elite) is preserved in each round.
*
* Preserves the size of the population. You can use @a elitePoolSize to make the algorithm
* generational (replacing most members in each round) or steady state (replacing only one member).
* Both versions are equivalent in terms of the outcome but the generational one converges in a
* smaller number of rounds while the steady state one does less work per round. This may matter
* in case of metrics that take a long time to compute though in case of this particular
* algorithm the same result could also be achieved by simply making the population smaller.
*/
class RandomAlgorithm: public GeneticAlgorithm
{
public:
struct Options
{
double elitePoolSize; ///< Percentage of the population treated as the elite
size_t minChromosomeLength; ///< Minimum length of newly generated chromosomes
size_t maxChromosomeLength; ///< Maximum length of newly generated chromosomes
bool isValid() const
{
return (
0 <= elitePoolSize && elitePoolSize <= 1.0 &&
minChromosomeLength <= maxChromosomeLength
);
}
};
explicit RandomAlgorithm(
Population _initialPopulation,
std::ostream& _outputStream,
Options const& _options
):
GeneticAlgorithm(_initialPopulation, _outputStream),
m_options(_options)
{
assert(_options.isValid());
}
void runNextRound() override;
private:
Options m_options;
};
}

View File

@ -17,6 +17,7 @@
#include <tools/yulPhaser/Population.h>
#include <tools/yulPhaser/Selections.h>
#include <libsolutil/CommonData.h>
#include <libsolutil/CommonIO.h>
@ -83,16 +84,13 @@ Population Population::makeRandom(
);
}
void Population::run(optional<size_t> _numRounds, ostream& _outputStream)
Population Population::select(Selection const& _selection) const
{
for (size_t round = 0; !_numRounds.has_value() || round < _numRounds.value(); ++round)
{
doMutation();
doSelection();
vector<Individual> selectedIndividuals;
for (size_t i: _selection.materialise(m_individuals.size()))
selectedIndividuals.emplace_back(m_individuals[i]);
_outputStream << "---------- ROUND " << round << " ----------" << endl;
_outputStream << *this;
}
return Population(m_fitnessMetric, selectedIndividuals);
}
Population operator+(Population _a, Population _b)
@ -121,35 +119,6 @@ ostream& phaser::operator<<(ostream& _stream, Population const& _population)
return _stream;
}
void Population::doMutation()
{
// TODO: Implement mutation and crossover
}
void Population::doSelection()
{
randomizeWorstChromosomes(*m_fitnessMetric, m_individuals, m_individuals.size() / 2);
m_individuals = sortedIndividuals(move(m_individuals));
}
void Population::randomizeWorstChromosomes(
FitnessMetric const& _fitnessMetric,
vector<Individual>& _individuals,
size_t _count
)
{
assert(_individuals.size() >= _count);
// ASSUMPTION: _individuals is sorted in ascending order
auto individual = _individuals.begin() + (_individuals.size() - _count);
for (; individual != _individuals.end(); ++individual)
{
auto chromosome = Chromosome::makeRandom(binomialChromosomeLength(MaxChromosomeLength));
size_t fitness = _fitnessMetric.evaluate(chromosome);
*individual = {move(chromosome), fitness};
}
}
vector<Individual> Population::chromosomesToIndividuals(
FitnessMetric const& _fitnessMetric,
vector<Chromosome> _chromosomes

View File

@ -39,6 +39,8 @@ solidity::phaser::Population operator+(solidity::phaser::Population _a, solidity
namespace solidity::phaser
{
class Selection;
/**
* Information describing the state of an individual member of the population during the course
* of the genetic algorithm.
@ -67,19 +69,19 @@ struct Individual
bool isFitter(Individual const& a, Individual const& b);
/**
* Represents a changing set of individuals undergoing a genetic algorithm.
* Each round of the algorithm involves mutating existing individuals, evaluating their fitness
* and selecting the best ones for the next round.
* Represents a snapshot of a population undergoing a genetic algorithm. Consists of a set of
* chromosomes with associated fitness values.
*
* An individual is a sequence of optimiser steps represented by a @a Chromosome instance.
* Individuals are always ordered by their fitness (based on @_fitnessMetric and @a isFitter()).
* The fitness is computed using the metric as soon as an individual is inserted into the population.
*
* The population is immutable. Selections, mutations and crossover work by producing a new
* instance and copying the individuals.
*/
class Population
{
public:
static constexpr size_t MaxChromosomeLength = 30;
explicit Population(
std::shared_ptr<FitnessMetric const> _fitnessMetric,
std::vector<Chromosome> _chromosomes = {}
@ -101,7 +103,7 @@ public:
size_t _maxChromosomeLength
);
void run(std::optional<size_t> _numRounds, std::ostream& _outputStream);
Population select(Selection const& _selection) const;
friend Population (::operator+)(Population _a, Population _b);
std::shared_ptr<FitnessMetric const> fitnessMetric() const { return m_fitnessMetric; }
@ -120,14 +122,6 @@ private:
m_fitnessMetric(std::move(_fitnessMetric)),
m_individuals{sortedIndividuals(std::move(_individuals))} {}
void doMutation();
void doSelection();
static void randomizeWorstChromosomes(
FitnessMetric const& _fitnessMetric,
std::vector<Individual>& _individuals,
size_t _count
);
static std::vector<Individual> chromosomesToIndividuals(
FitnessMetric const& _fitnessMetric,
std::vector<Chromosome> _chromosomes

View File

@ -0,0 +1,60 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
#include <tools/yulPhaser/Selections.h>
#include <tools/yulPhaser/SimulationRNG.h>
#include <cmath>
using namespace std;
using namespace solidity::phaser;
vector<size_t> RangeSelection::materialise(size_t _poolSize) const
{
size_t beginIndex = static_cast<size_t>(round(_poolSize * m_startPercent));
size_t endIndex = static_cast<size_t>(round(_poolSize * m_endPercent));
vector<size_t> selection;
for (size_t i = beginIndex; i < endIndex; ++i)
selection.push_back(i);
return selection;
}
vector<size_t> MosaicSelection::materialise(size_t _poolSize) const
{
size_t count = static_cast<size_t>(round(_poolSize * m_selectionSize));
vector<size_t> selection;
for (size_t i = 0; i < count; ++i)
selection.push_back(min(m_pattern[i % m_pattern.size()], _poolSize - 1));
return selection;
}
vector<size_t> RandomSelection::materialise(size_t _poolSize) const
{
size_t count = static_cast<size_t>(round(_poolSize * m_selectionSize));
vector<size_t> selection;
for (size_t i = 0; i < count; ++i)
selection.push_back(SimulationRNG::uniformInt(0, _poolSize - 1));
return selection;
}

View File

@ -0,0 +1,121 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Contains an abstract base class representing a selection of elements from a collection
* and its concrete implementations.
*/
#pragma once
#include <cassert>
#include <vector>
namespace solidity::phaser
{
/**
* Abstract base class for selections of elements from a collection.
*
* An instance of this class represents a specific method of selecting a set of elements from
* containers of arbitrary sizes. The set of selected elements is always a subset of the container
* but may indicate the same element more than once. The selection may or may not be fixed - it's
* up to a specific implementation whether subsequent calls for the same container produce the same
* indices or not.
*
* Derived classes are meant to override the @a materialise() method.
* This method is expected to produce indices of selected elements given the size of the collection.
*/
class Selection
{
public:
Selection() = default;
Selection(Selection const&) = delete;
Selection& operator=(Selection const&) = delete;
virtual ~Selection() = default;
virtual std::vector<size_t> materialise(size_t _poolSize) const = 0;
};
/**
* A selection that selects a contiguous slice of the container. Start and end of this part are
* specified as percentages of its size.
*/
class RangeSelection: public Selection
{
public:
explicit RangeSelection(double _startPercent = 0.0, double _endPercent = 1.0):
m_startPercent(_startPercent),
m_endPercent(_endPercent)
{
assert(0 <= m_startPercent && m_startPercent <= m_endPercent && m_endPercent <= 1.0);
}
std::vector<size_t> materialise(size_t _poolSize) const override;
private:
double m_startPercent;
double m_endPercent;
};
/**
* A selection that selects elements at specific, fixed positions indicated by a repeating "pattern".
* If the positions in the pattern exceed the size of the container, they are capped at the maximum
* available position. Always selects as many elements as the size of the container multiplied by
* @a _selectionSize (unless the container is empty).
*
* E.g. if the pattern is {0, 9} and collection size is 5, the selection will materialise into
* {0, 4, 0, 4, 0}. If the size is 3, it will be {0, 2, 0}.
*/
class MosaicSelection: public Selection
{
public:
explicit MosaicSelection(std::vector<size_t> _pattern, double _selectionSize = 1.0):
m_pattern(move(_pattern)),
m_selectionSize(_selectionSize)
{
assert(m_pattern.size() > 0 || _selectionSize == 0.0);
}
std::vector<size_t> materialise(size_t _poolSize) const override;
private:
std::vector<size_t> m_pattern;
double m_selectionSize;
};
/**
* A selection that randomly selects elements from a container. The resulting set of indices may
* contain duplicates and is different on each call to @a materialise(). Always selects as many
* elements as the size of the container multiplied by @a _selectionSize (unless the container is
* empty).
*/
class RandomSelection: public Selection
{
public:
explicit RandomSelection(double _selectionSize):
m_selectionSize(_selectionSize)
{
assert(_selectionSize >= 0);
}
std::vector<size_t> materialise(size_t _poolSize) const override;
private:
double m_selectionSize;
};
}

View File

@ -18,6 +18,7 @@
#include <tools/yulPhaser/Exceptions.h>
#include <tools/yulPhaser/Population.h>
#include <tools/yulPhaser/FitnessMetrics.h>
#include <tools/yulPhaser/GeneticAlgorithms.h>
#include <tools/yulPhaser/Program.h>
#include <tools/yulPhaser/SimulationRNG.h>
@ -29,7 +30,6 @@
#include <boost/program_options.hpp>
#include <iostream>
#include <functional>
#include <string>
using namespace std;
@ -71,14 +71,27 @@ CharStream loadSource(string const& _sourcePath)
void runAlgorithm(string const& _sourcePath)
{
constexpr size_t populationSize = 20;
constexpr size_t minChromosomeLength = 12;
constexpr size_t maxChromosomeLength = 30;
CharStream sourceCode = loadSource(_sourcePath);
shared_ptr<FitnessMetric> fitnessMetric = make_shared<ProgramSize>(Program::load(sourceCode), 5);
auto population = Population::makeRandom(
fitnessMetric,
10,
bind(Population::binomialChromosomeLength, Population::MaxChromosomeLength)
populationSize,
minChromosomeLength,
maxChromosomeLength
);
population.run(nullopt, cout);
RandomAlgorithm(
population,
cout,
{
/* elitePoolSize = */ 1.0 / populationSize,
/* minChromosomeLength = */ minChromosomeLength,
/* maxChromosomeLength = */ maxChromosomeLength,
}
).run();
}
CommandLineParsingResult parseCommandLine(int argc, char** argv)