From e19d8d1fa376e0478f26c0daeb93e2cbfbddc55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Fri, 20 Mar 2020 18:54:10 +0100 Subject: [PATCH 1/5] [yul-phaser] GeneticAlgorithm::runNextRound(): Fix outdated docstring --- tools/yulPhaser/GeneticAlgorithms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/yulPhaser/GeneticAlgorithms.h b/tools/yulPhaser/GeneticAlgorithms.h index 96c027738..f6c809a8b 100644 --- a/tools/yulPhaser/GeneticAlgorithms.h +++ b/tools/yulPhaser/GeneticAlgorithms.h @@ -58,8 +58,8 @@ public: GeneticAlgorithm& operator=(GeneticAlgorithm const&) = delete; virtual ~GeneticAlgorithm() = default; - /// The method that actually implements the algorithm. Should use @a m_population as input and - /// replace it with the updated state after the round. + /// The method that actually implements the algorithm. Should accept the current population in + /// @a _population and return the updated one after the round. virtual Population runNextRound(Population _population) = 0; }; From 424edecd21ed56582e2b9721cd73a10717f99d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Fri, 20 Mar 2020 18:14:28 +0100 Subject: [PATCH 2/5] [yul-phaser] Phaser: List all available values of enum options in --help --- tools/yulPhaser/Phaser.cpp | 49 ++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/tools/yulPhaser/Phaser.cpp b/tools/yulPhaser/Phaser.cpp index ebfdbb897..47f988ce9 100644 --- a/tools/yulPhaser/Phaser.cpp +++ b/tools/yulPhaser/Phaser.cpp @@ -437,8 +437,15 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() ( "mode", po::value()->value_name("")->default_value(PhaserMode::RunAlgorithm), - "Mode of operation. The default is to run the algorithm but you can also tell phaser " - "to do something else with its parameters, e.g. just print the optimised programs and exit." + ( + "Mode of operation. The default is to run the algorithm but you can also tell phaser " + "to do something else with its parameters, e.g. just print the optimised programs and exit.\n" + "\n" + "AVAILABLE MODES:\n" + "* " + toString(PhaserMode::RunAlgorithm) + "\n" + + "* " + toString(PhaserMode::PrintOptimisedPrograms) + "\n" + + "* " + toString(PhaserMode::PrintOptimisedASTs) + ).c_str() ) ; keywordDescription.add(generalDescription); @@ -448,7 +455,14 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() ( "algorithm", po::value()->value_name("")->default_value(Algorithm::GEWEP), - "Algorithm" + ( + "Algorithm\n" + "\n" + "AVAILABLE ALGORITHMS:\n" + "* " + toString(Algorithm::GEWEP) + "\n" + + "* " + toString(Algorithm::Classic) + "\n" + + "* " + toString(Algorithm::Random) + ).c_str() ) ( "no-randomise-duplicates", @@ -470,7 +484,14 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() ( "crossover", po::value()->value_name("")->default_value(CrossoverChoice::SinglePoint), - "Type of the crossover operator to use." + ( + "Type of the crossover operator to use.\n" + "\n" + "AVAILABLE CROSSOVER OPERATORS:\n" + "* " + toString(CrossoverChoice::SinglePoint) + "\n" + + "* " + toString(CrossoverChoice::TwoPoint) + "\n" + + "* " + toString(CrossoverChoice::Uniform) + ).c_str() ) ( "uniform-crossover-swap-chance", @@ -590,13 +611,27 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() ( "metric", po::value()->value_name("")->default_value(MetricChoice::RelativeCodeSize), - "Metric used to evaluate the fitness of a chromosome." + ( + "Metric used to evaluate the fitness of a chromosome.\n" + "\n" + "AVAILABLE METRICS:\n" + "* " + toString(MetricChoice::CodeSize) + "\n" + + "* " + toString(MetricChoice::RelativeCodeSize) + ).c_str() ) ( "metric-aggregator", po::value()->value_name("")->default_value(MetricAggregatorChoice::Average), - "Operator used to combine multiple fitness metric obtained by evaluating a chromosome " - "separately for each input program." + ( + "Operator used to combine multiple fitness metric obtained by evaluating a chromosome " + "separately for each input program.\n" + "\n" + "AVAILABLE METRIC AGGREGATORS:\n" + "* " + toString(MetricAggregatorChoice::Average) + "\n" + + "* " + toString(MetricAggregatorChoice::Sum) + "\n" + + "* " + toString(MetricAggregatorChoice::Maximum) + "\n" + + "* " + toString(MetricAggregatorChoice::Minimum) + ).c_str() ) ( "relative-metric-scale", From 35395a4b9c7dd9db514aa3a4d13173b82f426480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Fri, 20 Mar 2020 18:15:44 +0100 Subject: [PATCH 3/5] [yul-phaser] Phaser: Missing word in --metric-aggregator option description --- tools/yulPhaser/Phaser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/yulPhaser/Phaser.cpp b/tools/yulPhaser/Phaser.cpp index 47f988ce9..8e34a756b 100644 --- a/tools/yulPhaser/Phaser.cpp +++ b/tools/yulPhaser/Phaser.cpp @@ -623,8 +623,8 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() "metric-aggregator", po::value()->value_name("")->default_value(MetricAggregatorChoice::Average), ( - "Operator used to combine multiple fitness metric obtained by evaluating a chromosome " - "separately for each input program.\n" + "Operator used to combine multiple fitness metric values obtained by evaluating a " + "chromosome separately for each input program.\n" "\n" "AVAILABLE METRIC AGGREGATORS:\n" "* " + toString(MetricAggregatorChoice::Average) + "\n" + From 163e35dd23b228d6c620f4717ff73fa972851d0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Fri, 20 Mar 2020 18:44:45 +0100 Subject: [PATCH 4/5] [yul-phaser] Tweak default values according to experiment results - Long chromosomes in the intial population are better. Set minimum and maximum to 100. - The classic algorithm does not work well without elite. 50% performed better but I think it might be too large. Let's set it to 25%. - Switch to uniform crossover since this is what was used in most experiments and performed well. --- tools/yulPhaser/Phaser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/yulPhaser/Phaser.cpp b/tools/yulPhaser/Phaser.cpp index 8e34a756b..d5b3bdda7 100644 --- a/tools/yulPhaser/Phaser.cpp +++ b/tools/yulPhaser/Phaser.cpp @@ -473,17 +473,17 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() ) ( "min-chromosome-length", - po::value()->value_name("")->default_value(12), + po::value()->value_name("")->default_value(100), "Minimum length of randomly generated chromosomes." ) ( "max-chromosome-length", - po::value()->value_name("")->default_value(30), + po::value()->value_name("")->default_value(100), "Maximum length of randomly generated chromosomes." ) ( "crossover", - po::value()->value_name("")->default_value(CrossoverChoice::SinglePoint), + po::value()->value_name("")->default_value(CrossoverChoice::Uniform), ( "Type of the crossover operator to use.\n" "\n" @@ -542,7 +542,7 @@ Phaser::CommandLineDescription Phaser::buildCommandLineDescription() classicGeneticAlgorithmDescription.add_options() ( "classic-elite-pool-size", - po::value()->value_name("")->default_value(0), + po::value()->value_name("")->default_value(0.25), "Percentage of population to regenerate using mutations in each round." ) ( From ee915008bdb29249f9a1301c367d7b69804df84a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Fri, 20 Mar 2020 07:47:13 +0100 Subject: [PATCH 5/5] [yul-phaser] README --- tools/yulPhaser/README.md | 91 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tools/yulPhaser/README.md diff --git a/tools/yulPhaser/README.md b/tools/yulPhaser/README.md new file mode 100644 index 000000000..abc2a0e47 --- /dev/null +++ b/tools/yulPhaser/README.md @@ -0,0 +1,91 @@ +## yul-phaser +`yul-phaser` is an internal tool for finding good sequences of [optimisation steps](/libyul/optimiser/README.md) for Yul optimiser. + +### How it works +The space of possible solutions to this problem (usually referred to as _phase-ordering problem_) is extremely large and there may even be no single sequence that produces optimal results for all possible programs. + +The tool uses genetic algorithms to find sequences that result in better programs than others and to iteratively refine them. +The input is a set of one or more [Yul](/docs/yul.rst) programs and each sequence is applied to all of these programs. +Optimised programs are given numeric scores according to the selected metric. + +Optimisation step sequences are presented in an abbreviated form - as strings of letters where each character represents one step. +The abbreviations are defined in [`OptimiserSuite::stepNameToAbbreviationMap()`](/libyul/optimiser/Suite.cpp#L388-L423). + +### How to use it +The application has sensible defaults for most parameters. +An invocation can be as simple as: + +``` bash +tools/yul-phaser ../test/libyul/yulOptimizerTests/fullSuite/*.yul \ + --random-population 100 +``` + +This assumes that you have a working copy of the Solidity repository and you're in the build directory within that working copy. + +Run `yul-phaser --help` for a full list of available options. + +#### Restarting from a previous state +`yul-phaser` can save the list of sequences found after each round: + +``` bash +tools/yul-phaser *.yul \ + --random-population 100 \ + --population-autosave /tmp/population.txt +``` + +If you stop the application, you can later use the file to continue the search from the point you left off: + +``` bash +tools/yul-phaser *.yul \ + --population-from-file /tmp/population.txt \ + --population-autosave /tmp/population.txt +``` + +#### Analysing a sequence +Apart from running the genetic algorithm, `yul-phaser` can also provide useful information about a particular sequence. + +For example, to see the value of a particular metric for a given sequence and program run: +``` bash +tools/yul-phaser *.yul \ + --show-initial-population \ + --rounds 0 \ + --metric code-size \ + --metric-aggregator sum \ + --population +``` + +You can also easily see program code after being optimised using that sequence: +``` bash +tools/yul-phaser *.yul \ + --rounds 0 \ + --mode print-optimised-programs \ + --population +``` + +#### Using output from Solidity compiler +`yul-phaser` can process the intermediate representation produced by `solc`: + +``` bash +solc/solc \ + --ir \ + --no-optimize-yul \ + --output-dir +``` + +After running this command you'll find one or more .yul files in the output directory. +These files contain whole Yul objects rather than just raw Yul programs but `yul-phaser` is prepared to handle them. + +### How to choose good parameters +Choosing good parameters for a genetic algorithm is not a trivial task but phaser's defaults are generally enough to find a sequence that gives results comparable or better than one hand-crafted by an experienced developer for a given set of programs. +The difficult part is providing a fairly representative set of input files. +If the files you give don't need certain optimisations the tool will find sequences that don't use these optimisations and perform badly for programs that could benefit from them. +If all the provided files greatly benefit from a specific optimisation, the sequence may not work well for programs that do not. + +We have conducted [a set of rough experiments](https://github.com/ethereum/solidity/issues/7806#issuecomment-598644491) to evaluate some combinations of parameter values. +The conclusions were used to adjust the defaults but you might still benefit from some general observations: + +1. The algorithm that performed the best was `GEWEP`. +2. Using longer sequences in the initial population yields better results. The algorithm is good at removing superfluous steps. +3. Preserving the top sequences from previous rounds improves results. Elite should contain at least a few individuals, especially when using the `classic` algorithm. +4. Don't set mutation/deletion/addition chance too high. It makes results worse because it destroys the good patterns preserved by crossover. Values around 1-5% seem to work best. +5. Keep the algorithm running for 1000 rounds or more. It usually finds good sequences faster than that but it can shorten them significantly if you let it run longer. This is especially important when starting with long sequences.