solidity/tools/yulPhaser/ProgramCache.h

/*
	This file is part of solidity.

	solidity is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	solidity is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0

#pragma once

#include <tools/yulPhaser/Program.h>

#include <libyul/optimiser/Metrics.h>

#include <map>
#include <string>

namespace solidity::phaser
{

/**
 * Structure used by @a ProgramCache to store intermediate programs and metadata associated
 * with them.
 */
struct CacheEntry
{
	Program program;
	size_t roundNumber;

	CacheEntry(Program _program, size_t _roundNumber):
		program(std::move(_program)),
		roundNumber(_roundNumber) {}
};

/**
 * Stores statistics about current cache usage.
 */
struct CacheStats
{
	/// Weights used to compute totalCodeSize.
	/// The goal here is to get a result proportional to the amount of memory taken by the AST.
	/// Each statement/expression gets 1 just for existing. We add more if it contains any extra
	/// data that won't be visited separately by ASTWalker.
	static yul::CodeWeights constexpr StorageWeights = {
		/* expressionStatementCost = */ 1,
		/* assignmentCost = */ 1,
		/* variableDeclarationCost = */ 1,
		/* functionDefinitionCost = */ 1,
		/* ifCost = */ 1,
		/* switchCost = */ 1,
		/* caseCost = */ 1,
		/* forLoopCost = */ 1,
		/* breakCost = */ 1,
		/* continueCost = */ 1,
		/* leaveCost = */ 1,
		/* blockCost = */ 1,

		/* functionCallCost = */ 1,
		/* identifierCost = */ 1,
		/* literalCost = */ 1,
	};

	size_t hits;
	size_t misses;
	size_t totalCodeSize;
	std::map<size_t, size_t> roundEntryCounts;

	CacheStats& operator+=(CacheStats const& _other);
	CacheStats operator+(CacheStats const& _other) const { return CacheStats(*this) += _other; }

	bool operator==(CacheStats const& _other) const;
	bool operator!=(CacheStats const& _other) const { return !(*this == _other); }
};

/**
 * Class that optimises programs one step at a time which allows it to store and later reuse the
 * results of the intermediate steps.
 *
 * The cache keeps track of the current round number and associates newly created entries with it.
 * @a startRound() must be called at the beginning of a round so that entries that are too old
 * can be purged. The current strategy is to store programs corresponding to all possible prefixes
 * encountered in the current and the previous rounds. Entries older than that get removed to
 * conserve memory.
 *
 * @a gatherStats() allows getting statistics useful for determining cache effectiveness.
 *
 * The current strategy does speed things up (about 4:1 hit:miss ratio observed in my limited
 * experiments) but there's room for improvement. We could fit more useful programs in
 * the cache by being more picky about which ones we choose.
 *
 * There is currently no way to purge entries without starting a new round. Since the programs
 * take a lot of memory, this may lead to the cache eating up all the available RAM if sequences are
 * long and programs large. A limiter based on entry count or total program size would be useful.
 */
class ProgramCache
{
public:
	explicit ProgramCache(Program _program):
		m_program(std::move(_program)) {}

	Program optimiseProgram(
		std::string const& _abbreviatedOptimisationSteps,
		size_t _repetitionCount = 1
	);
	void startRound(size_t _nextRoundNumber);
	void clear();

	size_t size() const { return m_entries.size(); }
	Program const* find(std::string const& _abbreviatedOptimisationSteps) const;
	bool contains(std::string const& _abbreviatedOptimisationSteps) const { return find(_abbreviatedOptimisationSteps) != nullptr; }

	CacheStats gatherStats() const;

	std::map<std::string, CacheEntry> const& entries() const { return m_entries; };
	Program const& program() const { return m_program; }
	size_t currentRound() const { return m_currentRound; }

private:
	size_t calculateTotalCachedCodeSize() const;
	std::map<size_t, size_t> countRoundEntries() const;

	// The best matching data structure here would be a trie of chromosome prefixes but since
	// the programs are orders of magnitude larger than the prefixes, it does not really matter.
	// A map should be good enough.
	std::map<std::string, CacheEntry> m_entries;

	Program m_program;
	size_t m_currentRound = 0;
	size_t m_hits = 0;
	size_t m_misses = 0;
};

}
[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`/*`
			`This file is part of solidity.`

			`solidity is free software: you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation, either version 3 of the License, or`
			`(at your option) any later version.`

			`solidity is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with solidity. If not, see <http://www.gnu.org/licenses/>.`
			`*/`
Add SPDX license identifier if not present already in source file Fixes: #9220 2020-07-17 14:54:12 +00:00			`// SPDX-License-Identifier: GPL-3.0`
[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00
			`#pragma once`

			`#include <tools/yulPhaser/Program.h>`

[yul-phaser] ProgramCache::calculateTotalCachedCodeSize(): Replace default weights with ones that better correlate with memory usage 2020-02-29 21:13:49 +00:00			`#include <libyul/optimiser/Metrics.h>`

[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`#include <map>`
			`#include <string>`

			`namespace solidity::phaser`
			`{`

			`/**`
			`* Structure used by @a ProgramCache to store intermediate programs and metadata associated`
			`* with them.`
			`*/`
			`struct CacheEntry`
			`{`
			`Program program;`
			`size_t roundNumber;`

			`CacheEntry(Program _program, size_t _roundNumber):`
			`program(std::move(_program)),`
			`roundNumber(_roundNumber) {}`
			`};`

[yul-phaser] ProgramCache: Add ability to gather cache stats 2020-02-29 00:53:11 +00:00			`/**`
			`* Stores statistics about current cache usage.`
			`*/`
			`struct CacheStats`
			`{`
[yul-phaser] ProgramCache::calculateTotalCachedCodeSize(): Replace default weights with ones that better correlate with memory usage 2020-02-29 21:13:49 +00:00			`/// Weights used to compute totalCodeSize.`
			`/// The goal here is to get a result proportional to the amount of memory taken by the AST.`
			`/// Each statement/expression gets 1 just for existing. We add more if it contains any extra`
			`/// data that won't be visited separately by ASTWalker.`
			`static yul::CodeWeights constexpr StorageWeights = {`
			`/* expressionStatementCost = */ 1,`
			`/* assignmentCost = */ 1,`
			`/* variableDeclarationCost = */ 1,`
			`/* functionDefinitionCost = */ 1,`
			`/* ifCost = */ 1,`
			`/* switchCost = */ 1,`
			`/* caseCost = */ 1,`
			`/* forLoopCost = */ 1,`
			`/* breakCost = */ 1,`
			`/* continueCost = */ 1,`
			`/* leaveCost = */ 1,`
			`/* blockCost = */ 1,`

			`/* functionCallCost = */ 1,`
			`/* identifierCost = */ 1,`
			`/* literalCost = */ 1,`
			`};`

[yul-phaser] ProgramCache: Add ability to gather cache stats 2020-02-29 00:53:11 +00:00			`size_t hits;`
			`size_t misses;`
			`size_t totalCodeSize;`
			`std::map<size_t, size_t> roundEntryCounts;`

			`CacheStats& operator+=(CacheStats const& _other);`
			`CacheStats operator+(CacheStats const& _other) const { return CacheStats(*this) += _other; }`

			`bool operator==(CacheStats const& _other) const;`
			`bool operator!=(CacheStats const& _other) const { return !(*this == _other); }`
			`};`

[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`/**`
			`* Class that optimises programs one step at a time which allows it to store and later reuse the`
			`* results of the intermediate steps.`
			`*`
			`* The cache keeps track of the current round number and associates newly created entries with it.`
			`* @a startRound() must be called at the beginning of a round so that entries that are too old`
			`* can be purged. The current strategy is to store programs corresponding to all possible prefixes`
			`* encountered in the current and the previous rounds. Entries older than that get removed to`
			`* conserve memory.`
			`*`
[yul-phaser] ProgramCache: Add ability to gather cache stats 2020-02-29 00:53:11 +00:00			`* @a gatherStats() allows getting statistics useful for determining cache effectiveness.`
			`*`
[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`* The current strategy does speed things up (about 4:1 hit:miss ratio observed in my limited`
			`* experiments) but there's room for improvement. We could fit more useful programs in`
			`* the cache by being more picky about which ones we choose.`
			`*`
			`* There is currently no way to purge entries without starting a new round. Since the programs`
			`* take a lot of memory, this may lead to the cache eating up all the available RAM if sequences are`
			`* long and programs large. A limiter based on entry count or total program size would be useful.`
			`*/`
			`class ProgramCache`
			`{`
			`public:`
			`explicit ProgramCache(Program _program):`
			`m_program(std::move(_program)) {}`

			`Program optimiseProgram(`
			`std::string const& _abbreviatedOptimisationSteps,`
			`size_t _repetitionCount = 1`
			`);`
			`void startRound(size_t _nextRoundNumber);`
			`void clear();`

			`size_t size() const { return m_entries.size(); }`
			`Program const* find(std::string const& _abbreviatedOptimisationSteps) const;`
			`bool contains(std::string const& _abbreviatedOptimisationSteps) const { return find(_abbreviatedOptimisationSteps) != nullptr; }`

[yul-phaser] ProgramCache: Add ability to gather cache stats 2020-02-29 00:53:11 +00:00			`CacheStats gatherStats() const;`

[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`std::map<std::string, CacheEntry> const& entries() const { return m_entries; };`
			`Program const& program() const { return m_program; }`
			`size_t currentRound() const { return m_currentRound; }`

			`private:`
[yul-phaser] ProgramCache: Add ability to gather cache stats 2020-02-29 00:53:11 +00:00			`size_t calculateTotalCachedCodeSize() const;`
			`std::map<size_t, size_t> countRoundEntries() const;`

[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`// The best matching data structure here would be a trie of chromosome prefixes but since`
			`// the programs are orders of magnitude larger than the prefixes, it does not really matter.`
			`// A map should be good enough.`
			`std::map<std::string, CacheEntry> m_entries;`

			`Program m_program;`
			`size_t m_currentRound = 0;`
[yul-phaser] ProgramCache: Add ability to gather cache stats 2020-02-29 00:53:11 +00:00			`size_t m_hits = 0;`
			`size_t m_misses = 0;`
[yul-phaser] Add ProgramCache class 2020-02-26 18:28:58 +00:00			`};`

			`}`