diff --git a/docs/assembly.rst b/docs/assembly.rst index cc2ad1c4e..415bb1a1f 100644 --- a/docs/assembly.rst +++ b/docs/assembly.rst @@ -337,8 +337,6 @@ in solidity are: This feature is still a bit cumbersome to use, because the stack offset essentially changes during the call, and thus references to local variables will be wrong. -The assumed stack height can be changed by using labels: ``funreturn[-2]:`` -The number in brackets should be `m - n - 1`. .. code:: @@ -415,19 +413,23 @@ to the stack height just prior to the label. Note that you will not have to care about these things if you just use loops and assembly-level functions. +As an example how this can be done in extreme cases, please see the following. + .. code:: { let x := 8 jump(two) - one[1]: - // Now the stack height is correctly adjusted. - x := 9 + 0 // This code is unreachable but will adjust the stack height correctly + one: + x := 9 // Now x can be accessed properly. jump(three) + pop // Similar negative correction. two: 7 // push something onto the stack jump(one) three: + pop // We have to pop the manually pushed value here again. } .. note:: @@ -652,17 +654,12 @@ for e.g. returning runtime code or creating contracts. No identifier from an outer assembly is visible in a sub-assembly. If control flow passes over the end of a block, pop instructions are inserted -that match the number of local variables declared in that block, unless the -``}`` is directly preceded by an opcode that does not have a continuing control -flow path. Whenever a local variable is referenced, the code generator needs +that match the number of local variables declared in that block. +Whenever a local variable is referenced, the code generator needs to know its current relative position in the stack and thus it needs to -keep track of the current so-called stack height. -At the end of a block, this implicit stack height is always reduced by the number -of local variables whether there is a continuing control flow or not. - -This means that the stack height before and after the block should be the same. -If this is not the case, a warning is issued, -unless the last instruction in the block did not have a continuing control flow path. +keep track of the current so-called stack height. Since all local variables +are removed at the end of a block, the stack height before and after the block +should be the same. If this is not the case, a warning is issued. Why do we use higher-level constructs like ``switch``, ``for`` and functions: @@ -674,10 +671,9 @@ verification and optimization. Furthermore, if manual jumps are allowed, computing the stack height is rather complicated. The position of all local variables on the stack needs to be known, otherwise neither references to local variables nor removing local variables automatically -from the stack at the end of a block will work properly. Because of that, -every label that is preceded by an instruction that ends or diverts control flow -should be annotated with the current stack layout. This annotation is performed -automatically during the desugaring phase. +from the stack at the end of a block will work properly. The desugaring +mechanism correctly inserts operations at unreachable blocks that adjust the +stack height properly in case of jumps that do not have a continuing control flow. Example: @@ -730,17 +726,21 @@ After the desugaring phase it looks as follows:: $case1: { // the function call - we put return label and arguments on the stack - $ret1 calldataload(4) jump($fun_f) - $ret1 [r]: // a label with a [...]-annotation resets the stack height - // to "current block + number of local variables". It also - // introduces a variable, r: - // r is at top of stack, $0 is below (from enclosing block) - $ret2 0x20 jump($fun_allocate) - $ret2 [ret]: // stack here: $0, r, ret (top) + $ret1 calldataload(4) jump(f) + // This is unreachable code. Opcodes are added that mirror the + // effect of the function on the stack height: Arguments are + // removed and return values are introduced. + pop pop + let r := 0 + $ret1: // the actual return point + $ret2 0x20 jump($allocate) + pop pop let ret := 0 + $ret2: mstore(ret, r) return(ret, 0x20) // although it is useless, the jump is automatically inserted, - // since the desugaring process does not analyze control-flow + // since the desugaring process is a purely syntactic operation that + // does not analyze control-flow jump($endswitch) } $caseDefault: @@ -751,20 +751,29 @@ After the desugaring phase it looks as follows:: $endswitch: } jump($afterFunction) - $fun_allocate: + allocate: { - $start[$retpos, size]: - // output variables live in the same scope as the arguments. + // we jump over the unreachable code that introduces the function arguments + jump($start) + let $retpos := 0 let size := 0 + $start: + // output variables live in the same scope as the arguments and is + // actually allocated. let pos := 0 { pos := mload(0x40) mstore(0x40, add(pos, size)) } + // This code replaces the arguments by the return values and jumps back. swap1 pop swap1 jump + // Again unreachable code that corrects stack height. + 0 0 } - $fun_f: + f: { - start [$retpos, x]: + jump($start) + let $retpos := 0 let x := 0 + $start: let y := 0 { let i := 0 @@ -777,8 +786,9 @@ After the desugaring phase it looks as follows:: { i := add(i, 1) } jump($for_begin) $for_end: - } // Here, a pop instruction is inserted for i + } // Here, a pop instruction will be inserted for i swap1 pop swap1 jump + 0 0 } $afterFunction: stop @@ -839,7 +849,7 @@ Grammar:: IdentifierOrList = Identifier | '(' IdentifierList ')' IdentifierList = Identifier ( ',' Identifier)* AssemblyAssignment = '=:' Identifier - LabelDefinition = Identifier ( '[' ( IdentifierList? | NumberLiteral ) ']' )? ':' + LabelDefinition = Identifier ':' AssemblySwitch = 'switch' FunctionalAssemblyExpression AssemblyCase* ( 'default' ':' AssemblyBlock )? AssemblyCase = 'case' FunctionalAssemblyExpression ':' AssemblyBlock @@ -872,11 +882,14 @@ Pseudocode:: AssemblyFunctionDefinition('function' name '(' arg1, ..., argn ')' '->' ( '(' ret1, ..., retm ')' body) -> : { - $_start [$retPC, $argn, ..., arg1]: + jump($_start) + let $retPC := 0 let argn := 0 ... let arg1 := 0 + $_start: let ret1 := 0 ... let retm := 0 { desugar(body) } - swap and pop items so that only ret1, ... retn, $retPC are left on the stack - jump + swap and pop items so that only ret1, ... retm, $retPC are left on the stack + jump + 0 (1 + n times) to compensate removal of arg1, ..., argn and $retPC } AssemblyFor('for' { init } condition post body) -> { @@ -896,6 +909,7 @@ Pseudocode:: pop all local variables that are defined at the current point but not at $forI_end jump($forI_end) + 0 (as many as variables were removed above) } 'continue' -> { @@ -903,6 +917,7 @@ Pseudocode:: pop all local variables that are defined at the current point but not at $forI_continue jump($forI_continue) + 0 (as many as variables were removed above) } AssemblySwitch(switch condition cases ( default: defaultBlock )? ) -> { @@ -924,10 +939,13 @@ Pseudocode:: { // find I such that $funcallI_* does not exist $funcallI_return argn ... arg2 arg1 jump() + pop (n + 1 times) if the current context is `let (id1, ..., idm) := f(...)` -> - $funcallI_return [id1, ..., idm]: + let id1 := 0 ... let idm := 0 + $funcallI_return: else -> - $funcallI_return[m - n - 1]: + 0 (m times) + $funcallI_return: turn the functional expression that leads to the function call into a statement stream } @@ -987,16 +1005,8 @@ Pseudocode:: look up id in the syntactic stack of blocks, assert that it is a variable SWAPi where i = 1 + stack_height - stack_height_of_identifier(id) POP - LabelDefinition(name [id1, ..., idn] :) -> + LabelDefinition(name:) -> JUMPDEST - // register new local variables id1, ..., idn and adjust the stack - // height such that it matches the stack height at the beginning of - // the block plus all local variables including the just registered - // ones where idn is at the stack top. If n is zero, resets the stack - // height to just the local variables. - LabelDefinition(name [number] :) -> - JUMPDEST - // adjust stack height by the relative amount "number" (can be negative) NumberLiteral(num) -> PUSH HexLiteral(lit) ->