lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


David Given wrote:
>> In terms of Lua 5.1 bytecode this would replace GETGLOBAL+GETTABLE*
>> with a single LOADK. This ought to be *really* fast, both in the
>> interpreter and when compiled.
>
> For simple numerical stuff like trig functions, would the JIT manage to  
> then convert them to inline code sequences where appropriate? For  
> example, math.sin could theoretically become a single inline fsin  
> instruction in the right circumstances. Or will it still bounce it via  
> an external C function?

Both LJ1 and LJ2 inline math.sin and other functions -- even
without the proposed option. But they still have to do the two
table lookups. LJ2 is able to hoist these out of the loop, but
it's of course still done in the loop header.

The modification would allow both compilers to drop the table
lookups and the function dispatch check. This would go mostly
unnoticed for LJ2, but it would help to reduce the I-cache
footprint of the generated code.

Trivial example:
  local x=2; for i=1,100 do x=math.sqrt(x) end

LJ1:

[... start of loop before this ...]
--0006--  GETGLOBAL   5   -4      ; "math"
f7d217ef  BAE018EB09        mov edx,0x09eb18e0
f7d217f4  83C350            add ebx,byte +0x50
f7d217f7  E804FCFFFF        call 0xf7d21400	->GETGLOBAL

--0007--  GETTABLE    5    5   -5 ; "sqrt"
f7d217fc  8D7B50            lea edi,[ebx+0x50]
f7d217ff  BA4023EB09        mov edx,0x09eb2340
f7d21804  83C350            add ebx,byte +0x50
f7d21807  E804FCFFFF        call 0xf7d21410	->GETTABLE_KSTR

[...]
--0009--  CALL        5    2    2
f7d2181b  837B5806          cmp dword [ebx+0x58],byte +0x06
f7d2181f  7562              jnz 0xf7d21883
f7d21821  817B502023EB09    cmp dword [ebx+0x50],0x09eb2320
f7d21828  7559              jnz 0xf7d21883
f7d2182a  837B6803          cmp dword [ebx+0x68],byte +0x03
f7d2182e  7553              jnz 0xf7d21883
f7d21830  DD4360            fld qword [ebx+0x60]
f7d21833  D9FA              fsqrt                  <-------------
f7d21835  C7435803000000    mov dword [ebx+0x58],0x3
f7d2183c  DD5B50            fstp qword [ebx+0x50]
[... end of loop after this ...]

LJ2:

---- TRACE 0 start test.lua:1
0006  GGET     5   0      ; "math"
0007  TGETS    5   5   1  ; "sqrt"
0008  MOV      6   0
0009  CALL     5   2   2  ; math.sqrt
0010  MOV      0   5
0011  FORL     1 => 0006
---- TRACE 0 IR
0001 >  int SLOAD  #2    notc
0002 >  fun SLOAD  #0    notc
0003 >  fun FRAME  0002  test.lua:1
0004    tab FLOAD  test.lua:1  func.env
0005    int FLOAD  0004  tab.hmask
0006 >  int EQ     0005  +63
0007    int FLOAD  0004  tab.node
0008 >  int HREFK  0007  "math"     [slot 44]
0009 >  tab HLOAD  0008
0010    int FLOAD  0009  tab.hmask
0011 >  int EQ     0010  +31
0012    int FLOAD  0009  tab.node
0013 >  int HREFK  0012  "sqrt"     [slot 21]
0014 >  fun HLOAD  0013
0015 >  num SLOAD  #1  
0016 >  fun FRAME  0014  math.sqrt
0017  + num FPMATH 0015  sqrt
0018  + int ADD    0001  +1
0019 >  int LE     0018  +100
0020 ------ LOOP -----------
0021  + num FPMATH 0017  sqrt
0022  + int ADD    0018  +1
0023 >  int LE     0022  +100
0024    int PHI    0018  0022
0025    num PHI    0017  0021
---- TRACE 0 mcode 241
f7f58f0f  C705580107080000. mov dword [0x08070158], 0x0  // only for profiler
f7f58f19  C744240C00000000  mov dword [esp+0xc], 0x0
f7f58f21  89542410          mov [esp+0x10], edx          // BASE of Lua frame
f7f58f25  F20F107A08        movsd xmm7, [edx+0x8]        // narrowing of i
f7f58f2a  F20F2CF7          cvttsd2si esi, xmm7
f7f58f2e  F20F2AF6          cvtsi2sd xmm6, esi
f7f58f32  660F2EFE          ucomisd xmm7, xmm6
f7f58f36  0F85484E0F10      jnz ->EXIT_2
f7f58f3c  0F8A424E0F10      jpe ->EXIT_2
f7f58f42  817AF800FCD7F7    cmp dword [edx-0x8], 0xf7d7fc00
f7f58f49  0F85354E0F10      jnz ->EXIT_2
f7f58f4f  8B0508FCD7F7      mov eax, [0xf7d7fc08]        // globals
f7f58f55  83781C3F          cmp dword [eax+0x1c], +0x3f  // lookup "math"
f7f58f59  0F85254E0F10      jnz ->EXIT_2
f7f58f5f  8B4014            mov eax, [eax+0x14]
f7f58f62  83B82C040000FB    cmp dword [eax+0x42c], -0x05
f7f58f69  750A              jnz 0xf7f58f75
f7f58f6b  81B8280400006827. cmp dword [eax+0x428], 0xf7db2768
f7f58f75  0F85094E0F10      jnz ->EXIT_2
f7f58f7b  83B824040000F5    cmp dword [eax+0x424], -0x0b
f7f58f82  0F85FC4D0F10      jnz ->EXIT_2
f7f58f88  8B8020040000      mov eax, [eax+0x420]
f7f58f8e  83781C1F          cmp dword [eax+0x1c], +0x1f  // lookup "sqrt"
f7f58f92  0F85EC4D0F10      jnz ->EXIT_2
f7f58f98  8B4014            mov eax, [eax+0x14]
f7f58f9b  83B804020000FB    cmp dword [eax+0x204], -0x05
f7f58fa2  750A              jnz 0xf7f58fae
f7f58fa4  81B800020000D02B. cmp dword [eax+0x200], 0xf7db2bd0
f7f58fae  0F85D04D0F10      jnz ->EXIT_2
f7f58fb4  83B8FC010000F7    cmp dword [eax+0x1fc], -0x09
f7f58fbb  0F85C34D0F10      jnz ->EXIT_2
f7f58fc1  837A04F3          cmp dword [edx+0x4], -0x0d   // type check of x
f7f58fc5  0F87B94D0F10      ja ->EXIT_2
f7f58fcb  81B8F8010000B02B. cmp dword [eax+0x1f8], 0xf7db2bb0  // check func ID
f7f58fd5  0F85A94D0F10      jnz ->EXIT_2
f7f58fdb  F20F513A          sqrtsd xmm7, [edx]  // sqrt() once in pre-roll
f7f58fdf  83C601            add esi, +0x01
f7f58fe2  83FE64            cmp esi, +0x64
f7f58fe5  0F8F9D4D0F10      jg ->EXIT_1
->LOOP:
f7f58feb  F20F51FF          sqrtsd xmm7, xmm7   // sqrt() in loop
f7f58fef  83C601            add esi, +0x01
f7f58ff2  83FE64            cmp esi, +0x64
f7f58ff5  0F8EF0FFFFFF      jle ->LOOP
f7f58ffb  E9904D0F10        jmp ->EXIT_0
---- TRACE 0 end

--Mike