From 9bb6176c9d599417be30dc21d63c9d5e470aecf9 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 27 Jul 2019 11:31:13 -0500 Subject: [PATCH 1/3] Replace the slow slotname Dict with a counter ("age")-based mechanism Co-authored-by: "Kristoffer Carlsson" --- src/breakpoints.jl | 23 +++++++++++-- src/construct.jl | 83 +++++++++++++++++++++------------------------ src/interpret.jl | 4 +-- src/types.jl | 20 ++++++----- src/utils.jl | 24 ++++++++----- test/breakpoints.jl | 18 ++++++++++ 6 files changed, 107 insertions(+), 65 deletions(-) diff --git a/src/breakpoints.jl b/src/breakpoints.jl index 91cc8e2c..6e2d4ad7 100644 --- a/src/breakpoints.jl +++ b/src/breakpoints.jl @@ -132,8 +132,27 @@ function prepare_slotfunction(framecode::FrameCode, body::Union{Symbol,Expr}) for i = 1:length(slotnames) slotname = framecode.src.slotnames[i] qslotname = QuoteNode(slotname) - getexpr = :(something($dataname.locals[$dataname.last_reference[$qslotname]])) - push!(assignments, Expr(:(=), slotname, :(haskey($dataname.last_reference, $qslotname) ? $getexpr : $default))) + list = framecode.slotnamelists[slotname] + if length(list) == 1 + maxexpr = :($dataname.last_reference[$(list[1])] > 0 ? $(list[1]) : 0) + else + maxcounter, maxidx = gensym("maxcounter"), gensym("maxidx") + maxexpr = quote + begin + $maxcounter, $maxidx = 0, 0 + for l in $list + counter = $dataname.last_reference[l] + if counter > $maxcounter + $maxcounter, $maxidx = counter, l + end + end + $maxidx + end + end + end + maxexsym = gensym("slotid") + push!(assignments, :($maxexsym = $maxexpr)) + push!(assignments, :($slotname = $maxexsym > 0 ? something($dataname.locals[$maxexsym]) : $default)) end if ismeth syms = sparam_syms(framecode.scope) diff --git a/src/construct.jl b/src/construct.jl index 9b707866..bfde544e 100644 --- a/src/construct.jl +++ b/src/construct.jl @@ -258,56 +258,49 @@ function prepare_call(@nospecialize(f), allargs; enter_generated = false) end function prepare_framedata(framecode, argvals::Vector{Any}, caller_will_catch_err::Bool=false) + src = framecode.src + slotnames = src.slotnames::SlotNamesType + ssavt = src.ssavaluetypes + ng, ns = isa(ssavt, Int) ? ssavt : length(ssavt::Vector{Any}), length(src.slotflags) + if length(junk) > 0 + olddata = pop!(junk) + locals, ssavalues, sparams = olddata.locals, olddata.ssavalues, olddata.sparams + exception_frames, last_reference = olddata.exception_frames, olddata.last_reference + last_exception = olddata.last_exception + callargs = olddata.callargs + resize!(locals, ns) + fill!(locals, nothing) + resize!(ssavalues, ng) + # for check_isdefined to work properly, we need sparams to start out unassigned + resize!(sparams, 0) + empty!(exception_frames) + resize!(last_reference, ns) + last_exception[] = nothing + else + locals = Vector{Union{Nothing,Some{Any}}}(nothing, ns) + ssavalues = Vector{Any}(undef, ng) + sparams = Vector{Any}(undef, 0) + exception_frames = Int[] + last_reference = Vector{Int}(undef, ns) + callargs = Any[] + last_exception = Ref{Any}(nothing) + end + fill!(last_reference, 0) if isa(framecode.scope, Method) - meth, src = framecode.scope::Method, framecode.src - slotnames = src.slotnames::SlotNamesType - ssavt = src.ssavaluetypes - ng = isa(ssavt, Int) ? ssavt : length(ssavt::Vector{Any}) + meth = framecode.scope::Method nargs, meth_nargs = length(argvals), Int(meth.nargs) - if length(junk) > 0 - olddata = pop!(junk) - locals, ssavalues, sparams = olddata.locals, olddata.ssavalues, olddata.sparams - exception_frames, last_reference = olddata.exception_frames, olddata.last_reference - last_exception = olddata.last_exception - callargs = olddata.callargs - resize!(locals, length(src.slotflags)) - resize!(ssavalues, ng) - # for check_isdefined to work properly, we need sparams to start out unassigned - resize!(sparams, 0) - empty!(exception_frames) - empty!(last_reference) - last_exception[] = nothing - else - locals = Vector{Union{Nothing,Some{Any}}}(undef, length(src.slotflags)) - ssavalues = Vector{Any}(undef, ng) - sparams = Vector{Any}(undef, 0) - exception_frames = Int[] - last_reference = Dict{Symbol,Int}() - callargs = Any[] - last_exception = Ref{Any}(nothing) - end - for i = 1:meth_nargs - last_reference[slotnames[i]::Symbol] = i - if meth.isva && i == meth_nargs - locals[i] = nargs < i ? Some{Any}(()) : (let i=i; Some{Any}(ntuple(k->argvals[i+k-1], nargs-i+1)); end) - break + islastva = meth.isva && nargs >= meth_nargs + for i = 1:meth_nargs-islastva + if nargs >= i + locals[i], last_reference[i] = Some{Any}(argvals[i]), 1 + else + locals[i] = Some{Any}(()) end - locals[i] = nargs >= i ? Some{Any}(argvals[i]) : Some{Any}(()) end - # add local variables initially undefined - for i = (meth_nargs+1):length(slotnames) - locals[i] = nothing + if islastva + locals[meth_nargs] = (let i=meth_nargs; Some{Any}(ntuple(k->argvals[i+k-1], nargs-i+1)); end) + last_reference[meth_nargs] = 1 end - else - src = framecode.src - locals = Vector{Union{Nothing,Some{Any}}}(undef, length(src.slotflags)) # src.slotflags is concretely typed, unlike slotnames - fill!(locals, nothing) - ssavalues = Vector{Any}(undef, length(src.code)) - sparams = Any[] - exception_frames = Int[] - last_reference = Dict{Symbol,Int}() - callargs = Any[] - last_exception = Ref{Any}(nothing) end FrameData(locals, ssavalues, sparams, exception_frames, last_exception, caller_will_catch_err, last_reference, callargs) end diff --git a/src/interpret.jl b/src/interpret.jl index 6e1b49e3..84531972 100644 --- a/src/interpret.jl +++ b/src/interpret.jl @@ -336,9 +336,9 @@ function do_assignment!(frame, @nospecialize(lhs), @nospecialize(rhs)) if isa(lhs, SSAValue) data.ssavalues[lhs.id] = rhs elseif isa(lhs, SlotNumber) + counter = (frame.assignment_counter += 1) data.locals[lhs.id] = Some{Any}(rhs) - slotnames = code.src.slotnames::SlotNamesType - data.last_reference[slotnames[lhs.id]::Symbol] = lhs.id + data.last_reference[lhs.id] = counter elseif isa(lhs, GlobalRef) Core.eval(lhs.mod, :($(lhs.name) = $(QuoteNode(rhs)))) elseif isa(lhs, Symbol) diff --git a/src/types.jl b/src/types.jl index c073f384..cba21732 100644 --- a/src/types.jl +++ b/src/types.jl @@ -70,6 +70,7 @@ struct FrameCode src::CodeInfo methodtables::Vector{Union{Compiled,TypeMapEntry}} # line-by-line method tables for generic-function :call Exprs breakpoints::Vector{BreakpointState} + slotnamelists::Dict{Symbol,Vector{Int}} used::BitSet generator::Bool # true if this is for the expression-generator of a @generated function end @@ -89,8 +90,13 @@ function FrameCode(scope, src::CodeInfo; generator=false, optimize=true) src.code[i] = nothing end end + slotnamelists = Dict{Symbol,Vector{Int}}() + for (i, sym) in enumerate(src.slotnames) + list = get(slotnamelists, sym, Int[]) + slotnamelists[sym] = push!(list, i) + end used = find_used(src) - framecode = FrameCode(scope, src, methodtables, breakpoints, used, generator) + framecode = FrameCode(scope, src, methodtables, breakpoints, slotnamelists, used, generator) if scope isa Method for bp in _breakpoints # Manual union splitting @@ -151,9 +157,7 @@ struct FrameData exception_frames::Vector{Int} last_exception::Base.RefValue{Any} caller_will_catch_err::Bool - # A vector from names to the slotnumber of that name - # for which a reference was last encountered. - last_reference::Dict{Symbol,Int} + last_reference::Vector{Int} callargs::Vector{Any} # a temporary for processing arguments of :call exprs end @@ -176,10 +180,11 @@ mutable struct Frame framecode::FrameCode framedata::FrameData pc::Int + assignment_counter::Int caller::Union{Frame,Nothing} callee::Union{Frame,Nothing} end -Frame(framecode, framedata, pc=1, caller=nothing) = Frame(framecode, framedata, pc, caller, nothing) +Frame(framecode, framedata, pc=1, caller=nothing) = Frame(framecode, framedata, pc, 1, caller, nothing) caller(frame) = frame.caller callee(frame) = frame.callee @@ -331,7 +336,7 @@ struct BreakpointSignature <: AbstractBreakpoint enabled::Ref{Bool} instances::Vector{BreakpointRef} end -same_location(bp2::BreakpointSignature, bp::BreakpointSignature) = +same_location(bp2::BreakpointSignature, bp::BreakpointSignature) = bp2.f == bp.f && bp2.sig == bp.sig && bp2.line == bp.line function Base.show(io::IO, bp::BreakpointSignature) print(io, bp.f) @@ -369,7 +374,7 @@ struct BreakpointFileLocation <: AbstractBreakpoint enabled::Ref{Bool} instances::Vector{BreakpointRef} end -same_location(bp2::BreakpointFileLocation, bp::BreakpointFileLocation) = +same_location(bp2::BreakpointFileLocation, bp::BreakpointFileLocation) = bp2.path == bp.path && bp2.abspath == bp.abspath && bp2.line == bp.line function Base.show(io::IO, bp::BreakpointFileLocation) print(io, bp.path, ':', bp.line) @@ -378,4 +383,3 @@ function Base.show(io::IO, bp::BreakpointFileLocation) print(io, " [disabled]") end end - diff --git a/src/utils.jl b/src/utils.jl index 03512a7c..dbd65b01 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -335,16 +335,24 @@ end Return the local variables as a vector of `Variable`[@ref]. """ function locals(frame::Frame) - vars = Variable[] + vars, var_counter = Variable[], Int[] + varlookup = Dict{Symbol,Int}() data, code = frame.framedata, frame.framecode - added = Set{Symbol}() slotnames = code.src.slotnames::SlotNamesType - for sym in slotnames - sym ∈ added && continue - idx = get(data.last_reference, sym, 0) - idx == 0 && continue - push!(vars, Variable(something(data.locals[idx]), sym, false)) - push!(added, sym) + for (sym, counter, val) in zip(slotnames, data.last_reference, data.locals) + counter == 0 && continue + var = Variable(something(val), sym, false) + idx = get(varlookup, sym, 0) + if idx > 0 + if counter > var_counter[idx] + vars[idx] = var + var_counter[idx] = counter + end + else + varlookup[sym] = length(vars)+1 + push!(vars, var) + push!(var_counter, counter) + end end if code.scope isa Method syms = sparam_syms(code.scope) diff --git a/test/breakpoints.jl b/test/breakpoints.jl index fcd7c65a..11cae71c 100644 --- a/test/breakpoints.jl +++ b/test/breakpoints.jl @@ -116,6 +116,24 @@ struct Squarer end @test !any(v->v.name == :b, var) @test filter(v->v.name == :a, var)[1].value == 2 + # Method with local scope (two slots with same name) + ln = @__LINE__ + function ftwoslots() + y = 1 + z = let y = y + y = y + 2 + rand() + end + y = y + 1 + return z + end + bp = breakpoint(@__FILE__, ln+5, :(y > 2)) + frame, bp2 = @interpret ftwoslots() + var = JuliaInterpreter.locals(leaf(frame)) + @test filter(v->v.name == :y, var)[1].value == 3 + remove(bp) + bp = breakpoint(@__FILE__, ln+8, :(y > 2)) + @test isa(@interpret(ftwoslots()), Float64) # Direct return @breakpoint gcd(1,1) a==5 From 83f4716f9fbdcdf382d7fcd3543b01470ef09188 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 29 Jul 2019 10:02:23 -0500 Subject: [PATCH 2/3] A new serialization format for optimizing & executing lowered IR This is a draft of a new tokenization of lowered IR. The main goal is to split off from the representation currently used in Base so that we are free to perform more significant transformations of the IR to enable more performance optimizations. This tackles the first step, serializing lowered code to the new tokenized format. Currently the only things supported are serializing and printing; execution is in draft form but never tested. --- src/JuliaInterpreter.jl | 2 + src/builtins_serializer.jl | 32 ++ src/serializer.jl | 913 +++++++++++++++++++++++++++++++++++++ src/utils.jl | 7 +- 4 files changed, 952 insertions(+), 2 deletions(-) create mode 100644 src/builtins_serializer.jl create mode 100644 src/serializer.jl diff --git a/src/JuliaInterpreter.jl b/src/JuliaInterpreter.jl index 7f83be63..065dac20 100644 --- a/src/JuliaInterpreter.jl +++ b/src/JuliaInterpreter.jl @@ -42,6 +42,8 @@ include("interpret.jl") include("builtins.jl") include("optimize.jl") include("commands.jl") +# include("serializer.jl") +# using .Serializer include("breakpoints.jl") function set_compiled_methods() diff --git a/src/builtins_serializer.jl b/src/builtins_serializer.jl new file mode 100644 index 00000000..a22a16b8 --- /dev/null +++ b/src/builtins_serializer.jl @@ -0,0 +1,32 @@ +# Builtins and intrinsics could be indexed by something smaller than Int32, +# but since the system uses Int32 let's just use FIndexT for indexing everything. + +@enum BuiltinToken::FIndexT begin + tgetfield=FIndexT(1) + tegal +end + +builtin_tokens = Dict(Core.getfield => tgetfield, + Core.:(===) => tegal) +const builtin = Any[] + +intrinsic_token(f) = FIndexT(Core.bitcast(Int32, f)) +const intrinsic = Any[] + +function fill_ftables() + resize!(builtin, length(instances(BuiltinToken))) + for (f, tok) in builtin_tokens + builtin[Int(tok)] = f + end + for fname in names(Core.Intrinsics) + f = getfield(Core.Intrinsics, fname) + if f isa Core.IntrinsicFunction + tok = intrinsic_token(f)+1 + if tok > length(intrinsic) + resize!(intrinsic, tok) + end + intrinsic[tok] = f + end + end + nothing +end diff --git a/src/serializer.jl b/src/serializer.jl new file mode 100644 index 00000000..e78ce707 --- /dev/null +++ b/src/serializer.jl @@ -0,0 +1,913 @@ +module Serializer + +using Core: CodeInfo, GotoNode, MethodInstance +using Base.Meta: isexpr +using ..JuliaInterpreter: + BreakpointState, + FrameData, + get_call_framecode, + prepare_frame_caller, + finish_and_return!, + lookup_global_ref, + pc_from_spc, + compiled_calls, + find_used + +export serialize + +# Because we're not sure yet about the tradeoff (or lack thereof) +# between size and performance, we use aliases for a couple of key types +const TokenT = UInt8 # the eltype of the serialization format (UInt8 or UInt64) +const FIndexT = Int32 # what type we use to index function tables + +const max_fixed_args = 4 + +include("builtins_serializer.jl") + +### Serialization format + +# This uses a simple format, conceptually implementing a machine with the following +# properties: +# - a tape of instructions called `ser` +# - a single implicit "register" called `ans` +# - the ability to execute operations specific to a particular instruction token +# in `ser`. Executing these operations may consume future tokens. +# +# Operations are conceptually of 4 categories: +# - loads (which fill `ans` from a variety of sources), encoded by `load*` or literal tokens +# - stores (which put `ans` somewhere more permanent), encoded by `store*` tokens +# - calls (for which the return value is stored in `ans`) +# - control-flow +# +# The implementation of calls is allowed to store data to named local variables or lists, +# thus increasing the temporary storage beyond `ans`. +# +# The serialization of the lowered IR +# %4 = atan(@3, 2.4) +# might look something like this on the tape: +# call atan_idx methlist fixedargs 2 loadslot 3 float64 2.4 storessa 4 +# where +# - `call` is an instruction token signaling that next operation is a function call +# (in reality, there are multiple call-type tokens for intrinsics, builtins, +# generics via the interpreter, generics via ordinary compiled dispatch, +# `invokelatest`, `Core._apply`, etc.) +# - `atan_idx` is a token representing `atan`. It is encoded as an integer index +# into a table of functions (the table is maintained by the serializer) +# - `methlist` is a pointer to a local method table, a performance optimization +# used for avoiding full-blown dispatch (this also stores whether the method should +# be called via the interpreter or the compiled path) +# - `fixedargs 2` is an indication that this call should use the path optimized for a +# particular (small) number of arguments, which in this case is 2. +# An alternative is `listargs args`, which packs an arbitrary number of arguments +# into a literally-encoded `args::Vector{Any}` stored (via its pointer) in `ser`. +# - `loadslot 3` indicates that the next argument (first argument) is to be loaded +# from the slots at index 3 +# - `float64 2.4` indicates that the next argument (second argument) is a literal +# value of type `Float64` encoded in `ser` itself. +# - after the second argument, the function call is executed and the result is +# stored in `ans` +# - `storessa 4` indicates that `ans` should be placed in `%4`. + +@enum InterpretToken::TokenT begin + # Static data (you can think of these as `load*` tokens for literals in `ser`) + int + float64 + float32 + nothingtok + symbolptr # ptrs to Symbol... + stringptr # String + # Load tokens + loadssa + loadslot + loadparameter # static parameters + loadexception # the most recent exception + loadglobalref # GlobalRef + # Store tokens + storessa + storeslot + storeglobalref + # Call expressions + callintrinsic + callbuiltin + call # standard call + calllatest # call made by `invokelatest` + callinvoke # Core.invoke + callapply # vararg handling + callapplylatest # invokelatest with varargs + # Argument tokens + fixedargs # use the path optimized for a fixed (small) number of arguments + listargs # use the general path + # Control flow + goto + gotoifnot + enter + leave + popexception + returntok +end +# Some constants for handling blocks of tokens +const lastimmutable = stringptr +const lastload = loadglobalref +const laststore = storeglobalref +const firstcall = callintrinsic +const lastcall = callapplylatest + +# A more stripped-down alternative to reusing TypeMapEntry for our local method +mutable struct DispatchableMethod + next::Union{Nothing,DispatchableMethod} # linked-list representation + sig::Type # for speed of matching, this is a *concrete* signature. `sig <: method.sig` + meth::MethodInstance # method to be called when `sig` matches + compiled::Bool # true if the compiled version should be called +end + +mutable struct MethodListHead # linked from the serialized code. This allows us to swap out first item on list + next::Union{Nothing,DispatchableMethod} +end +MethodListHead() = MethodListHead(nothing) + +struct SerializedCode + src::CodeInfo + ser::Vector{TokenT} + serindex::Vector{Int} # lookup from pc -> spc + breakpoints::Vector{BreakpointState} + slotnamelists::Dict{Symbol,Vector{Int}} + generator::Bool # true if this is for the expression-generator of a @generated function + # The rest just prevent garbage collection of pointer-referenced items in `ser` + # We could alternatively encode them as indexes but that seems to involve a second + # indirection without changing their rooting, so it's not obviously advantageous. + serargs::Vector{Vector{Any}} # `args` stores in `ser` + sermeth::Vector{MethodListHead} # local method tables in `ser` + + function SerializedCode(src::CodeInfo; generator::Bool=false) + serialize!(new(src, + TokenT[], + Int[], + BreakpointState[], + Dict{Symbol,Vector{Int}}(), + generator, + Vector{Any}[], + MethodListHead[])) + end +end + +mutable struct SerializedFrame + framecode::SerializedCode + framedata::FrameData + pc::Int + spc::Int + assignment_counter::Int + caller::Union{SerializedFrame,Nothing} + callee::Union{SerializedFrame,Nothing} +end +SerializedFrame(framecode, framedata, pc=1, caller=nothing) = SerializedFrame(framecode, framedata, pc, 1, caller, nothing) + +# Functions cannot be serialized via pointer, so we use a list/Dict pair +const functionlist = [] +const functionlookup = IdDict{Any,FIndexT}() # functionlist[functionlookup[f]] == f +function function_token(@nospecialize(f)) + id = get(functionlookup, f, zero(FIndexT)) + if iszero(id) + push!(functionlist, f) + id = functionlookup[f] = FIndexT(length(functionlist)) + end + return id +end + +# function get_f(tok, fid) +# fid = Int(fid) +# if callbuiltin_n <= tok <= callbuiltin +# return builtin[fid] +# elseif tok == callintrinsic +# return intrinsic[fid+1] +# elseif calllmt <= tok <= callapplylatest +# return functionlist[fid] +# else +# error("f token ", tok, " not supported") +# end +# end + +### Raw (untokenized) serialization and deserialization + +# This handles basic number types. General object types are handled by their pointers. + +""" + serialize_raw!(ser::Vector{TokenT}, val) + +Append a serialized version of `val` to `ser`. Returns `val`. +""" +function serialize_raw! end + +""" + val, newspc = deserialize_raw(T, ser::Vector{TokenT}, spc) + +Starting at index `spc` in `ser`, deserialize a value of type `T`. Return the value `val` +and the next undeserialized index `newspc`. +""" +function deserialize_raw end + +# The ::Vector{TokenT} is to prevent accidental misuse on a higher-level object +serialize_raw!(ser::Vector{TokenT}, val::TokenT) = (push!(ser, val); return val) +serialize_raw_at!(ser::Vector{TokenT}, spc, val::TokenT) = (ser[spc] = val; return spc+1) +deserialize_raw(::Type{TokenT}, ser::Vector{TokenT}, spc) = ser[spc], spc+1 + +if TokenT === UInt8 + split2(x::UInt16) = UInt8(x >> 8), UInt8(x & 0x00ff) + split2(x::UInt32) = UInt16(x >> 16), UInt16(x & 0x0000ffff) + split2(x::UInt64) = UInt32(x >> 32), UInt32(x & 0x00000000ffffffff) + join2(hi::UInt8, lo::UInt8) = UInt16(hi) << 8 + UInt16(lo) + join2(hi::UInt16, lo::UInt16) = UInt32(hi) << 16 + UInt32(lo) + join2(hi::UInt32, lo::UInt32) = UInt64(hi) << 32 + UInt64(lo) + narrow(::Type{UInt16}) = UInt8 + narrow(::Type{UInt32}) = UInt16 + narrow(::Type{UInt64}) = UInt32 + + function serialize_raw!(ser::Vector{TokenT}, val::Union{UInt16,UInt32,UInt64}) + hi, lo = split2(val) + serialize_raw!(ser, hi) + serialize_raw!(ser, lo) + return val + end + function serialize_raw_at!(ser::Vector{TokenT}, spc, val::Union{UInt16,UInt32,UInt64}) + hi, lo = split2(val) + spc = serialize_raw_at!(ser, spc, hi) + return serialize_raw_at!(ser, spc, lo) + end + + function deserialize_raw(::Type{T}, ser::Vector{TokenT}, spc) where T<:Union{UInt16,UInt32,UInt64} + Thalf = narrow(T) + hi, spc = deserialize_raw(Thalf, ser, spc) + lo, spc = deserialize_raw(Thalf, ser, spc) + return join2(hi, lo), spc + end +elseif TokenT === UInt64 + serialize_raw!(ser::Vector{TokenT}, val::Union{UInt8,UInt16,UInt32}) = serialize_raw!(ser, UInt64(val)) + serialize_raw_at!(ser::Vector{TokenT}, spc, val::Union{UInt8,UInt16,UInt32}) = serialize_raw_at!(ser, spc, UInt64(val)) + function deserialize_raw(::Type{T}, ser::Vector{TokenT}, spc) where T<:Union{UInt8,UInt16,UInt32} + val, spc = deserialize_raw(UInt, ser, spc) + return T(val), spc + end +end + +serialize_raw!(ser::Vector{TokenT}, val::Int32) = (serialize_raw!(ser, reinterpret(UInt32, val)); return val) +serialize_raw!(ser::Vector{TokenT}, val::Int64) = (serialize_raw!(ser, reinterpret(UInt64, val)); return val) +serialize_raw!(ser::Vector{TokenT}, val::Float32) = (serialize_raw!(ser, reinterpret(UInt32, val)); return val) +serialize_raw!(ser::Vector{TokenT}, val::Float64) = (serialize_raw!(ser, reinterpret(UInt64, val)); return val) + +serialize_raw_at!(ser::Vector{TokenT}, spc, val::Integer) = serialize_raw_at!(ser, spc, Unsigned(val)) + +serialize_rawptr!(ser::Vector{TokenT}, val) = (serialize_raw!(ser, reinterpret(UInt, pointer_from_objref(val))); return val) + +deserialize_raw(::Type{Int32}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt32, ser, spc); return reinterpret(Int32, val), spc) +deserialize_raw(::Type{Int64}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt64, ser, spc); return reinterpret(Int64, val), spc) +deserialize_raw(::Type{Float32}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt32, ser, spc); return reinterpret(Float32, val), spc) +deserialize_raw(::Type{Float64}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt64, ser, spc); return reinterpret(Float64, val), spc) + +function deserialize_rawptr(ser::Vector{TokenT}, spc) + uintp, spc = deserialize_raw(UInt, ser, spc) + return unsafe_pointer_to_objref(reinterpret(Ptr, uintp)), spc +end +function deserialize_rawptr(::Type{T}, ser::Vector{TokenT}, spc) where T + uintp, spc = deserialize_raw(UInt, ser, spc) + return unsafe_pointer_to_objref(reinterpret(Ptr{T}, uintp))::T, spc +end + +### Serialization of code objects + +# This uses tokens to convey type & other forms of meaning + +function serialize!(code::SerializedCode, tok::InterpretToken) + serialize_raw!(code.ser, eltype(code.ser)(tok)) + return code +end + +function serialize!(code::SerializedCode, tok::InterpretToken, val) + serialize!(code, tok) + serialize_raw!(code.ser, val) + return code +end +function serialize_ptr!(code::SerializedCode, tok::InterpretToken, val) + serialize!(code, tok) + serialize_rawptr!(code.ser, val) + return code +end + +function serialize_immutable!(code::SerializedCode, val) + if val isa Int + serialize!(code, int, val) + elseif val isa Float32 + serialize!(code, float32, val) + elseif val isa Float64 + serialize!(code, float64, val) + elseif val === nothing + serialize!(code, nothingtok) + elseif val isa Symbol + serialize_ptr!(code, symbolptr, val) + elseif val isa String + serialize_ptr!(code, stringptr, val) + else + error("static val type ", typeof(val), " not recognized") + end + return code +end + +function serialize_value_load!(code::SerializedCode, val) + if val isa GlobalRef + val = lookup_global_ref(val) + if isa(val, QuoteNode) + val = val.value + end + end + if val isa Core.SSAValue + serialize!(code, loadssa, val.id) + elseif val isa Core.SlotNumber + serialize!(code, loadslot, val.id) + elseif isexpr(val, :static_parameter) + serialize!(code, loadparameter, val.args[1]) + elseif isexpr(val, :the_exception) + serialize!(code, loadexception) + elseif isa(val, GlobalRef) + serialize_ptr!(code, loadglobalref, val) + else + serialize_immutable!(code, val) + end + return code +end + +function serialize_call!(code::SerializedCode, stmt::Expr) + f = stmt.args[1] + if isa(f, GlobalRef) + f = lookup_global_ref(f) + if isa(f, QuoteNode) + f = f.value + end + end + if f === Base.invoke + error("not yet implemented") + elseif f === Base.invokelatest + error("not yet implemented") + elseif f === Core._apply + error("not yet implemented") + elseif f === Core._apply_latest + error("not yet implemented") + elseif f === Core._apply_pure + error("not yet implemented") + else + nargs = length(stmt.args) - 1 + tok, ftok = + if f isa Core.IntrinsicFunction + callintrinsic, intrinsic_token(f) + elseif f isa Core.Builtin + callbuiltin, builtin_tokens[f] + else + call, function_token(f) + end + serialize!(code, tok) + serialize_raw!(code.ser, FIndexT(ftok)) + if tok ∉ (callintrinsic, callbuiltin) + mlhead = MethodListHead() + serialize_rawptr!(code.ser, mlhead) + push!(code.sermeth, mlhead) + end + if tok == callbuiltin && nargs <= max_fixed_args + serialize!(code, fixedargs, UInt8(nargs)) + else + args = Vector{Any}(undef, nargs) + serialize_ptr!(code, listargs, args) + push!(code.serargs, args) # protect args from GC (since `code.ser` references by pointer) + end + for i = 1:nargs + arg = stmt.args[i+1] + if arg isa GlobalRef + arg = lookup_global_ref(arg) + if arg isa QuoteNode + serialize_value_load!(ser, arg.value) + else + serialize_ptr!(code, loadglobalref, arg::GlobalRef) + end + else + serialize_value_load!(code, arg) + end + end + end + return code +end + +# main ser +function serialize!(code::SerializedCode; used=find_used(code.src)) + fixserindex = Tuple{Int,Int}[] # gotos that need a serindex (forward-jumps, not known yet) + for (i, stmt) in enumerate(code.src.code) + push!(code.serindex, length(code.ser)+1) + storeto = i ∈ used ? Core.SSAValue(i) : nothing + if isa(stmt, Union{Int,Float64,Float32,Symbol,String,Core.SSAValue,Core.SlotNumber,GlobalRef}) + serialize_value_load!(code, stmt) + elseif isa(stmt, Expr) + head = stmt.head + if head == :static_parameter + serialize!(code, loadparameter, stmt.args[1]) + elseif head == :the_exception + serialize!(code, loadexception) + elseif head == :(=) + @assert storeto === nothing + storeto, rhs = stmt.args[1], stmt.args[2] + if isexpr(rhs, :call) + serialize_call!(code, rhs) + else + serialize_value_load!(code, rhs) + end + elseif head == :call + serialize_call!(code, stmt) + elseif head == :return + serialize!(code, returntok) + serialize_value_load!(code, stmt.args[1]) + elseif head == :gotoifnot + serialize!(code, gotoifnot) + serialize_value_load!(code, stmt.args[1]) + ln = stmt.args[2] + if ln <= length(code.serindex) + serialize_raw!(code.ser, code.serindex[ln]) + else + push!(fixserindex, (length(code.ser)+1, ln)) + serialize_raw!(code.ser, zero(eltype(code.serindex))) + end + else + error("head ", head, " not yet handled") + end + elseif stmt isa GotoNode + serialize!(code, goto) + ln = stmt.label + if ln <= length(code.serindex) + serialize_raw!(code.ser, code.serindex[ln]) + else + push!(fixserindex, (length(code.ser)+1, ln)) + serialize_raw!(code.ser, zero(eltype(code.serindex))) + end + else + error("unhandled statement ", stmt) + end + if storeto != nothing + if isa(storeto, Core.SSAValue) + serialize!(code, storessa, storeto.id) + elseif isa(storeto, Core.SlotNumber) + serialize!(code, storeslot, storeto.id) + elseif isa(storeto, GlobalRef) + serialize_ptr!(code, storeglobalref, storeto) + else + error("unhandled storeto ", storeto) + end + end + end + # Now fix up the forward-jumping gotos + for (idx, ln) in fixserindex + serialize_raw_at!(code.ser, idx, code.serindex[ln]) + end + return code +end + +### Deserialization and executing code objects + +# A full deserializer would convert back to lowered code. That's not a major priority +# right now, so we focus on other things and avoid calling it deserialization +# except where it's truly applicable. + +function deserialize_token(code::SerializedCode, spc) + itok, spc = deserialize_raw(TokenT, code.ser, spc) + tok = InterpretToken(itok) + return tok, spc +end +# function deserialize_ftoken(code::SerializedCode, spc) +# return deserialize_raw(FIndexT, code.ser, spc) +# end + +function deserialize_immutable(code::SerializedCode, tok, spc) + if tok == int + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == float64 + val, spc = deserialize_raw(Float64, code.ser, spc) + elseif tok == float32 + val, spc = deserialize_raw(Float32, code.ser, spc) + elseif tok == nothingtok + val = nothing + elseif tok == symbolptr + val, spc = deserialize_rawptr(Symbol, code.ser, spc) + elseif tok == stringptr + val, spc = deserialize_rawptr(String, code.ser, spc) + else + error("immutable val type ", typeof(val), " not recognized") + end + return val, spc +end + +function execute_load(frame, tok::InterpretToken, spc) + code, data = frame.framecode, frame.framedata + if tok == loadssa + val, spc = deserialize_raw(Int, code.ser, spc) + val = data.ssavalues[val] + elseif tok == loadslot + val, spc = deserialize_raw(Int, code.ser, spc) + val = something(data.locals[val]) + elseif tok == loadparameter + val, spc = deserialize_raw(Int, code.ser, spc) + val = data.sparams[val] + elseif tok == loadexception + val = data.last_exception[] + elseif tok == loadglobalref + gr, spc = deserialize_rawptr(GlobalRef, code.ser, spc) + val = getfield(gr.mod, gr.name) + else + return deserialize_immutable(code, tok, spc) + end + return val, spc +end + +function execute_load(frame, spc) + code, data = frame.framecode, frame.framedata + tok, spc = deserialize_token(code, spc) + return execute_load(frame, tok, spc) +end + +function execute_call(frame, tok::InterpretToken, spc) + code, data = frame.framecode, frame.framedata + ftok, spc = deserialize_raw(FIndexT, code.ser, spc) + if tok == callintrinsic || tok == callbuiltin + # no methlist for intrinsics or builtins + argtok, spc = deserialize_token(code, spc) + if argtok == fixedargs + @assert tok == callbuiltin + n, spc = deserialize_raw(UInt8, code.ser, spc) + # The advantage in fixedargs is not so much the loading but in + # avoiding runtime dispatch in the callee. But we can also save the memory + # of an array, so let's take advantage of that. + if n == 0x00 + return call_builtin(ftok) + elseif n == 0x01 + a, spc = execute_load(frame, spc) + val = call_builtin(ftok, a) + elseif n == 0x02 + a, spc = execute_load(frame, spc) + b, spc = execute_load(frame, spc) + val = call_builtin(ftok, a, b) + elseif n == 0x03 + a, spc = execute_load(frame, spc) + b, spc = execute_load(frame, spc) + c, spc = execute_load(frame, spc) + val = call_builtin(ftok, a, b, c) + elseif n == 0x04 + a, spc = execute_load(frame, spc) + b, spc = execute_load(frame, spc) + c, spc = execute_load(frame, spc) + d, spc = execute_load(frame, spc) + val = call_builtin(ftok, a, b, c, d) + else + error("not implemented for ", n, " arguments") + end + else + # listargs case + args, spc = deserialize_rawptr(Vector{Any}, code.ser, spc) + for i = 1:length(args) + args[i], spc = execute_load(frame, spc) + end + if tok == callintrinsic + f = intrinsic[ftok] + val = ccall(:jl_f_intrinsic_call, Any, (Any, Ptr{Any}, UInt32), f, args, length(args)) + else + val = call_builtin_listargs(ftok, args) + end + end + else + mlhead, spc = deserialize_rawptr(MethodListHead, code.ser, spc) + argtok, spc = deserialize_token(code, spc) + @assert argtok == listargs + args, spc = deserialize_rawptr(Vector{Any}, code.ser, spc) + for i = 1:length(args) + args[i], spc = execute_load(frame, spc) + end + if tok == call + # Hack: currently non-recursive + f = functionlist[Int(ftok)] + val = f(args...) + else + error("call type ", tok, " not yet implemented") + end + end + return val, spc +end + +function step_ser!(@nospecialize(recurse), frame, spc::Int, istoplevel::Bool) + code, data = frame.framecode, frame.framedata + ans = Unassigned() + tok, spc = deserialize_token(code, spc) + if tok <= lastload + ans, spc = execute_load(frame, tok, spc) + elseif firstcall <= tok <= lastcall + ans, spc = execute_call(frame, tok, spc) + elseif tok == goto + newspc, _ = deserialize_raw(eltype(code.serindex), code.ser, spc) + spc = Int(newspc) + elseif tok == gotoifnot + cond, spc = execute_load(frame, spc) + newspc, spc = deserialize_raw(eltype(code.serindex), code.ser, spc) + if !cond + spc = Int(newspc) + end + elseif tok == enter + error("unhandled") + elseif tok == leave + error("unhandled") + elseif tok == popexception + error("unhandled") + elseif tok == returntok + return nothing + else + error("unhandled token ", tok, " at ", spc, " (pc = ", pc_from_spc(frame, spc), ')') + end + if !isa(ans, Unassigned) && spc <= length(code.ser) + # peek at the next token without advancing + nexttok = InterpretToken(code.ser[spc]) + if nexttok == storessa + id, spc = deserialize_raw(Int, code.ser, spc) + data.ssavalues[id] = ans + elseif nexttok == storeslot + id, spc = deserialize_raw(Int, code.ser, spc) + data.locals[id] = Some{Any}(ans) + elseif nexttok == storeglobalref + gr, spc = deserialize_rawptr(GlobalRef, code.ser, spc) + Core.eval(gr.mod, :($(gr.name) = $(QuoteNode(ans)))) + end + end + return spc +end + +### Printing serialized code +# Useful for debugging + +print_token(io, tok::InterpretToken) = print(io, tok, ' ') + +function print_load_store(io, code::SerializedCode, tok::InterpretToken, spc) + print_token(io, tok) + if tok <= lastimmutable + val, spc = deserialize_immutable(code, tok, spc) + elseif tok == loadssa || tok == storessa + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == loadslot || tok == storeslot + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == loadparameter + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == loadexception + val = nothing + elseif tok == loadglobalref || tok == storeglobalref + val, spc = deserialize_rawptr(GlobalRef, code.ser, spc) + else + error(tok, " is not a load") + end + if val !== nothing + print(io, val, ' ') + end + return spc +end + +function print_load(io, code::SerializedCode, spc) + tok, spc = deserialize_token(code, spc) + return print_load_store(io, code, tok, spc) +end + +function print_call(io, code::SerializedCode, tok::InterpretToken, spc) + print_token(io, tok) + ftok, spc = deserialize_raw(FIndexT, code.ser, spc) + if tok == callintrinsic + f = intrinsic[ftok] + elseif tok == callbuiltin + f = builtin[ftok] + else + f = functionlist[ftok] + mlhead, spc = deserialize_rawptr(MethodListHead, code.ser, spc) + end + show(io, f) + print(io, ' ') + if tok ∉ (callintrinsic, callbuiltin) + print(io, "methlist ") + end + argtok, spc = deserialize_token(code, spc) + print_token(io, argtok) + if argtok == fixedargs + n, spc = deserialize_raw(UInt8, code.ser, spc) + n = Int(n) + print(io, n, ' ') + else + args, spc = deserialize_rawptr(Vector{Any}, code.ser, spc) + n = length(args) + print(io, " ") + end + for i = 1:n + spc = print_load(io, code, spc) + end + println() + return spc +end + +function print_serialization(io, code::SerializedCode, spc) + tok, spc = deserialize_token(code, spc) + if tok <= laststore + spc = print_load_store(io, code, tok, spc) + println() + elseif tok <= lastcall + spc = print_call(io, code, tok, spc) + else + print_token(io, tok) + if tok == goto + newspc, spc = deserialize_raw(eltype(code.serindex), code.ser, spc) + println(io, newspc) + elseif tok == gotoifnot + spc = print_load(io, code, spc) + newspc, spc = deserialize_raw(eltype(code.serindex), code.ser, spc) + println(io, newspc) + elseif tok == enter + error("unhandled") + elseif tok == leave + error("unhandled") + elseif tok == popexception + error("unhandled") + elseif tok == returntok + spc = print_load(io, code, spc) + println() + else + error("unhandled token ", tok, " at ", spc) + end + end + return spc +end + +function print_serialization(io, code::SerializedCode) + nd = ndigits(length(code.ser)) + spc = 1 + while spc <= length(code.ser) + print(io, lpad(spc, nd), ": ") + spc = print_serialization(io, code, spc) + end + return nothing +end + +# print(io, tok, ' ') +# if tok == assignprev +# lhsid, spc = ser[spc], spc+1 +# if f isa Core.IntrinsicFunction +# println(io, lhsid) +# elseif tok ∈ (storessa, assignslot) +# lhsid, spc = ser[spc], spc+1 +# print(io, tok == storessa ? " %" : " @", Int(lhsid), ' ') +# spc = print_serialization(io, mod, ser, spc) +# elseif tok == assignglobalref +# lhs, spc = deserialize_globalref(ser, spc) +# rhsid, spc = ser[spc], spc+1 +# print(io, lhs, ' ') +# spc = print_serialization(io, mod, ser, spc) +# elseif tok == assignmodsym +# lhs, spc = deserialize_symbol(ser, spc) +# rhsid, spc = ser[spc], spc+1 +# println(io, mod, '.', lhs, ' ') +# spc = print_serialization(io, mod, ser, spc) +# elseif tok == callbuiltin_n +# f, spc = get_f(tok, ser[spc]), spc+1 +# n, spc = Int(ser[spc]), spc+1 +# spc = print_call(io, f, n, mod, ser, spc) +# elseif callbuiltin <= tok <= callapplylatest +# f, spc = get_f(tok, ser[spc]), spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# spc = print_call(io, f, length(args), mod, ser, spc) +# elseif tok == goto +# idx, spc = Int(ser[spc]), spc+1 +# println(io, "goto ", idx) +# elseif tok == gotoifnot +# condtok, spc = ser[spc], spc+1 +# if condtok ∈ (loadssa, loadslot) +# id, spc = Int(ser[spc]), spc+1 +# else +# error("unsupported conditional token ", tok) +# end +# idx, spc = Int(ser[spc]), spc+1 +# println(io, "gotoifnot ", condtok == loadssa ? " %" : " @", id, ' ', idx) +# elseif tok == enter +# error("unhandled") +# elseif tok == leave +# error("unhandled") +# elseif tok == popexception +# error("unhandled") +# elseif tok == returntok +# datatok, spc = ser[spc], spc+1 +# if datatok ∈ (loadssa, loadslot) +# id, spc = Int(ser[spc]), spc+1 +# else +# error("unsupported conditional token ", tok) +# end +# idx, spc = Int(ser[spc]), spc+1 +# println(io, "return ", datatok == loadssa ? " %" : " @", id, ' ', idx) +# else +# error("unhandled token ", tok, " at ", spc) +# end +# return spc +# end +# +# function print_serialization(io, mod, ser) +# n = length(ser) +# nd = ndigits(n) +# spc = 1 +# while spc <= n +# print(io, lpad(string(spc), nd), ": ") +# spc = print_serialization(io, mod, ser, spc) +# end +# end +# +# +# ### interpreting serialized code +# +# function step_ser!(@nospecialize(recurse), frame, spc::Int, istoplevel::Bool) +# code, data = frame.framecode, frame.framedata +# ser = code.ser +# tok, spc = InterpretToken(ser[spc]), spc+1 +# if tok < call_builtin_n +# ret, spc = deserialize_value(ser, spc, data) +# spc = assign_value!(data, code, spc, ret) +# if tok == callbuiltin_n +# f, spc = ser[spc], spc+1 +# n, spc = Int(ser[spc]), spc+1 +# ret, spc = call_builtin_n(ftok, n, ser, spc, data) +# spc = assign_value!(data, code, spc, ret) +# elseif tok == callbuiltin +# ftok, spc = ser[spc], spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# for i = 1:length(args) +# args[i], spc = deserialize_value(ser, spc, data) +# end +# ret = call_builtin(fflag, args) +# spc = assign_value!(data, spc, ret) +# elseif tok == callintrinsic +# f, spc = intrinsic[ser[spc]], spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# for i = 1:length(args) +# args[i], spc = deserialize_value(ser, spc, data) +# end +# ret = ccall(:jl_f_intrinsic_call, Any, (Any, Ptr{Any}, UInt32), f, args, length(args)) +# spc = assign_value!(data, spc, ret) +# elseif tok == calllmt +# f, spc = functionlist[Int(ser[spc])], spc+1 +# fargs, spc = vecany(ser[spc]), spc+1 +# fargs[1] = f +# for i = 1:length(args) +# fargs[i+1], spc = deserialize_value(ser, spc, data) +# end +# calleecode, lenv = get_call_framecode(fargs, calleecode, spc) +# if lenv === nothing +# if isa(calleecode, Compiled) +# return Base.invokelatest(fargs...) +# end +# return framecode # this was a Builtin +# end +# # TODO: "inline" trivial frames +# newframe = prepare_frame_caller(frame, calleecode, fargs, lenv) +# if recurse === finish_and_return! +# # Optimize this case to avoid dynamic dispatch +# ret = finish_and_return!(finish_and_return!, newframe, false) +# else +# ret = recurse(recurse, newframe, false) +# end +# spc = assign_value!(data, spc, ret) +# elseif tok == calldispatch +# f, spc = obj(ser[spc]), spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# for i = 1:length(args) +# args[i], spc = deserialize_value(ser, spc, data) +# end +# ret = f(args...) +# spc = assign_value!(data, spc, ret) +# elseif tok == calllatest +# error("unhandled") +# elseif tok == callinvoke +# error("unhandled") +# elseif tok == callapply +# error("unhandled") +# elseif tok == callapplylatest +# error("unhandled") +# elseif tok == goto +# spc = Int(ser[spc]) +# elseif tok == gotoifnot +# cond, spc = deserialize_value(ser, spc, data) +# if !cond +# spc = Int(ser[spc]) +# end +# elseif tok == enter +# error("unhandled") +# elseif tok == leave +# error("unhandled") +# elseif tok == popexception +# error("unhandled") +# elseif tok == returntok +# return nothing +# else +# error("unhandled token ", tok, " at ", spc, " (pc = ", pc_from_spc(frame, spc), ')') +# end +# end + +function __init__() + fill_ftables() +end + +end diff --git a/src/utils.jl b/src/utils.jl index dbd65b01..3140859c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -72,7 +72,7 @@ function find_used(code::CodeInfo) end function scan_ssa_use!(used::BitSet, @nospecialize(stmt)) - if isa(stmt, SSAValue) + if isa(stmt, SSAValue) || isa(stmt, Core.SSAValue) push!(used, stmt.id) end iter = Core.Compiler.userefs(stmt) @@ -80,7 +80,7 @@ function scan_ssa_use!(used::BitSet, @nospecialize(stmt)) while iterval !== nothing useref, state = iterval val = Core.Compiler.getindex(useref) - if isa(val, SSAValue) + if isa(val, SSAValue) || isa(val, Core.SSAValue) push!(used, val.id) end iterval = Core.Compiler.iterate(iter, state) @@ -100,6 +100,9 @@ function hasarg(predicate, args) return false end +pc_from_spc(framecode::FrameCode, spc) = searchsortedfirst(framecode.serindex, spc) - 1 +pc_from_spc(frame::Frame, spc) = pc_from_spc(frame.framecode, spc) + ## Predicates is_goto_node(@nospecialize(node)) = isa(node, GotoNode) || isexpr(node, :gotoifnot) From 65588671474a2f4f626a8c92c404b5f88c3ff6f1 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 29 Jul 2019 10:42:44 -0500 Subject: [PATCH 3/3] Get the basics of stepping working --- src/serializer.jl | 39 ++++++++++++++++++++++++++------------- test/serialization.jl | 31 +++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 13 deletions(-) create mode 100644 test/serialization.jl diff --git a/src/serializer.jl b/src/serializer.jl index e78ce707..8bed68c0 100644 --- a/src/serializer.jl +++ b/src/serializer.jl @@ -1,17 +1,15 @@ module Serializer -using Core: CodeInfo, GotoNode, MethodInstance +using Core: CodeInfo, GotoNode, MethodInstance, SimpleVector using Base.Meta: isexpr using ..JuliaInterpreter: BreakpointState, FrameData, - get_call_framecode, - prepare_frame_caller, - finish_and_return!, + Unassigned, + find_used, lookup_global_ref, pc_from_spc, - compiled_calls, - find_used + prepare_framedata export serialize @@ -126,6 +124,7 @@ end MethodListHead() = MethodListHead(nothing) struct SerializedCode + scope::Union{Method,Module} src::CodeInfo ser::Vector{TokenT} serindex::Vector{Int} # lookup from pc -> spc @@ -138,8 +137,9 @@ struct SerializedCode serargs::Vector{Vector{Any}} # `args` stores in `ser` sermeth::Vector{MethodListHead} # local method tables in `ser` - function SerializedCode(src::CodeInfo; generator::Bool=false) - serialize!(new(src, + function SerializedCode(scope, src::CodeInfo; generator::Bool=false) + serialize!(new(scope, + src, TokenT[], Int[], BreakpointState[], @@ -159,7 +159,20 @@ mutable struct SerializedFrame caller::Union{SerializedFrame,Nothing} callee::Union{SerializedFrame,Nothing} end -SerializedFrame(framecode, framedata, pc=1, caller=nothing) = SerializedFrame(framecode, framedata, pc, 1, caller, nothing) +SerializedFrame(framecode, framedata, pc=1, caller=nothing) = + SerializedFrame(framecode, framedata, pc, framecode.serindex[pc], 1, caller, nothing) + +function prepare_frame(framecode::SerializedCode, args::Vector{Any}, lenv::SimpleVector, caller_will_catch_err::Bool=false) + framedata = prepare_framedata(framecode, args, caller_will_catch_err) + resize!(framedata.sparams, length(lenv)) + # Add static parameters to environment + for i = 1:length(lenv) + T = lenv[i] + isa(T, TypeVar) && continue # only fill concrete types + framedata.sparams[i] = T + end + return SerializedFrame(framecode, framedata) +end # Functions cannot be serialized via pointer, so we use a list/Dict pair const functionlist = [] @@ -622,15 +635,15 @@ function step_ser!(@nospecialize(recurse), frame, spc::Int, istoplevel::Bool) end if !isa(ans, Unassigned) && spc <= length(code.ser) # peek at the next token without advancing - nexttok = InterpretToken(code.ser[spc]) + nexttok, spctmp = deserialize_token(code, spc) if nexttok == storessa - id, spc = deserialize_raw(Int, code.ser, spc) + id, spc = deserialize_raw(Int, code.ser, spctmp) data.ssavalues[id] = ans elseif nexttok == storeslot - id, spc = deserialize_raw(Int, code.ser, spc) + id, spc = deserialize_raw(Int, code.ser, spctmp) data.locals[id] = Some{Any}(ans) elseif nexttok == storeglobalref - gr, spc = deserialize_rawptr(GlobalRef, code.ser, spc) + gr, spc = deserialize_rawptr(GlobalRef, code.ser, spctmp) Core.eval(gr.mod, :($(gr.name) = $(QuoteNode(ans)))) end end diff --git a/test/serialization.jl b/test/serialization.jl new file mode 100644 index 00000000..31b7ba14 --- /dev/null +++ b/test/serialization.jl @@ -0,0 +1,31 @@ +using JuliaInterpreter + +if !isdefined(Main, :Serializer) + include("../src/serializer.jl") +end + +function summer(A::AbstractArray{T}) where T + s = zero(T) + for a in A + s += a + end + return s +end + +a = [1,2,3] +method = @which summer(a) +src = @code_lowered summer(a) +code = Serializer.SerializedCode(method, src) +# Build a frame (currently this must be done manually) +argtypes = Tuple{typeof(summer), typeof(a)} +(ti, lenv) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), + argtypes, method.sig) +frame = Serializer.prepare_frame(code, [summer, a], lenv) + +Serializer.print_serialization(stdout, code) + +spc = frame.spc +spc = Serializer.step_ser!(nothing, frame, spc, false) +spc = Serializer.step_ser!(nothing, frame, spc, false) +spc = Serializer.step_ser!(nothing, frame, spc, false) +# builtins & intrinsics aren't handled yet, so going any further gives an error