diff --git a/src/JuliaInterpreter.jl b/src/JuliaInterpreter.jl index 7f83be63..065dac20 100644 --- a/src/JuliaInterpreter.jl +++ b/src/JuliaInterpreter.jl @@ -42,6 +42,8 @@ include("interpret.jl") include("builtins.jl") include("optimize.jl") include("commands.jl") +# include("serializer.jl") +# using .Serializer include("breakpoints.jl") function set_compiled_methods() diff --git a/src/breakpoints.jl b/src/breakpoints.jl index 91cc8e2c..6e2d4ad7 100644 --- a/src/breakpoints.jl +++ b/src/breakpoints.jl @@ -132,8 +132,27 @@ function prepare_slotfunction(framecode::FrameCode, body::Union{Symbol,Expr}) for i = 1:length(slotnames) slotname = framecode.src.slotnames[i] qslotname = QuoteNode(slotname) - getexpr = :(something($dataname.locals[$dataname.last_reference[$qslotname]])) - push!(assignments, Expr(:(=), slotname, :(haskey($dataname.last_reference, $qslotname) ? $getexpr : $default))) + list = framecode.slotnamelists[slotname] + if length(list) == 1 + maxexpr = :($dataname.last_reference[$(list[1])] > 0 ? $(list[1]) : 0) + else + maxcounter, maxidx = gensym("maxcounter"), gensym("maxidx") + maxexpr = quote + begin + $maxcounter, $maxidx = 0, 0 + for l in $list + counter = $dataname.last_reference[l] + if counter > $maxcounter + $maxcounter, $maxidx = counter, l + end + end + $maxidx + end + end + end + maxexsym = gensym("slotid") + push!(assignments, :($maxexsym = $maxexpr)) + push!(assignments, :($slotname = $maxexsym > 0 ? something($dataname.locals[$maxexsym]) : $default)) end if ismeth syms = sparam_syms(framecode.scope) diff --git a/src/builtins_serializer.jl b/src/builtins_serializer.jl new file mode 100644 index 00000000..a22a16b8 --- /dev/null +++ b/src/builtins_serializer.jl @@ -0,0 +1,32 @@ +# Builtins and intrinsics could be indexed by something smaller than Int32, +# but since the system uses Int32 let's just use FIndexT for indexing everything. + +@enum BuiltinToken::FIndexT begin + tgetfield=FIndexT(1) + tegal +end + +builtin_tokens = Dict(Core.getfield => tgetfield, + Core.:(===) => tegal) +const builtin = Any[] + +intrinsic_token(f) = FIndexT(Core.bitcast(Int32, f)) +const intrinsic = Any[] + +function fill_ftables() + resize!(builtin, length(instances(BuiltinToken))) + for (f, tok) in builtin_tokens + builtin[Int(tok)] = f + end + for fname in names(Core.Intrinsics) + f = getfield(Core.Intrinsics, fname) + if f isa Core.IntrinsicFunction + tok = intrinsic_token(f)+1 + if tok > length(intrinsic) + resize!(intrinsic, tok) + end + intrinsic[tok] = f + end + end + nothing +end diff --git a/src/construct.jl b/src/construct.jl index 9b707866..bfde544e 100644 --- a/src/construct.jl +++ b/src/construct.jl @@ -258,56 +258,49 @@ function prepare_call(@nospecialize(f), allargs; enter_generated = false) end function prepare_framedata(framecode, argvals::Vector{Any}, caller_will_catch_err::Bool=false) + src = framecode.src + slotnames = src.slotnames::SlotNamesType + ssavt = src.ssavaluetypes + ng, ns = isa(ssavt, Int) ? ssavt : length(ssavt::Vector{Any}), length(src.slotflags) + if length(junk) > 0 + olddata = pop!(junk) + locals, ssavalues, sparams = olddata.locals, olddata.ssavalues, olddata.sparams + exception_frames, last_reference = olddata.exception_frames, olddata.last_reference + last_exception = olddata.last_exception + callargs = olddata.callargs + resize!(locals, ns) + fill!(locals, nothing) + resize!(ssavalues, ng) + # for check_isdefined to work properly, we need sparams to start out unassigned + resize!(sparams, 0) + empty!(exception_frames) + resize!(last_reference, ns) + last_exception[] = nothing + else + locals = Vector{Union{Nothing,Some{Any}}}(nothing, ns) + ssavalues = Vector{Any}(undef, ng) + sparams = Vector{Any}(undef, 0) + exception_frames = Int[] + last_reference = Vector{Int}(undef, ns) + callargs = Any[] + last_exception = Ref{Any}(nothing) + end + fill!(last_reference, 0) if isa(framecode.scope, Method) - meth, src = framecode.scope::Method, framecode.src - slotnames = src.slotnames::SlotNamesType - ssavt = src.ssavaluetypes - ng = isa(ssavt, Int) ? ssavt : length(ssavt::Vector{Any}) + meth = framecode.scope::Method nargs, meth_nargs = length(argvals), Int(meth.nargs) - if length(junk) > 0 - olddata = pop!(junk) - locals, ssavalues, sparams = olddata.locals, olddata.ssavalues, olddata.sparams - exception_frames, last_reference = olddata.exception_frames, olddata.last_reference - last_exception = olddata.last_exception - callargs = olddata.callargs - resize!(locals, length(src.slotflags)) - resize!(ssavalues, ng) - # for check_isdefined to work properly, we need sparams to start out unassigned - resize!(sparams, 0) - empty!(exception_frames) - empty!(last_reference) - last_exception[] = nothing - else - locals = Vector{Union{Nothing,Some{Any}}}(undef, length(src.slotflags)) - ssavalues = Vector{Any}(undef, ng) - sparams = Vector{Any}(undef, 0) - exception_frames = Int[] - last_reference = Dict{Symbol,Int}() - callargs = Any[] - last_exception = Ref{Any}(nothing) - end - for i = 1:meth_nargs - last_reference[slotnames[i]::Symbol] = i - if meth.isva && i == meth_nargs - locals[i] = nargs < i ? Some{Any}(()) : (let i=i; Some{Any}(ntuple(k->argvals[i+k-1], nargs-i+1)); end) - break + islastva = meth.isva && nargs >= meth_nargs + for i = 1:meth_nargs-islastva + if nargs >= i + locals[i], last_reference[i] = Some{Any}(argvals[i]), 1 + else + locals[i] = Some{Any}(()) end - locals[i] = nargs >= i ? Some{Any}(argvals[i]) : Some{Any}(()) end - # add local variables initially undefined - for i = (meth_nargs+1):length(slotnames) - locals[i] = nothing + if islastva + locals[meth_nargs] = (let i=meth_nargs; Some{Any}(ntuple(k->argvals[i+k-1], nargs-i+1)); end) + last_reference[meth_nargs] = 1 end - else - src = framecode.src - locals = Vector{Union{Nothing,Some{Any}}}(undef, length(src.slotflags)) # src.slotflags is concretely typed, unlike slotnames - fill!(locals, nothing) - ssavalues = Vector{Any}(undef, length(src.code)) - sparams = Any[] - exception_frames = Int[] - last_reference = Dict{Symbol,Int}() - callargs = Any[] - last_exception = Ref{Any}(nothing) end FrameData(locals, ssavalues, sparams, exception_frames, last_exception, caller_will_catch_err, last_reference, callargs) end diff --git a/src/interpret.jl b/src/interpret.jl index 6e1b49e3..84531972 100644 --- a/src/interpret.jl +++ b/src/interpret.jl @@ -336,9 +336,9 @@ function do_assignment!(frame, @nospecialize(lhs), @nospecialize(rhs)) if isa(lhs, SSAValue) data.ssavalues[lhs.id] = rhs elseif isa(lhs, SlotNumber) + counter = (frame.assignment_counter += 1) data.locals[lhs.id] = Some{Any}(rhs) - slotnames = code.src.slotnames::SlotNamesType - data.last_reference[slotnames[lhs.id]::Symbol] = lhs.id + data.last_reference[lhs.id] = counter elseif isa(lhs, GlobalRef) Core.eval(lhs.mod, :($(lhs.name) = $(QuoteNode(rhs)))) elseif isa(lhs, Symbol) diff --git a/src/serializer.jl b/src/serializer.jl new file mode 100644 index 00000000..8bed68c0 --- /dev/null +++ b/src/serializer.jl @@ -0,0 +1,926 @@ +module Serializer + +using Core: CodeInfo, GotoNode, MethodInstance, SimpleVector +using Base.Meta: isexpr +using ..JuliaInterpreter: + BreakpointState, + FrameData, + Unassigned, + find_used, + lookup_global_ref, + pc_from_spc, + prepare_framedata + +export serialize + +# Because we're not sure yet about the tradeoff (or lack thereof) +# between size and performance, we use aliases for a couple of key types +const TokenT = UInt8 # the eltype of the serialization format (UInt8 or UInt64) +const FIndexT = Int32 # what type we use to index function tables + +const max_fixed_args = 4 + +include("builtins_serializer.jl") + +### Serialization format + +# This uses a simple format, conceptually implementing a machine with the following +# properties: +# - a tape of instructions called `ser` +# - a single implicit "register" called `ans` +# - the ability to execute operations specific to a particular instruction token +# in `ser`. Executing these operations may consume future tokens. +# +# Operations are conceptually of 4 categories: +# - loads (which fill `ans` from a variety of sources), encoded by `load*` or literal tokens +# - stores (which put `ans` somewhere more permanent), encoded by `store*` tokens +# - calls (for which the return value is stored in `ans`) +# - control-flow +# +# The implementation of calls is allowed to store data to named local variables or lists, +# thus increasing the temporary storage beyond `ans`. +# +# The serialization of the lowered IR +# %4 = atan(@3, 2.4) +# might look something like this on the tape: +# call atan_idx methlist fixedargs 2 loadslot 3 float64 2.4 storessa 4 +# where +# - `call` is an instruction token signaling that next operation is a function call +# (in reality, there are multiple call-type tokens for intrinsics, builtins, +# generics via the interpreter, generics via ordinary compiled dispatch, +# `invokelatest`, `Core._apply`, etc.) +# - `atan_idx` is a token representing `atan`. It is encoded as an integer index +# into a table of functions (the table is maintained by the serializer) +# - `methlist` is a pointer to a local method table, a performance optimization +# used for avoiding full-blown dispatch (this also stores whether the method should +# be called via the interpreter or the compiled path) +# - `fixedargs 2` is an indication that this call should use the path optimized for a +# particular (small) number of arguments, which in this case is 2. +# An alternative is `listargs args`, which packs an arbitrary number of arguments +# into a literally-encoded `args::Vector{Any}` stored (via its pointer) in `ser`. +# - `loadslot 3` indicates that the next argument (first argument) is to be loaded +# from the slots at index 3 +# - `float64 2.4` indicates that the next argument (second argument) is a literal +# value of type `Float64` encoded in `ser` itself. +# - after the second argument, the function call is executed and the result is +# stored in `ans` +# - `storessa 4` indicates that `ans` should be placed in `%4`. + +@enum InterpretToken::TokenT begin + # Static data (you can think of these as `load*` tokens for literals in `ser`) + int + float64 + float32 + nothingtok + symbolptr # ptrs to Symbol... + stringptr # String + # Load tokens + loadssa + loadslot + loadparameter # static parameters + loadexception # the most recent exception + loadglobalref # GlobalRef + # Store tokens + storessa + storeslot + storeglobalref + # Call expressions + callintrinsic + callbuiltin + call # standard call + calllatest # call made by `invokelatest` + callinvoke # Core.invoke + callapply # vararg handling + callapplylatest # invokelatest with varargs + # Argument tokens + fixedargs # use the path optimized for a fixed (small) number of arguments + listargs # use the general path + # Control flow + goto + gotoifnot + enter + leave + popexception + returntok +end +# Some constants for handling blocks of tokens +const lastimmutable = stringptr +const lastload = loadglobalref +const laststore = storeglobalref +const firstcall = callintrinsic +const lastcall = callapplylatest + +# A more stripped-down alternative to reusing TypeMapEntry for our local method +mutable struct DispatchableMethod + next::Union{Nothing,DispatchableMethod} # linked-list representation + sig::Type # for speed of matching, this is a *concrete* signature. `sig <: method.sig` + meth::MethodInstance # method to be called when `sig` matches + compiled::Bool # true if the compiled version should be called +end + +mutable struct MethodListHead # linked from the serialized code. This allows us to swap out first item on list + next::Union{Nothing,DispatchableMethod} +end +MethodListHead() = MethodListHead(nothing) + +struct SerializedCode + scope::Union{Method,Module} + src::CodeInfo + ser::Vector{TokenT} + serindex::Vector{Int} # lookup from pc -> spc + breakpoints::Vector{BreakpointState} + slotnamelists::Dict{Symbol,Vector{Int}} + generator::Bool # true if this is for the expression-generator of a @generated function + # The rest just prevent garbage collection of pointer-referenced items in `ser` + # We could alternatively encode them as indexes but that seems to involve a second + # indirection without changing their rooting, so it's not obviously advantageous. + serargs::Vector{Vector{Any}} # `args` stores in `ser` + sermeth::Vector{MethodListHead} # local method tables in `ser` + + function SerializedCode(scope, src::CodeInfo; generator::Bool=false) + serialize!(new(scope, + src, + TokenT[], + Int[], + BreakpointState[], + Dict{Symbol,Vector{Int}}(), + generator, + Vector{Any}[], + MethodListHead[])) + end +end + +mutable struct SerializedFrame + framecode::SerializedCode + framedata::FrameData + pc::Int + spc::Int + assignment_counter::Int + caller::Union{SerializedFrame,Nothing} + callee::Union{SerializedFrame,Nothing} +end +SerializedFrame(framecode, framedata, pc=1, caller=nothing) = + SerializedFrame(framecode, framedata, pc, framecode.serindex[pc], 1, caller, nothing) + +function prepare_frame(framecode::SerializedCode, args::Vector{Any}, lenv::SimpleVector, caller_will_catch_err::Bool=false) + framedata = prepare_framedata(framecode, args, caller_will_catch_err) + resize!(framedata.sparams, length(lenv)) + # Add static parameters to environment + for i = 1:length(lenv) + T = lenv[i] + isa(T, TypeVar) && continue # only fill concrete types + framedata.sparams[i] = T + end + return SerializedFrame(framecode, framedata) +end + +# Functions cannot be serialized via pointer, so we use a list/Dict pair +const functionlist = [] +const functionlookup = IdDict{Any,FIndexT}() # functionlist[functionlookup[f]] == f +function function_token(@nospecialize(f)) + id = get(functionlookup, f, zero(FIndexT)) + if iszero(id) + push!(functionlist, f) + id = functionlookup[f] = FIndexT(length(functionlist)) + end + return id +end + +# function get_f(tok, fid) +# fid = Int(fid) +# if callbuiltin_n <= tok <= callbuiltin +# return builtin[fid] +# elseif tok == callintrinsic +# return intrinsic[fid+1] +# elseif calllmt <= tok <= callapplylatest +# return functionlist[fid] +# else +# error("f token ", tok, " not supported") +# end +# end + +### Raw (untokenized) serialization and deserialization + +# This handles basic number types. General object types are handled by their pointers. + +""" + serialize_raw!(ser::Vector{TokenT}, val) + +Append a serialized version of `val` to `ser`. Returns `val`. +""" +function serialize_raw! end + +""" + val, newspc = deserialize_raw(T, ser::Vector{TokenT}, spc) + +Starting at index `spc` in `ser`, deserialize a value of type `T`. Return the value `val` +and the next undeserialized index `newspc`. +""" +function deserialize_raw end + +# The ::Vector{TokenT} is to prevent accidental misuse on a higher-level object +serialize_raw!(ser::Vector{TokenT}, val::TokenT) = (push!(ser, val); return val) +serialize_raw_at!(ser::Vector{TokenT}, spc, val::TokenT) = (ser[spc] = val; return spc+1) +deserialize_raw(::Type{TokenT}, ser::Vector{TokenT}, spc) = ser[spc], spc+1 + +if TokenT === UInt8 + split2(x::UInt16) = UInt8(x >> 8), UInt8(x & 0x00ff) + split2(x::UInt32) = UInt16(x >> 16), UInt16(x & 0x0000ffff) + split2(x::UInt64) = UInt32(x >> 32), UInt32(x & 0x00000000ffffffff) + join2(hi::UInt8, lo::UInt8) = UInt16(hi) << 8 + UInt16(lo) + join2(hi::UInt16, lo::UInt16) = UInt32(hi) << 16 + UInt32(lo) + join2(hi::UInt32, lo::UInt32) = UInt64(hi) << 32 + UInt64(lo) + narrow(::Type{UInt16}) = UInt8 + narrow(::Type{UInt32}) = UInt16 + narrow(::Type{UInt64}) = UInt32 + + function serialize_raw!(ser::Vector{TokenT}, val::Union{UInt16,UInt32,UInt64}) + hi, lo = split2(val) + serialize_raw!(ser, hi) + serialize_raw!(ser, lo) + return val + end + function serialize_raw_at!(ser::Vector{TokenT}, spc, val::Union{UInt16,UInt32,UInt64}) + hi, lo = split2(val) + spc = serialize_raw_at!(ser, spc, hi) + return serialize_raw_at!(ser, spc, lo) + end + + function deserialize_raw(::Type{T}, ser::Vector{TokenT}, spc) where T<:Union{UInt16,UInt32,UInt64} + Thalf = narrow(T) + hi, spc = deserialize_raw(Thalf, ser, spc) + lo, spc = deserialize_raw(Thalf, ser, spc) + return join2(hi, lo), spc + end +elseif TokenT === UInt64 + serialize_raw!(ser::Vector{TokenT}, val::Union{UInt8,UInt16,UInt32}) = serialize_raw!(ser, UInt64(val)) + serialize_raw_at!(ser::Vector{TokenT}, spc, val::Union{UInt8,UInt16,UInt32}) = serialize_raw_at!(ser, spc, UInt64(val)) + function deserialize_raw(::Type{T}, ser::Vector{TokenT}, spc) where T<:Union{UInt8,UInt16,UInt32} + val, spc = deserialize_raw(UInt, ser, spc) + return T(val), spc + end +end + +serialize_raw!(ser::Vector{TokenT}, val::Int32) = (serialize_raw!(ser, reinterpret(UInt32, val)); return val) +serialize_raw!(ser::Vector{TokenT}, val::Int64) = (serialize_raw!(ser, reinterpret(UInt64, val)); return val) +serialize_raw!(ser::Vector{TokenT}, val::Float32) = (serialize_raw!(ser, reinterpret(UInt32, val)); return val) +serialize_raw!(ser::Vector{TokenT}, val::Float64) = (serialize_raw!(ser, reinterpret(UInt64, val)); return val) + +serialize_raw_at!(ser::Vector{TokenT}, spc, val::Integer) = serialize_raw_at!(ser, spc, Unsigned(val)) + +serialize_rawptr!(ser::Vector{TokenT}, val) = (serialize_raw!(ser, reinterpret(UInt, pointer_from_objref(val))); return val) + +deserialize_raw(::Type{Int32}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt32, ser, spc); return reinterpret(Int32, val), spc) +deserialize_raw(::Type{Int64}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt64, ser, spc); return reinterpret(Int64, val), spc) +deserialize_raw(::Type{Float32}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt32, ser, spc); return reinterpret(Float32, val), spc) +deserialize_raw(::Type{Float64}, ser::Vector{TokenT}, spc) = ((val, spc) = deserialize_raw(UInt64, ser, spc); return reinterpret(Float64, val), spc) + +function deserialize_rawptr(ser::Vector{TokenT}, spc) + uintp, spc = deserialize_raw(UInt, ser, spc) + return unsafe_pointer_to_objref(reinterpret(Ptr, uintp)), spc +end +function deserialize_rawptr(::Type{T}, ser::Vector{TokenT}, spc) where T + uintp, spc = deserialize_raw(UInt, ser, spc) + return unsafe_pointer_to_objref(reinterpret(Ptr{T}, uintp))::T, spc +end + +### Serialization of code objects + +# This uses tokens to convey type & other forms of meaning + +function serialize!(code::SerializedCode, tok::InterpretToken) + serialize_raw!(code.ser, eltype(code.ser)(tok)) + return code +end + +function serialize!(code::SerializedCode, tok::InterpretToken, val) + serialize!(code, tok) + serialize_raw!(code.ser, val) + return code +end +function serialize_ptr!(code::SerializedCode, tok::InterpretToken, val) + serialize!(code, tok) + serialize_rawptr!(code.ser, val) + return code +end + +function serialize_immutable!(code::SerializedCode, val) + if val isa Int + serialize!(code, int, val) + elseif val isa Float32 + serialize!(code, float32, val) + elseif val isa Float64 + serialize!(code, float64, val) + elseif val === nothing + serialize!(code, nothingtok) + elseif val isa Symbol + serialize_ptr!(code, symbolptr, val) + elseif val isa String + serialize_ptr!(code, stringptr, val) + else + error("static val type ", typeof(val), " not recognized") + end + return code +end + +function serialize_value_load!(code::SerializedCode, val) + if val isa GlobalRef + val = lookup_global_ref(val) + if isa(val, QuoteNode) + val = val.value + end + end + if val isa Core.SSAValue + serialize!(code, loadssa, val.id) + elseif val isa Core.SlotNumber + serialize!(code, loadslot, val.id) + elseif isexpr(val, :static_parameter) + serialize!(code, loadparameter, val.args[1]) + elseif isexpr(val, :the_exception) + serialize!(code, loadexception) + elseif isa(val, GlobalRef) + serialize_ptr!(code, loadglobalref, val) + else + serialize_immutable!(code, val) + end + return code +end + +function serialize_call!(code::SerializedCode, stmt::Expr) + f = stmt.args[1] + if isa(f, GlobalRef) + f = lookup_global_ref(f) + if isa(f, QuoteNode) + f = f.value + end + end + if f === Base.invoke + error("not yet implemented") + elseif f === Base.invokelatest + error("not yet implemented") + elseif f === Core._apply + error("not yet implemented") + elseif f === Core._apply_latest + error("not yet implemented") + elseif f === Core._apply_pure + error("not yet implemented") + else + nargs = length(stmt.args) - 1 + tok, ftok = + if f isa Core.IntrinsicFunction + callintrinsic, intrinsic_token(f) + elseif f isa Core.Builtin + callbuiltin, builtin_tokens[f] + else + call, function_token(f) + end + serialize!(code, tok) + serialize_raw!(code.ser, FIndexT(ftok)) + if tok ∉ (callintrinsic, callbuiltin) + mlhead = MethodListHead() + serialize_rawptr!(code.ser, mlhead) + push!(code.sermeth, mlhead) + end + if tok == callbuiltin && nargs <= max_fixed_args + serialize!(code, fixedargs, UInt8(nargs)) + else + args = Vector{Any}(undef, nargs) + serialize_ptr!(code, listargs, args) + push!(code.serargs, args) # protect args from GC (since `code.ser` references by pointer) + end + for i = 1:nargs + arg = stmt.args[i+1] + if arg isa GlobalRef + arg = lookup_global_ref(arg) + if arg isa QuoteNode + serialize_value_load!(ser, arg.value) + else + serialize_ptr!(code, loadglobalref, arg::GlobalRef) + end + else + serialize_value_load!(code, arg) + end + end + end + return code +end + +# main ser +function serialize!(code::SerializedCode; used=find_used(code.src)) + fixserindex = Tuple{Int,Int}[] # gotos that need a serindex (forward-jumps, not known yet) + for (i, stmt) in enumerate(code.src.code) + push!(code.serindex, length(code.ser)+1) + storeto = i ∈ used ? Core.SSAValue(i) : nothing + if isa(stmt, Union{Int,Float64,Float32,Symbol,String,Core.SSAValue,Core.SlotNumber,GlobalRef}) + serialize_value_load!(code, stmt) + elseif isa(stmt, Expr) + head = stmt.head + if head == :static_parameter + serialize!(code, loadparameter, stmt.args[1]) + elseif head == :the_exception + serialize!(code, loadexception) + elseif head == :(=) + @assert storeto === nothing + storeto, rhs = stmt.args[1], stmt.args[2] + if isexpr(rhs, :call) + serialize_call!(code, rhs) + else + serialize_value_load!(code, rhs) + end + elseif head == :call + serialize_call!(code, stmt) + elseif head == :return + serialize!(code, returntok) + serialize_value_load!(code, stmt.args[1]) + elseif head == :gotoifnot + serialize!(code, gotoifnot) + serialize_value_load!(code, stmt.args[1]) + ln = stmt.args[2] + if ln <= length(code.serindex) + serialize_raw!(code.ser, code.serindex[ln]) + else + push!(fixserindex, (length(code.ser)+1, ln)) + serialize_raw!(code.ser, zero(eltype(code.serindex))) + end + else + error("head ", head, " not yet handled") + end + elseif stmt isa GotoNode + serialize!(code, goto) + ln = stmt.label + if ln <= length(code.serindex) + serialize_raw!(code.ser, code.serindex[ln]) + else + push!(fixserindex, (length(code.ser)+1, ln)) + serialize_raw!(code.ser, zero(eltype(code.serindex))) + end + else + error("unhandled statement ", stmt) + end + if storeto != nothing + if isa(storeto, Core.SSAValue) + serialize!(code, storessa, storeto.id) + elseif isa(storeto, Core.SlotNumber) + serialize!(code, storeslot, storeto.id) + elseif isa(storeto, GlobalRef) + serialize_ptr!(code, storeglobalref, storeto) + else + error("unhandled storeto ", storeto) + end + end + end + # Now fix up the forward-jumping gotos + for (idx, ln) in fixserindex + serialize_raw_at!(code.ser, idx, code.serindex[ln]) + end + return code +end + +### Deserialization and executing code objects + +# A full deserializer would convert back to lowered code. That's not a major priority +# right now, so we focus on other things and avoid calling it deserialization +# except where it's truly applicable. + +function deserialize_token(code::SerializedCode, spc) + itok, spc = deserialize_raw(TokenT, code.ser, spc) + tok = InterpretToken(itok) + return tok, spc +end +# function deserialize_ftoken(code::SerializedCode, spc) +# return deserialize_raw(FIndexT, code.ser, spc) +# end + +function deserialize_immutable(code::SerializedCode, tok, spc) + if tok == int + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == float64 + val, spc = deserialize_raw(Float64, code.ser, spc) + elseif tok == float32 + val, spc = deserialize_raw(Float32, code.ser, spc) + elseif tok == nothingtok + val = nothing + elseif tok == symbolptr + val, spc = deserialize_rawptr(Symbol, code.ser, spc) + elseif tok == stringptr + val, spc = deserialize_rawptr(String, code.ser, spc) + else + error("immutable val type ", typeof(val), " not recognized") + end + return val, spc +end + +function execute_load(frame, tok::InterpretToken, spc) + code, data = frame.framecode, frame.framedata + if tok == loadssa + val, spc = deserialize_raw(Int, code.ser, spc) + val = data.ssavalues[val] + elseif tok == loadslot + val, spc = deserialize_raw(Int, code.ser, spc) + val = something(data.locals[val]) + elseif tok == loadparameter + val, spc = deserialize_raw(Int, code.ser, spc) + val = data.sparams[val] + elseif tok == loadexception + val = data.last_exception[] + elseif tok == loadglobalref + gr, spc = deserialize_rawptr(GlobalRef, code.ser, spc) + val = getfield(gr.mod, gr.name) + else + return deserialize_immutable(code, tok, spc) + end + return val, spc +end + +function execute_load(frame, spc) + code, data = frame.framecode, frame.framedata + tok, spc = deserialize_token(code, spc) + return execute_load(frame, tok, spc) +end + +function execute_call(frame, tok::InterpretToken, spc) + code, data = frame.framecode, frame.framedata + ftok, spc = deserialize_raw(FIndexT, code.ser, spc) + if tok == callintrinsic || tok == callbuiltin + # no methlist for intrinsics or builtins + argtok, spc = deserialize_token(code, spc) + if argtok == fixedargs + @assert tok == callbuiltin + n, spc = deserialize_raw(UInt8, code.ser, spc) + # The advantage in fixedargs is not so much the loading but in + # avoiding runtime dispatch in the callee. But we can also save the memory + # of an array, so let's take advantage of that. + if n == 0x00 + return call_builtin(ftok) + elseif n == 0x01 + a, spc = execute_load(frame, spc) + val = call_builtin(ftok, a) + elseif n == 0x02 + a, spc = execute_load(frame, spc) + b, spc = execute_load(frame, spc) + val = call_builtin(ftok, a, b) + elseif n == 0x03 + a, spc = execute_load(frame, spc) + b, spc = execute_load(frame, spc) + c, spc = execute_load(frame, spc) + val = call_builtin(ftok, a, b, c) + elseif n == 0x04 + a, spc = execute_load(frame, spc) + b, spc = execute_load(frame, spc) + c, spc = execute_load(frame, spc) + d, spc = execute_load(frame, spc) + val = call_builtin(ftok, a, b, c, d) + else + error("not implemented for ", n, " arguments") + end + else + # listargs case + args, spc = deserialize_rawptr(Vector{Any}, code.ser, spc) + for i = 1:length(args) + args[i], spc = execute_load(frame, spc) + end + if tok == callintrinsic + f = intrinsic[ftok] + val = ccall(:jl_f_intrinsic_call, Any, (Any, Ptr{Any}, UInt32), f, args, length(args)) + else + val = call_builtin_listargs(ftok, args) + end + end + else + mlhead, spc = deserialize_rawptr(MethodListHead, code.ser, spc) + argtok, spc = deserialize_token(code, spc) + @assert argtok == listargs + args, spc = deserialize_rawptr(Vector{Any}, code.ser, spc) + for i = 1:length(args) + args[i], spc = execute_load(frame, spc) + end + if tok == call + # Hack: currently non-recursive + f = functionlist[Int(ftok)] + val = f(args...) + else + error("call type ", tok, " not yet implemented") + end + end + return val, spc +end + +function step_ser!(@nospecialize(recurse), frame, spc::Int, istoplevel::Bool) + code, data = frame.framecode, frame.framedata + ans = Unassigned() + tok, spc = deserialize_token(code, spc) + if tok <= lastload + ans, spc = execute_load(frame, tok, spc) + elseif firstcall <= tok <= lastcall + ans, spc = execute_call(frame, tok, spc) + elseif tok == goto + newspc, _ = deserialize_raw(eltype(code.serindex), code.ser, spc) + spc = Int(newspc) + elseif tok == gotoifnot + cond, spc = execute_load(frame, spc) + newspc, spc = deserialize_raw(eltype(code.serindex), code.ser, spc) + if !cond + spc = Int(newspc) + end + elseif tok == enter + error("unhandled") + elseif tok == leave + error("unhandled") + elseif tok == popexception + error("unhandled") + elseif tok == returntok + return nothing + else + error("unhandled token ", tok, " at ", spc, " (pc = ", pc_from_spc(frame, spc), ')') + end + if !isa(ans, Unassigned) && spc <= length(code.ser) + # peek at the next token without advancing + nexttok, spctmp = deserialize_token(code, spc) + if nexttok == storessa + id, spc = deserialize_raw(Int, code.ser, spctmp) + data.ssavalues[id] = ans + elseif nexttok == storeslot + id, spc = deserialize_raw(Int, code.ser, spctmp) + data.locals[id] = Some{Any}(ans) + elseif nexttok == storeglobalref + gr, spc = deserialize_rawptr(GlobalRef, code.ser, spctmp) + Core.eval(gr.mod, :($(gr.name) = $(QuoteNode(ans)))) + end + end + return spc +end + +### Printing serialized code +# Useful for debugging + +print_token(io, tok::InterpretToken) = print(io, tok, ' ') + +function print_load_store(io, code::SerializedCode, tok::InterpretToken, spc) + print_token(io, tok) + if tok <= lastimmutable + val, spc = deserialize_immutable(code, tok, spc) + elseif tok == loadssa || tok == storessa + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == loadslot || tok == storeslot + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == loadparameter + val, spc = deserialize_raw(Int, code.ser, spc) + elseif tok == loadexception + val = nothing + elseif tok == loadglobalref || tok == storeglobalref + val, spc = deserialize_rawptr(GlobalRef, code.ser, spc) + else + error(tok, " is not a load") + end + if val !== nothing + print(io, val, ' ') + end + return spc +end + +function print_load(io, code::SerializedCode, spc) + tok, spc = deserialize_token(code, spc) + return print_load_store(io, code, tok, spc) +end + +function print_call(io, code::SerializedCode, tok::InterpretToken, spc) + print_token(io, tok) + ftok, spc = deserialize_raw(FIndexT, code.ser, spc) + if tok == callintrinsic + f = intrinsic[ftok] + elseif tok == callbuiltin + f = builtin[ftok] + else + f = functionlist[ftok] + mlhead, spc = deserialize_rawptr(MethodListHead, code.ser, spc) + end + show(io, f) + print(io, ' ') + if tok ∉ (callintrinsic, callbuiltin) + print(io, "methlist ") + end + argtok, spc = deserialize_token(code, spc) + print_token(io, argtok) + if argtok == fixedargs + n, spc = deserialize_raw(UInt8, code.ser, spc) + n = Int(n) + print(io, n, ' ') + else + args, spc = deserialize_rawptr(Vector{Any}, code.ser, spc) + n = length(args) + print(io, " ") + end + for i = 1:n + spc = print_load(io, code, spc) + end + println() + return spc +end + +function print_serialization(io, code::SerializedCode, spc) + tok, spc = deserialize_token(code, spc) + if tok <= laststore + spc = print_load_store(io, code, tok, spc) + println() + elseif tok <= lastcall + spc = print_call(io, code, tok, spc) + else + print_token(io, tok) + if tok == goto + newspc, spc = deserialize_raw(eltype(code.serindex), code.ser, spc) + println(io, newspc) + elseif tok == gotoifnot + spc = print_load(io, code, spc) + newspc, spc = deserialize_raw(eltype(code.serindex), code.ser, spc) + println(io, newspc) + elseif tok == enter + error("unhandled") + elseif tok == leave + error("unhandled") + elseif tok == popexception + error("unhandled") + elseif tok == returntok + spc = print_load(io, code, spc) + println() + else + error("unhandled token ", tok, " at ", spc) + end + end + return spc +end + +function print_serialization(io, code::SerializedCode) + nd = ndigits(length(code.ser)) + spc = 1 + while spc <= length(code.ser) + print(io, lpad(spc, nd), ": ") + spc = print_serialization(io, code, spc) + end + return nothing +end + +# print(io, tok, ' ') +# if tok == assignprev +# lhsid, spc = ser[spc], spc+1 +# if f isa Core.IntrinsicFunction +# println(io, lhsid) +# elseif tok ∈ (storessa, assignslot) +# lhsid, spc = ser[spc], spc+1 +# print(io, tok == storessa ? " %" : " @", Int(lhsid), ' ') +# spc = print_serialization(io, mod, ser, spc) +# elseif tok == assignglobalref +# lhs, spc = deserialize_globalref(ser, spc) +# rhsid, spc = ser[spc], spc+1 +# print(io, lhs, ' ') +# spc = print_serialization(io, mod, ser, spc) +# elseif tok == assignmodsym +# lhs, spc = deserialize_symbol(ser, spc) +# rhsid, spc = ser[spc], spc+1 +# println(io, mod, '.', lhs, ' ') +# spc = print_serialization(io, mod, ser, spc) +# elseif tok == callbuiltin_n +# f, spc = get_f(tok, ser[spc]), spc+1 +# n, spc = Int(ser[spc]), spc+1 +# spc = print_call(io, f, n, mod, ser, spc) +# elseif callbuiltin <= tok <= callapplylatest +# f, spc = get_f(tok, ser[spc]), spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# spc = print_call(io, f, length(args), mod, ser, spc) +# elseif tok == goto +# idx, spc = Int(ser[spc]), spc+1 +# println(io, "goto ", idx) +# elseif tok == gotoifnot +# condtok, spc = ser[spc], spc+1 +# if condtok ∈ (loadssa, loadslot) +# id, spc = Int(ser[spc]), spc+1 +# else +# error("unsupported conditional token ", tok) +# end +# idx, spc = Int(ser[spc]), spc+1 +# println(io, "gotoifnot ", condtok == loadssa ? " %" : " @", id, ' ', idx) +# elseif tok == enter +# error("unhandled") +# elseif tok == leave +# error("unhandled") +# elseif tok == popexception +# error("unhandled") +# elseif tok == returntok +# datatok, spc = ser[spc], spc+1 +# if datatok ∈ (loadssa, loadslot) +# id, spc = Int(ser[spc]), spc+1 +# else +# error("unsupported conditional token ", tok) +# end +# idx, spc = Int(ser[spc]), spc+1 +# println(io, "return ", datatok == loadssa ? " %" : " @", id, ' ', idx) +# else +# error("unhandled token ", tok, " at ", spc) +# end +# return spc +# end +# +# function print_serialization(io, mod, ser) +# n = length(ser) +# nd = ndigits(n) +# spc = 1 +# while spc <= n +# print(io, lpad(string(spc), nd), ": ") +# spc = print_serialization(io, mod, ser, spc) +# end +# end +# +# +# ### interpreting serialized code +# +# function step_ser!(@nospecialize(recurse), frame, spc::Int, istoplevel::Bool) +# code, data = frame.framecode, frame.framedata +# ser = code.ser +# tok, spc = InterpretToken(ser[spc]), spc+1 +# if tok < call_builtin_n +# ret, spc = deserialize_value(ser, spc, data) +# spc = assign_value!(data, code, spc, ret) +# if tok == callbuiltin_n +# f, spc = ser[spc], spc+1 +# n, spc = Int(ser[spc]), spc+1 +# ret, spc = call_builtin_n(ftok, n, ser, spc, data) +# spc = assign_value!(data, code, spc, ret) +# elseif tok == callbuiltin +# ftok, spc = ser[spc], spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# for i = 1:length(args) +# args[i], spc = deserialize_value(ser, spc, data) +# end +# ret = call_builtin(fflag, args) +# spc = assign_value!(data, spc, ret) +# elseif tok == callintrinsic +# f, spc = intrinsic[ser[spc]], spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# for i = 1:length(args) +# args[i], spc = deserialize_value(ser, spc, data) +# end +# ret = ccall(:jl_f_intrinsic_call, Any, (Any, Ptr{Any}, UInt32), f, args, length(args)) +# spc = assign_value!(data, spc, ret) +# elseif tok == calllmt +# f, spc = functionlist[Int(ser[spc])], spc+1 +# fargs, spc = vecany(ser[spc]), spc+1 +# fargs[1] = f +# for i = 1:length(args) +# fargs[i+1], spc = deserialize_value(ser, spc, data) +# end +# calleecode, lenv = get_call_framecode(fargs, calleecode, spc) +# if lenv === nothing +# if isa(calleecode, Compiled) +# return Base.invokelatest(fargs...) +# end +# return framecode # this was a Builtin +# end +# # TODO: "inline" trivial frames +# newframe = prepare_frame_caller(frame, calleecode, fargs, lenv) +# if recurse === finish_and_return! +# # Optimize this case to avoid dynamic dispatch +# ret = finish_and_return!(finish_and_return!, newframe, false) +# else +# ret = recurse(recurse, newframe, false) +# end +# spc = assign_value!(data, spc, ret) +# elseif tok == calldispatch +# f, spc = obj(ser[spc]), spc+1 +# args, spc = vecany(ser[spc]), spc+1 +# for i = 1:length(args) +# args[i], spc = deserialize_value(ser, spc, data) +# end +# ret = f(args...) +# spc = assign_value!(data, spc, ret) +# elseif tok == calllatest +# error("unhandled") +# elseif tok == callinvoke +# error("unhandled") +# elseif tok == callapply +# error("unhandled") +# elseif tok == callapplylatest +# error("unhandled") +# elseif tok == goto +# spc = Int(ser[spc]) +# elseif tok == gotoifnot +# cond, spc = deserialize_value(ser, spc, data) +# if !cond +# spc = Int(ser[spc]) +# end +# elseif tok == enter +# error("unhandled") +# elseif tok == leave +# error("unhandled") +# elseif tok == popexception +# error("unhandled") +# elseif tok == returntok +# return nothing +# else +# error("unhandled token ", tok, " at ", spc, " (pc = ", pc_from_spc(frame, spc), ')') +# end +# end + +function __init__() + fill_ftables() +end + +end diff --git a/src/types.jl b/src/types.jl index c073f384..cba21732 100644 --- a/src/types.jl +++ b/src/types.jl @@ -70,6 +70,7 @@ struct FrameCode src::CodeInfo methodtables::Vector{Union{Compiled,TypeMapEntry}} # line-by-line method tables for generic-function :call Exprs breakpoints::Vector{BreakpointState} + slotnamelists::Dict{Symbol,Vector{Int}} used::BitSet generator::Bool # true if this is for the expression-generator of a @generated function end @@ -89,8 +90,13 @@ function FrameCode(scope, src::CodeInfo; generator=false, optimize=true) src.code[i] = nothing end end + slotnamelists = Dict{Symbol,Vector{Int}}() + for (i, sym) in enumerate(src.slotnames) + list = get(slotnamelists, sym, Int[]) + slotnamelists[sym] = push!(list, i) + end used = find_used(src) - framecode = FrameCode(scope, src, methodtables, breakpoints, used, generator) + framecode = FrameCode(scope, src, methodtables, breakpoints, slotnamelists, used, generator) if scope isa Method for bp in _breakpoints # Manual union splitting @@ -151,9 +157,7 @@ struct FrameData exception_frames::Vector{Int} last_exception::Base.RefValue{Any} caller_will_catch_err::Bool - # A vector from names to the slotnumber of that name - # for which a reference was last encountered. - last_reference::Dict{Symbol,Int} + last_reference::Vector{Int} callargs::Vector{Any} # a temporary for processing arguments of :call exprs end @@ -176,10 +180,11 @@ mutable struct Frame framecode::FrameCode framedata::FrameData pc::Int + assignment_counter::Int caller::Union{Frame,Nothing} callee::Union{Frame,Nothing} end -Frame(framecode, framedata, pc=1, caller=nothing) = Frame(framecode, framedata, pc, caller, nothing) +Frame(framecode, framedata, pc=1, caller=nothing) = Frame(framecode, framedata, pc, 1, caller, nothing) caller(frame) = frame.caller callee(frame) = frame.callee @@ -331,7 +336,7 @@ struct BreakpointSignature <: AbstractBreakpoint enabled::Ref{Bool} instances::Vector{BreakpointRef} end -same_location(bp2::BreakpointSignature, bp::BreakpointSignature) = +same_location(bp2::BreakpointSignature, bp::BreakpointSignature) = bp2.f == bp.f && bp2.sig == bp.sig && bp2.line == bp.line function Base.show(io::IO, bp::BreakpointSignature) print(io, bp.f) @@ -369,7 +374,7 @@ struct BreakpointFileLocation <: AbstractBreakpoint enabled::Ref{Bool} instances::Vector{BreakpointRef} end -same_location(bp2::BreakpointFileLocation, bp::BreakpointFileLocation) = +same_location(bp2::BreakpointFileLocation, bp::BreakpointFileLocation) = bp2.path == bp.path && bp2.abspath == bp.abspath && bp2.line == bp.line function Base.show(io::IO, bp::BreakpointFileLocation) print(io, bp.path, ':', bp.line) @@ -378,4 +383,3 @@ function Base.show(io::IO, bp::BreakpointFileLocation) print(io, " [disabled]") end end - diff --git a/src/utils.jl b/src/utils.jl index 03512a7c..3140859c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -72,7 +72,7 @@ function find_used(code::CodeInfo) end function scan_ssa_use!(used::BitSet, @nospecialize(stmt)) - if isa(stmt, SSAValue) + if isa(stmt, SSAValue) || isa(stmt, Core.SSAValue) push!(used, stmt.id) end iter = Core.Compiler.userefs(stmt) @@ -80,7 +80,7 @@ function scan_ssa_use!(used::BitSet, @nospecialize(stmt)) while iterval !== nothing useref, state = iterval val = Core.Compiler.getindex(useref) - if isa(val, SSAValue) + if isa(val, SSAValue) || isa(val, Core.SSAValue) push!(used, val.id) end iterval = Core.Compiler.iterate(iter, state) @@ -100,6 +100,9 @@ function hasarg(predicate, args) return false end +pc_from_spc(framecode::FrameCode, spc) = searchsortedfirst(framecode.serindex, spc) - 1 +pc_from_spc(frame::Frame, spc) = pc_from_spc(frame.framecode, spc) + ## Predicates is_goto_node(@nospecialize(node)) = isa(node, GotoNode) || isexpr(node, :gotoifnot) @@ -335,16 +338,24 @@ end Return the local variables as a vector of `Variable`[@ref]. """ function locals(frame::Frame) - vars = Variable[] + vars, var_counter = Variable[], Int[] + varlookup = Dict{Symbol,Int}() data, code = frame.framedata, frame.framecode - added = Set{Symbol}() slotnames = code.src.slotnames::SlotNamesType - for sym in slotnames - sym ∈ added && continue - idx = get(data.last_reference, sym, 0) - idx == 0 && continue - push!(vars, Variable(something(data.locals[idx]), sym, false)) - push!(added, sym) + for (sym, counter, val) in zip(slotnames, data.last_reference, data.locals) + counter == 0 && continue + var = Variable(something(val), sym, false) + idx = get(varlookup, sym, 0) + if idx > 0 + if counter > var_counter[idx] + vars[idx] = var + var_counter[idx] = counter + end + else + varlookup[sym] = length(vars)+1 + push!(vars, var) + push!(var_counter, counter) + end end if code.scope isa Method syms = sparam_syms(code.scope) diff --git a/test/breakpoints.jl b/test/breakpoints.jl index fcd7c65a..11cae71c 100644 --- a/test/breakpoints.jl +++ b/test/breakpoints.jl @@ -116,6 +116,24 @@ struct Squarer end @test !any(v->v.name == :b, var) @test filter(v->v.name == :a, var)[1].value == 2 + # Method with local scope (two slots with same name) + ln = @__LINE__ + function ftwoslots() + y = 1 + z = let y = y + y = y + 2 + rand() + end + y = y + 1 + return z + end + bp = breakpoint(@__FILE__, ln+5, :(y > 2)) + frame, bp2 = @interpret ftwoslots() + var = JuliaInterpreter.locals(leaf(frame)) + @test filter(v->v.name == :y, var)[1].value == 3 + remove(bp) + bp = breakpoint(@__FILE__, ln+8, :(y > 2)) + @test isa(@interpret(ftwoslots()), Float64) # Direct return @breakpoint gcd(1,1) a==5 diff --git a/test/serialization.jl b/test/serialization.jl new file mode 100644 index 00000000..31b7ba14 --- /dev/null +++ b/test/serialization.jl @@ -0,0 +1,31 @@ +using JuliaInterpreter + +if !isdefined(Main, :Serializer) + include("../src/serializer.jl") +end + +function summer(A::AbstractArray{T}) where T + s = zero(T) + for a in A + s += a + end + return s +end + +a = [1,2,3] +method = @which summer(a) +src = @code_lowered summer(a) +code = Serializer.SerializedCode(method, src) +# Build a frame (currently this must be done manually) +argtypes = Tuple{typeof(summer), typeof(a)} +(ti, lenv) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), + argtypes, method.sig) +frame = Serializer.prepare_frame(code, [summer, a], lenv) + +Serializer.print_serialization(stdout, code) + +spc = frame.spc +spc = Serializer.step_ser!(nothing, frame, spc, false) +spc = Serializer.step_ser!(nothing, frame, spc, false) +spc = Serializer.step_ser!(nothing, frame, spc, false) +# builtins & intrinsics aren't handled yet, so going any further gives an error