Skip to content

Commit

Permalink
Merge pull request #17 from skelsec/speedup
Browse files Browse the repository at this point in the history
Speedup
  • Loading branch information
skelsec authored Mar 27, 2021
2 parents 9d50d3b + 714d122 commit 96d6b64
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 73 deletions.
2 changes: 1 addition & 1 deletion minidump/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

__version__ = "0.0.15"
__version__ = "0.0.16"
__banner__ = \
"""
# minidump %s
Expand Down
85 changes: 63 additions & 22 deletions minidump/aminidumpreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,26 @@
from .common_structs import *
from .streams.SystemInfoStream import PROCESSOR_ARCHITECTURE

class AMinidumpBufferedMemorySegment:
def __init__(self):
self.start_address = None
self.end_address = None

class VirtualSegment:
def __init__(self, start, end, start_file_address):
self.start = start
self.end = end
self.start_file_address = start_file_address

self.data = None

def inrange(self, start, end):
return self.start <= start and end<= self.end

async def load(self, memory_segment, file_handle):
class AMinidumpBufferedMemorySegment:
def __init__(self, memory_segment, chunksize = 10*1024):
self.start_address = memory_segment.start_virtual_address
self.end_address = memory_segment.end_virtual_address
await file_handle.seek(memory_segment.start_file_address)
self.data = await file_handle.read(memory_segment.size)
self.total_size = memory_segment.end_virtual_address - memory_segment.start_virtual_address
self.start_file_address = memory_segment.start_file_address
self.chunksize = chunksize
self.chunks = []

def inrange(self, position):
return self.start_address <= position <= self.end_address
Expand All @@ -28,10 +37,43 @@ def remaining_len(self, position):
return None
return self.end_address - position

async def find(self, file_handle, pattern, startpos):
data = await self.read(file_handle, 0, -1)
return data.find(pattern, startpos)

async def read(self, file_handle, start, end):
if end is None:
await file_handle.seek(self.start_file_address + start)
return await file_handle.read(self.end_address - (self.start_file_address + start))

for chunk in self.chunks:
if chunk.inrange(start, end):
return chunk.data[start - chunk.start: end - chunk.start]

if self.total_size <= 2*self.chunksize:
chunksize = self.total_size
vs = VirtualSegment(0, chunksize, self.start_file_address)
await file_handle.seek(self.start_file_address)
vs.data = await file_handle.read(chunksize)
self.chunks.append(vs)
return vs.data[start - vs.start: end - vs.start]

chunksize = max((end-start), self.chunksize)
if start + chunksize > self.end_address:
chunksize = self.end_address - start

vs = VirtualSegment(start, start+chunksize, self.start_file_address + start)
await file_handle.seek(vs.start_file_address)
vs.data = await file_handle.read(chunksize)
self.chunks.append(vs)

return vs.data[start - vs.start: end - vs.start]

class AMinidumpBufferedReader:
def __init__(self, reader):
def __init__(self, reader, segment_chunk_size = 10*1024):
self.reader = reader
self.memory_segments = []
self.segment_chunk_size = segment_chunk_size

self.current_segment = None
self.current_position = None
Expand All @@ -50,8 +92,7 @@ async def _select_segment(self, requested_position):
# not in cache, check if it's present in memory space. if yes then create a new buffered memeory object, and copy data
for memory_segment in self.reader.memory_segments:
if memory_segment.inrange(requested_position):
newsegment = AMinidumpBufferedMemorySegment()
await newsegment.load(memory_segment, self.reader.file_handle)
newsegment = AMinidumpBufferedMemorySegment(memory_segment, chunksize=self.segment_chunk_size)
self.memory_segments.append(newsegment)
self.current_segment = newsegment
self.current_position = requested_position
Expand Down Expand Up @@ -118,7 +159,7 @@ async def peek(self, length):
t = self.current_position + length
if not self.current_segment.inrange(t):
raise Exception('Would read over segment boundaries!')
return self.current_segment.data[self.current_position - self.current_segment.start_address :t - self.current_segment.start_address]
return await self.current_segment.read(self.reader.file_handle, self.current_position - self.current_segment.start_address , t - self.current_segment.start_address)

async def read(self, size = -1):
"""
Expand All @@ -133,15 +174,15 @@ async def read(self, size = -1):

old_new_pos = self.current_position
self.current_position = self.current_segment.end_address
return self.current_segment.data[old_new_pos - self.current_segment.start_address:]
return await self.current_segment.read(self.reader.file_handle, old_new_pos - self.current_segment.start_address, None)

t = self.current_position + size
if not self.current_segment.inrange(t):
raise Exception('Would read over segment boundaries!')

old_new_pos = self.current_position
self.current_position = t
return self.current_segment.data[old_new_pos - self.current_segment.start_address :t - self.current_segment.start_address]
return await self.current_segment.read(self.reader.file_handle, old_new_pos - self.current_segment.start_address, t - self.current_segment.start_address)

async def read_int(self):
"""
Expand Down Expand Up @@ -173,7 +214,7 @@ async def find(self, pattern):
"""
Searches for a pattern in the current memory segment
"""
pos = self.current_segment.data.find(pattern)
pos = await self.current_segment.find(self.reader.file_handle, pattern)
if pos == -1:
return -1
return pos + self.current_position
Expand All @@ -185,7 +226,7 @@ async def find_all(self, pattern):
pos = []
last_found = -1
while True:
last_found = self.current_segment.data.find(pattern, last_found + 1)
last_found = await self.current_segment.find(self.reader.file_handle, pattern, last_found + 1)
if last_found == -1:
break
pos.append(last_found + self.current_segment.start_address)
Expand Down Expand Up @@ -227,7 +268,7 @@ async def get_ptr_with_offset(self, pos):
return await self.read_uint()

async def find_in_module(self, module_name, pattern, find_first = False, reverse_order = False):
t = await self.reader.search_module(module_name, pattern, find_first = find_first, reverse_order = reverse_order)
t = await self.reader.search_module(module_name, pattern, find_first = find_first, reverse_order = reverse_order,chunksize = self.segment_chunk_size)
return t


Expand Down Expand Up @@ -262,32 +303,32 @@ def __init__(self, minidumpfile):
else:
raise Exception('Unknown processor architecture %s! Please fix and submit PR!' % self.sysinfo.ProcessorArchitecture)

def get_buffered_reader(self):
return AMinidumpBufferedReader(self)
def get_buffered_reader(self, segment_chunk_size = 10*1024):
return AMinidumpBufferedReader(self, segment_chunk_size = segment_chunk_size)

def get_module_by_name(self, module_name):
for mod in self.modules:
if ntpath.basename(mod.name).find(module_name) != -1:
return mod
return None

async def search_module(self, module_name, pattern, find_first = False, reverse_order = False):
async def search_module(self, module_name, pattern, find_first = False, reverse_order = False, chunksize = 10*1024):
mod = self.get_module_by_name(module_name)
if mod is None:
raise Exception('Could not find module! %s' % module_name)
needles = []
for ms in self.memory_segments:
if mod.baseaddress <= ms.start_virtual_address < mod.endaddress:
needles += await ms.asearch(pattern, self.file_handle, find_first = find_first)
needles += await ms.asearch(pattern, self.file_handle, find_first = find_first, chunksize = chunksize)
if len(needles) > 0 and find_first is True:
return needles

return needles

async def search(self, pattern, find_first = False):
async def search(self, pattern, find_first = False, chunksize = 10*1024):
t = []
for ms in self.memory_segments:
t += await ms.asearch(pattern, self.file_handle)
t += await ms.asearch(pattern, self.file_handle, find_first = find_first, chunksize = chunksize)

return t

Expand Down
98 changes: 70 additions & 28 deletions minidump/common_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,46 +155,88 @@ async def aread(self, virtual_address, size, file_handler):
await file_handler.seek(pos, 0)
return data

def search(self, pattern, file_handler, find_first = False):
def search(self, pattern, file_handler, find_first = False, chunksize = 50*1024):
if len(pattern) > self.size:
return []
pos = file_handler.tell()
file_handler.seek(self.start_file_address, 0)
data = file_handler.read(self.size)
file_handler.seek(pos, 0)
fl = []
offset = 0
while len(data) > len(pattern):
marker = data.find(pattern)
if marker == -1:
return fl
fl.append(marker + offset + self.start_virtual_address)
data = data[marker+1:]
offset = marker + 1
if find_first is True:
return fl

if find_first is True:
chunksize = min(chunksize, self.size)
data = b''
i = 0
while len(data) < self.size:
i += 1
if chunksize > (self.size - len(data)):
chunksize = (self.size - len(data))
data += file_handler.read(chunksize)
marker = data.find(pattern)
if marker != -1:
#print('FOUND! size: %s i: %s read: %s perc: %s' % (self.size, i, i*chunksize, 100*((i*chunksize)/self.size)))
file_handler.seek(pos, 0)
return [self.start_virtual_address + marker]


#print('NOTFOUND! size: %s i: %s read: %s perc %s' % (self.size, i, len(data), 100*(len(data)/self.size) ))

else:
data = file_handler.read(self.size)
file_handler.seek(pos, 0)

offset = 0
while len(data) > len(pattern):
marker = data.find(pattern)
if marker == -1:
return fl
fl.append(marker + offset + self.start_virtual_address)
data = data[marker+1:]
offset = marker + 1
if find_first is True:
return fl

file_handler.seek(pos, 0)
return fl

async def asearch(self, pattern, file_handler, find_first = False):
async def asearch(self, pattern, file_handler, find_first = False, chunksize = 50*1024):
if len(pattern) > self.size:
return []
pos = file_handler.tell()
await file_handler.seek(self.start_file_address, 0)
data = await file_handler.read(self.size)
await file_handler.seek(pos, 0)
fl = []
offset = 0
while len(data) > len(pattern):
marker = data.find(pattern)
if marker == -1:
return fl
fl.append(marker + offset + self.start_virtual_address)
data = data[marker+1:]
offset = marker + 1
if find_first is True:
return fl


if find_first is True:
chunksize = min(chunksize, self.size)
data = b''
i = 0
while len(data) < self.size:
i += 1
if chunksize > (self.size - len(data)):
chunksize = (self.size - len(data))
data += await file_handler.read(chunksize)
marker = data.find(pattern)
if marker != -1:
#print('FOUND! size: %s i: %s read: %s perc: %s' % (self.size, i, i*chunksize, 100*((i*chunksize)/self.size)))
await file_handler.seek(pos, 0)
return [self.start_virtual_address + marker]


#print('NOTFOUND! size: %s i: %s read: %s perc %s' % (self.size, i, len(data), 100*(len(data)/self.size) ))

else:
offset = 0
data = await file_handler.read(self.size)
await file_handler.seek(pos, 0)
while len(data) > len(pattern):
marker = data.find(pattern)
if marker == -1:
return fl
fl.append(marker + offset + self.start_virtual_address)
data = data[marker+1:]
offset = marker + 1
if find_first is True:
return fl

await file_handler.seek(pos, 0)
return fl


Expand Down
Loading

0 comments on commit 96d6b64

Please sign in to comment.