Updates to properly handle @Profile decorators for CPU time attributi…

…on (#637) * Updates to properly handle @Profile decorators. * Added some additional smoketests * Added more error printing * Corrected checking * Corrected checking * Update doc, remove disabled code. * Fixed smoketest --------- Co-authored-by: Sam Stern <[email protected]>
plasma-umass · Jul 14, 2023 · cadb8f4 · cadb8f4
1 parent 101ad2b
commit cadb8f4
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 16 deletions.
diff --git a/.github/workflows/test-smoketests.yml b/.github/workflows/test-smoketests.yml
@@ -47,3 +47,6 @@ jobs:
       # Note: test/smoketest.py only handles single JSON, rather than multiple in sequence.
     - name: profile-interval smoke test
       run: python -m scalene --cli --profile-interval=2 test/testme.py
+
+    - name: decorator smoke test
+      run: python test/smoketest_profile_decorator.py
diff --git a/scalene/scalene_json.py b/scalene/scalene_json.py
@@ -107,7 +107,27 @@ def output_profile_line(
         """Print at most one line of the profile (true == printed one)."""
 
         if not force_print and not profile_this_code(fname, line_no):
-            return {}
+            return {
+                "lineno": line_no,
+                "line": line,
+                "n_core_utilization" : 0,
+                "n_cpu_percent_c": 0,
+                "n_cpu_percent_python": 0,
+                "n_sys_percent": 0,
+                "n_gpu_percent": 0,
+                "n_gpu_avg_memory_mb": 0,
+                "n_gpu_peak_memory_mb": 0,
+                "n_peak_mb": 0,
+                "n_growth_mb": 0,
+                "n_avg_mb": 0,
+                "n_mallocs": 0,
+                "n_malloc_mb": 0,
+                "n_usage_fraction": 0, 
+                "n_python_fraction": 0,
+                "n_copy_mb_s": 0,
+                "memory_samples": [],
+            }
+
         # Prepare output values.
         n_cpu_samples_c = stats.cpu_samples_c[fname][line_no]
         # Correct for negative CPU sample counts. This can happen
@@ -219,7 +239,7 @@ def output_profiles(
         python_alias_dir: Path,
         program_path: Path,
         profile_memory: bool = True,
-        reduced_profile: bool = False,
+        reduced_profile: bool = False
     ) -> Dict[str, Any]:
         """Write the profile out."""
         # Get the children's stats, if any.
@@ -302,7 +322,8 @@ def output_profiles(
             fname = Filename(fname)
             try:
                 percent_cpu_time = (
-                    100 * stats.cpu_samples[fname] / stats.total_cpu_samples
+                    100 * stats.cpu_samples[fname] / stats.elapsed_time
+                    # 100 * stats.cpu_samples[fname] / stats.total_cpu_samples
                 )
             except ZeroDivisionError:
                 percent_cpu_time = 0

diff --git a/scalene/scalene_profiler.py b/scalene/scalene_profiler.py
@@ -417,9 +417,6 @@ def invalidate_lines_python(
             # Different line: stop tracing this frame.
             frame.f_trace = None
             frame.f_trace_lines = False
-            # If we are not in a file we should be tracing, return.
-            # if not Scalene.should_trace(ff):
-            #     return None
             if Scalene.on_stack(frame, fname, lineno):
                 # We are still on the same line, but somewhere up the stack
                 # (since we returned when it was the same line in this
@@ -480,6 +477,14 @@ def profile(func: Any) -> Any:
         Scalene.__files_to_profile.add(func.__code__.co_filename)
         Scalene.__functions_to_profile[func.__code__.co_filename].add(func)
 
+        if Scalene.__args.memory:
+            from scalene import pywhere  # type: ignore
+            pywhere.register_files_to_profile(
+                list(Scalene.__files_to_profile),
+                Scalene.__program_path,
+                Scalene.__args.profile_all,
+            )
+
         return func
 
     @staticmethod
@@ -584,8 +589,7 @@ def malloc_signal_handler(
         found_frame = False
         f = this_frame
         while f:
-            if Scalene.should_trace(f.f_code.co_filename):
-                found_frame = True
+            if found_frame := Scalene.should_trace(f.f_code.co_filename, f.f_code.co_name):
                 break
             f = cast(FrameType, f.f_back)
         if not found_frame:
@@ -934,7 +938,7 @@ def output_profile() -> bool:
                 Scalene.__program_being_profiled,
                 Scalene.__stats,
                 Scalene.__pid,
-                lambda x,y: True if Scalene.__args.web else Scalene.profile_this_code,
+                Scalene.profile_this_code,
                 Scalene.__python_alias_dir,
                 Scalene.__program_path,
                 profile_memory=Scalene.__args.memory,
@@ -972,7 +976,7 @@ def output_profile() -> bool:
                 column_width,
                 Scalene.__stats,
                 Scalene.__pid,
-                lambda x,y: True if Scalene.__args.web else Scalene.profile_this_code,
+                Scalene.profile_this_code,
                 Scalene.__python_alias_dir,
                 Scalene.__program_path,
                 profile_memory=Scalene.__args.memory,
@@ -1005,7 +1009,7 @@ def add_stack(frame: FrameType) -> None:
         stk = HashableList()
         f = frame
         while f:
-            if Scalene.should_trace(f.f_code.co_filename):
+            if Scalene.should_trace(f.f_code.co_filename, f.f_code.co_name):
                 stk.insert(0, (f.f_code.co_filename, f.f_lineno))
             f = f.f_back
         Scalene.__stats.stacks[stk] += 1
@@ -1227,14 +1231,16 @@ def compute_frames_to_record() -> List[Tuple[FrameType, int, FrameType]]:
             if not frame:
                 continue
             fname = frame.f_code.co_filename
+            func = frame.f_code.co_name
             # Record samples only for files we care about.
             if not fname:
                 # 'eval/compile' gives no f_code.co_filename.  We have
                 # to look back into the outer frame in order to check
                 # the co_filename.
                 back = cast(FrameType, frame.f_back)
                 fname = Filename(back.f_code.co_filename)
-            while not Scalene.should_trace(fname):
+                func = back.f_code.co_name
+            while not Scalene.should_trace(fname, func):
                 # Walk the stack backwards until we hit a frame that
                 # IS one we should trace (if there is one).  i.e., if
                 # it's in the code being profiled, and it is just
@@ -1245,6 +1251,7 @@ def compute_frames_to_record() -> List[Tuple[FrameType, int, FrameType]]:
                     break
                 if frame:
                     fname = frame.f_code.co_filename
+                    func = frame.f_code.co_name
             if frame:
                 new_frames.append((frame, tident, orig_frame))
         del frames[:]
@@ -1267,7 +1274,7 @@ def enter_function_meta(
                     return
         except Exception:
             return
-        if not Scalene.should_trace(f.f_code.co_filename):
+        if not Scalene.should_trace(f.f_code.co_filename, f.f_code.co_name):
             return
 
         fn_name = Filename(f.f_code.co_name)
@@ -1595,13 +1602,18 @@ def memcpy_sigqueue_processor(
 
     @staticmethod
     @functools.lru_cache(None)
-    def should_trace(filename: str) -> bool:
-        """Return true if the filename is one we should trace."""
+    def should_trace(filename: str, func : str) -> bool:
+        """Return true if we should trace this filename and function."""
         if not filename:
             return False
         if os.path.join("scalene", "scalene") in filename:
             # Don't profile the profiler.
             return False
+        if Scalene.__functions_to_profile:
+            if filename in Scalene.__functions_to_profile:
+                if func in { fn.__code__.co_name for fn in Scalene.__functions_to_profile[filename] }:
+                    return True
+            return False
         # Don't profile the Python libraries, unless overridden by --profile-all
         if not Scalene.__args.profile_all:
             try:

diff --git a/test/profile_annotation_test.py b/test/profile_annotation_test.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+import numpy as np
+
+#import math
+
+# from numpy import linalg as LA
+
+arr = [i for i in range(1,1000)]
+
+@profile
+def doit1(x):
+#    x = [i*i for i in range(1,1000)][0]
+    y = 1
+#    w, v = LA.eig(np.diag(arr)) # (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)))
+    x = [i*i for i in range(0,100000)][99999]
+    y1 = [i*i for i in range(0,200000)][199999]
+    z1 = [i for i in range(0,300000)][299999]
+    z = x * y
+#    z = np.multiply(x, y)
+    return z
+
+def doit2(x):
+    i = 0
+#    zarr = [math.cos(13) for i in range(1,100000)]
+#    z = zarr[0]
+    z = 0.1
+    while i < 100000:
+#        z = math.cos(13)
+#        z = np.multiply(x,x)
+#        z = np.multiply(z,z)
+#        z = np.multiply(z,z)
+        z = z * z
+        z = x * x
+        z = z * z
+        z = z * z
+        i += 1
+    return z
+
+@profile
+def doit3(x):
+    for i in range(1000000):
+        z = x + 1
+        z = x + 1
+        z = x + 1
+        z = x + z
+        z = x + z
+#    z = np.cos(x)
+    return z
+
+def stuff():
+#    y = np.random.randint(1, 100, size=50000000)[49999999]
+    x = 1.01
+    for i in range(1,3):
+        # print(i)
+        for j in range(1,3):
+            x = doit1(x)
+            x = doit2(x)
+            x = doit3(x)
+            x = 1.01
+    return x
+
+import sys
+# print("TESTME")
+# print(sys.argv)
+stuff()
+
diff --git a/test/smoketest.py b/test/smoketest.py
@@ -7,7 +7,7 @@ def smoketest(fname, rest):
     proc = subprocess.run( [sys.executable, "-m", "scalene", "--cli", "--json", "--outfile", "/dev/stderr", *rest, fname] ,capture_output=True)
     if proc.returncode != 0:
         print("Exited with a non-zero code:", proc.returncode)
-        print("Stdout:", sys.stdout.decode('utf-8'))
+        print("Stdout:", proc.stdout.decode('utf-8'))
         exit(proc.returncode)
     # stdout = proc.stdout.decode('utf-8')
     stderr = proc.stderr.decode('utf-8')

diff --git a/test/smoketest_profile_decorator.py b/test/smoketest_profile_decorator.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+import subprocess
+import sys
+import json
+
+def smoketest(fname):
+    proc = subprocess.run( [sys.executable, "-m", "scalene", "--cli", "--json", "--outfile", "/dev/stderr", fname] ,capture_output=True)
+    if proc.returncode != 0:
+        print("Exited with a non-zero code:", proc.returncode)
+        print("Stdout:", proc.stdout.decode('utf-8'))
+        print("Stderr:", proc.stderr.decode('utf-8'))
+
+        exit(proc.returncode)
+
+    stderr = proc.stderr.decode('utf-8')
+    try:
+        scalene_json = json.loads(stderr)
+    except json.JSONDecodeError:
+        print("Invalid JSON", stderr)
+        exit(1)
+    if len(scalene_json) == 0:
+        print("No JSON output")
+        exit(1)
+    files = scalene_json['files']
+    if not len(files) > 0:
+        print("No files found in output")
+        exit(1)
+    _fname = list(files.keys())[0]
+    function_list = files[_fname]['functions']
+    exit_code = 0
+
+    # if 'doit1' not in function_dict:
+    expected_functions = ['doit1', 'doit3']
+    unexpected_functions = ['doit2']
+    for fn_name in expected_functions:
+        if not any(fn_name in f['line'] for f in function_list):
+            print(f"Expected function '{fn_name}' not returned")
+            exit_code = 1
+    for fn_name in unexpected_functions:
+        if any(fn_name in f['line'] for f in function_list):
+            print(f"Unexpected function '{fn_name}' returned")
+            exit_code = 1
+    if exit_code != 0:
+        print(function_list)
+        exit(exit_code)
+
+if __name__ == '__main__':
+    smoketest('test/profile_annotation_test.py')