Profiling patch
authorSverre Rabbelier <srabbelier@gmail.com>
Thu, 27 Aug 2009 20:34:43 -0700
changeset 2832 2a0a7e081caf
parent 2831 a7ed56911653
child 2833 5aa891de5aa5
Profiling patch
app/app.yaml.template
app/app_profiler/__init__.py
app/app_profiler/app_profiler.py
app/app_profiler/ppstats.py
app/profiler.py
app/settings.py
scripts/build.sh
--- a/app/app.yaml.template	Fri Aug 28 19:41:28 2009 +0530
+++ b/app/app.yaml.template	Thu Aug 27 20:34:43 2009 -0700
@@ -55,6 +55,9 @@
   static_dir: shell/static
   expiration: 1d
 
+- url: /profiler/.*
+  script: app_profiler/app_profiler.py
+
 - url: /.*
   script: main.py
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/app_profiler/app_profiler.py	Thu Aug 27 20:34:43 2009 -0700
@@ -0,0 +1,240 @@
+import cProfile
+import ppstats
+
+from google.appengine.ext import webapp
+from google.appengine.api import memcache
+import google.appengine.ext.webapp.util
+
+import os.path
+import logging
+import re
+import random
+import string
+import zlib
+
+mc_client = memcache.Client()
+
+alphanumeric = string.letters + string.digits
+
+global_profiler = None
+
+class GAEProfiler(object):
+    _save_every = 10
+
+    def __init__(self):
+        self.is_profiling = False
+        self._profiler = None
+        self.num_requests = 0
+        self.requests_profiled = 0
+        self.request_regex = None
+        self.profile_key = ''.join([random.choice(alphanumeric) for x in range(4)])
+
+    def start_profiling(self, request_regex=None, num_requests=0):
+        "start profiling with this object, setting # of requests and filter"
+        if self.is_profiling:
+            return
+
+        self.is_profiling = True
+        if self._profiler is None:
+            self._profiler = cProfile.Profile()
+        self.num_requests = num_requests
+        if request_regex:
+            self.request_regex = re.compile(request_regex)
+
+    def stop_profiling(self):
+        self.is_profiling = False
+
+    def resume_profiling(self):
+        self.is_profiling = True
+    
+    def has_profiler(self):
+        return self._profiler is not None
+
+    def get_pstats(self):
+        "return a ppstats object from current profile data"
+        gae_base_dir = '/'.join(webapp.__file__.split('/')[:-5])
+        sys_base_dir = '/'.join(logging.__file__.split('/')[:-2])
+
+        stats = ppstats.Stats(self._profiler)
+        stats.hide_directory(gae_base_dir, 'GAEHome')
+        stats.hide_directory(sys_base_dir, 'SysHome')
+        stats.strip_dirs()
+        return stats
+
+    def runcall(self, func, *args, **kwargs):
+        "profile one call, incrementing requests_profiled and maybe saving stats"
+        self.requests_profiled += 1
+        if self._profiler:
+            ret = self._profiler.runcall(func, *args, **kwargs)
+        else:
+            ret = func(*args, **kwargs)
+        
+#        if (self.requests_profiled % self._save_every) == 0 or \
+#                self.requests_profiled == self.num_requests:
+#            self.save_pstats_to_memcache()
+        self.save_pstats_to_memcache()
+        return ret
+
+    def should_profile_request(self):
+        "check for # of requests profiled and that SCRIPT_NAME matches regex"
+        env = dict(os.environ)
+        script_name = env.get('SCRIPT_NAME', '')
+        logging.info(script_name)
+        
+        if self.num_requests and self.requests_profiled >= self.num_requests:
+            return False
+
+        if self.request_regex and not self.request_regex.search(script_name):
+            return False
+
+        return True
+
+    def save_pstats_to_memcache(self):
+        "save stats from profiler object to memcache"
+        ps = self.get_pstats()
+        output = ps.dump_stats_pickle()
+        compressed_data = zlib.compress(output, 3)
+        cache_key = cache_key_for_profile(self.profile_key)
+        mc_client.set(cache_key, compressed_data)
+        logging.info("Saved pstats to memcache with key %s" % cache_key)
+
+
+
+def get_global_profiler():
+    global global_profiler
+    if not global_profiler:
+        global_profiler = GAEProfiler()
+
+    return global_profiler
+    
+def cache_key_for_profile(profile_key):
+    "generate a memcache key"
+    return "ProfileData.%s" % profile_key
+
+def load_pstats_from_memcache(profile_key):
+    "retrieve ppstats object"
+    mc_data = mc_client.get(cache_key_for_profile(profile_key))
+    if not mc_data:
+        return None
+
+    return ppstats.from_gz(mc_data)
+
+def get_stats_from_global_or_request(request_obj):
+    "get pstats for a key, or the global pstats"
+    key = request_obj.get('key', '')
+    if key:
+        return load_pstats_from_memcache(key)
+    else:
+        gp = get_global_profiler()
+        if not gp.has_profiler():
+            return None
+        return gp.get_pstats()
+
+class show_profile(webapp.RequestHandler):
+    def get(self):
+        ps = get_stats_from_global_or_request(self.request)
+        if not ps:
+            self.response.out.write("<body><html><h3>No profiler.</h3><html></body>")
+            return
+
+        ps.set_output(self.response.out)
+        sort = self.request.get('sort', 'time')
+        ps.sort_stats(sort)
+        self.response.out.write("<body><html><pre>\n")
+        ps.print_stats(30)
+        self.response.out.write("</pre></html></body>")
+
+class download_profile_data(webapp.RequestHandler):
+    def get(self):
+        ps = get_stats_from_global_or_request(self.request)
+        if not ps:
+            self.response.out.write("<body><html><h3>No profiler.</h3><html></body>")
+            return            
+
+        output = ps.dump_stats_pickle()
+
+        self.response.headers['Content-Type'] = 'application/octet-stream'
+
+        self.response.out.write(output)
+
+class show_profiler_status(webapp.RequestHandler):
+    def get(self):
+        gp = get_global_profiler()
+        if not gp.has_profiler:
+            self.response.out.write("<body><html><h3>No profiler.</h3><html></body>")
+            return
+
+        self.response.out.write("<html><body>")
+        self.response.out.write("<b>Currently profiling:</b> %s<br>" % gp.is_profiling)
+        self.response.out.write("<b>Profile Key</b>: %s<br>" % gp.profile_key)
+        self.response.out.write("<b>Requests profiled so far:</b> %s<br>" % gp.requests_profiled)
+        self.response.out.write("<b>Requests to profile:</b> %s<br>" % gp.num_requests)
+        self.response.out.write("<b>Request regex:</b> %s<br>" % gp.request_regex)
+        self.response.out.write("</body></html>")
+
+class start_profiler(webapp.RequestHandler):
+    def get(self):
+        gp = get_global_profiler()
+        gp.start_profiling()
+        self.response.headers['Content-Type'] = "text/plain"
+        self.response.out.write("Started profiling (key: %s).\n" % gp.profile_key)
+        self.response.out.write("Retrieve saved results at <a href='/profiler/show?key=%(key)s'>/profiler/show?key=%(key)s).\n" % {'key':gp.profile_key})
+
+class stop_profiler(webapp.RequestHandler):
+    def get(self):
+        gp = get_global_profiler()
+        gp.stop_profiling()
+        self.request.out.write("Content-Type: text/plain\n\n")
+        self.request.out.write("done.")
+
+class save_profile_data(webapp.RequestHandler):
+    def get(self):
+        gp = get_global_profiler()
+        
+
+def _add_our_endpoints(application):
+    "insert our URLs into the application map"
+    url_mapping = [(regex.pattern, handler) for (regex, handler) in application._url_mapping]
+    return webapp.WSGIApplication(url_mapping, debug=True)
+
+#
+#  wrapper to for webapp applications
+#
+def run_wsgi_app(application):
+    "proxy webapp.util's call to profile when needed"
+    gp = get_global_profiler()
+    if gp.is_profiling and gp.should_profile_request():
+        return gp.runcall(google.appengine.ext.webapp.util.run_wsgi_app, *(application,))
+    else:
+        return google.appengine.ext.webapp.util.run_wsgi_app(application)
+
+#
+# middleware for django applications
+#
+
+class ProfileMiddleware(object):
+    def __init__(self):
+        self.profiler = None
+
+    def process_request(self, request):
+        self.profiler = get_global_profiler()
+
+    def process_view(self, request, callback, callback_args, callback_kwargs):
+        if self.profiler.is_profiling:
+            return self.profiler.runcall(callback, request, *callback_args, **callback_kwargs)
+
+application = webapp.WSGIApplication(
+    [('/profiler/start', start_profiler),
+     ('/profiler/stop', stop_profiler),
+     ('/profiler/show', show_profile),
+     ('/profiler/download', download_profile_data),
+     ('/profiler/status', show_profiler_status),
+     ],
+    debug=True)
+
+    
+def main():
+    google.appengine.ext.webapp.util.run_wsgi_app(application)
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/app_profiler/ppstats.py	Thu Aug 27 20:34:43 2009 -0700
@@ -0,0 +1,107 @@
+import pstats
+import cPickle
+import zlib
+
+class PickleStats(object):
+    def __init__(self, stats):
+        self.stats = stats
+
+    def create_stats(self):
+        "pstats.Stats checks for the existence of this method to see if it can load from an object"
+        pass
+
+def from_file(fileobj):
+    "load ppstats from an open file object"
+    stats = cPickle.load(fileobj)
+    ps = PickleStats(stats)
+    return Stats(ps)
+
+def from_filename(filename):
+    "load ppstats from a filename"
+    fileobj = open(filename, 'rb')
+    return from_file(fileobj)
+
+def from_gz_file(fileobj):
+    "load ppstats from an open file containing gzipped data"
+    data = fileobj.read()
+    stats = cPickle.loads(zlib.decompress(data))
+    ps = PickleStats(stats)
+    return Stats(ps)
+
+def from_gz_filename(filename):
+    "load ppstats from a file containing gzipped data, by filename"
+    fileobj = open(filename, 'rb')
+    return from_gz_file(fileobj)
+
+def from_gz(gz_string):
+    "load ppstats from a string of gzipped data"
+    return Stats(PickleStats(cPickle.loads(zlib.decompress(gz_string))))
+
+def from_stats(stats):
+    "load ppstats from a stats object" 
+    return Stats(PickleStats(stats))
+
+def from_string(stats_string):
+    return Stats(PickleStats(cPickle.loads(stats_string)))
+
+class Stats(pstats.Stats):
+    def __init__(self, *args, **kwargs):
+        pstats.Stats.__init__(self, *args)
+        self.replace_dirs = {}
+
+    def set_output(self, stream):
+        "redirect output of print_stats to the file object <stream>"
+        self.stream = stream
+
+    def hide_directory(self, dirname, replacement=''):
+        "replace occurences of <dirname> in filenames with <replacement>"
+        self.replace_dirs[dirname] = replacement
+
+    def func_strip_path(self, func_name):
+        "take a filename, line, name tuple and mangle appropiately"
+        filename, line, name = func_name
+
+        for dirname in self.replace_dirs:
+            filename = filename.replace(dirname, self.replace_dirs[dirname])
+
+        return filename, line, name
+
+    def strip_dirs(self):
+        "strip irrelevant/redundant directories from filenames in profile data"
+        func_std_string = pstats.func_std_string
+
+        oldstats = self.stats
+        self.stats = newstats = {}
+        max_name_len = 0
+        for func, (cc, nc, tt, ct, callers) in oldstats.iteritems():
+            newfunc = self.func_strip_path(func)
+            if len(func_std_string(newfunc)) > max_name_len:
+                max_name_len = len(func_std_string(newfunc))
+            newcallers = {}
+            for func2, caller in callers.iteritems():
+                newcallers[self.func_strip_path(func2)] = caller
+
+            if newfunc in newstats:
+                newstats[newfunc] = add_func_stats(
+                                        newstats[newfunc],
+                                        (cc, nc, tt, ct, newcallers))
+            else:
+                newstats[newfunc] = (cc, nc, tt, ct, newcallers)
+        old_top = self.top_level
+        self.top_level = new_top = {}
+        for func in old_top:
+            new_top[self.func_strip_path(func)] = None
+
+        self.max_name_len = max_name_len
+
+        self.fcn_list = None
+        self.all_callees = None
+        return self
+
+    def dump_stats_pickle(self):
+        "return a string containing picked stats information (dump_stats uses marshall)"
+        return cPickle.dumps(self.stats)
+
+    def load_stats_pickle(self, pickle_string):
+        "load from string returned by dump_stats_pickle"
+        return self.load_stats(PickleStats(cPickle.load(pickle_string)))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/profiler.py	Thu Aug 27 20:34:43 2009 -0700
@@ -0,0 +1,8 @@
+import gae_django
+import app_profiler.app_profiler
+
+def main():
+   app_profiler.app_profiler.main()
+
+if __name__ == '__main__':
+   main()
--- a/app/settings.py	Fri Aug 28 19:41:28 2009 +0530
+++ b/app/settings.py	Thu Aug 27 20:34:43 2009 -0700
@@ -86,6 +86,7 @@
 )
 
 MIDDLEWARE_CLASSES = (
+    'app_profiler.app_profiler.ProfileMiddleware',    
 #    'django.middleware.common.CommonMiddleware',
 #    'django.contrib.sessions.middleware.SessionMiddleware',
 #    'django.contrib.auth.middleware.AuthenticationMiddleware',
--- a/scripts/build.sh	Fri Aug 28 19:41:28 2009 +0530
+++ b/scripts/build.sh	Thu Aug 27 20:34:43 2009 -0700
@@ -10,9 +10,10 @@
 
 DEFAULT_APP_BUILD=../build
 DEFAULT_APP_FOLDER="../app"
-DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml queue.yaml main.py settings.py shell.py urls.py gae_django.py"
+DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml queue.yaml main.py settings.py
+shell.py urls.py gae_django.py profiler.py"
 DEFAULT_APP_DIRS="soc ghop gsoc feedparser python25src reflistprop jquery \
-    ranklist shell json htmlsanitizer taggable gviz"
+    ranklist shell json htmlsanitizer taggable gviz app_profiler"
 DEFAULT_ZIP_FILES="tiny_mce.zip"
 
 APP_BUILD=${APP_BUILD:-"${DEFAULT_APP_BUILD}"}