--- a/app/app.yaml.template Fri Aug 28 19:41:28 2009 +0530
+++ b/app/app.yaml.template Thu Aug 27 20:34:43 2009 -0700
@@ -55,6 +55,9 @@
static_dir: shell/static
expiration: 1d
+- url: /profiler/.*
+ script: app_profiler/app_profiler.py
+
- url: /.*
script: main.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/app/app_profiler/app_profiler.py Thu Aug 27 20:34:43 2009 -0700
@@ -0,0 +1,240 @@
+import cProfile
+import ppstats
+
+from google.appengine.ext import webapp
+from google.appengine.api import memcache
+import google.appengine.ext.webapp.util
+
+import os.path
+import logging
+import re
+import random
+import string
+import zlib
+
+mc_client = memcache.Client()
+
+alphanumeric = string.letters + string.digits
+
+global_profiler = None
+
+class GAEProfiler(object):
+ _save_every = 10
+
+ def __init__(self):
+ self.is_profiling = False
+ self._profiler = None
+ self.num_requests = 0
+ self.requests_profiled = 0
+ self.request_regex = None
+ self.profile_key = ''.join([random.choice(alphanumeric) for x in range(4)])
+
+ def start_profiling(self, request_regex=None, num_requests=0):
+ "start profiling with this object, setting # of requests and filter"
+ if self.is_profiling:
+ return
+
+ self.is_profiling = True
+ if self._profiler is None:
+ self._profiler = cProfile.Profile()
+ self.num_requests = num_requests
+ if request_regex:
+ self.request_regex = re.compile(request_regex)
+
+ def stop_profiling(self):
+ self.is_profiling = False
+
+ def resume_profiling(self):
+ self.is_profiling = True
+
+ def has_profiler(self):
+ return self._profiler is not None
+
+ def get_pstats(self):
+ "return a ppstats object from current profile data"
+ gae_base_dir = '/'.join(webapp.__file__.split('/')[:-5])
+ sys_base_dir = '/'.join(logging.__file__.split('/')[:-2])
+
+ stats = ppstats.Stats(self._profiler)
+ stats.hide_directory(gae_base_dir, 'GAEHome')
+ stats.hide_directory(sys_base_dir, 'SysHome')
+ stats.strip_dirs()
+ return stats
+
+ def runcall(self, func, *args, **kwargs):
+ "profile one call, incrementing requests_profiled and maybe saving stats"
+ self.requests_profiled += 1
+ if self._profiler:
+ ret = self._profiler.runcall(func, *args, **kwargs)
+ else:
+ ret = func(*args, **kwargs)
+
+# if (self.requests_profiled % self._save_every) == 0 or \
+# self.requests_profiled == self.num_requests:
+# self.save_pstats_to_memcache()
+ self.save_pstats_to_memcache()
+ return ret
+
+ def should_profile_request(self):
+ "check for # of requests profiled and that SCRIPT_NAME matches regex"
+ env = dict(os.environ)
+ script_name = env.get('SCRIPT_NAME', '')
+ logging.info(script_name)
+
+ if self.num_requests and self.requests_profiled >= self.num_requests:
+ return False
+
+ if self.request_regex and not self.request_regex.search(script_name):
+ return False
+
+ return True
+
+ def save_pstats_to_memcache(self):
+ "save stats from profiler object to memcache"
+ ps = self.get_pstats()
+ output = ps.dump_stats_pickle()
+ compressed_data = zlib.compress(output, 3)
+ cache_key = cache_key_for_profile(self.profile_key)
+ mc_client.set(cache_key, compressed_data)
+ logging.info("Saved pstats to memcache with key %s" % cache_key)
+
+
+
+def get_global_profiler():
+ global global_profiler
+ if not global_profiler:
+ global_profiler = GAEProfiler()
+
+ return global_profiler
+
+def cache_key_for_profile(profile_key):
+ "generate a memcache key"
+ return "ProfileData.%s" % profile_key
+
+def load_pstats_from_memcache(profile_key):
+ "retrieve ppstats object"
+ mc_data = mc_client.get(cache_key_for_profile(profile_key))
+ if not mc_data:
+ return None
+
+ return ppstats.from_gz(mc_data)
+
+def get_stats_from_global_or_request(request_obj):
+ "get pstats for a key, or the global pstats"
+ key = request_obj.get('key', '')
+ if key:
+ return load_pstats_from_memcache(key)
+ else:
+ gp = get_global_profiler()
+ if not gp.has_profiler():
+ return None
+ return gp.get_pstats()
+
+class show_profile(webapp.RequestHandler):
+ def get(self):
+ ps = get_stats_from_global_or_request(self.request)
+ if not ps:
+ self.response.out.write("<body><html><h3>No profiler.</h3><html></body>")
+ return
+
+ ps.set_output(self.response.out)
+ sort = self.request.get('sort', 'time')
+ ps.sort_stats(sort)
+ self.response.out.write("<body><html><pre>\n")
+ ps.print_stats(30)
+ self.response.out.write("</pre></html></body>")
+
+class download_profile_data(webapp.RequestHandler):
+ def get(self):
+ ps = get_stats_from_global_or_request(self.request)
+ if not ps:
+ self.response.out.write("<body><html><h3>No profiler.</h3><html></body>")
+ return
+
+ output = ps.dump_stats_pickle()
+
+ self.response.headers['Content-Type'] = 'application/octet-stream'
+
+ self.response.out.write(output)
+
+class show_profiler_status(webapp.RequestHandler):
+ def get(self):
+ gp = get_global_profiler()
+ if not gp.has_profiler:
+ self.response.out.write("<body><html><h3>No profiler.</h3><html></body>")
+ return
+
+ self.response.out.write("<html><body>")
+ self.response.out.write("<b>Currently profiling:</b> %s<br>" % gp.is_profiling)
+ self.response.out.write("<b>Profile Key</b>: %s<br>" % gp.profile_key)
+ self.response.out.write("<b>Requests profiled so far:</b> %s<br>" % gp.requests_profiled)
+ self.response.out.write("<b>Requests to profile:</b> %s<br>" % gp.num_requests)
+ self.response.out.write("<b>Request regex:</b> %s<br>" % gp.request_regex)
+ self.response.out.write("</body></html>")
+
+class start_profiler(webapp.RequestHandler):
+ def get(self):
+ gp = get_global_profiler()
+ gp.start_profiling()
+ self.response.headers['Content-Type'] = "text/plain"
+ self.response.out.write("Started profiling (key: %s).\n" % gp.profile_key)
+ self.response.out.write("Retrieve saved results at <a href='/profiler/show?key=%(key)s'>/profiler/show?key=%(key)s).\n" % {'key':gp.profile_key})
+
+class stop_profiler(webapp.RequestHandler):
+ def get(self):
+ gp = get_global_profiler()
+ gp.stop_profiling()
+ self.request.out.write("Content-Type: text/plain\n\n")
+ self.request.out.write("done.")
+
+class save_profile_data(webapp.RequestHandler):
+ def get(self):
+ gp = get_global_profiler()
+
+
+def _add_our_endpoints(application):
+ "insert our URLs into the application map"
+ url_mapping = [(regex.pattern, handler) for (regex, handler) in application._url_mapping]
+ return webapp.WSGIApplication(url_mapping, debug=True)
+
+#
+# wrapper to for webapp applications
+#
+def run_wsgi_app(application):
+ "proxy webapp.util's call to profile when needed"
+ gp = get_global_profiler()
+ if gp.is_profiling and gp.should_profile_request():
+ return gp.runcall(google.appengine.ext.webapp.util.run_wsgi_app, *(application,))
+ else:
+ return google.appengine.ext.webapp.util.run_wsgi_app(application)
+
+#
+# middleware for django applications
+#
+
+class ProfileMiddleware(object):
+ def __init__(self):
+ self.profiler = None
+
+ def process_request(self, request):
+ self.profiler = get_global_profiler()
+
+ def process_view(self, request, callback, callback_args, callback_kwargs):
+ if self.profiler.is_profiling:
+ return self.profiler.runcall(callback, request, *callback_args, **callback_kwargs)
+
+application = webapp.WSGIApplication(
+ [('/profiler/start', start_profiler),
+ ('/profiler/stop', stop_profiler),
+ ('/profiler/show', show_profile),
+ ('/profiler/download', download_profile_data),
+ ('/profiler/status', show_profiler_status),
+ ],
+ debug=True)
+
+
+def main():
+ google.appengine.ext.webapp.util.run_wsgi_app(application)
+
+if __name__ == '__main__':
+ main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/app/app_profiler/ppstats.py Thu Aug 27 20:34:43 2009 -0700
@@ -0,0 +1,107 @@
+import pstats
+import cPickle
+import zlib
+
+class PickleStats(object):
+ def __init__(self, stats):
+ self.stats = stats
+
+ def create_stats(self):
+ "pstats.Stats checks for the existence of this method to see if it can load from an object"
+ pass
+
+def from_file(fileobj):
+ "load ppstats from an open file object"
+ stats = cPickle.load(fileobj)
+ ps = PickleStats(stats)
+ return Stats(ps)
+
+def from_filename(filename):
+ "load ppstats from a filename"
+ fileobj = open(filename, 'rb')
+ return from_file(fileobj)
+
+def from_gz_file(fileobj):
+ "load ppstats from an open file containing gzipped data"
+ data = fileobj.read()
+ stats = cPickle.loads(zlib.decompress(data))
+ ps = PickleStats(stats)
+ return Stats(ps)
+
+def from_gz_filename(filename):
+ "load ppstats from a file containing gzipped data, by filename"
+ fileobj = open(filename, 'rb')
+ return from_gz_file(fileobj)
+
+def from_gz(gz_string):
+ "load ppstats from a string of gzipped data"
+ return Stats(PickleStats(cPickle.loads(zlib.decompress(gz_string))))
+
+def from_stats(stats):
+ "load ppstats from a stats object"
+ return Stats(PickleStats(stats))
+
+def from_string(stats_string):
+ return Stats(PickleStats(cPickle.loads(stats_string)))
+
+class Stats(pstats.Stats):
+ def __init__(self, *args, **kwargs):
+ pstats.Stats.__init__(self, *args)
+ self.replace_dirs = {}
+
+ def set_output(self, stream):
+ "redirect output of print_stats to the file object <stream>"
+ self.stream = stream
+
+ def hide_directory(self, dirname, replacement=''):
+ "replace occurences of <dirname> in filenames with <replacement>"
+ self.replace_dirs[dirname] = replacement
+
+ def func_strip_path(self, func_name):
+ "take a filename, line, name tuple and mangle appropiately"
+ filename, line, name = func_name
+
+ for dirname in self.replace_dirs:
+ filename = filename.replace(dirname, self.replace_dirs[dirname])
+
+ return filename, line, name
+
+ def strip_dirs(self):
+ "strip irrelevant/redundant directories from filenames in profile data"
+ func_std_string = pstats.func_std_string
+
+ oldstats = self.stats
+ self.stats = newstats = {}
+ max_name_len = 0
+ for func, (cc, nc, tt, ct, callers) in oldstats.iteritems():
+ newfunc = self.func_strip_path(func)
+ if len(func_std_string(newfunc)) > max_name_len:
+ max_name_len = len(func_std_string(newfunc))
+ newcallers = {}
+ for func2, caller in callers.iteritems():
+ newcallers[self.func_strip_path(func2)] = caller
+
+ if newfunc in newstats:
+ newstats[newfunc] = add_func_stats(
+ newstats[newfunc],
+ (cc, nc, tt, ct, newcallers))
+ else:
+ newstats[newfunc] = (cc, nc, tt, ct, newcallers)
+ old_top = self.top_level
+ self.top_level = new_top = {}
+ for func in old_top:
+ new_top[self.func_strip_path(func)] = None
+
+ self.max_name_len = max_name_len
+
+ self.fcn_list = None
+ self.all_callees = None
+ return self
+
+ def dump_stats_pickle(self):
+ "return a string containing picked stats information (dump_stats uses marshall)"
+ return cPickle.dumps(self.stats)
+
+ def load_stats_pickle(self, pickle_string):
+ "load from string returned by dump_stats_pickle"
+ return self.load_stats(PickleStats(cPickle.load(pickle_string)))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/app/profiler.py Thu Aug 27 20:34:43 2009 -0700
@@ -0,0 +1,8 @@
+import gae_django
+import app_profiler.app_profiler
+
+def main():
+ app_profiler.app_profiler.main()
+
+if __name__ == '__main__':
+ main()
--- a/app/settings.py Fri Aug 28 19:41:28 2009 +0530
+++ b/app/settings.py Thu Aug 27 20:34:43 2009 -0700
@@ -86,6 +86,7 @@
)
MIDDLEWARE_CLASSES = (
+ 'app_profiler.app_profiler.ProfileMiddleware',
# 'django.middleware.common.CommonMiddleware',
# 'django.contrib.sessions.middleware.SessionMiddleware',
# 'django.contrib.auth.middleware.AuthenticationMiddleware',
--- a/scripts/build.sh Fri Aug 28 19:41:28 2009 +0530
+++ b/scripts/build.sh Thu Aug 27 20:34:43 2009 -0700
@@ -10,9 +10,10 @@
DEFAULT_APP_BUILD=../build
DEFAULT_APP_FOLDER="../app"
-DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml queue.yaml main.py settings.py shell.py urls.py gae_django.py"
+DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml queue.yaml main.py settings.py
+shell.py urls.py gae_django.py profiler.py"
DEFAULT_APP_DIRS="soc ghop gsoc feedparser python25src reflistprop jquery \
- ranklist shell json htmlsanitizer taggable gviz"
+ ranklist shell json htmlsanitizer taggable gviz app_profiler"
DEFAULT_ZIP_FILES="tiny_mce.zip"
APP_BUILD=${APP_BUILD:-"${DEFAULT_APP_BUILD}"}