# HG changeset patch # User Sverre Rabbelier # Date 1251430483 25200 # Node ID 2a0a7e081caf2735ef437861f6825f6cb4f6ca5e # Parent a7ed56911653949ac5e17301cdff383771aa7fb8 Profiling patch diff -r a7ed56911653 -r 2a0a7e081caf app/app.yaml.template --- a/app/app.yaml.template Fri Aug 28 19:41:28 2009 +0530 +++ b/app/app.yaml.template Thu Aug 27 20:34:43 2009 -0700 @@ -55,6 +55,9 @@ static_dir: shell/static expiration: 1d +- url: /profiler/.* + script: app_profiler/app_profiler.py + - url: /.* script: main.py diff -r a7ed56911653 -r 2a0a7e081caf app/app_profiler/__init__.py diff -r a7ed56911653 -r 2a0a7e081caf app/app_profiler/app_profiler.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app/app_profiler/app_profiler.py Thu Aug 27 20:34:43 2009 -0700 @@ -0,0 +1,240 @@ +import cProfile +import ppstats + +from google.appengine.ext import webapp +from google.appengine.api import memcache +import google.appengine.ext.webapp.util + +import os.path +import logging +import re +import random +import string +import zlib + +mc_client = memcache.Client() + +alphanumeric = string.letters + string.digits + +global_profiler = None + +class GAEProfiler(object): + _save_every = 10 + + def __init__(self): + self.is_profiling = False + self._profiler = None + self.num_requests = 0 + self.requests_profiled = 0 + self.request_regex = None + self.profile_key = ''.join([random.choice(alphanumeric) for x in range(4)]) + + def start_profiling(self, request_regex=None, num_requests=0): + "start profiling with this object, setting # of requests and filter" + if self.is_profiling: + return + + self.is_profiling = True + if self._profiler is None: + self._profiler = cProfile.Profile() + self.num_requests = num_requests + if request_regex: + self.request_regex = re.compile(request_regex) + + def stop_profiling(self): + self.is_profiling = False + + def resume_profiling(self): + self.is_profiling = True + + def has_profiler(self): + return self._profiler is not None + + def get_pstats(self): + "return a ppstats object from current profile data" + gae_base_dir = '/'.join(webapp.__file__.split('/')[:-5]) + sys_base_dir = '/'.join(logging.__file__.split('/')[:-2]) + + stats = ppstats.Stats(self._profiler) + stats.hide_directory(gae_base_dir, 'GAEHome') + stats.hide_directory(sys_base_dir, 'SysHome') + stats.strip_dirs() + return stats + + def runcall(self, func, *args, **kwargs): + "profile one call, incrementing requests_profiled and maybe saving stats" + self.requests_profiled += 1 + if self._profiler: + ret = self._profiler.runcall(func, *args, **kwargs) + else: + ret = func(*args, **kwargs) + +# if (self.requests_profiled % self._save_every) == 0 or \ +# self.requests_profiled == self.num_requests: +# self.save_pstats_to_memcache() + self.save_pstats_to_memcache() + return ret + + def should_profile_request(self): + "check for # of requests profiled and that SCRIPT_NAME matches regex" + env = dict(os.environ) + script_name = env.get('SCRIPT_NAME', '') + logging.info(script_name) + + if self.num_requests and self.requests_profiled >= self.num_requests: + return False + + if self.request_regex and not self.request_regex.search(script_name): + return False + + return True + + def save_pstats_to_memcache(self): + "save stats from profiler object to memcache" + ps = self.get_pstats() + output = ps.dump_stats_pickle() + compressed_data = zlib.compress(output, 3) + cache_key = cache_key_for_profile(self.profile_key) + mc_client.set(cache_key, compressed_data) + logging.info("Saved pstats to memcache with key %s" % cache_key) + + + +def get_global_profiler(): + global global_profiler + if not global_profiler: + global_profiler = GAEProfiler() + + return global_profiler + +def cache_key_for_profile(profile_key): + "generate a memcache key" + return "ProfileData.%s" % profile_key + +def load_pstats_from_memcache(profile_key): + "retrieve ppstats object" + mc_data = mc_client.get(cache_key_for_profile(profile_key)) + if not mc_data: + return None + + return ppstats.from_gz(mc_data) + +def get_stats_from_global_or_request(request_obj): + "get pstats for a key, or the global pstats" + key = request_obj.get('key', '') + if key: + return load_pstats_from_memcache(key) + else: + gp = get_global_profiler() + if not gp.has_profiler(): + return None + return gp.get_pstats() + +class show_profile(webapp.RequestHandler): + def get(self): + ps = get_stats_from_global_or_request(self.request) + if not ps: + self.response.out.write("

No profiler.

") + return + + ps.set_output(self.response.out) + sort = self.request.get('sort', 'time') + ps.sort_stats(sort) + self.response.out.write("
\n")
+        ps.print_stats(30)
+        self.response.out.write("
") + +class download_profile_data(webapp.RequestHandler): + def get(self): + ps = get_stats_from_global_or_request(self.request) + if not ps: + self.response.out.write("

No profiler.

") + return + + output = ps.dump_stats_pickle() + + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.out.write(output) + +class show_profiler_status(webapp.RequestHandler): + def get(self): + gp = get_global_profiler() + if not gp.has_profiler: + self.response.out.write("

No profiler.

") + return + + self.response.out.write("") + self.response.out.write("Currently profiling: %s
" % gp.is_profiling) + self.response.out.write("Profile Key: %s
" % gp.profile_key) + self.response.out.write("Requests profiled so far: %s
" % gp.requests_profiled) + self.response.out.write("Requests to profile: %s
" % gp.num_requests) + self.response.out.write("Request regex: %s
" % gp.request_regex) + self.response.out.write("") + +class start_profiler(webapp.RequestHandler): + def get(self): + gp = get_global_profiler() + gp.start_profiling() + self.response.headers['Content-Type'] = "text/plain" + self.response.out.write("Started profiling (key: %s).\n" % gp.profile_key) + self.response.out.write("Retrieve saved results at /profiler/show?key=%(key)s).\n" % {'key':gp.profile_key}) + +class stop_profiler(webapp.RequestHandler): + def get(self): + gp = get_global_profiler() + gp.stop_profiling() + self.request.out.write("Content-Type: text/plain\n\n") + self.request.out.write("done.") + +class save_profile_data(webapp.RequestHandler): + def get(self): + gp = get_global_profiler() + + +def _add_our_endpoints(application): + "insert our URLs into the application map" + url_mapping = [(regex.pattern, handler) for (regex, handler) in application._url_mapping] + return webapp.WSGIApplication(url_mapping, debug=True) + +# +# wrapper to for webapp applications +# +def run_wsgi_app(application): + "proxy webapp.util's call to profile when needed" + gp = get_global_profiler() + if gp.is_profiling and gp.should_profile_request(): + return gp.runcall(google.appengine.ext.webapp.util.run_wsgi_app, *(application,)) + else: + return google.appengine.ext.webapp.util.run_wsgi_app(application) + +# +# middleware for django applications +# + +class ProfileMiddleware(object): + def __init__(self): + self.profiler = None + + def process_request(self, request): + self.profiler = get_global_profiler() + + def process_view(self, request, callback, callback_args, callback_kwargs): + if self.profiler.is_profiling: + return self.profiler.runcall(callback, request, *callback_args, **callback_kwargs) + +application = webapp.WSGIApplication( + [('/profiler/start', start_profiler), + ('/profiler/stop', stop_profiler), + ('/profiler/show', show_profile), + ('/profiler/download', download_profile_data), + ('/profiler/status', show_profiler_status), + ], + debug=True) + + +def main(): + google.appengine.ext.webapp.util.run_wsgi_app(application) + +if __name__ == '__main__': + main() diff -r a7ed56911653 -r 2a0a7e081caf app/app_profiler/ppstats.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app/app_profiler/ppstats.py Thu Aug 27 20:34:43 2009 -0700 @@ -0,0 +1,107 @@ +import pstats +import cPickle +import zlib + +class PickleStats(object): + def __init__(self, stats): + self.stats = stats + + def create_stats(self): + "pstats.Stats checks for the existence of this method to see if it can load from an object" + pass + +def from_file(fileobj): + "load ppstats from an open file object" + stats = cPickle.load(fileobj) + ps = PickleStats(stats) + return Stats(ps) + +def from_filename(filename): + "load ppstats from a filename" + fileobj = open(filename, 'rb') + return from_file(fileobj) + +def from_gz_file(fileobj): + "load ppstats from an open file containing gzipped data" + data = fileobj.read() + stats = cPickle.loads(zlib.decompress(data)) + ps = PickleStats(stats) + return Stats(ps) + +def from_gz_filename(filename): + "load ppstats from a file containing gzipped data, by filename" + fileobj = open(filename, 'rb') + return from_gz_file(fileobj) + +def from_gz(gz_string): + "load ppstats from a string of gzipped data" + return Stats(PickleStats(cPickle.loads(zlib.decompress(gz_string)))) + +def from_stats(stats): + "load ppstats from a stats object" + return Stats(PickleStats(stats)) + +def from_string(stats_string): + return Stats(PickleStats(cPickle.loads(stats_string))) + +class Stats(pstats.Stats): + def __init__(self, *args, **kwargs): + pstats.Stats.__init__(self, *args) + self.replace_dirs = {} + + def set_output(self, stream): + "redirect output of print_stats to the file object " + self.stream = stream + + def hide_directory(self, dirname, replacement=''): + "replace occurences of in filenames with " + self.replace_dirs[dirname] = replacement + + def func_strip_path(self, func_name): + "take a filename, line, name tuple and mangle appropiately" + filename, line, name = func_name + + for dirname in self.replace_dirs: + filename = filename.replace(dirname, self.replace_dirs[dirname]) + + return filename, line, name + + def strip_dirs(self): + "strip irrelevant/redundant directories from filenames in profile data" + func_std_string = pstats.func_std_string + + oldstats = self.stats + self.stats = newstats = {} + max_name_len = 0 + for func, (cc, nc, tt, ct, callers) in oldstats.iteritems(): + newfunc = self.func_strip_path(func) + if len(func_std_string(newfunc)) > max_name_len: + max_name_len = len(func_std_string(newfunc)) + newcallers = {} + for func2, caller in callers.iteritems(): + newcallers[self.func_strip_path(func2)] = caller + + if newfunc in newstats: + newstats[newfunc] = add_func_stats( + newstats[newfunc], + (cc, nc, tt, ct, newcallers)) + else: + newstats[newfunc] = (cc, nc, tt, ct, newcallers) + old_top = self.top_level + self.top_level = new_top = {} + for func in old_top: + new_top[self.func_strip_path(func)] = None + + self.max_name_len = max_name_len + + self.fcn_list = None + self.all_callees = None + return self + + def dump_stats_pickle(self): + "return a string containing picked stats information (dump_stats uses marshall)" + return cPickle.dumps(self.stats) + + def load_stats_pickle(self, pickle_string): + "load from string returned by dump_stats_pickle" + return self.load_stats(PickleStats(cPickle.load(pickle_string))) diff -r a7ed56911653 -r 2a0a7e081caf app/profiler.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app/profiler.py Thu Aug 27 20:34:43 2009 -0700 @@ -0,0 +1,8 @@ +import gae_django +import app_profiler.app_profiler + +def main(): + app_profiler.app_profiler.main() + +if __name__ == '__main__': + main() diff -r a7ed56911653 -r 2a0a7e081caf app/settings.py --- a/app/settings.py Fri Aug 28 19:41:28 2009 +0530 +++ b/app/settings.py Thu Aug 27 20:34:43 2009 -0700 @@ -86,6 +86,7 @@ ) MIDDLEWARE_CLASSES = ( + 'app_profiler.app_profiler.ProfileMiddleware', # 'django.middleware.common.CommonMiddleware', # 'django.contrib.sessions.middleware.SessionMiddleware', # 'django.contrib.auth.middleware.AuthenticationMiddleware', diff -r a7ed56911653 -r 2a0a7e081caf scripts/build.sh --- a/scripts/build.sh Fri Aug 28 19:41:28 2009 +0530 +++ b/scripts/build.sh Thu Aug 27 20:34:43 2009 -0700 @@ -10,9 +10,10 @@ DEFAULT_APP_BUILD=../build DEFAULT_APP_FOLDER="../app" -DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml queue.yaml main.py settings.py shell.py urls.py gae_django.py" +DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml queue.yaml main.py settings.py +shell.py urls.py gae_django.py profiler.py" DEFAULT_APP_DIRS="soc ghop gsoc feedparser python25src reflistprop jquery \ - ranklist shell json htmlsanitizer taggable gviz" + ranklist shell json htmlsanitizer taggable gviz app_profiler" DEFAULT_ZIP_FILES="tiny_mce.zip" APP_BUILD=${APP_BUILD:-"${DEFAULT_APP_BUILD}"}