app/django/contrib/gis/utils/geoip.py
changeset 323 ff1a9aa48cfd
equal deleted inserted replaced
322:6641e941ef1e 323:ff1a9aa48cfd
       
     1 """
       
     2  This module houses the GeoIP object, a ctypes wrapper for the MaxMind GeoIP(R)
       
     3  C API (http://www.maxmind.com/app/c).  This is an alternative to the GPL
       
     4  licensed Python GeoIP interface provided by MaxMind.
       
     5 
       
     6  GeoIP(R) is a registered trademark of MaxMind, LLC of Boston, Massachusetts.
       
     7 
       
     8  For IP-based geolocation, this module requires the GeoLite Country and City
       
     9  datasets, in binary format (CSV will not work!).  The datasets may be 
       
    10  downloaded from MaxMind at http://www.maxmind.com/download/geoip/database/.
       
    11  Grab GeoIP.dat.gz and GeoLiteCity.dat.gz, and unzip them in the directory
       
    12  corresponding to settings.GEOIP_PATH.  See the GeoIP docstring and examples
       
    13  below for more details.
       
    14 
       
    15  TODO: Verify compatibility with Windows.
       
    16 
       
    17  Example:
       
    18 
       
    19  >>> from django.contrib.gis.utils import GeoIP
       
    20  >>> g = GeoIP()
       
    21  >>> g.country('google.com')
       
    22  {'country_code': 'US', 'country_name': 'United States'}
       
    23  >>> g.city('72.14.207.99')
       
    24  {'area_code': 650,
       
    25  'city': 'Mountain View',
       
    26  'country_code': 'US',
       
    27  'country_code3': 'USA',
       
    28  'country_name': 'United States',
       
    29  'dma_code': 807,
       
    30  'latitude': 37.419200897216797,
       
    31  'longitude': -122.05740356445312,
       
    32  'postal_code': '94043',
       
    33  'region': 'CA'}
       
    34  >>> g.lat_lon('salon.com')
       
    35  (37.789798736572266, -122.39420318603516)
       
    36  >>> g.lon_lat('uh.edu')
       
    37  (-95.415199279785156, 29.77549934387207) 
       
    38  >>> g.geos('24.124.1.80').wkt
       
    39  'POINT (-95.2087020874023438 39.0392990112304688)'
       
    40 """
       
    41 import os, re
       
    42 from ctypes import c_char_p, c_float, c_int, Structure, CDLL, POINTER
       
    43 from ctypes.util import find_library
       
    44 from django.conf import settings
       
    45 if not settings._target: settings.configure()
       
    46 
       
    47 # Creating the settings dictionary with any settings, if needed.
       
    48 GEOIP_SETTINGS = dict((key, getattr(settings, key)) 
       
    49                       for key in ('GEOIP_PATH', 'GEOIP_LIBRARY_PATH', 'GEOIP_COUNTRY', 'GEOIP_CITY')
       
    50                       if hasattr(settings, key))
       
    51 lib_path = GEOIP_SETTINGS.get('GEOIP_LIBRARY_PATH', None)
       
    52 
       
    53 # GeoIP Exception class.
       
    54 class GeoIPException(Exception): pass
       
    55 
       
    56 # The shared library for the GeoIP C API.  May be downloaded
       
    57 #  from http://www.maxmind.com/download/geoip/api/c/
       
    58 if lib_path:
       
    59     lib_name = None
       
    60 else:
       
    61     # TODO: Is this really the library name for Windows?
       
    62     lib_name = 'GeoIP'
       
    63 
       
    64 # Getting the path to the GeoIP library.
       
    65 if lib_name: lib_path = find_library(lib_name)
       
    66 if lib_path is None: raise GeoIPException('Could not find the GeoIP library (tried "%s"). '
       
    67                                           'Try setting GEOIP_LIBRARY_PATH in your settings.' % lib_name)
       
    68 lgeoip = CDLL(lib_path)
       
    69 
       
    70 # Regular expressions for recognizing IP addresses and the GeoIP
       
    71 # free database editions.
       
    72 ipregex = re.compile(r'^(?P<w>\d\d?\d?)\.(?P<x>\d\d?\d?)\.(?P<y>\d\d?\d?)\.(?P<z>\d\d?\d?)$')
       
    73 free_regex = re.compile(r'^GEO-\d{3}FREE')
       
    74 lite_regex = re.compile(r'^GEO-\d{3}LITE')
       
    75 
       
    76 #### GeoIP C Structure definitions ####
       
    77 class GeoIPRecord(Structure):
       
    78     _fields_ = [('country_code', c_char_p),
       
    79                 ('country_code3', c_char_p),
       
    80                 ('country_name', c_char_p),
       
    81                 ('region', c_char_p),
       
    82                 ('city', c_char_p),
       
    83                 ('postal_code', c_char_p),
       
    84                 ('latitude', c_float),
       
    85                 ('longitude', c_float),
       
    86                 ('dma_code', c_int),
       
    87                 ('area_code', c_int),
       
    88                 ]
       
    89 class GeoIPTag(Structure): pass
       
    90 
       
    91 #### ctypes function prototypes ####
       
    92 RECTYPE = POINTER(GeoIPRecord)
       
    93 DBTYPE = POINTER(GeoIPTag)
       
    94 
       
    95 # For retrieving records by name or address.
       
    96 def record_output(func):
       
    97     func.restype = RECTYPE
       
    98     return func
       
    99 rec_by_addr = record_output(lgeoip.GeoIP_record_by_addr)
       
   100 rec_by_name = record_output(lgeoip.GeoIP_record_by_name)
       
   101 
       
   102 # For opening up GeoIP databases.
       
   103 geoip_open = lgeoip.GeoIP_open
       
   104 geoip_open.restype = DBTYPE
       
   105 
       
   106 # String output routines.
       
   107 def string_output(func):
       
   108     func.restype = c_char_p
       
   109     return func
       
   110 geoip_dbinfo = string_output(lgeoip.GeoIP_database_info)
       
   111 cntry_code_by_addr = string_output(lgeoip.GeoIP_country_code_by_addr)
       
   112 cntry_code_by_name = string_output(lgeoip.GeoIP_country_code_by_name)
       
   113 cntry_name_by_addr = string_output(lgeoip.GeoIP_country_name_by_addr)
       
   114 cntry_name_by_name = string_output(lgeoip.GeoIP_country_name_by_name)
       
   115 
       
   116 #### GeoIP class ####
       
   117 class GeoIP(object):
       
   118     # The flags for GeoIP memory caching.
       
   119     # GEOIP_STANDARD - read database from filesystem, uses least memory.
       
   120     #
       
   121     # GEOIP_MEMORY_CACHE - load database into memory, faster performance
       
   122     #        but uses more memory
       
   123     #
       
   124     # GEOIP_CHECK_CACHE - check for updated database.  If database has been updated,
       
   125     #        reload filehandle and/or memory cache.
       
   126     #
       
   127     # GEOIP_INDEX_CACHE - just cache
       
   128     #        the most frequently accessed index portion of the database, resulting
       
   129     #        in faster lookups than GEOIP_STANDARD, but less memory usage than
       
   130     #        GEOIP_MEMORY_CACHE - useful for larger databases such as
       
   131     #        GeoIP Organization and GeoIP City.  Note, for GeoIP Country, Region
       
   132     #        and Netspeed databases, GEOIP_INDEX_CACHE is equivalent to GEOIP_MEMORY_CACHE
       
   133     #
       
   134     GEOIP_STANDARD = 0
       
   135     GEOIP_MEMORY_CACHE = 1
       
   136     GEOIP_CHECK_CACHE = 2
       
   137     GEOIP_INDEX_CACHE = 4
       
   138     cache_options = dict((opt, None) for opt in (0, 1, 2, 4))
       
   139 
       
   140     def __init__(self, path=None, cache=0, country=None, city=None):
       
   141         """
       
   142         Initializes the GeoIP object, no parameters are required to use default
       
   143         settings.  Keyword arguments may be passed in to customize the locations
       
   144         of the GeoIP data sets.
       
   145 
       
   146         * path: Base directory to where GeoIP data is located or the full path
       
   147             to where the city or country data files (*.dat) are located.
       
   148             Assumes that both the city and country data sets are located in
       
   149             this directory; overrides the GEOIP_PATH settings attribute.
       
   150 
       
   151         * cache: The cache settings when opening up the GeoIP datasets,
       
   152             and may be an integer in (0, 1, 2, 4) corresponding to
       
   153             the GEOIP_STANDARD, GEOIP_MEMORY_CACHE, GEOIP_CHECK_CACHE,
       
   154             and GEOIP_INDEX_CACHE `GeoIPOptions` C API settings,
       
   155             respectively.  Defaults to 0, meaning that the data is read
       
   156             from the disk.
       
   157 
       
   158         * country: The name of the GeoIP country data file.  Defaults to
       
   159             'GeoIP.dat'; overrides the GEOIP_COUNTRY settings attribute.
       
   160 
       
   161         * city: The name of the GeoIP city data file.  Defaults to
       
   162             'GeoLiteCity.dat'; overrides the GEOIP_CITY settings attribute.
       
   163         """
       
   164         # Checking the given cache option.
       
   165         if cache in self.cache_options:
       
   166             self._cache = self.cache_options[cache]
       
   167         else:
       
   168             raise GeoIPException('Invalid caching option: %s' % cache)
       
   169 
       
   170         # Getting the GeoIP data path.
       
   171         if not path:
       
   172             path = GEOIP_SETTINGS.get('GEOIP_PATH', None)
       
   173             if not path: raise GeoIPException('GeoIP path must be provided via parameter or the GEOIP_PATH setting.')
       
   174         if not isinstance(path, basestring):
       
   175             raise TypeError('Invalid path type: %s' % type(path).__name__)
       
   176 
       
   177         cntry_ptr, city_ptr = (None, None)
       
   178         if os.path.isdir(path):
       
   179             # Getting the country and city files using the settings
       
   180             # dictionary.  If no settings are provided, default names
       
   181             # are assigned.
       
   182             country = os.path.join(path, country or GEOIP_SETTINGS.get('GEOIP_COUNTRY', 'GeoIP.dat'))
       
   183             city = os.path.join(path, city or GEOIP_SETTINGS.get('GEOIP_CITY', 'GeoLiteCity.dat'))
       
   184         elif os.path.isfile(path):
       
   185             # Otherwise, some detective work will be needed to figure
       
   186             # out whether the given database path is for the GeoIP country
       
   187             # or city databases.
       
   188             ptr = geoip_open(path, cache)
       
   189             info = geoip_dbinfo(ptr)
       
   190             if lite_regex.match(info):
       
   191                 # GeoLite City database.
       
   192                 city, city_ptr = path, ptr
       
   193             elif free_regex.match(info):
       
   194                 # GeoIP Country database.
       
   195                 country, cntry_ptr = path, ptr
       
   196             else:
       
   197                 raise GeoIPException('Unable to recognize database edition: %s' % info)
       
   198         else:
       
   199             raise GeoIPException('GeoIP path must be a valid file or directory.')
       
   200         
       
   201         # `_init_db` does the dirty work.
       
   202         self._init_db(country, cache, '_country', cntry_ptr)
       
   203         self._init_db(city, cache, '_city', city_ptr)
       
   204 
       
   205     def _init_db(self, db_file, cache, attname, ptr=None):
       
   206         "Helper routine for setting GeoIP ctypes database properties."
       
   207         if ptr:
       
   208             # Pointer already retrieved.
       
   209             pass
       
   210         elif os.path.isfile(db_file or ''):
       
   211             ptr = geoip_open(db_file, cache)
       
   212         setattr(self, attname, ptr)
       
   213         setattr(self, '%s_file' % attname, db_file)
       
   214 
       
   215     def _check_query(self, query, country=False, city=False, city_or_country=False):
       
   216         "Helper routine for checking the query and database availability."
       
   217         # Making sure a string was passed in for the query.
       
   218         if not isinstance(query, basestring):
       
   219             raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__)
       
   220 
       
   221         # Extra checks for the existence of country and city databases.
       
   222         if city_or_country and self._country is None and self._city is None:
       
   223             raise GeoIPException('Invalid GeoIP country and city data files.')
       
   224         elif country and self._country is None:
       
   225             raise GeoIPException('Invalid GeoIP country data file: %s' % self._country_file)
       
   226         elif city and self._city is None:
       
   227             raise GeoIPException('Invalid GeoIP city data file: %s' % self._city_file)
       
   228 
       
   229     def city(self, query):
       
   230         """
       
   231         Returns a dictionary of city information for the given IP address or
       
   232         Fully Qualified Domain Name (FQDN).  Some information in the dictionary
       
   233         may be undefined (None).
       
   234         """
       
   235         self._check_query(query, city=True)
       
   236         if ipregex.match(query):
       
   237             # If an IP address was passed in
       
   238             ptr = rec_by_addr(self._city, c_char_p(query))
       
   239         else:
       
   240             # If a FQDN was passed in.
       
   241             ptr = rec_by_name(self._city, c_char_p(query))
       
   242 
       
   243         # Checking the pointer to the C structure, if valid pull out elements
       
   244         # into a dicionary and return.
       
   245         if bool(ptr):
       
   246             record = ptr.contents
       
   247             return dict((tup[0], getattr(record, tup[0])) for tup in record._fields_)
       
   248         else:
       
   249             return None
       
   250     
       
   251     def country_code(self, query):
       
   252         "Returns the country code for the given IP Address or FQDN."
       
   253         self._check_query(query, city_or_country=True)
       
   254         if self._country:
       
   255             if ipregex.match(query): return cntry_code_by_addr(self._country, query)
       
   256             else: return cntry_code_by_name(self._country, query)
       
   257         else:
       
   258             return self.city(query)['country_code']
       
   259 
       
   260     def country_name(self, query):
       
   261         "Returns the country name for the given IP Address or FQDN."
       
   262         self._check_query(query, city_or_country=True)
       
   263         if self._country:
       
   264             if ipregex.match(query): return cntry_name_by_addr(self._country, query)
       
   265             else: return cntry_name_by_name(self._country, query)
       
   266         else:
       
   267             return self.city(query)['country_name']
       
   268 
       
   269     def country(self, query):
       
   270         """
       
   271         Returns a dictonary with with the country code and name when given an 
       
   272         IP address or a Fully Qualified Domain Name (FQDN).  For example, both
       
   273         '24.124.1.80' and 'djangoproject.com' are valid parameters.
       
   274         """
       
   275         # Returning the country code and name
       
   276         return {'country_code' : self.country_code(query), 
       
   277                 'country_name' : self.country_name(query),
       
   278                 }
       
   279 
       
   280     #### Coordinate retrieval routines ####
       
   281     def coords(self, query, ordering=('longitude', 'latitude')):
       
   282         cdict = self.city(query)
       
   283         if cdict is None: return None
       
   284         else: return tuple(cdict[o] for o in ordering)
       
   285 
       
   286     def lon_lat(self, query):
       
   287         "Returns a tuple of the (longitude, latitude) for the given query."
       
   288         return self.coords(query)
       
   289 
       
   290     def lat_lon(self, query):
       
   291         "Returns a tuple of the (latitude, longitude) for the given query."
       
   292         return self.coords(query, ('latitude', 'longitude'))
       
   293 
       
   294     def geos(self, query):
       
   295         "Returns a GEOS Point object for the given query."
       
   296         ll = self.lon_lat(query)
       
   297         if ll:
       
   298             from django.contrib.gis.geos import Point
       
   299             return Point(ll, srid=4326)
       
   300         else:
       
   301             return None
       
   302 
       
   303     #### GeoIP Database Information Routines ####
       
   304     def country_info(self):
       
   305         "Returns information about the GeoIP country database."
       
   306         if self._country is None:
       
   307             ci = 'No GeoIP Country data in "%s"' % self._country_file
       
   308         else:
       
   309             ci = geoip_dbinfo(self._country)
       
   310         return ci
       
   311     country_info = property(country_info)
       
   312 
       
   313     def city_info(self):
       
   314         "Retuns information about the GeoIP city database."
       
   315         if self._city is None:
       
   316             ci = 'No GeoIP City data in "%s"' % self._city_file
       
   317         else:
       
   318             ci = geoip_dbinfo(self._city)
       
   319         return ci
       
   320     city_info = property(city_info)
       
   321         
       
   322     def info(self):
       
   323         "Returns information about all GeoIP databases in use."
       
   324         return 'Country:\n\t%s\nCity:\n\t%s' % (self.country_info, self.city_info)
       
   325     info = property(info)
       
   326 
       
   327     #### Methods for compatibility w/the GeoIP-Python API. ####
       
   328     @classmethod
       
   329     def open(cls, full_path, cache):
       
   330         return GeoIP(full_path, cache)
       
   331 
       
   332     def _rec_by_arg(self, arg):
       
   333         if self._city:
       
   334             return self.city(arg)
       
   335         else:
       
   336             return self.country(arg)
       
   337     region_by_addr = city
       
   338     region_by_name = city
       
   339     record_by_addr = _rec_by_arg
       
   340     record_by_name = _rec_by_arg
       
   341     country_code_by_addr = country_code
       
   342     country_code_by_name = country_code
       
   343     country_name_by_addr = country_name
       
   344     country_name_by_name = country_name