|
1 """ |
|
2 This module houses the GeoIP object, a ctypes wrapper for the MaxMind GeoIP(R) |
|
3 C API (http://www.maxmind.com/app/c). This is an alternative to the GPL |
|
4 licensed Python GeoIP interface provided by MaxMind. |
|
5 |
|
6 GeoIP(R) is a registered trademark of MaxMind, LLC of Boston, Massachusetts. |
|
7 |
|
8 For IP-based geolocation, this module requires the GeoLite Country and City |
|
9 datasets, in binary format (CSV will not work!). The datasets may be |
|
10 downloaded from MaxMind at http://www.maxmind.com/download/geoip/database/. |
|
11 Grab GeoIP.dat.gz and GeoLiteCity.dat.gz, and unzip them in the directory |
|
12 corresponding to settings.GEOIP_PATH. See the GeoIP docstring and examples |
|
13 below for more details. |
|
14 |
|
15 TODO: Verify compatibility with Windows. |
|
16 |
|
17 Example: |
|
18 |
|
19 >>> from django.contrib.gis.utils import GeoIP |
|
20 >>> g = GeoIP() |
|
21 >>> g.country('google.com') |
|
22 {'country_code': 'US', 'country_name': 'United States'} |
|
23 >>> g.city('72.14.207.99') |
|
24 {'area_code': 650, |
|
25 'city': 'Mountain View', |
|
26 'country_code': 'US', |
|
27 'country_code3': 'USA', |
|
28 'country_name': 'United States', |
|
29 'dma_code': 807, |
|
30 'latitude': 37.419200897216797, |
|
31 'longitude': -122.05740356445312, |
|
32 'postal_code': '94043', |
|
33 'region': 'CA'} |
|
34 >>> g.lat_lon('salon.com') |
|
35 (37.789798736572266, -122.39420318603516) |
|
36 >>> g.lon_lat('uh.edu') |
|
37 (-95.415199279785156, 29.77549934387207) |
|
38 >>> g.geos('24.124.1.80').wkt |
|
39 'POINT (-95.2087020874023438 39.0392990112304688)' |
|
40 """ |
|
41 import os, re |
|
42 from ctypes import c_char_p, c_float, c_int, Structure, CDLL, POINTER |
|
43 from ctypes.util import find_library |
|
44 from django.conf import settings |
|
45 if not settings._target: settings.configure() |
|
46 |
|
47 # Creating the settings dictionary with any settings, if needed. |
|
48 GEOIP_SETTINGS = dict((key, getattr(settings, key)) |
|
49 for key in ('GEOIP_PATH', 'GEOIP_LIBRARY_PATH', 'GEOIP_COUNTRY', 'GEOIP_CITY') |
|
50 if hasattr(settings, key)) |
|
51 lib_path = GEOIP_SETTINGS.get('GEOIP_LIBRARY_PATH', None) |
|
52 |
|
53 # GeoIP Exception class. |
|
54 class GeoIPException(Exception): pass |
|
55 |
|
56 # The shared library for the GeoIP C API. May be downloaded |
|
57 # from http://www.maxmind.com/download/geoip/api/c/ |
|
58 if lib_path: |
|
59 lib_name = None |
|
60 else: |
|
61 # TODO: Is this really the library name for Windows? |
|
62 lib_name = 'GeoIP' |
|
63 |
|
64 # Getting the path to the GeoIP library. |
|
65 if lib_name: lib_path = find_library(lib_name) |
|
66 if lib_path is None: raise GeoIPException('Could not find the GeoIP library (tried "%s"). ' |
|
67 'Try setting GEOIP_LIBRARY_PATH in your settings.' % lib_name) |
|
68 lgeoip = CDLL(lib_path) |
|
69 |
|
70 # Regular expressions for recognizing IP addresses and the GeoIP |
|
71 # free database editions. |
|
72 ipregex = re.compile(r'^(?P<w>\d\d?\d?)\.(?P<x>\d\d?\d?)\.(?P<y>\d\d?\d?)\.(?P<z>\d\d?\d?)$') |
|
73 free_regex = re.compile(r'^GEO-\d{3}FREE') |
|
74 lite_regex = re.compile(r'^GEO-\d{3}LITE') |
|
75 |
|
76 #### GeoIP C Structure definitions #### |
|
77 class GeoIPRecord(Structure): |
|
78 _fields_ = [('country_code', c_char_p), |
|
79 ('country_code3', c_char_p), |
|
80 ('country_name', c_char_p), |
|
81 ('region', c_char_p), |
|
82 ('city', c_char_p), |
|
83 ('postal_code', c_char_p), |
|
84 ('latitude', c_float), |
|
85 ('longitude', c_float), |
|
86 ('dma_code', c_int), |
|
87 ('area_code', c_int), |
|
88 ] |
|
89 class GeoIPTag(Structure): pass |
|
90 |
|
91 #### ctypes function prototypes #### |
|
92 RECTYPE = POINTER(GeoIPRecord) |
|
93 DBTYPE = POINTER(GeoIPTag) |
|
94 |
|
95 # For retrieving records by name or address. |
|
96 def record_output(func): |
|
97 func.restype = RECTYPE |
|
98 return func |
|
99 rec_by_addr = record_output(lgeoip.GeoIP_record_by_addr) |
|
100 rec_by_name = record_output(lgeoip.GeoIP_record_by_name) |
|
101 |
|
102 # For opening up GeoIP databases. |
|
103 geoip_open = lgeoip.GeoIP_open |
|
104 geoip_open.restype = DBTYPE |
|
105 |
|
106 # String output routines. |
|
107 def string_output(func): |
|
108 func.restype = c_char_p |
|
109 return func |
|
110 geoip_dbinfo = string_output(lgeoip.GeoIP_database_info) |
|
111 cntry_code_by_addr = string_output(lgeoip.GeoIP_country_code_by_addr) |
|
112 cntry_code_by_name = string_output(lgeoip.GeoIP_country_code_by_name) |
|
113 cntry_name_by_addr = string_output(lgeoip.GeoIP_country_name_by_addr) |
|
114 cntry_name_by_name = string_output(lgeoip.GeoIP_country_name_by_name) |
|
115 |
|
116 #### GeoIP class #### |
|
117 class GeoIP(object): |
|
118 # The flags for GeoIP memory caching. |
|
119 # GEOIP_STANDARD - read database from filesystem, uses least memory. |
|
120 # |
|
121 # GEOIP_MEMORY_CACHE - load database into memory, faster performance |
|
122 # but uses more memory |
|
123 # |
|
124 # GEOIP_CHECK_CACHE - check for updated database. If database has been updated, |
|
125 # reload filehandle and/or memory cache. |
|
126 # |
|
127 # GEOIP_INDEX_CACHE - just cache |
|
128 # the most frequently accessed index portion of the database, resulting |
|
129 # in faster lookups than GEOIP_STANDARD, but less memory usage than |
|
130 # GEOIP_MEMORY_CACHE - useful for larger databases such as |
|
131 # GeoIP Organization and GeoIP City. Note, for GeoIP Country, Region |
|
132 # and Netspeed databases, GEOIP_INDEX_CACHE is equivalent to GEOIP_MEMORY_CACHE |
|
133 # |
|
134 GEOIP_STANDARD = 0 |
|
135 GEOIP_MEMORY_CACHE = 1 |
|
136 GEOIP_CHECK_CACHE = 2 |
|
137 GEOIP_INDEX_CACHE = 4 |
|
138 cache_options = dict((opt, None) for opt in (0, 1, 2, 4)) |
|
139 |
|
140 def __init__(self, path=None, cache=0, country=None, city=None): |
|
141 """ |
|
142 Initializes the GeoIP object, no parameters are required to use default |
|
143 settings. Keyword arguments may be passed in to customize the locations |
|
144 of the GeoIP data sets. |
|
145 |
|
146 * path: Base directory to where GeoIP data is located or the full path |
|
147 to where the city or country data files (*.dat) are located. |
|
148 Assumes that both the city and country data sets are located in |
|
149 this directory; overrides the GEOIP_PATH settings attribute. |
|
150 |
|
151 * cache: The cache settings when opening up the GeoIP datasets, |
|
152 and may be an integer in (0, 1, 2, 4) corresponding to |
|
153 the GEOIP_STANDARD, GEOIP_MEMORY_CACHE, GEOIP_CHECK_CACHE, |
|
154 and GEOIP_INDEX_CACHE `GeoIPOptions` C API settings, |
|
155 respectively. Defaults to 0, meaning that the data is read |
|
156 from the disk. |
|
157 |
|
158 * country: The name of the GeoIP country data file. Defaults to |
|
159 'GeoIP.dat'; overrides the GEOIP_COUNTRY settings attribute. |
|
160 |
|
161 * city: The name of the GeoIP city data file. Defaults to |
|
162 'GeoLiteCity.dat'; overrides the GEOIP_CITY settings attribute. |
|
163 """ |
|
164 # Checking the given cache option. |
|
165 if cache in self.cache_options: |
|
166 self._cache = self.cache_options[cache] |
|
167 else: |
|
168 raise GeoIPException('Invalid caching option: %s' % cache) |
|
169 |
|
170 # Getting the GeoIP data path. |
|
171 if not path: |
|
172 path = GEOIP_SETTINGS.get('GEOIP_PATH', None) |
|
173 if not path: raise GeoIPException('GeoIP path must be provided via parameter or the GEOIP_PATH setting.') |
|
174 if not isinstance(path, basestring): |
|
175 raise TypeError('Invalid path type: %s' % type(path).__name__) |
|
176 |
|
177 cntry_ptr, city_ptr = (None, None) |
|
178 if os.path.isdir(path): |
|
179 # Getting the country and city files using the settings |
|
180 # dictionary. If no settings are provided, default names |
|
181 # are assigned. |
|
182 country = os.path.join(path, country or GEOIP_SETTINGS.get('GEOIP_COUNTRY', 'GeoIP.dat')) |
|
183 city = os.path.join(path, city or GEOIP_SETTINGS.get('GEOIP_CITY', 'GeoLiteCity.dat')) |
|
184 elif os.path.isfile(path): |
|
185 # Otherwise, some detective work will be needed to figure |
|
186 # out whether the given database path is for the GeoIP country |
|
187 # or city databases. |
|
188 ptr = geoip_open(path, cache) |
|
189 info = geoip_dbinfo(ptr) |
|
190 if lite_regex.match(info): |
|
191 # GeoLite City database. |
|
192 city, city_ptr = path, ptr |
|
193 elif free_regex.match(info): |
|
194 # GeoIP Country database. |
|
195 country, cntry_ptr = path, ptr |
|
196 else: |
|
197 raise GeoIPException('Unable to recognize database edition: %s' % info) |
|
198 else: |
|
199 raise GeoIPException('GeoIP path must be a valid file or directory.') |
|
200 |
|
201 # `_init_db` does the dirty work. |
|
202 self._init_db(country, cache, '_country', cntry_ptr) |
|
203 self._init_db(city, cache, '_city', city_ptr) |
|
204 |
|
205 def _init_db(self, db_file, cache, attname, ptr=None): |
|
206 "Helper routine for setting GeoIP ctypes database properties." |
|
207 if ptr: |
|
208 # Pointer already retrieved. |
|
209 pass |
|
210 elif os.path.isfile(db_file or ''): |
|
211 ptr = geoip_open(db_file, cache) |
|
212 setattr(self, attname, ptr) |
|
213 setattr(self, '%s_file' % attname, db_file) |
|
214 |
|
215 def _check_query(self, query, country=False, city=False, city_or_country=False): |
|
216 "Helper routine for checking the query and database availability." |
|
217 # Making sure a string was passed in for the query. |
|
218 if not isinstance(query, basestring): |
|
219 raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__) |
|
220 |
|
221 # Extra checks for the existence of country and city databases. |
|
222 if city_or_country and self._country is None and self._city is None: |
|
223 raise GeoIPException('Invalid GeoIP country and city data files.') |
|
224 elif country and self._country is None: |
|
225 raise GeoIPException('Invalid GeoIP country data file: %s' % self._country_file) |
|
226 elif city and self._city is None: |
|
227 raise GeoIPException('Invalid GeoIP city data file: %s' % self._city_file) |
|
228 |
|
229 def city(self, query): |
|
230 """ |
|
231 Returns a dictionary of city information for the given IP address or |
|
232 Fully Qualified Domain Name (FQDN). Some information in the dictionary |
|
233 may be undefined (None). |
|
234 """ |
|
235 self._check_query(query, city=True) |
|
236 if ipregex.match(query): |
|
237 # If an IP address was passed in |
|
238 ptr = rec_by_addr(self._city, c_char_p(query)) |
|
239 else: |
|
240 # If a FQDN was passed in. |
|
241 ptr = rec_by_name(self._city, c_char_p(query)) |
|
242 |
|
243 # Checking the pointer to the C structure, if valid pull out elements |
|
244 # into a dicionary and return. |
|
245 if bool(ptr): |
|
246 record = ptr.contents |
|
247 return dict((tup[0], getattr(record, tup[0])) for tup in record._fields_) |
|
248 else: |
|
249 return None |
|
250 |
|
251 def country_code(self, query): |
|
252 "Returns the country code for the given IP Address or FQDN." |
|
253 self._check_query(query, city_or_country=True) |
|
254 if self._country: |
|
255 if ipregex.match(query): return cntry_code_by_addr(self._country, query) |
|
256 else: return cntry_code_by_name(self._country, query) |
|
257 else: |
|
258 return self.city(query)['country_code'] |
|
259 |
|
260 def country_name(self, query): |
|
261 "Returns the country name for the given IP Address or FQDN." |
|
262 self._check_query(query, city_or_country=True) |
|
263 if self._country: |
|
264 if ipregex.match(query): return cntry_name_by_addr(self._country, query) |
|
265 else: return cntry_name_by_name(self._country, query) |
|
266 else: |
|
267 return self.city(query)['country_name'] |
|
268 |
|
269 def country(self, query): |
|
270 """ |
|
271 Returns a dictonary with with the country code and name when given an |
|
272 IP address or a Fully Qualified Domain Name (FQDN). For example, both |
|
273 '24.124.1.80' and 'djangoproject.com' are valid parameters. |
|
274 """ |
|
275 # Returning the country code and name |
|
276 return {'country_code' : self.country_code(query), |
|
277 'country_name' : self.country_name(query), |
|
278 } |
|
279 |
|
280 #### Coordinate retrieval routines #### |
|
281 def coords(self, query, ordering=('longitude', 'latitude')): |
|
282 cdict = self.city(query) |
|
283 if cdict is None: return None |
|
284 else: return tuple(cdict[o] for o in ordering) |
|
285 |
|
286 def lon_lat(self, query): |
|
287 "Returns a tuple of the (longitude, latitude) for the given query." |
|
288 return self.coords(query) |
|
289 |
|
290 def lat_lon(self, query): |
|
291 "Returns a tuple of the (latitude, longitude) for the given query." |
|
292 return self.coords(query, ('latitude', 'longitude')) |
|
293 |
|
294 def geos(self, query): |
|
295 "Returns a GEOS Point object for the given query." |
|
296 ll = self.lon_lat(query) |
|
297 if ll: |
|
298 from django.contrib.gis.geos import Point |
|
299 return Point(ll, srid=4326) |
|
300 else: |
|
301 return None |
|
302 |
|
303 #### GeoIP Database Information Routines #### |
|
304 def country_info(self): |
|
305 "Returns information about the GeoIP country database." |
|
306 if self._country is None: |
|
307 ci = 'No GeoIP Country data in "%s"' % self._country_file |
|
308 else: |
|
309 ci = geoip_dbinfo(self._country) |
|
310 return ci |
|
311 country_info = property(country_info) |
|
312 |
|
313 def city_info(self): |
|
314 "Retuns information about the GeoIP city database." |
|
315 if self._city is None: |
|
316 ci = 'No GeoIP City data in "%s"' % self._city_file |
|
317 else: |
|
318 ci = geoip_dbinfo(self._city) |
|
319 return ci |
|
320 city_info = property(city_info) |
|
321 |
|
322 def info(self): |
|
323 "Returns information about all GeoIP databases in use." |
|
324 return 'Country:\n\t%s\nCity:\n\t%s' % (self.country_info, self.city_info) |
|
325 info = property(info) |
|
326 |
|
327 #### Methods for compatibility w/the GeoIP-Python API. #### |
|
328 @classmethod |
|
329 def open(cls, full_path, cache): |
|
330 return GeoIP(full_path, cache) |
|
331 |
|
332 def _rec_by_arg(self, arg): |
|
333 if self._city: |
|
334 return self.city(arg) |
|
335 else: |
|
336 return self.country(arg) |
|
337 region_by_addr = city |
|
338 region_by_name = city |
|
339 record_by_addr = _rec_by_arg |
|
340 record_by_name = _rec_by_arg |
|
341 country_code_by_addr = country_code |
|
342 country_code_by_name = country_code |
|
343 country_name_by_addr = country_name |
|
344 country_name_by_name = country_name |