app/scripts/fetch_data.py
author Madhusudan.C.S <madhusudancs@gmail.com>
Sun, 09 Aug 2009 12:40:14 +0530
changeset 20 327b3f0b73bb
permissions -rw-r--r--
Added data fetching scripts.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
20
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     1
"""Module to fetch data.
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     2
"""
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     3
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     4
__authors__ = [
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     5
  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     6
]
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     7
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     8
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     9
import json
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    10
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    11
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    12
def fetch_state_code(file_name):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    13
  """Fetch State Codes
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    14
  """
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    15
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    16
  fh = file(file_name)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    17
  states = {} 
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    18
  for line in fh:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    19
    name, code = line.split('\t')
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    20
    name = name.strip()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    21
    code = code.strip()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    22
    states[code] = name
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    23
  print json.dumps(states, indent=4)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    24
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    25
def fetch_district_code(file_name, write_file):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    26
  """Fetch District Codes
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    27
  """
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    28
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    29
  fh = file(file_name)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    30
  districts = {}
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    31
  for line in fh:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    32
    if line == '\n' or line[:4] == 'See ' or line[:4] == 'Code':
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    33
      continue
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    34
    if line[:6] == '[edit]':
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    35
      state = line[6:].strip().split()[-1].strip('()')
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    36
      continue 
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    37
    district_line = line.split('\t')
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    38
    code = '%s%s' % (state, district_line[0].strip())
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    39
    name = district_line[1].strip()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    40
    districts[code] = name
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    41
    if len(code) > 4:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    42
      print code, name
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    43
  fh.close()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    44
  district_data = json.dumps(districts, indent=6)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    45
  fh = file(write_file, "w")
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    46
  fh.write(district_data)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    47
  fh.close()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    48
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    49
if __name__ == '__main__':
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    50
  import sys
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    51
  if sys.argv[1] == 'fetch_state_code':
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    52
    fetch_state_code(sys.argv[2])
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    53
  if sys.argv[1] == 'fetch_district_code':
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    54
    fetch_district_code(sys.argv[2], sys.argv[3])