import sys f=open(sys.argv[1],'r') pid_list=[] for i in f.readlines(): if i.startswith('<p id=') : list_tmp=i.split('"')[1] pid_list.append(list_tmp) print pid_list