f=open('abc.html','r') pid_list=[] for i in f.readlines(): if i.startswith('<p id=') : list_tmp=i.split('"')[1] pid_list.append(list_tmp) print pid_list