#coding:utf-8 import requests from bs4 import BeautifulSoup import sys,os import django reload(sys) sys.setdefaultencoding('utf8') sys.path.append('/mnt/bzyifeng/src') os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' django.setup() import common.models as cm def get_divsites(url,splice=None): data = [] html = requests.get(url).text bs4node = BeautifulSoup(html) divsites = bs4node.find("div",class_="divsites") if not splice: for a in divsites.find_all("a"): name = a.text url = a.attrs["href"] ename = url.split("//")[-1].split(".")[0] if name != u"全国": data.append({"name":name,"ename":ename,"url":url}) else: for a in divsites.find_all("a")[splice:]: name = a.text url = a.attrs["href"] ename = url.split("//")[-1].split(".")[0] if name != u"全国": data.append({"name":name,"ename":ename,"url":url}) return data def main(): provinces = get_divsites("http://www.yinongtao.com/") for item in provinces: name = item["name"] url = item["url"] ename = item["ename"] obj,flag = cm.City.objects.get_or_create(name=name,ename=ename,parent_id=None) if name in [u"北京市",u"天津市",u"上海市",u"重庆市"]: citys = get_divsites(url,1) #区 for iitem in citys: _url = iitem["url"] _name = iitem["name"] _ename = iitem["ename"] oobj,flag = cm.City.objects.get_or_create(name=_name,ename=_ename,parent_id=obj.id) try: areas = get_divsites(_url,2) for iiitem in areas: __url = iiitem["url"] __name = iiitem["name"] __ename = iiitem["ename"] ooobj,flag = cm.City.objects.get_or_create(name=__name,ename=__ename,parent_id=oobj.id) except Exception as e: print e print name,_name,2222222222222222 pass else: pass #citys = get_divsites(url) ##区 #for iitem in citys: # _url = iitem["url"] # _name = iitem["name"] # _ename = iitem["ename"] # oobj,flag = cm.City.objects.get_or_create(name=_name,ename=_ename,parent_id=obj.id) # try: # areas = get_divsites(_url,2) # for iiitem in areas: # __url = iiitem["url"] # __name = iiitem["name"] # __ename = iiitem["ename"] # ooobj,flag = cm.City.objects.get_or_create(name=__name,ename=__ename,parent_id=oobj.id) # except Exception as e: # print e # print name,_name,2222222222222222 # pass item["children"] = citys import pprint #pprint.pprint(provinces) if __name__ == "__main__": main()