| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- #coding:utf-8
- import requests
- from bs4 import BeautifulSoup
- import sys,os
- import django
- reload(sys)
- sys.setdefaultencoding('utf8')
- sys.path.append('/mnt/bzyifeng/src')
- os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
- django.setup()
- import common.models as cm
- def get_divsites(url,splice=None):
- data = []
- html = requests.get(url).text
- bs4node = BeautifulSoup(html)
- divsites = bs4node.find("div",class_="divsites")
- if not splice:
- for a in divsites.find_all("a"):
- name = a.text
- url = a.attrs["href"]
- ename = url.split("//")[-1].split(".")[0]
- if name != u"全国":
- data.append({"name":name,"ename":ename,"url":url})
- else:
- for a in divsites.find_all("a")[splice:]:
- name = a.text
- url = a.attrs["href"]
- ename = url.split("//")[-1].split(".")[0]
- if name != u"全国":
- data.append({"name":name,"ename":ename,"url":url})
- return data
- def main():
- provinces = get_divsites("http://www.yinongtao.com/")
- for item in provinces:
- name = item["name"]
- url = item["url"]
- ename = item["ename"]
- obj,flag = cm.City.objects.get_or_create(name=name,ename=ename,parent_id=None)
- if name in [u"北京市",u"天津市",u"上海市",u"重庆市"]:
- citys = get_divsites(url,1)
- #区
- for iitem in citys:
- _url = iitem["url"]
- _name = iitem["name"]
- _ename = iitem["ename"]
- oobj,flag = cm.City.objects.get_or_create(name=_name,ename=_ename,parent_id=obj.id)
- try:
- areas = get_divsites(_url,2)
- for iiitem in areas:
- __url = iiitem["url"]
- __name = iiitem["name"]
- __ename = iiitem["ename"]
- ooobj,flag = cm.City.objects.get_or_create(name=__name,ename=__ename,parent_id=oobj.id)
- except Exception as e:
- print e
- print name,_name,2222222222222222
- pass
- else:
- pass
- #citys = get_divsites(url)
- ##区
- #for iitem in citys:
- # _url = iitem["url"]
- # _name = iitem["name"]
- # _ename = iitem["ename"]
- # oobj,flag = cm.City.objects.get_or_create(name=_name,ename=_ename,parent_id=obj.id)
- # try:
- # areas = get_divsites(_url,2)
- # for iiitem in areas:
- # __url = iiitem["url"]
- # __name = iiitem["name"]
- # __ename = iiitem["ename"]
- # ooobj,flag = cm.City.objects.get_or_create(name=__name,ename=__ename,parent_id=oobj.id)
- # except Exception as e:
- # print e
- # print name,_name,2222222222222222
- # pass
- item["children"] = citys
- import pprint
- #pprint.pprint(provinces)
- if __name__ == "__main__":
- main()
|