get_city.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #coding:utf-8
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import sys,os
  5. import django
  6. reload(sys)
  7. sys.setdefaultencoding('utf8')
  8. sys.path.append('/mnt/bzyifeng/src')
  9. os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
  10. django.setup()
  11. import common.models as cm
  12. def get_divsites(url,splice=None):
  13. data = []
  14. html = requests.get(url).text
  15. bs4node = BeautifulSoup(html)
  16. divsites = bs4node.find("div",class_="divsites")
  17. if not splice:
  18. for a in divsites.find_all("a"):
  19. name = a.text
  20. url = a.attrs["href"]
  21. ename = url.split("//")[-1].split(".")[0]
  22. if name != u"全国":
  23. data.append({"name":name,"ename":ename,"url":url})
  24. else:
  25. for a in divsites.find_all("a")[splice:]:
  26. name = a.text
  27. url = a.attrs["href"]
  28. ename = url.split("//")[-1].split(".")[0]
  29. if name != u"全国":
  30. data.append({"name":name,"ename":ename,"url":url})
  31. return data
  32. def main():
  33. provinces = get_divsites("http://www.yinongtao.com/")
  34. for item in provinces:
  35. name = item["name"]
  36. url = item["url"]
  37. ename = item["ename"]
  38. obj,flag = cm.City.objects.get_or_create(name=name,ename=ename,parent_id=None)
  39. if name in [u"北京市",u"天津市",u"上海市",u"重庆市"]:
  40. citys = get_divsites(url,1)
  41. #区
  42. for iitem in citys:
  43. _url = iitem["url"]
  44. _name = iitem["name"]
  45. _ename = iitem["ename"]
  46. oobj,flag = cm.City.objects.get_or_create(name=_name,ename=_ename,parent_id=obj.id)
  47. try:
  48. areas = get_divsites(_url,2)
  49. for iiitem in areas:
  50. __url = iiitem["url"]
  51. __name = iiitem["name"]
  52. __ename = iiitem["ename"]
  53. ooobj,flag = cm.City.objects.get_or_create(name=__name,ename=__ename,parent_id=oobj.id)
  54. except Exception as e:
  55. print e
  56. print name,_name,2222222222222222
  57. pass
  58. else:
  59. pass
  60. #citys = get_divsites(url)
  61. ##区
  62. #for iitem in citys:
  63. # _url = iitem["url"]
  64. # _name = iitem["name"]
  65. # _ename = iitem["ename"]
  66. # oobj,flag = cm.City.objects.get_or_create(name=_name,ename=_ename,parent_id=obj.id)
  67. # try:
  68. # areas = get_divsites(_url,2)
  69. # for iiitem in areas:
  70. # __url = iiitem["url"]
  71. # __name = iiitem["name"]
  72. # __ename = iiitem["ename"]
  73. # ooobj,flag = cm.City.objects.get_or_create(name=__name,ename=__ename,parent_id=oobj.id)
  74. # except Exception as e:
  75. # print e
  76. # print name,_name,2222222222222222
  77. # pass
  78. item["children"] = citys
  79. import pprint
  80. #pprint.pprint(provinces)
  81. if __name__ == "__main__":
  82. main()