__main__.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. import requests
  2. import json
  3. from bs4 import BeautifulSoup
  4. def main():
  5. # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
  6. # str = '188888'
  7. # n = len(str)
  8. # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
  9. # print(new_str)
  10. result = []
  11. n = 290
  12. for i in range(1, n):
  13. page = i
  14. text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
  15. res = text.text
  16. soup = BeautifulSoup(res, features="html.parser")
  17. arr = soup.find_all(name = "table")[1].find_all(name = "tr")
  18. for i in arr[2:]:
  19. t = i.find_all(name = "td")
  20. href = t[0].find(name = "a").attrs.get("href")
  21. if href != None:
  22. sub_text = requests.get("https://airportcode.bmcx.com" + href)
  23. sub_res = sub_text.text
  24. sub_soup = BeautifulSoup(sub_res, features="html.parser")
  25. sub_arr = sub_soup.find_all(name = "table")[1].find_all(name = "tr")
  26. item = {}
  27. for j in sub_arr[2:]:
  28. sub_t = j.find_all(name = "td")
  29. item[sub_t[0].string] = sub_t[1].string
  30. result.append(item)
  31. s = json.dumps(result, ensure_ascii=False)
  32. # print(s)
  33. with open('json.json', 'w') as f:
  34. f.write(s)
  35. # 程序入口
  36. if __name__ == '__main__':
  37. main()