__main__.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import re
  2. import requests
  3. import json
  4. from bs4 import BeautifulSoup
  5. def main():
  6. # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
  7. # str = '188888'
  8. # n = len(str)
  9. # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
  10. # print(new_str)
  11. # 抓取机场三字码等信息
  12. pattern = re.compile(r'-*\d+\.\d+')
  13. result = []
  14. n = 290
  15. for i in range(1, n):
  16. page = i
  17. try:
  18. text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
  19. res = text.text
  20. html = BeautifulSoup(res, features="html.parser")
  21. tabs = html.find_all(name = "table")
  22. if len(tabs) > 1:
  23. arr = tabs[1].find_all(name = "tr")
  24. if len(arr) > 2:
  25. for tr in arr[2:]:
  26. href = tr.find(name = "a").attrs.get("href")
  27. if href != None:
  28. try:
  29. sub_text = requests.get("https://airportcode.bmcx.com" + href)
  30. sub_res = sub_text.text
  31. sub_html = BeautifulSoup(sub_res, features="html.parser")
  32. sub_tabs = sub_html.find_all(name = "table")
  33. if len(sub_tabs) > 1:
  34. sub_arr = sub_tabs[1].find_all(name = "tr")
  35. item = {}
  36. for sub_tr in sub_arr:
  37. sub_t = sub_tr.find_all(name = "td")
  38. item[sub_t[0].string] = sub_t[1].string
  39. flag = ""
  40. swap = ""
  41. if not pattern.match(item["纬度"]):
  42. flag = "纬度"
  43. swap = item["纬度"]
  44. if not pattern.match(item["经度"]):
  45. flag = "经度"
  46. swap = item["经度"]
  47. if flag != "":
  48. for k, v in item.items():
  49. if k != "纬度" and k != "经度" and pattern.match(v):
  50. item[flag] = v
  51. item[k] = swap
  52. break
  53. result.append(item)
  54. except Exception as e:
  55. print(f"Exception in sub process: {e=}, {type(e)=}")
  56. except Exception as ex:
  57. print(f"Exception in main loop process: {ex=}, {type(ex)=}")
  58. s = json.dumps(result, ensure_ascii=False)
  59. # print(s)
  60. with open('json.json', 'w') as f:
  61. f.write(s)
  62. # 程序入口
  63. if __name__ == '__main__':
  64. main()