123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- import re
- import requests
- import json
- from bs4 import BeautifulSoup
- def main():
- # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
- # str = '188888'
- # n = len(str)
- # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
- # print(new_str)
- # 抓取机场三字码等信息
- pattern = re.compile(r'-*\d+\.\d+')
- result = []
- n = 290
- for i in range(1, n):
- page = i
- try:
- text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
- res = text.text
- html = BeautifulSoup(res, features="html.parser")
- tabs = html.find_all(name = "table")
- if len(tabs) > 1:
- arr = tabs[1].find_all(name = "tr")
- if len(arr) > 2:
- for tr in arr[2:]:
- href = tr.find(name = "a").attrs.get("href")
- if href != None:
- try:
- sub_text = requests.get("https://airportcode.bmcx.com" + href)
- sub_res = sub_text.text
- sub_html = BeautifulSoup(sub_res, features="html.parser")
- sub_tabs = sub_html.find_all(name = "table")
- if len(sub_tabs) > 1:
- sub_arr = sub_tabs[1].find_all(name = "tr")
- item = {}
- for sub_tr in sub_arr:
- sub_t = sub_tr.find_all(name = "td")
- item[sub_t[0].string] = sub_t[1].string
- flag = ""
- swap = ""
- if not pattern.match(item["纬度"]):
- flag = "纬度"
- swap = item["纬度"]
- if not pattern.match(item["经度"]):
- flag = "经度"
- swap = item["经度"]
- if flag != "":
- for k, v in item.items():
- if k != "纬度" and k != "经度" and pattern.match(v):
- item[flag] = v
- item[k] = swap
- break
- result.append(item)
- except Exception as e:
- print(f"Exception in sub process: {e=}, {type(e)=}")
- except Exception as ex:
- print(f"Exception in main loop process: {ex=}, {type(ex)=}")
- s = json.dumps(result, ensure_ascii=False)
- # print(s)
- with open('json.json', 'w') as f:
- f.write(s)
- # 程序入口
- if __name__ == '__main__':
- main()
|