import re import requests import json from bs4 import BeautifulSoup def main(): # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))) # str = '188888' # n = len(str) # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):] # print(new_str) # 抓取机场三字码等信息 pattern = re.compile(r'-*\d+\.\d+') result = [] n = 2 for i in range(1, n): page = i try: text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/") res = text.text html = BeautifulSoup(res, features="html.parser") tabs = html.find_all(name = "table") if len(tabs) > 1: arr = tabs[1].find_all(name = "tr") if len(arr) > 2: for tr in arr[2:]: href = tr.find(name = "a").attrs.get("href") if href != None: try: sub_text = requests.get("https://airportcode.bmcx.com" + href) sub_res = sub_text.text sub_html = BeautifulSoup(sub_res, features="html.parser") sub_tabs = sub_html.find_all(name = "table") if len(sub_tabs) > 1: sub_arr = sub_tabs[1].find_all(name = "tr") item = {} for sub_tr in sub_arr: sub_t = sub_tr.find_all(name = "td") item[sub_t[0].string] = sub_t[1].string flag = "" swap = "" if not pattern.match(item["纬度"]): flag = "纬度" swap = item["纬度"] if not pattern.match(item["经度"]): flag = "经度" swap = item["经度"] if flag != "": for k, v in item.items(): if k != "纬度" and k != "经度" and pattern.match(v): item[flag] = v item[k] = swap break result.append(item) except Exception as e: print(f"Exception in sub process: {e=}, {type(e)=}") except Exception as ex: print(f"Exception in main loop process: {ex=}, {type(ex)=}") s = json.dumps(result, ensure_ascii=False) # print(s) with open('json.json', 'w') as f: f.write(s) # 程序入口 if __name__ == '__main__': main()