import json import re import time from bs4 import BeautifulSoup import requests # 根据机场三字码,重新从 Google Geocoding AP 刷数据 def req_geocode(): # arr = [{"机场三字码":"SZX"}] arr = [] with open("json2.json", "r") as f: arr = json.load(f) langs = ['en', 'zh'] i = 0 for item in arr: try: if item['机场三字码'] == '': continue for lang in langs: uri = 'https://maps.googleapis.com/maps/api/geocode/json?address={}%20Airport%2C{}%2C{}&language={}&key=AIzaSyD0OfQuI1qV-VgTbVS8253RuU7Kt3ohtFo'.format(item['机场三字码'], item['城市名(英文)'], item['国家(地区)代码'], lang) text = requests.get(uri).text res = json.loads(text).get('results') if res != None and len(res) > 0: geo = res[0].get('address_components') if geo != None: item['geo_{}'.format(lang)] = geo s = json.dumps(item, ensure_ascii=False) print(item) i += 1 if i % 3 == 0: time.sleep(1) except Exception as e: print(f"Exception in geocode process: {e=}, {type(e)=}") s = json.dumps(arr, ensure_ascii=False) # print(s) with open('json3.json', 'w') as f: f.write(s) # 抓取机场三字码等信息 def get_info(): pattern1 = re.compile(r'^-*\d+\.*\d+$') pattern2 = re.compile(r'^[A-Z]{3}$') pattern3 = re.compile(r'^[A-Z]{2}$') pattern4 = re.compile(r'^[A-Z]{4}$') pattern5 = re.compile(r'^[A-Za-z\s,]+$') pattern6 = re.compile(r'^[\u4e00-\u9fa5]+$') result = [] n = 290 for i in range(1, n): page = i try: text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/") res = text.text html = BeautifulSoup(res, features="html.parser") tabs = html.find_all(name = "table") if len(tabs) > 1: arr = tabs[1].find_all(name = "tr") if len(arr) > 2: for tr in arr[2:]: href = tr.find(name = "a").attrs.get("href") if href != None: try: sub_text = requests.get("https://airportcode.bmcx.com" + href) sub_res = sub_text.text sub_html = BeautifulSoup(sub_res, features="html.parser") sub_tabs = sub_html.find_all(name = "table") if len(sub_tabs) > 1: sub_arr = sub_tabs[1].find_all(name = "tr") item = {} for sub_tr in sub_arr: sub_t = sub_tr.find_all(name = "td") item[sub_t[0].string] = sub_t[1].string flag = False name = "" swap = "" if item["纬度"] != '' and not pattern1.match(item["纬度"]): name = "纬度" swap = item["纬度"] flag = True if item["经度"] != '' and not pattern1.match(item["经度"]): name = "经度" swap = item["经度"] flag = True if flag: for k, v in item.items(): if k != "纬度" and k != "经度" and pattern1.match(v): item[name] = v item[k] = swap break flag = False swap = "" if item["机场三字码"] == '' or not pattern2.match(item["机场三字码"]): swap = item["机场三字码"] flag = True if flag: for k, v in item.items(): if k != "机场三字码" and pattern2.match(v): item["机场三字码"] = v item[k] = swap break item["机场三字码"] = item["机场三字码"].upper() if item["国家(地区)代码"] == '' and pattern3.match(item["纬度"]): item["国家(地区)代码"] = item["纬度"] item["纬度"] = '' if item["国家(地区)代码"] == '' and pattern3.match(item["经度"]): item["国家(地区)代码"] = item["经度"] item["经度"] = '' if item["机场四字码"] == '' and pattern4.match(item["纬度"]): item["机场四字码"] = item["纬度"] item["纬度"] = '' if item["机场四字码"] == '' and pattern4.match(item["经度"]): item["机场四字码"] = item["经度"] item["经度"] = '' if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["纬度"]): item["机场名称(英文)"] = item["纬度"] item["纬度"] = '' if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["经度"]): item["机场名称(英文)"] = item["经度"] item["经度"] = '' if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["纬度"]): item["城市名(英文)"] = item["纬度"] item["纬度"] = '' if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["经度"]): item["城市名(英文)"] = item["经度"] item["经度"] = '' if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["纬度"]): item["城市名"] = item["纬度"] item["纬度"] = '' if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["经度"]): item["城市名"] = item["经度"] item["经度"] = '' if item["国家(地区)名称"] == '' and pattern6.match(item["纬度"]): item["国家(地区)名称"] = item["纬度"] item["纬度"] = '' if item["国家(地区)名称"] == '' and pattern6.match(item["经度"]): item["国家(地区)名称"] = item["经度"] item["经度"] = '' if item["城市名"] == '' and pattern6.match(item["纬度"]): item["城市名"] = item["纬度"] item["纬度"] = '' if item["城市名"] == '' and pattern6.match(item["经度"]): item["城市名"] = item["经度"] item["经度"] = '' result.append(item) except Exception as e: print(f"Exception in sub process: {e=}, {type(e)=}") except Exception as ex: print(f"Exception in main loop process: {ex=}, {type(ex)=}") s = json.dumps(result, ensure_ascii=False) # print(s) with open('json2.json', 'w') as f: f.write(s) # 结果校验 arr = [] with open("json2.json", "r") as f: arr = json.load(f) ct = 0 for item in arr: swap = "" if item["纬度"] != '' and not pattern1.match(item["纬度"]): print(item["机场三字码"]) ct += 1 if item["经度"] != '' and not pattern1.match(item["经度"]): print(item["机场三字码"]) ct += 1 if item["机场三字码"] != '' and not pattern2.match(item["机场三字码"]): ct += 1 if ct > 0: print('Sth wrong ' + ct) # s = json.dumps(arr, ensure_ascii=False) # with open('json2.json', 'w') as f: # f.write(s)