123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- import json
- import re
- import time
- from bs4 import BeautifulSoup
- import requests
- # 根据机场三字码,重新从 Google Geocoding AP 刷数据
- def req_geocode():
- # arr = [{"机场三字码":"SZX"}]
- arr = []
- with open("json2.json", "r") as f:
- arr = json.load(f)
- langs = ['en', 'zh']
- i = 0
- for item in arr:
- try:
- if item['机场三字码'] == '':
- continue
-
- for lang in langs:
- uri = 'https://maps.googleapis.com/maps/api/geocode/json?address={}%20Airport%2C{}%2C{}&language={}&key=AIzaSyD0OfQuI1qV-VgTbVS8253RuU7Kt3ohtFo'.format(item['机场三字码'], item['城市名(英文)'], item['国家(地区)代码'], lang)
- text = requests.get(uri).text
- res = json.loads(text).get('results')
- if res != None and len(res) > 0:
- geo = res[0].get('address_components')
- if geo != None:
- item['geo_{}'.format(lang)] = geo
-
- s = json.dumps(item, ensure_ascii=False)
- print(item)
- i += 1
- if i % 3 == 0:
- time.sleep(1)
- except Exception as e:
- print(f"Exception in geocode process: {e=}, {type(e)=}")
- s = json.dumps(arr, ensure_ascii=False)
- # print(s)
- with open('json3.json', 'w') as f:
- f.write(s)
- # 抓取机场三字码等信息
- def get_info():
- pattern1 = re.compile(r'^-*\d+\.*\d+$')
- pattern2 = re.compile(r'^[A-Z]{3}$')
- pattern3 = re.compile(r'^[A-Z]{2}$')
- pattern4 = re.compile(r'^[A-Z]{4}$')
- pattern5 = re.compile(r'^[A-Za-z\s,]+$')
- pattern6 = re.compile(r'^[\u4e00-\u9fa5]+$')
- result = []
- n = 290
- for i in range(1, n):
- page = i
- try:
- text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
- res = text.text
- html = BeautifulSoup(res, features="html.parser")
- tabs = html.find_all(name = "table")
- if len(tabs) > 1:
- arr = tabs[1].find_all(name = "tr")
- if len(arr) > 2:
- for tr in arr[2:]:
- href = tr.find(name = "a").attrs.get("href")
- if href != None:
- try:
- sub_text = requests.get("https://airportcode.bmcx.com" + href)
- sub_res = sub_text.text
- sub_html = BeautifulSoup(sub_res, features="html.parser")
- sub_tabs = sub_html.find_all(name = "table")
- if len(sub_tabs) > 1:
- sub_arr = sub_tabs[1].find_all(name = "tr")
- item = {}
- for sub_tr in sub_arr:
- sub_t = sub_tr.find_all(name = "td")
- item[sub_t[0].string] = sub_t[1].string
- flag = False
- name = ""
- swap = ""
- if item["纬度"] != '' and not pattern1.match(item["纬度"]):
- name = "纬度"
- swap = item["纬度"]
- flag = True
- if item["经度"] != '' and not pattern1.match(item["经度"]):
- name = "经度"
- swap = item["经度"]
- flag = True
- if flag:
- for k, v in item.items():
- if k != "纬度" and k != "经度" and pattern1.match(v):
- item[name] = v
- item[k] = swap
- break
- flag = False
- swap = ""
- if item["机场三字码"] == '' or not pattern2.match(item["机场三字码"]):
- swap = item["机场三字码"]
- flag = True
- if flag:
- for k, v in item.items():
- if k != "机场三字码" and pattern2.match(v):
- item["机场三字码"] = v
- item[k] = swap
- break
- item["机场三字码"] = item["机场三字码"].upper()
- if item["国家(地区)代码"] == '' and pattern3.match(item["纬度"]):
- item["国家(地区)代码"] = item["纬度"]
- item["纬度"] = ''
- if item["国家(地区)代码"] == '' and pattern3.match(item["经度"]):
- item["国家(地区)代码"] = item["经度"]
- item["经度"] = ''
- if item["机场四字码"] == '' and pattern4.match(item["纬度"]):
- item["机场四字码"] = item["纬度"]
- item["纬度"] = ''
- if item["机场四字码"] == '' and pattern4.match(item["经度"]):
- item["机场四字码"] = item["经度"]
- item["经度"] = ''
- if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["纬度"]):
- item["机场名称(英文)"] = item["纬度"]
- item["纬度"] = ''
- if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["经度"]):
- item["机场名称(英文)"] = item["经度"]
- item["经度"] = ''
- if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["纬度"]):
- item["城市名(英文)"] = item["纬度"]
- item["纬度"] = ''
- if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["经度"]):
- item["城市名(英文)"] = item["经度"]
- item["经度"] = ''
- if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["纬度"]):
- item["城市名"] = item["纬度"]
- item["纬度"] = ''
- if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["经度"]):
- item["城市名"] = item["经度"]
- item["经度"] = ''
- if item["国家(地区)名称"] == '' and pattern6.match(item["纬度"]):
- item["国家(地区)名称"] = item["纬度"]
- item["纬度"] = ''
- if item["国家(地区)名称"] == '' and pattern6.match(item["经度"]):
- item["国家(地区)名称"] = item["经度"]
- item["经度"] = ''
- if item["城市名"] == '' and pattern6.match(item["纬度"]):
- item["城市名"] = item["纬度"]
- item["纬度"] = ''
- if item["城市名"] == '' and pattern6.match(item["经度"]):
- item["城市名"] = item["经度"]
- item["经度"] = ''
- result.append(item)
- except Exception as e:
- print(f"Exception in sub process: {e=}, {type(e)=}")
- except Exception as ex:
- print(f"Exception in main loop process: {ex=}, {type(ex)=}")
- s = json.dumps(result, ensure_ascii=False)
- # print(s)
- with open('json2.json', 'w') as f:
- f.write(s)
- # 结果校验
- arr = []
- with open("json2.json", "r") as f:
- arr = json.load(f)
- ct = 0
- for item in arr:
- swap = ""
- if item["纬度"] != '' and not pattern1.match(item["纬度"]):
- print(item["机场三字码"])
- ct += 1
- if item["经度"] != '' and not pattern1.match(item["经度"]):
- print(item["机场三字码"])
- ct += 1
- if item["机场三字码"] != '' and not pattern2.match(item["机场三字码"]):
- ct += 1
-
- if ct > 0:
- print('Sth wrong ' + ct)
- # s = json.dumps(arr, ensure_ascii=False)
- # with open('json2.json', 'w') as f:
- # f.write(s)
|