|
@@ -1,3 +1,4 @@
|
|
|
|
+import re
|
|
import requests
|
|
import requests
|
|
import json
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
from bs4 import BeautifulSoup
|
|
@@ -9,27 +10,54 @@ def main():
|
|
# new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
|
|
# new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
|
|
# print(new_str)
|
|
# print(new_str)
|
|
|
|
|
|
|
|
+ # 抓取机场三字码等信息
|
|
|
|
+ pattern = re.compile(r'-*\d+\.\d+')
|
|
result = []
|
|
result = []
|
|
- n = 290
|
|
|
|
|
|
+ n = 2
|
|
for i in range(1, n):
|
|
for i in range(1, n):
|
|
page = i
|
|
page = i
|
|
- text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
|
|
|
|
- res = text.text
|
|
|
|
- soup = BeautifulSoup(res, features="html.parser")
|
|
|
|
- arr = soup.find_all(name = "table")[1].find_all(name = "tr")
|
|
|
|
- for i in arr[2:]:
|
|
|
|
- t = i.find_all(name = "td")
|
|
|
|
- href = t[0].find(name = "a").attrs.get("href")
|
|
|
|
- if href != None:
|
|
|
|
- sub_text = requests.get("https://airportcode.bmcx.com" + href)
|
|
|
|
- sub_res = sub_text.text
|
|
|
|
- sub_soup = BeautifulSoup(sub_res, features="html.parser")
|
|
|
|
- sub_arr = sub_soup.find_all(name = "table")[1].find_all(name = "tr")
|
|
|
|
- item = {}
|
|
|
|
- for j in sub_arr[2:]:
|
|
|
|
- sub_t = j.find_all(name = "td")
|
|
|
|
- item[sub_t[0].string] = sub_t[1].string
|
|
|
|
- result.append(item)
|
|
|
|
|
|
+ try:
|
|
|
|
+ text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
|
|
|
|
+ res = text.text
|
|
|
|
+ html = BeautifulSoup(res, features="html.parser")
|
|
|
|
+ tabs = html.find_all(name = "table")
|
|
|
|
+ if len(tabs) > 1:
|
|
|
|
+ arr = tabs[1].find_all(name = "tr")
|
|
|
|
+ if len(arr) > 2:
|
|
|
|
+ for tr in arr[2:]:
|
|
|
|
+ href = tr.find(name = "a").attrs.get("href")
|
|
|
|
+ if href != None:
|
|
|
|
+ try:
|
|
|
|
+ sub_text = requests.get("https://airportcode.bmcx.com" + href)
|
|
|
|
+ sub_res = sub_text.text
|
|
|
|
+ sub_html = BeautifulSoup(sub_res, features="html.parser")
|
|
|
|
+ sub_tabs = sub_html.find_all(name = "table")
|
|
|
|
+ if len(sub_tabs) > 1:
|
|
|
|
+ sub_arr = sub_tabs[1].find_all(name = "tr")
|
|
|
|
+ item = {}
|
|
|
|
+ for sub_tr in sub_arr:
|
|
|
|
+ sub_t = sub_tr.find_all(name = "td")
|
|
|
|
+ item[sub_t[0].string] = sub_t[1].string
|
|
|
|
+ flag = ""
|
|
|
|
+ swap = ""
|
|
|
|
+ if not pattern.match(item["纬度"]):
|
|
|
|
+ flag = "纬度"
|
|
|
|
+ swap = item["纬度"]
|
|
|
|
+ if not pattern.match(item["经度"]):
|
|
|
|
+ flag = "经度"
|
|
|
|
+ swap = item["经度"]
|
|
|
|
+ if flag != "":
|
|
|
|
+ for k, v in item.items():
|
|
|
|
+ if k != "纬度" and k != "经度" and pattern.match(v):
|
|
|
|
+ item[flag] = v
|
|
|
|
+ item[k] = swap
|
|
|
|
+ break
|
|
|
|
+ result.append(item)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(f"Exception in sub process: {e=}, {type(e)=}")
|
|
|
|
+ except Exception as ex:
|
|
|
|
+ print(f"Exception in main loop process: {ex=}, {type(ex)=}")
|
|
|
|
+
|
|
s = json.dumps(result, ensure_ascii=False)
|
|
s = json.dumps(result, ensure_ascii=False)
|
|
# print(s)
|
|
# print(s)
|
|
with open('json.json', 'w') as f:
|
|
with open('json.json', 'w') as f:
|