郑毅 3 місяців тому
батько
коміт
ec056f5a9c
2 змінених файлів з 110 додано та 24 видалено
  1. 46 18
      __main__.py
  2. 64 6
      json.json

+ 46 - 18
__main__.py

@@ -1,3 +1,4 @@
+import re
 import requests
 import json
 from bs4 import BeautifulSoup
@@ -9,27 +10,54 @@ def main():
     # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
     # print(new_str)
 
+    # 抓取机场三字码等信息
+    pattern = re.compile(r'-*\d+\.\d+')
     result = []
-    n = 290
+    n = 2
     for i in range(1, n):
         page = i
-        text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
-        res = text.text
-        soup = BeautifulSoup(res, features="html.parser")
-        arr = soup.find_all(name = "table")[1].find_all(name = "tr")
-        for i in arr[2:]:
-            t = i.find_all(name = "td")
-            href = t[0].find(name = "a").attrs.get("href")
-            if href != None:
-                sub_text = requests.get("https://airportcode.bmcx.com" + href)
-                sub_res = sub_text.text
-                sub_soup = BeautifulSoup(sub_res, features="html.parser")
-                sub_arr = sub_soup.find_all(name = "table")[1].find_all(name = "tr")
-                item = {}
-                for j in sub_arr[2:]:
-                    sub_t = j.find_all(name = "td")
-                    item[sub_t[0].string] = sub_t[1].string
-                result.append(item)
+        try:
+            text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
+            res = text.text
+            html = BeautifulSoup(res, features="html.parser")
+            tabs = html.find_all(name = "table")
+            if len(tabs) > 1:
+                arr = tabs[1].find_all(name = "tr")
+                if len(arr) > 2:
+                    for tr in arr[2:]:
+                        href = tr.find(name = "a").attrs.get("href")
+                        if href != None:
+                            try:
+                                sub_text = requests.get("https://airportcode.bmcx.com" + href)
+                                sub_res = sub_text.text
+                                sub_html = BeautifulSoup(sub_res, features="html.parser")
+                                sub_tabs = sub_html.find_all(name = "table")
+                                if len(sub_tabs) > 1:
+                                    sub_arr = sub_tabs[1].find_all(name = "tr")
+                                    item = {}
+                                    for sub_tr in sub_arr:
+                                        sub_t = sub_tr.find_all(name = "td")
+                                        item[sub_t[0].string] = sub_t[1].string
+                                    flag = ""
+                                    swap = ""
+                                    if not pattern.match(item["纬度"]):
+                                        flag = "纬度"
+                                        swap = item["纬度"]
+                                    if not pattern.match(item["经度"]):
+                                        flag = "经度"
+                                        swap = item["经度"]
+                                    if flag != "":
+                                        for k, v in item.items():
+                                            if k != "纬度" and k != "经度" and pattern.match(v):
+                                                item[flag] = v
+                                                item[k] = swap
+                                                break
+                                    result.append(item)
+                            except Exception as e:
+                                print(f"Exception in sub process: {e=}, {type(e)=}")
+        except Exception as ex:
+            print(f"Exception in main loop process: {ex=}, {type(ex)=}")
+
     s = json.dumps(result, ensure_ascii=False)
     # print(s)
     with open('json.json', 'w') as f:

+ 64 - 6
json.json

@@ -1,5 +1,7 @@
 [
     {
+        "城市名": "济南",
+        "城市名(英文)": "JINAN",
         "机场三字码": "TNA",
         "机场四字码": "ZSJN",
         "机场名称": "济南遥墙国际机场",
@@ -10,6 +12,8 @@
         "经度": "116.9667"
     },
     {
+        "城市名": "汕头",
+        "城市名(英文)": "SHANTOU",
         "机场三字码": "SWA",
         "机场四字码": "ZGOW",
         "机场名称": "汕头外砂国际机场",
@@ -20,6 +24,8 @@
         "经度": "116.7631244659424"
     },
     {
+        "城市名": "义乌",
+        "城市名(英文)": "YIWU",
         "机场三字码": "YIW",
         "机场四字码": "ZSYW",
         "机场名称": "义乌",
@@ -30,6 +36,8 @@
         "经度": "120.0500"
     },
     {
+        "城市名": "安庆",
+        "城市名(英文)": "ANQING",
         "机场三字码": "AQG",
         "机场四字码": "ZSAQ",
         "机场名称": "安庆",
@@ -40,6 +48,8 @@
         "经度": "117.0333"
     },
     {
+        "城市名": "西双版纳",
+        "城市名(英文)": "JINGHONG",
         "机场三字码": "JHG",
         "机场四字码": "ZPJH",
         "机场名称": "景洪",
@@ -50,6 +60,8 @@
         "经度": "100.8000"
     },
     {
+        "城市名": "通辽",
+        "城市名(英文)": "TONGLIAO",
         "机场三字码": "TGO",
         "机场四字码": "ZBTL",
         "机场名称": "通辽",
@@ -60,6 +72,8 @@
         "经度": "122.4028"
     },
     {
+        "城市名": "吉隆坡",
+        "城市名(英文)": "KUALALUMPUR",
         "机场三字码": "KUL",
         "机场四字码": "WMKK",
         "机场名称": "吉隆坡",
@@ -70,6 +84,8 @@
         "经度": "101.707405"
     },
     {
+        "城市名": "西昌",
+        "城市名(英文)": "XICHANG",
         "机场三字码": "XIC",
         "机场四字码": "ZUXC",
         "机场名称": "西昌青山机场",
@@ -80,6 +96,8 @@
         "经度": "102.2167"
     },
     {
+        "城市名": "CHICAGO",
+        "城市名(英文)": "芝加哥",
         "机场三字码": "CHI",
         "机场四字码": null,
         "机场名称": "芝加哥",
@@ -90,6 +108,8 @@
         "经度": "-87.9111"
     },
     {
+        "城市名": "西雅图",
+        "城市名(英文)": "SEATTLE",
         "机场三字码": "SEA",
         "机场四字码": "KSEA",
         "机场名称": "西雅图",
@@ -100,6 +120,8 @@
         "经度": "-122.30869"
     },
     {
+        "城市名": "惠灵顿",
+        "城市名(英文)": "WLG",
         "机场三字码": "WELLINGTON",
         "机场四字码": "NZWN",
         "机场名称": "惠灵顿",
@@ -110,6 +132,8 @@
         "经度": "174.812660"
     },
     {
+        "城市名": "伊斯兰堡",
+        "城市名(英文)": "OPRN",
         "机场三字码": "ISB",
         "机场四字码": "ISLAMABAD",
         "机场名称": "伊斯兰堡",
@@ -120,6 +144,8 @@
         "经度": "73.0987585"
     },
     {
+        "城市名": "日内瓦",
+        "城市名(英文)": "日内瓦",
         "机场三字码": "GVA",
         "机场四字码": "LSGG",
         "机场名称": "GENEVA",
@@ -130,6 +156,8 @@
         "经度": "6.10216256"
     },
     {
+        "城市名": "柏林",
+        "城市名(英文)": "Schonenfeld Airport",
         "机场三字码": "SXF",
         "机场四字码": "EDDB",
         "机场名称": "柏林",
@@ -140,6 +168,8 @@
         "经度": "13.5183387"
     },
     {
+        "城市名": "冲绳",
+        "城市名(英文)": "JP",
         "机场三字码": "OKA",
         "机场四字码": "ROAH",
         "机场名称": "冲绳",
@@ -150,6 +180,8 @@
         "经度": "127.650756"
     },
     {
+        "城市名": "新泻",
+        "城市名(英文)": "日本",
         "机场三字码": "KIJ",
         "机场四字码": "RJSN",
         "机场名称": "新泻",
@@ -160,16 +192,20 @@
         "经度": "139.1167"
     },
     {
+        "城市名": "加尔各答",
+        "城市名(英文)": "CALCUTTA",
         "机场三字码": "CCU",
         "机场四字码": "VECC",
         "机场名称": "加尔各达苏巴斯德拉国际机场",
         "机场名称(英文)": "Netaji Subhash Chandra Bose Intl Airport",
         "国家(地区)代码": "IN",
         "国家(地区)名称": "印度",
-        "纬度": "CALCUTTA",
+        "纬度": "22.653889",
         "经度": "88.446387"
     },
     {
+        "城市名": "北京南苑",
+        "城市名(英文)": "NANYUAN",
         "机场三字码": "NAY",
         "机场四字码": "ZBNY",
         "机场名称": "北京南苑机场",
@@ -177,9 +213,11 @@
         "国家(地区)代码": "CN",
         "国家(地区)名称": "中国",
         "纬度": "39.784400",
-        "经度": "NANYUAN"
+        "经度": "116.386070"
     },
     {
+        "城市名": "SXJ",
+        "城市名(英文)": "SHANSHAN",
         "机场三字码": "膳善",
         "机场四字码": "ZWSS",
         "机场名称": null,
@@ -190,6 +228,8 @@
         "经度": "90.2333"
     },
     {
+        "城市名": "柏林",
+        "城市名(英文)": "TXL",
         "机场三字码": "BERLIN",
         "机场四字码": null,
         "机场名称": "柏林",
@@ -200,6 +240,8 @@
         "经度": "13.2921197"
     },
     {
+        "城市名": "蒙彼利埃",
+        "城市名(英文)": "MONTPELLIER",
         "机场三字码": "MPL",
         "机场四字码": null,
         "机场名称": "蒙彼里埃",
@@ -210,6 +252,8 @@
         "经度": "3.95868589"
     },
     {
+        "城市名": "纽伦堡",
+        "城市名(英文)": "NUREMBERG",
         "机场三字码": null,
         "机场四字码": "NUE",
         "机场名称": "纽伦堡",
@@ -220,6 +264,8 @@
         "经度": "11.0787720"
     },
     {
+        "城市名": "瓦伦西亚",
+        "城市名(英文)": "VALENCIA",
         "机场三字码": "巴伦西亚",
         "机场四字码": null,
         "机场名称": "VLC",
@@ -230,6 +276,8 @@
         "经度": "-0.4727126"
     },
     {
+        "城市名": "哈拉雷",
+        "城市名(英文)": "HARARE",
         "机场三字码": "HARARE, ZIMBABWE HARARE",
         "机场四字码": null,
         "机场名称": "哈拉雷",
@@ -240,6 +288,8 @@
         "经度": "31.0000"
     },
     {
+        "城市名": "克拉克",
+        "城市名(英文)": "CLARKAIRBASE",
         "机场三字码": "PH",
         "机场四字码": null,
         "机场名称": "克拉克",
@@ -250,6 +300,8 @@
         "经度": "120.546050"
     },
     {
+        "城市名": "阿拜昂环礁",
+        "城市名(英文)": "ABAIANGATOLL",
         "机场三字码": "基里巴斯",
         "机场四字码": null,
         "机场名称": "阿拜昂环礁",
@@ -260,26 +312,32 @@
         "经度": "172.8333"
     },
     {
-        "机场三字码": "16.7608139",
+        "城市名": "阿卡普尔科",
+        "城市名(英文)": "ACAPULCO",
+        "机场三字码": "ACA",
         "机场四字码": null,
         "机场名称": "阿卡普尔科",
         "机场名称(英文)": "Acapulco-General Alvarez Intl Airport",
         "国家(地区)代码": "MX",
         "国家(地区)名称": "墨西哥",
-        "纬度": "ACA",
+        "纬度": "16.7608139",
         "经度": "-99.754485"
     },
     {
-        "机场三字码": "45.2000",
+        "城市名": "亚丁",
+        "城市名(英文)": "ADEN",
+        "机场三字码": "ADE",
         "机场四字码": null,
         "机场名称": "亚丁",
         "机场名称(英文)": "ADEN",
         "国家(地区)代码": "YE",
         "国家(地区)名称": "也门",
         "纬度": "12.7500",
-        "经度": "ADE"
+        "经度": "45.2000"
     },
     {
+        "城市名": null,
+        "城市名(英文)": "AEKGODANG",
         "机场三字码": "AEG",
         "机场四字码": "AEKGODANG",
         "机场名称": null,