Browse Source

update script

郑毅 3 months ago
parent
commit
2176857587
3 changed files with 186 additions and 142 deletions
  1. 2 142
      __main__.py
  2. 0 0
      airport_codes/__init__.py
  3. 184 0
      airport_codes/get_info.py

+ 2 - 142
__main__.py

@@ -1,7 +1,4 @@
-import re
-import requests
-import json
-from bs4 import BeautifulSoup
+from airport_codes.get_info import req_geocode
 
 
 def main():
 def main():
     # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
     # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
@@ -10,144 +7,7 @@ def main():
     # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
     # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
     # print(new_str)
     # print(new_str)
 
 
-    # 抓取机场三字码等信息
-    pattern1 = re.compile(r'^-*\d+\.*\d+$')
-    pattern2 = re.compile(r'^[A-Z]{3}$')
-    pattern3 = re.compile(r'^[A-Z]{2}$')
-    pattern4 = re.compile(r'^[A-Z]{4}$')
-    pattern5 = re.compile(r'^[A-Za-z\s,]+$')
-    pattern6 = re.compile(r'^[\u4e00-\u9fa5]+$')
-    result = []
-    n = 290
-    for i in range(1, n):
-        page = i
-        try:
-            text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
-            res = text.text
-            html = BeautifulSoup(res, features="html.parser")
-            tabs = html.find_all(name = "table")
-            if len(tabs) > 1:
-                arr = tabs[1].find_all(name = "tr")
-                if len(arr) > 2:
-                    for tr in arr[2:]:
-                        href = tr.find(name = "a").attrs.get("href")
-                        if href != None:
-                            try:
-                                sub_text = requests.get("https://airportcode.bmcx.com" + href)
-                                sub_res = sub_text.text
-                                sub_html = BeautifulSoup(sub_res, features="html.parser")
-                                sub_tabs = sub_html.find_all(name = "table")
-                                if len(sub_tabs) > 1:
-                                    sub_arr = sub_tabs[1].find_all(name = "tr")
-                                    item = {}
-                                    for sub_tr in sub_arr:
-                                        sub_t = sub_tr.find_all(name = "td")
-                                        item[sub_t[0].string] = sub_t[1].string
-                                    flag = False
-                                    name = ""
-                                    swap = ""
-                                    if item["纬度"] != '' and not pattern1.match(item["纬度"]):
-                                        name = "纬度"
-                                        swap = item["纬度"]
-                                        flag = True
-                                    if item["经度"] != '' and not pattern1.match(item["经度"]):
-                                        name = "经度"
-                                        swap = item["经度"]
-                                        flag = True
-                                    if flag:
-                                        for k, v in item.items():
-                                            if k != "纬度" and k != "经度" and pattern1.match(v):
-                                                item[name] = v
-                                                item[k] = swap
-                                                break
-                                    flag = False        
-                                    swap = ""
-                                    if item["机场三字码"] == '' or not pattern2.match(item["机场三字码"]):
-                                        swap = item["机场三字码"]
-                                        flag = True
-                                    if flag:
-                                        for k, v in item.items():
-                                            if k != "机场三字码" and pattern2.match(v):
-                                                item["机场三字码"] = v
-                                                item[k] = swap
-                                                break
-                                    item["机场三字码"] = item["机场三字码"].upper()
-                                    if item["国家(地区)代码"] == '' and pattern3.match(item["纬度"]):
-                                        item["国家(地区)代码"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["国家(地区)代码"] == '' and pattern3.match(item["经度"]):
-                                        item["国家(地区)代码"] = item["经度"]
-                                        item["经度"] = ''
-                                    if item["机场四字码"] == '' and pattern4.match(item["纬度"]):
-                                        item["机场四字码"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["机场四字码"] == '' and pattern4.match(item["经度"]):
-                                        item["机场四字码"] = item["经度"]
-                                        item["经度"] = ''
-                                    if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["纬度"]):
-                                        item["机场名称(英文)"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["经度"]):
-                                        item["机场名称(英文)"] = item["经度"]
-                                        item["经度"] = ''
-                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["纬度"]):
-                                        item["城市名(英文)"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["经度"]):
-                                        item["城市名(英文)"] = item["经度"]
-                                        item["经度"] = ''
-                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["纬度"]):
-                                        item["城市名"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["经度"]):
-                                        item["城市名"] = item["经度"]
-                                        item["经度"] = ''
-                                    if item["国家(地区)名称"] == '' and pattern6.match(item["纬度"]):
-                                        item["国家(地区)名称"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["国家(地区)名称"] == '' and pattern6.match(item["经度"]):
-                                        item["国家(地区)名称"] = item["经度"]
-                                        item["经度"] = ''
-                                    if item["城市名"] == '' and pattern6.match(item["纬度"]):
-                                        item["城市名"] = item["纬度"]
-                                        item["纬度"] = ''
-                                    if item["城市名"] == '' and pattern6.match(item["经度"]):
-                                        item["城市名"] = item["经度"]
-                                        item["经度"] = ''
-                                    result.append(item)
-                            except Exception as e:
-                                print(f"Exception in sub process: {e=}, {type(e)=}")
-        except Exception as ex:
-            print(f"Exception in main loop process: {ex=}, {type(ex)=}")
-
-    s = json.dumps(result, ensure_ascii=False)
-    # print(s)
-    with open('json2.json', 'w') as f:
-        f.write(s)
-
-    # 结果校验
-    arr = []
-    with open("json2.json", "r") as f:
-        arr = json.load(f)
-
-    ct = 0
-    for item in arr:
-        swap = ""
-        if item["纬度"] != '' and not pattern1.match(item["纬度"]):
-            print(item["机场三字码"])
-            ct += 1
-        if item["经度"] != '' and not pattern1.match(item["经度"]):
-            print(item["机场三字码"])
-            ct += 1
-        if item["机场三字码"] != '' and not pattern2.match(item["机场三字码"]):
-            ct += 1
-    
-    if ct > 0:
-        print('Sth wrong ' + ct)
-
-    # s = json.dumps(arr, ensure_ascii=False)
-    # with open('json2.json', 'w') as f:
-    #     f.write(s)
+    req_geocode();
 
 
 
 
 # 程序入口
 # 程序入口

+ 0 - 0
airport_codes/__init__.py


+ 184 - 0
airport_codes/get_info.py

@@ -0,0 +1,184 @@
+import json
+import re
+import time
+
+from bs4 import BeautifulSoup
+import requests
+
+# 根据机场三字码,重新从 Google Geocoding AP 刷数据
+def req_geocode():
+    # arr = [{"机场三字码":"SZX"}]
+    arr = []
+    with open("json2.json", "r") as f:
+        arr = json.load(f)
+
+    langs = ['en', 'zh']
+
+    i = 0
+    for item in arr:
+        try:
+            if item['机场三字码'] == '':
+                continue
+            
+            for lang in langs:
+                uri = 'https://maps.googleapis.com/maps/api/geocode/json?address={}%20Airport%2C{}%2C{}&language={}&key=AIzaSyD0OfQuI1qV-VgTbVS8253RuU7Kt3ohtFo'.format(item['机场三字码'], item['城市名(英文)'], item['国家(地区)代码'], lang)
+                text = requests.get(uri).text
+                res = json.loads(text).get('results')
+                if res != None and len(res) > 0:
+                    geo = res[0].get('address_components')
+                    if geo != None:
+                        item['geo_{}'.format(lang)] = geo
+            
+            s = json.dumps(item, ensure_ascii=False)
+            print(item)
+
+            i += 1
+            if i % 3 == 0:
+                time.sleep(1)
+        except Exception as e:
+            print(f"Exception in geocode process: {e=}, {type(e)=}")
+
+    s = json.dumps(arr, ensure_ascii=False)
+    # print(s)
+    with open('json3.json', 'w') as f:
+        f.write(s)
+
+# 抓取机场三字码等信息
+def get_info():
+    pattern1 = re.compile(r'^-*\d+\.*\d+$')
+    pattern2 = re.compile(r'^[A-Z]{3}$')
+    pattern3 = re.compile(r'^[A-Z]{2}$')
+    pattern4 = re.compile(r'^[A-Z]{4}$')
+    pattern5 = re.compile(r'^[A-Za-z\s,]+$')
+    pattern6 = re.compile(r'^[\u4e00-\u9fa5]+$')
+    result = []
+    n = 290
+    for i in range(1, n):
+        page = i
+        try:
+            text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
+            res = text.text
+            html = BeautifulSoup(res, features="html.parser")
+            tabs = html.find_all(name = "table")
+            if len(tabs) > 1:
+                arr = tabs[1].find_all(name = "tr")
+                if len(arr) > 2:
+                    for tr in arr[2:]:
+                        href = tr.find(name = "a").attrs.get("href")
+                        if href != None:
+                            try:
+                                sub_text = requests.get("https://airportcode.bmcx.com" + href)
+                                sub_res = sub_text.text
+                                sub_html = BeautifulSoup(sub_res, features="html.parser")
+                                sub_tabs = sub_html.find_all(name = "table")
+                                if len(sub_tabs) > 1:
+                                    sub_arr = sub_tabs[1].find_all(name = "tr")
+                                    item = {}
+                                    for sub_tr in sub_arr:
+                                        sub_t = sub_tr.find_all(name = "td")
+                                        item[sub_t[0].string] = sub_t[1].string
+                                    flag = False
+                                    name = ""
+                                    swap = ""
+                                    if item["纬度"] != '' and not pattern1.match(item["纬度"]):
+                                        name = "纬度"
+                                        swap = item["纬度"]
+                                        flag = True
+                                    if item["经度"] != '' and not pattern1.match(item["经度"]):
+                                        name = "经度"
+                                        swap = item["经度"]
+                                        flag = True
+                                    if flag:
+                                        for k, v in item.items():
+                                            if k != "纬度" and k != "经度" and pattern1.match(v):
+                                                item[name] = v
+                                                item[k] = swap
+                                                break
+                                    flag = False        
+                                    swap = ""
+                                    if item["机场三字码"] == '' or not pattern2.match(item["机场三字码"]):
+                                        swap = item["机场三字码"]
+                                        flag = True
+                                    if flag:
+                                        for k, v in item.items():
+                                            if k != "机场三字码" and pattern2.match(v):
+                                                item["机场三字码"] = v
+                                                item[k] = swap
+                                                break
+                                    item["机场三字码"] = item["机场三字码"].upper()
+                                    if item["国家(地区)代码"] == '' and pattern3.match(item["纬度"]):
+                                        item["国家(地区)代码"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["国家(地区)代码"] == '' and pattern3.match(item["经度"]):
+                                        item["国家(地区)代码"] = item["经度"]
+                                        item["经度"] = ''
+                                    if item["机场四字码"] == '' and pattern4.match(item["纬度"]):
+                                        item["机场四字码"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["机场四字码"] == '' and pattern4.match(item["经度"]):
+                                        item["机场四字码"] = item["经度"]
+                                        item["经度"] = ''
+                                    if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["纬度"]):
+                                        item["机场名称(英文)"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["机场名称(英文)"] == '' and item["城市名(英文)"] != '' and pattern5.match(item["经度"]):
+                                        item["机场名称(英文)"] = item["经度"]
+                                        item["经度"] = ''
+                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["纬度"]):
+                                        item["城市名(英文)"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] == '' and pattern5.match(item["经度"]):
+                                        item["城市名(英文)"] = item["经度"]
+                                        item["经度"] = ''
+                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["纬度"]):
+                                        item["城市名"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["机场名称(英文)"] != '' and item["城市名(英文)"] != '' and item["城市名"] == '' and pattern5.match(item["经度"]):
+                                        item["城市名"] = item["经度"]
+                                        item["经度"] = ''
+                                    if item["国家(地区)名称"] == '' and pattern6.match(item["纬度"]):
+                                        item["国家(地区)名称"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["国家(地区)名称"] == '' and pattern6.match(item["经度"]):
+                                        item["国家(地区)名称"] = item["经度"]
+                                        item["经度"] = ''
+                                    if item["城市名"] == '' and pattern6.match(item["纬度"]):
+                                        item["城市名"] = item["纬度"]
+                                        item["纬度"] = ''
+                                    if item["城市名"] == '' and pattern6.match(item["经度"]):
+                                        item["城市名"] = item["经度"]
+                                        item["经度"] = ''
+                                    result.append(item)
+                            except Exception as e:
+                                print(f"Exception in sub process: {e=}, {type(e)=}")
+        except Exception as ex:
+            print(f"Exception in main loop process: {ex=}, {type(ex)=}")
+
+    s = json.dumps(result, ensure_ascii=False)
+    # print(s)
+    with open('json2.json', 'w') as f:
+        f.write(s)
+
+    # 结果校验
+    arr = []
+    with open("json2.json", "r") as f:
+        arr = json.load(f)
+
+    ct = 0
+    for item in arr:
+        swap = ""
+        if item["纬度"] != '' and not pattern1.match(item["纬度"]):
+            print(item["机场三字码"])
+            ct += 1
+        if item["经度"] != '' and not pattern1.match(item["经度"]):
+            print(item["机场三字码"])
+            ct += 1
+        if item["机场三字码"] != '' and not pattern2.match(item["机场三字码"]):
+            ct += 1
+    
+    if ct > 0:
+        print('Sth wrong ' + ct)
+
+    # s = json.dumps(arr, ensure_ascii=False)
+    # with open('json2.json', 'w') as f:
+    #     f.write(s)