|
@@ -1,8 +1,39 @@
|
|
-import time
|
|
|
|
-
|
|
|
|
|
|
+import requests
|
|
|
|
+import json
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
|
def main():
|
|
def main():
|
|
- print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
|
|
|
|
|
|
+ # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
|
|
|
|
+ # str = '188888'
|
|
|
|
+ # n = len(str)
|
|
|
|
+ # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
|
|
|
|
+ # print(new_str)
|
|
|
|
+
|
|
|
|
+ result = []
|
|
|
|
+ n = 290
|
|
|
|
+ for i in range(1, n):
|
|
|
|
+ page = i
|
|
|
|
+ text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
|
|
|
|
+ res = text.text
|
|
|
|
+ soup = BeautifulSoup(res, features="html.parser")
|
|
|
|
+ arr = soup.find_all(name = "table")[1].find_all(name = "tr")
|
|
|
|
+ for i in arr[2:]:
|
|
|
|
+ t = i.find_all(name = "td")
|
|
|
|
+ href = t[0].find(name = "a").attrs.get("href")
|
|
|
|
+ if href != None:
|
|
|
|
+ sub_text = requests.get("https://airportcode.bmcx.com" + href)
|
|
|
|
+ sub_res = sub_text.text
|
|
|
|
+ sub_soup = BeautifulSoup(sub_res, features="html.parser")
|
|
|
|
+ sub_arr = sub_soup.find_all(name = "table")[1].find_all(name = "tr")
|
|
|
|
+ item = {}
|
|
|
|
+ for j in sub_arr[2:]:
|
|
|
|
+ sub_t = j.find_all(name = "td")
|
|
|
|
+ item[sub_t[0].string] = sub_t[1].string
|
|
|
|
+ result.append(item)
|
|
|
|
+ s = json.dumps(result, ensure_ascii=False)
|
|
|
|
+ # print(s)
|
|
|
|
+ with open('json.json', 'w') as f:
|
|
|
|
+ f.write(s)
|
|
|
|
|
|
|
|
|
|
# 程序入口
|
|
# 程序入口
|