1234567891011121314151617181920212223242526272829303132333435363738394041 |
- import requests
- import json
- from bs4 import BeautifulSoup
- def main():
- # print('[{}] Hello world!'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
- # str = '188888'
- # n = len(str)
- # new_str = '****' if len(str) <= 4 else str[0:int(n/3)] + '****' + str[int(n*2/3):]
- # print(new_str)
- result = []
- n = 290
- for i in range(1, n):
- page = i
- text = requests.get("https://airportcode.bmcx.com/" + str(page) + "__airportcode/")
- res = text.text
- soup = BeautifulSoup(res, features="html.parser")
- arr = soup.find_all(name = "table")[1].find_all(name = "tr")
- for i in arr[2:]:
- t = i.find_all(name = "td")
- href = t[0].find(name = "a").attrs.get("href")
- if href != None:
- sub_text = requests.get("https://airportcode.bmcx.com" + href)
- sub_res = sub_text.text
- sub_soup = BeautifulSoup(sub_res, features="html.parser")
- sub_arr = sub_soup.find_all(name = "table")[1].find_all(name = "tr")
- item = {}
- for j in sub_arr[2:]:
- sub_t = j.find_all(name = "td")
- item[sub_t[0].string] = sub_t[1].string
- result.append(item)
- s = json.dumps(result, ensure_ascii=False)
- # print(s)
- with open('json.json', 'w') as f:
- f.write(s)
- # 程序入口
- if __name__ == '__main__':
- main()
|