by_request.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import json
  4. import math
  5. import numbers
  6. import re
  7. import time
  8. from urllib.parse import quote
  9. import requests
  10. from bs4 import BeautifulSoup
  11. import pandas as pd
  12. from sqlalchemy.ext.declarative import declarative_base
  13. from sqlalchemy import Column, String, Text, BigInteger, Integer, Float, Boolean, JSON, create_engine
  14. from sqlalchemy.orm import sessionmaker
  15. BRAND_PATTERN = re.compile(r'>\s*Brand\s*<br\s*\/*>\s*<strong>\s*([^<>]+)\s*</strong>')
  16. HOTEL_LINK_PATTERN = re.compile(r'/venue/(\d+)/([^/]+)/')
  17. DISTANCE_PATTERN = re.compile(r'(\d+)\s*(\w+)\s*\((\d+)\s*\w+\s*\/\s*([^)]+)\)')
  18. ENCODING = 'utf-8'
  19. def test():
  20. # get_hotels()
  21. # get_hotels_rooms()
  22. # get_hotels_rooms_ext()
  23. # insert_to_mysql()
  24. # get_loc()
  25. convert_geo()
  26. # insert_saas_to_mysql()
  27. pass
  28. def get_hotels():
  29. items = []
  30. for i in range(1, 241):
  31. try:
  32. resp = requests.get('https://www.1000meetings.com/en/area/1/world/p{}'.format(i))
  33. resp.encoding = ENCODING
  34. doc = BeautifulSoup(resp.text, 'html.parser')
  35. for li in doc.select('#venuelist li'):
  36. item = {}
  37. a_arr = li.select('.venueheader .line1 .name a')
  38. if len(a_arr) == 0:
  39. continue
  40. a = a_arr[0]
  41. item['hotel_name'] = rep(a.string)
  42. item['link'] = quote(a['href'].strip())
  43. g = HOTEL_LINK_PATTERN.findall(item['link'])
  44. if len(g) == 0 or len(g[0]) < 1:
  45. continue
  46. item['hotel_id'] = g[0][0]
  47. item['slug'] = quote(g[0][1].strip())
  48. area_arr = li.select('.venueheader .line1 .area a')
  49. area_arr_len = len(area_arr)
  50. if area_arr_len == 1:
  51. item['country'] = area_arr[0].string.strip().strip('[').strip(']')
  52. item['city'] = item['country']
  53. elif area_arr_len == 2:
  54. item['country'] = area_arr[0].string.strip().strip('[').strip(']')
  55. item['city'] = area_arr[1].string.strip().strip('[').strip(']')
  56. else:
  57. item['country'] = area_arr[0].string.strip().strip('[').strip(']')
  58. item['province'] = area_arr[1].string.strip().strip('[').strip(']')
  59. if item['province'] == 'Hong Kong':
  60. item['city'] = item['province']
  61. item['district'] = area_arr[2].string.strip().strip('[').strip(']')
  62. else:
  63. item['city'] = area_arr[2].string.strip().strip('[').strip(']')
  64. items.append(item)
  65. except Exception as e:
  66. print('get_hotels() failed. ', e)
  67. # print(items)
  68. df = pd.DataFrame(items)
  69. df.to_csv('hotels.csv', encoding=ENCODING, index=False, header=True)
  70. def get_hotels_rooms():
  71. hotels = pd.read_csv('hotels.csv', encoding=ENCODING).to_dict('records')
  72. items = []
  73. for h in hotels:
  74. try:
  75. if h['link'] == '':
  76. continue
  77. resp = requests.get('https://www.1000meetings.com{}'.format(h['link']))
  78. resp.encoding = ENCODING
  79. doc = BeautifulSoup(resp.text, 'html.parser')
  80. for li in doc.select('#frame-header #line2-left li'):
  81. text = li.get_text().strip()
  82. if text.startswith('Address:'):
  83. text = text.lstrip('Address:')
  84. h['address'] = rep(text)
  85. elif text.startswith('Telephone:'):
  86. text = text.lstrip('Telephone:').strip().replace('\n','')
  87. text = re.sub(r'\s+', ' ', re.sub(r'[\t\(\)())]+', ' ', text)).replace('-', '').replace('ext.', '-').replace('、', '/').replace(',', '').replace('中国', '86').replace('China', '86').strip()
  88. if ' ' in text:
  89. idx = text.index(' ')
  90. code = re.sub('\++', '+', text[0:idx])
  91. number = text[idx:].replace(' ', '').replace('+', '').strip()
  92. text = '{} {}'.format(code, number).strip()
  93. if re.match(r'^0+$', text) or text == 'nan':
  94. text = ''
  95. h['phone'] = text
  96. for div in doc.select('.MainBaseInfo .contactInfo .Line'):
  97. text = div.get_text().strip()
  98. if text.startswith('Web:') and len(div.select('a')) > 0:
  99. h['site_url'] = div.select('a')[0]['href'].strip() if div.select('a') else ''
  100. floor = ''
  101. rooms = []
  102. for ul in doc.select('#meeting-room-chart ul'):
  103. room = {}
  104. if 'title' in ul['class']:
  105. continue
  106. elif 'floor' in ul['class']:
  107. li = ul.select('li.floorname')
  108. if len(li) > 0:
  109. floor = li[0].string
  110. elif 'rooms' in ul['class']:
  111. li = ul.select('li')
  112. if len(li) > 0:
  113. room['floor'] = rep(floor)
  114. room['name'] = rep(li[0].string)
  115. room['area'] = repq(li[2].string)
  116. room['height'] = repq(li[3].string)
  117. room['theatre'] = repq(li[4].string)
  118. room['classroom'] = repq(li[5].string)
  119. room['u_shape'] = repq(li[6].string)
  120. room['banquet'] = repq(li[7].string)
  121. room['cocktail'] = repq(li[8].string)
  122. room['boardroom'] = repq(li[9].string)
  123. rooms.append(room)
  124. if len(rooms) > 0:
  125. h['rooms'] = rooms
  126. except Exception as e:
  127. print('get_hotels_rooms() failed. ', e)
  128. items.append(h)
  129. # print(items)
  130. df = pd.DataFrame(items)
  131. df.to_csv('hotels_rooms.csv', encoding=ENCODING, index=False, header=True)
  132. def get_hotels_rooms_ext():
  133. hotels = pd.read_csv('hotels_rooms.csv', encoding=ENCODING).to_dict('records')
  134. items = []
  135. for h in hotels:
  136. try:
  137. if h['link'] == '':
  138. continue
  139. resp = requests.get('https://www.1000meetings.com.sg/meeting-destination/{}/{}/'.format(h['hotel_id'], h['slug']))
  140. resp.encoding = ENCODING
  141. doc = BeautifulSoup(resp.text, 'html.parser')
  142. overview = doc.select('.hotel-details')
  143. if len(overview) > 0:
  144. name = ''
  145. for tag in overview[0].find_all(True):
  146. if tag.name == 'h4':
  147. name = tag.get_text()
  148. elif tag.name == 'p' and name == 'Hotel presentation':
  149. h['hotel_presentation'] = rep(tag.prettify())
  150. name = ''
  151. elif tag.name == 'p' and name == 'Meeting Facilities':
  152. h['meeting_facilities'] = rep(tag.prettify())
  153. name = ''
  154. images = doc.select('.portfolio-list')
  155. if len(images) > 0:
  156. img_arr = []
  157. for img in images[0].select('img'):
  158. img_arr.append({'origin_name':quote(img['src'].strip().split('/')[-1])})
  159. h['images'] = img_arr
  160. details = doc.select('table.hoteldetails tr')
  161. room_quantity = []
  162. meeting_room_quantity = []
  163. restaurant_remark = []
  164. meeting_room_facilities = {}
  165. distances = []
  166. topic = ''
  167. if len(details) > 0:
  168. for tr in details:
  169. if tr.find('th') and tr.find('th').find('h5'):
  170. topic = tr.find('th').find('h5').text.strip()
  171. elif tr.find('td') and tr.find('td').find('h5') and len(tr.select('td.type-info')) > 0:
  172. title = tr.find('td').find('h5').text.strip()
  173. value = tr.select('td.type-info')[0].text.strip()
  174. if topic == 'Room accommodation':
  175. if title == 'Total rooms':
  176. h['total_rooms'] = value
  177. elif title == 'Free wifi':
  178. h['hotel_free_wifi'] = '1' if value == 'Yes' else '0'
  179. else:
  180. room_quantity.append({'name':rep(title), 'quantity': repq(value)})
  181. elif topic == 'Meeting space':
  182. if title == 'Meeting rooms':
  183. h['total_meeting_rooms'] = value
  184. elif title == 'Max surface':
  185. h['meeting_room_max_sqm'] = value.split(' ')[0]
  186. elif title == 'Max height':
  187. h['meeting_room_max_height'] = value.split(' ')[0]
  188. elif title == 'Projector resolution':
  189. h['projector_resolution'] = rep(value)
  190. elif title == 'Natural light':
  191. meeting_room_facilities['natural_light'] = '1' if value == 'Yes' else '0'
  192. elif title == 'Pillarless':
  193. meeting_room_facilities['pillarless'] = '1' if value == 'Yes' else '0'
  194. elif title == 'Outdoor space':
  195. meeting_room_facilities['outdoor'] = '1' if value == 'Yes' else '0'
  196. elif title == 'LED screen on-site':
  197. meeting_room_facilities['led_screen'] = '1' if value == 'Yes' else '0'
  198. elif title == 'Free wifi':
  199. meeting_room_facilities['wifi'] = '1' if value == 'Yes' else '0'
  200. elif title == 'VIP rooms':
  201. meeting_room_facilities['wifi'] = '1' if value == 'Yes' else '0'
  202. else:
  203. meeting_room_quantity.append({'name':rep(title), 'quantity': repq(value)})
  204. elif topic == 'Restaurants':
  205. if title == 'Num restaurants':
  206. h['total_restaurants'] = value
  207. else:
  208. restaurant_remark.append({'name':rep(title), 'remark': rep(value)})
  209. elif topic == 'Transportation':
  210. if title in ['Distance to airport', 'Distance to airport 2', 'Distance to train station', 'Distance to train station 2', 'Subway station', 'Subway station 2']:
  211. g = DISTANCE_PATTERN.findall(value)
  212. if len(g) == 0 or len(g[0]) < 4:
  213. continue
  214. name = rep(g[0][3].replace('- line', '').replace('-', '').replace('N/A', ''))
  215. distance = g[0][0]
  216. time = g[0][2]
  217. if len(name) == 0 or distance == '0' or time == '0':
  218. continue
  219. distances.append({'name': name, 'distance': distance + ('000' if g[0][1] == 'km' else ''), 'time': time})
  220. if len(room_quantity) > 0:
  221. h['room_quantity'] = room_quantity
  222. if len(meeting_room_quantity) > 0:
  223. h['meeting_room_quantity'] = meeting_room_quantity
  224. if len(meeting_room_facilities) > 0:
  225. h['meeting_room_facilities'] = meeting_room_facilities
  226. if len(restaurant_remark) > 0:
  227. h['restaurant_remark'] = restaurant_remark
  228. if len(distances) > 0:
  229. h['distances'] = distances
  230. mds = doc.select('.slider-venue-display .col-md-3')
  231. if len(mds) > 0:
  232. for div in mds:
  233. html = re.sub(r'\n', '', div.prettify())
  234. g = BRAND_PATTERN.findall(html)
  235. if len(g) > 0:
  236. h['brand'] = rep(g[0])
  237. break
  238. h['attachments'] = ['https://www8.1000meetings.com/download/venue/pdf/{}/'.format(h['hotel_id'])]
  239. except Exception as e:
  240. print('get_hotels_rooms_ext() failed. ', e)
  241. items.append(h)
  242. # print(items)
  243. df = pd.DataFrame(items)
  244. df.to_csv('hotels_rooms_ext.csv', encoding=ENCODING, index=False, header=True)
  245. def repq(input):
  246. if not input:
  247. return ''
  248. input = input.strip()
  249. return '0' if input == '-' or input == '–' else input
  250. def rep(input):
  251. if not input:
  252. return ''
  253. return re.sub(r'\s+', ' ', input.replace('!', '!').replace('、', '/').replace('?', '?').replace(';', ';').replace('(', '(').replace(')', ')').replace(':', ':').replace('。', '.').replace(',', ',').replace('–', '-').replace('√', '-').replace('’', '\'').replace('‘', '\'').replace(' ', ' ').replace('“', '"').replace('”', '"')).strip().strip('/').strip()
  254. get_int = lambda v: 0 if not v or (isinstance(v, numbers.Number) and math.isnan(v)) else int(v)
  255. get_float = lambda v: 0 if not v or (isinstance(v, numbers.Number) and math.isnan(v)) else float(v)
  256. get_str = lambda v: '' if not v or (isinstance(v, numbers.Number) and math.isnan(v)) else '{}'.format(v)
  257. get_json = lambda v: '' if not v or (isinstance(v, numbers.Number) and math.isnan(v)) else json.dumps(v)
  258. get_bool = lambda v: v and not (isinstance(v, numbers.Number) and math.isnan(v)) and int(v) > 0
  259. commit_buff_size = 500
  260. session_maker = sessionmaker(bind=create_engine('mysql+pymysql://{}:{}@{}:{}/test'.format('root', '123456', 'localhost', 3306),
  261. echo=False, # 程序运行时反馈执行过程中的关键对象,包括ORM构建的sql语句
  262. max_overflow=0, # 超过连接池大小外最多创建的连接
  263. pool_size=5, # 连接池大小
  264. pool_timeout=300, # 池中没有线程最多等待的时间,否则报错
  265. pool_recycle=3600, # 多久之后对线程池中的线程进行一次连接的回收(重置)
  266. pool_pre_ping=True # 每次调用之前ping
  267. ))
  268. def insert_to_mysql():
  269. hotels = pd.read_csv('hotels_rooms_ext.csv', encoding=ENCODING).to_dict('records')
  270. with session_maker() as session:
  271. i = 0
  272. length = len(hotels)
  273. for h in hotels:
  274. i += 1
  275. if not h['hotel_id'] or math.isnan(h['hotel_id']) or h['hotel_id'] <= 0:
  276. continue
  277. try:
  278. hotel = Hotel(
  279. hotel_id = h['hotel_id'],
  280. hotel_name = get_str(h['hotel_name']),
  281. link = get_str(h['link']),
  282. slug = get_str(h['slug']),
  283. country = get_str(h['country']),
  284. province = get_str(h['province']),
  285. city = get_str(h['city']),
  286. district = get_str(h['district']),
  287. address = get_str(h['address']),
  288. phone = get_str(h['phone']),
  289. site_url = get_str(h['site_url']),
  290. rooms = get_json(h['rooms']),
  291. hotel_presentation = get_str(h['hotel_presentation']),
  292. meeting_facilities = get_str(h['meeting_facilities']),
  293. images = get_json(h['images']),
  294. total_rooms = get_int(h['total_rooms']),
  295. hotel_free_wifi = get_bool(h['hotel_free_wifi']),
  296. total_meeting_rooms = get_int(h['total_meeting_rooms']),
  297. meeting_room_max_sqm = get_float(h['meeting_room_max_sqm']),
  298. meeting_room_max_height = get_float(h['meeting_room_max_height']),
  299. projector_resolution = get_str(h['projector_resolution']),
  300. total_restaurants = get_int(h['total_restaurants']),
  301. room_quantity = get_json(h['room_quantity']),
  302. meeting_room_quantity = get_json(h['meeting_room_quantity']),
  303. meeting_room_facilities = get_json(h['meeting_room_facilities']),
  304. restaurant_remark = get_json(h['restaurant_remark']),
  305. distances = get_json(h['distances']),
  306. brand = get_str(h['brand']),
  307. attachments = get_json(h['attachments']),
  308. # lon = get_float(h['lon']),
  309. # lat = get_float(h['lat']),
  310. # s_country = get_str(h['s_country']),
  311. # s_province = get_str(h['s_province']),
  312. # s_city = get_str(h['s_city']),
  313. # s_name = get_str(h['s_name']),
  314. # s_address = get_str(h['s_address']),
  315. # s_phone = get_str(h['s_phone']),
  316. # hop_id = get_int(h['hop_id']),
  317. # s_id = get_int(h['s_id']),
  318. # s_lon = get_float(h['s_lon']),
  319. # s_lat = get_float(h['s_lat']),
  320. # s_loc_type = get_int(h['s_loc_type'])
  321. )
  322. session.add(hotel)
  323. if i % commit_buff_size == 0 or i >= length:
  324. session.commit()
  325. except Exception as e:
  326. print('insert_to_mysql() failed. ', e)
  327. def insert_saas_to_mysql():
  328. hotels = pd.read_csv('saas_hotels.csv', encoding=ENCODING).to_dict('records')
  329. with session_maker() as session:
  330. i = 0
  331. length = len(hotels)
  332. for h in hotels:
  333. i += 1
  334. if not h['id'] or math.isnan(h['id']) or h['id'] <= 0:
  335. continue
  336. try:
  337. hotel = SaaSHotel(
  338. id = h['id'],
  339. hop_id = get_int(h['hop_id']),
  340. name = get_str(h['name']),
  341. en_name = get_str(h['en_name']),
  342. phone = get_str(h['phone']),
  343. address = get_str(h['address']),
  344. en_address = get_str(h['en_address']),
  345. lon = get_float(h['lon']),
  346. lat = get_float(h['lat']),
  347. loc_type = get_int(h['loc_type']),
  348. country = get_str(h['country']),
  349. province = get_str(h['province']),
  350. city = get_str(h['city'])
  351. )
  352. session.add(hotel)
  353. if i % commit_buff_size == 0 or i >= length:
  354. session.commit()
  355. except Exception as e:
  356. print('insert_saas_to_mysql() failed. ', e)
  357. def get_loc():
  358. pp1 = re.compile(r'^\+*0*0*[86-]*(\d{3,4})(\d{8})$')
  359. pp2 = re.compile(r'^\+*0*0*[86-]*(\d{3,4})(\d{8})-(.+)$')
  360. with session_maker() as session:
  361. hotels = session.query(Hotel).filter(Hotel.lon==None).all()
  362. i = 0
  363. length = len(hotels)
  364. for h in hotels:
  365. i += 1
  366. try:
  367. h.phone = re.sub(r'\s+', '-', h.phone)
  368. if h.country == 'China' and h.province not in ['Taiwan', 'Hong Kong', 'Macau']:
  369. g = pp1.findall(h.phone)
  370. if len(g) > 0 and len(g[0]) > 1:
  371. h.phone = '+86-{}-{}'.format(g[0][0], g[0][1])
  372. g = pp2.findall(h.phone)
  373. if len(g) > 0 and len(g[0]) > 2:
  374. h.phone = '+86-{}-{}-{}'.format(g[0][0], g[0][1], g[0][2])
  375. str = quote('{} {}'.format(h.hotel_name, h.address))
  376. uri = 'https://maps.googleapis.com/maps/api/geocode/json?address={}&language=en&key=AIzaSyD0OfQuI1qV-VgTbVS8253RuU7Kt3ohtFo'.format(str)
  377. text = requests.get(uri).text
  378. res = json.loads(text).get('results')
  379. if res and len(res) > 0:
  380. geo = res[0]
  381. if geo and geo.get('geometry') and geo.get('geometry').get('location') and geo.get('geometry').get('location').get('lat') and geo.get('geometry').get('location').get('lng'):
  382. h.lon = float(geo.get('geometry').get('location').get('lng'))
  383. h.lat = float(geo.get('geometry').get('location').get('lat'))
  384. if i % 50 == 0:
  385. time.sleep(1)
  386. if i % commit_buff_size == 0 or i >= length:
  387. session.commit()
  388. except Exception as e:
  389. print('get_loc() failed. ', e)
  390. def convert_geo():
  391. with session_maker() as session:
  392. hotels = session.query(Hotel).filter((Hotel.s_loc_type == 1 or Hotel.s_loc_type == 3) and Hotel.s_lon.is_not(None) and Hotel.s_lat.is_not(None)).all()
  393. i = 0
  394. length = len(hotels)
  395. for h in hotels:
  396. i += 1
  397. try:
  398. if h.s_loc_type == 3:
  399. h.s_lon, h.s_lat = gcj02_to_wgs84(h.s_lon, h.s_lat)
  400. h.s_loc_type = 2
  401. elif h.s_loc_type == 1:
  402. h.s_lon, h.s_lat = bd09_to_wgs84(h.s_lon, h.s_lat)
  403. h.s_loc_type = 2
  404. if i % commit_buff_size == 0 or i >= length:
  405. session.commit()
  406. except Exception as e:
  407. print('convert_geo() failed. ', e)
  408. x_pi = 3.14159265358979324 * 3000.0 / 180.0
  409. pi = 3.1415926535897932384626 # π
  410. a = 6378245.0 # 长半轴
  411. ee = 0.00669342162296594323 # 偏心率平方
  412. '''
  413. 输入(经度,维度)
  414. '''
  415. def bd09_to_gcj02(bd_lon, bd_lat):
  416. """
  417. 百度坐标系(BD-09)转火星坐标系(GCJ-02)
  418. 百度——>谷歌、高德
  419. :param bd_lat:百度坐标纬度
  420. :param bd_lon:百度坐标经度
  421. :return:转换后的坐标列表形式
  422. """
  423. x = bd_lon - 0.0065
  424. y = bd_lat - 0.006
  425. z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
  426. theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
  427. gg_lng = z * math.cos(theta)
  428. gg_lat = z * math.sin(theta)
  429. return [gg_lng, gg_lat]
  430. def gcj02_to_wgs84(lng, lat):
  431. """
  432. GCJ02(火星坐标系)转GPS84
  433. :param lng:火星坐标系的经度
  434. :param lat:火星坐标系纬度
  435. :return:
  436. """
  437. if out_of_china(lng, lat):
  438. return [lng, lat]
  439. dlat = _transformlat(lng - 105.0, lat - 35.0)
  440. dlng = _transformlng(lng - 105.0, lat - 35.0)
  441. radlat = lat / 180.0 * pi
  442. magic = math.sin(radlat)
  443. magic = 1 - ee * magic * magic
  444. sqrtmagic = math.sqrt(magic)
  445. dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
  446. dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
  447. mglat = lat + dlat
  448. mglng = lng + dlng
  449. return [lng * 2 - mglng, lat * 2 - mglat]
  450. def bd09_to_wgs84(bd_lon, bd_lat):
  451. lon, lat = bd09_to_gcj02(bd_lon, bd_lat)
  452. return gcj02_to_wgs84(lon, lat)
  453. def bd09_to_wgs84(bd_lon, bd_lat):
  454. lon, lat = bd09_to_gcj02(bd_lon, bd_lat)
  455. return gcj02_to_wgs84(lon, lat)
  456. def gcj02_to_bd09(lng, lat):
  457. """
  458. 火星坐标系(GCJ-02)转百度坐标系(BD-09) 谷歌、高德——>百度
  459. :param lng:火星坐标经度
  460. :param lat:火星坐标纬度
  461. :return:
  462. """
  463. z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
  464. theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
  465. bd_lng = z * math.cos(theta) + 0.0065
  466. bd_lat = z * math.sin(theta) + 0.006
  467. return [bd_lng, bd_lat]
  468. def wgs84_to_gcj02(lng, lat):
  469. """
  470. WGS84转GCJ02(火星坐标系)
  471. :param lng:WGS84坐标系的经度
  472. :param lat:WGS84坐标系的纬度
  473. :return:
  474. """
  475. if out_of_china(lng, lat): # 判断是否在国内
  476. return [lng, lat]
  477. dlat = _transformlat(lng - 105.0, lat - 35.0)
  478. dlng = _transformlng(lng - 105.0, lat - 35.0)
  479. radlat = lat / 180.0 * pi
  480. magic = math.sin(radlat)
  481. magic = 1 - ee * magic * magic
  482. sqrtmagic = math.sqrt(magic)
  483. dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
  484. dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
  485. mglat = lat + dlat
  486. mglng = lng + dlng
  487. return [mglng, mglat]
  488. def wgs84_to_bd09(lon, lat):
  489. lon, lat = wgs84_to_gcj02(lon, lat)
  490. return gcj02_to_bd09(lon, lat)
  491. def out_of_china(lng, lat):
  492. """
  493. 判断是否在国内,不在国内不做偏移
  494. :param lng:
  495. :param lat:
  496. :return:
  497. """
  498. return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55)
  499. def _transformlng(lng, lat):
  500. ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
  501. ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * math.sin(2.0 * lng * pi)) * 2.0 / 3.0
  502. ret += (20.0 * math.sin(lng * pi) + 40.0 * math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
  503. ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
  504. return ret
  505. def _transformlat(lng, lat):
  506. ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
  507. ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * math.sin(2.0 * lng * pi)) * 2.0 / 3.0
  508. ret += (20.0 * math.sin(lat * pi) + 40.0 * math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
  509. ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 * math.sin(lat * pi / 30.0)) * 2.0 / 3.0
  510. return ret
  511. Base = declarative_base()
  512. class Hotel(Base):
  513. __tablename__ = 'hotels'
  514. hotel_id = Column(Integer, primary_key=True)
  515. hotel_name = Column(String)
  516. link = Column(String)
  517. slug = Column(String)
  518. country = Column(String)
  519. province = Column(String)
  520. city = Column(String)
  521. district = Column(String)
  522. address = Column(String)
  523. phone = Column(String)
  524. site_url = Column(String)
  525. rooms = Column(JSON)
  526. hotel_presentation = Column(Text)
  527. meeting_facilities = Column(Text)
  528. images = Column(JSON)
  529. total_rooms = Column(Integer)
  530. hotel_free_wifi = Column(Boolean)
  531. total_meeting_rooms = Column(Integer)
  532. meeting_room_max_sqm = Column(Float)
  533. meeting_room_max_height = Column(Float)
  534. projector_resolution = Column(String)
  535. total_restaurants = Column(Integer)
  536. room_quantity = Column(JSON)
  537. meeting_room_quantity = Column(JSON)
  538. meeting_room_facilities = Column(JSON)
  539. restaurant_remark = Column(JSON)
  540. distances = Column(JSON)
  541. brand = Column(String)
  542. attachments = Column(JSON)
  543. lon = Column(Float)
  544. lat = Column(Float)
  545. s_country = Column(String)
  546. s_province = Column(String)
  547. s_city = Column(String)
  548. s_name = Column(String)
  549. s_address = Column(String)
  550. s_phone = Column(String)
  551. hop_id = Column(Integer)
  552. s_id = Column(BigInteger)
  553. s_lon = Column(Float)
  554. s_lat = Column(Float)
  555. s_loc_type = Column(Integer)
  556. class SaaSHotel(Base):
  557. __tablename__ = 'saas_hotel'
  558. id = Column(BigInteger, primary_key=True)
  559. hop_id = Column(Integer)
  560. name = Column(String)
  561. en_name = Column(String)
  562. phone = Column(String)
  563. address = Column(String)
  564. en_address = Column(String)
  565. lon = Column(Float)
  566. lat = Column(Float)
  567. loc_type = Column(Integer)
  568. country = Column(String)
  569. province = Column(String)
  570. city = Column(String)