__author__
=
'LeoKim'
from
bs4
import
BeautifulSoup
import
pymysql
import
urllib.request, urllib.parse, http.cookiejar
from
urllib
import
parse
import
pymysql
import
json
import
re
conn
=
pymysql.connect(host
=
'localhost'
,user
=
'root'
,passwd
=
'superhero'
,db
=
'python_test'
,port
=
3306
,charset
=
'utf8'
)
cur
=
conn.cursor()
def
getstore(village_id,geohash,latitude,longitude,limit):
key
=
{
'geohash'
:geohash,
'latitude'
:latitude,
'longitude'
:longitude,
'limit'
:limit
}
url
=
'https://mainsite-restapi.ele.me/shopping/restaurants?extras%5B%5D=activities&offset=0&terminal=web'
+
parse.urlencode(key)
cj
=
http.cookiejar.CookieJar()
opener
=
urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders
=
[(
'User-Agent'
,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'
),
(
'Cookie'
,
'ubt_ssid=but8xnmtkpfrbvypd9z3hxaa5i8ugmj0_2017-04-29; _utrace=edd9bb6de13caed667d2cf273d73fc0a_2017-04-29'
)]
urllib.request.install_opener(opener)
html_bytes
=
urllib.request.urlopen(url).read()
html_string
=
html_bytes.decode(
'utf-8'
)
soup
=
BeautifulSoup(html_string,
'html.parser'
)
info
=
soup.prettify()
jsonData
=
json.loads(info)
for
data
in
jsonData:
print
(data[
'id'
])
print
(village_id)
print
(data[
'name'
])
print
(data[
'recent_order_num'
])
print
(data[
'address'
])
print
(data[
'order_lead_time'
])
print
(data[
'float_delivery_fee'
])
average_cost
=
0
if
'average_cost'
in
data:
cost
=
re.findall(r
'\d+'
, data[
'average_cost'
])
average_cost
=
cost[
0
]
print
(average_cost)
print
(data[
'rating'
])
print
(
'---------------------------------------------'
)
shop_id
=
data[
'id'
]
name
=
data[
'name'
]
address
=
data[
'address'
]
recent_order_num
=
data[
'recent_order_num'
]
order_lead_time
=
data[
'order_lead_time'
]
float_delivery_fee
=
data[
'float_delivery_fee'
]
rating
=
data[
'rating'
]
sql
=
"INSERT INTO `store` (`shop_id`,`village_id`,`name`,`address`,`recent_order_num`,`order_lead_time`,`float_delivery_fee`, `average_cost`, `rating`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
cur.execute(sql, (shop_id,village_id,name, address, recent_order_num, order_lead_time, float_delivery_fee, average_cost, rating))
sql
=
"SELECT id,name,geohash,latitude,longitude FROM `village` where id >482 and geohash is not null"
cur.execute(sql)
data
=
cur.fetchall()
for
d
in
data:
village_id
=
d[
0
]
geohash
=
d[
2
]
latitude
=
d[
3
]
longitude
=
d[
4
]
getstore(village_id,geohash,latitude,longitude,
30
)
cur.close()
conn.close()