CSV出力は30分くらいかかる。
でも取り込みは個別ページを開いたりする必要ないからあっという間に終わった。
10秒くらい。
import pymysql
from bs4 import BeautifulSoup
import requests
import datetime
start_time = datetime.datetime.now()
print('start:'+ str(start_time))
# コネクションの作成
conn = pymysql.connect(
host='localhost',
user='root',
password='パスワード',
database='kabuka',
)
today = format(datetime.date.today(),'%Y%m%d')
fol = 'フォルダパス' + today + '/'
def importcsv(code):
insert_sql = 'LOAD DATA INFILE "' + fol + code + '.csv" INTO TABLE kabuka.kabuka FIELDS TERMINATED BY ","'
cur = conn.cursor()
cur.execute(insert_sql)
cur.close()
conn.commit()
def main():
values = []
url = 'https://kabuoji3.com/stock/'
headers = {'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Safari/605.1.15'}
r = requests.get(url,headers = headers)
soup = BeautifulSoup(r.content,'html.parser')
cur = conn.cursor()
sql = 'SELECT DISTINCT code FROM meigara'
cur.execute(sql)
pgul=soup.find('ul',class_ = 'pager')
all_li = pgul.find_all('li')
for code, in cur:
importcsv(code)
print(code)
conn.close()
end_time = datetime.datetime.now()
print('end:'+ str(end_time))
print(end_time - start_time)
if __name__ == "__main__":
main()