目標
選手の成績推移が見たい。
選手情報のCSV化
コード
#!/usr/bin/env python # -*- coding: utf-8 -*- import urllib import re import datetime time = str(datetime.datetime.today()) day = str(datetime.date.today()) ######URLアクセス###### def httpGetter(url): #送信先URL #url += "?{0}".format( urllib.urlencode( param ) ) #API実行 result = None try : result = urllib.urlopen( url ).read() except ValueError : print "アクセス失敗" return result ######CSV出力###### def fileExport(data,filename): f = open(filename, 'w') for i in data: f.write(i+"\n") f.close() ######文字整形###### def html_cleaner(data): r = re.compile("<tr.*?</tr>") data = r.findall(data.replace("\n", "")) player = [] for i in data: i = re.sub(r'<.*?>', "", i.replace("</td>",",")) player.append(i.decode('utf-8')) return player def player_parser(case,player_data): if case == "batter":player = ["date,team,PlayerNo,name,daritu,shiaisuu,dasekisuu,dasuu,anda,niruida,sanruida,homerun,ruidasuu,daten,tokuten,sanshin,fourball,deadball,gida,giseifurai,tourui,syuturuiritu,tyoudaritu,tokutenken,syouriten,heisatu,sissaku"] if case == "pitcher":player = ["date,team,PlayerNo,name,bougyoritu,shiaisuu,kantou,mushittenshouri,mushikyuu,win,lose,hold,holdpoint,save,shouritu,toukyuukaisuu,hianda,hihonruida,datusanshin,fourball,deadball,boutou,bo-ku,shiten,jisekiten"] for i in player_data: i = re.sub(r",(\.)(\d+)",r",0.\2",i) i = re.sub(r"-",r"0",i) i = re.sub(r" 1/3",r".333",i) i = re.sub(r" 2/3",r".666",i) if (re.match(r".+?,.+?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*?,.*" , i)): player.append(time+","+str(team_no)+","+i.encode('utf-8')) return player teams = ["001_読売ジャイアンツ","002_東京ヤクルトスワローズ","003_横浜DeNAベイスターズ","004_中日ドラゴンズ","005_阪神タイガース","006_広島東洋カープ",\ "007_埼玉西武ライオンズ","008_北海道日本ハムファイターズ","009_千葉ロッテマリーンズ","376_東北楽天ゴールデンイーグルス","011_オリックス・バファローズ","012_福岡ソフトバンクホークス"] for team_no in teams: batter_data = html_cleaner ( httpGetter (url = "https://baseball.yahoo.co.jp/npb/teams/"+str(int(team_no[0:3]))+"/memberlist?type=b")) pitcher_data = html_cleaner ( httpGetter (url = "https://baseball.yahoo.co.jp/npb/teams/"+str(int(team_no[0:3]))+"/memberlist?type=p")) #野手データ取得 exp = player_parser("batter",batter_data) fileExport(exp,"data_batter/"+str(team_no[0:3])+"_"+day+"_batter.csv") for i in exp:print i #投手データ取得 exp = player_parser("pitcher",pitcher_data) fileExport(exp,"data_pitcher/"+str(team_no[0:3])+"_"+day+"_pitcher.csv") for i in exp:print i
アウトプット
2017-03-21 17:14:19.120000,003:横浜DeNAベイスターズ,25,筒香 嘉智,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2017-03-21 17:14:19.120000,012:福岡ソフトバンクホークス,18,松坂 大輔,4.76,3,0,0,0,0,0,0,0,0,0,11.333,4,1,7,10,2,2,0,8,6,