Вы находитесь на странице: 1из 5

#Author : K.K.D De Silva #DIT : IT09113374 #Batch : SE, Week End import urllib.

request import re headerlist_batting_and_fielding_headers=[] headerlist_bawling_headers=[] #for storing batting and fielding data bafavg_tests=[] bafavg_ODIs=[] bafavg_T20=[] #for storing bawling data bawlavg_tests=[] bawlavg_ODIs=[] bawlavg_T20=[] #variables which contain data from the page fullname='' #variables used to differentiate data from the page tabcounter=0 nameisinnextline=0 headersnext=0 baf_test_datanext=0; baf_ODI_datanext=0 baf_T20_datanext=0 bawl_test_datanext=0; bawl_ODI_datanext=0 bawl_T20_datanext=0 dat_incrementer=0 #file=urllib.request.urlopen("http://www.espncricinfo.com/ci/content/player/5071 0.html") file=urllib.request.urlopen("http://www.espncricinfo.com/srilanka/content/player /49209.html") for line in file : linestr=str(line).replace('*','') #remove * character if found if nameisinnextline==1:#going to retrieve full name nameisinnextline=0 #print(linestr) nameline=re.search(r'(<span>)([\w\s.-]+)(</span>)',linestr) if nameline : fullname= nameline.group(2) #set full name ## print("Name : {}".format(fullname)) #print full name if headersnext==1 and tabcounter==1: #going to retrieve headers matchesplayedregex=re.search(r'(<th title="matches played" nowrap="nowra p">)([\w\s.-]+)(</th>)',linestr) if matchesplayedregex : headerlist_batting_and_fielding_headers.append(matchesplayedregex.gr oup(2)) #print(matchesplayedregex.group(2)) inningregex=re.search(r'(<th title="innings batted" nowrap="nowrap">)([\ w\s.-]+)(</th>)',linestr) if inningregex :

headerlist_batting_and_fielding_headers.append(inningregex.group(2)) #print(inningregex.group(2)) notoutsregex=re.search(r'(<th title="not outs" nowrap="nowrap">)([\w\s.]+)(</th>)',linestr) if notoutsregex : headerlist_batting_and_fielding_headers.append(notoutsregex.group(2) ) #print(notoutsregex.group(2)) runsregex=re.search(r'(<th title="runs scored" nowrap="nowrap">)([\w\s.]+)(</th>)',linestr) if runsregex : headerlist_batting_and_fielding_headers.append(runsregex.group(2)) #print(runsregex.group(2)) HSregex=re.search(r'(<th title="highest inns score" class="padAst" nowra p="nowrap">)([\w\s.-]+)(</th>)',linestr) if HSregex : headerlist_batting_and_fielding_headers.append(HSregex.group(2)) #print(HSregex.group(2)) battaveregex=re.search(r'(<th title="batting average" nowrap="nowrap">)( [\w\s.-]+)(</th>)',linestr) if battaveregex : headerlist_batting_and_fielding_headers.append(battaveregex.group(2) ) headersnext=0 #print(battaveregex.group(2)) elif headersnext==1 and tabcounter==2: #going to retrieve bawling table head ers matchesplayedregex=re.search(r'(<th title="matches played" nowrap="nowra p">)([\w\s.-]+)(</th>)',linestr) if matchesplayedregex : headerlist_bawling_headers.append(matchesplayedregex.group(2)) #print(matchesplayedregex.group(2)) inningregex=re.search(r'(<th title="innings bowled in" nowrap="nowrap">) ([\w\s.-]+)(</th>)',linestr) if inningregex : headerlist_bawling_headers.append(inningregex.group(2)) #print(inningregex.group(2)) ballsbwl=re.search(r'(<th title="balls bowled" nowrap="nowrap">)([\w\s.]+)(</th>)',linestr) if ballsbwl : headerlist_bawling_headers.append(ballsbwl.group(2)) #print(ballsbwl.group(2)) runnsbwl=re.search(r'(<th title="runs conceded" nowrap="nowrap">)([\w\s. -]+)(</th>)',linestr) if runnsbwl : headerlist_bawling_headers.append(runnsbwl.group(2)) #print(runnsbwl.group(2)) wktstkn=re.search(r'(<th title="wickets taken" nowrap="nowrap">)([\w\s.]+)(</th>)',linestr) if wktstkn : headerlist_bawling_headers.append(wktstkn.group(2)) #print(wktstkn.group(2)) bwlavg=re.search(r'(<th title="bowling average" nowrap="nowrap">)([\w\s. -]+)(</th>)',linestr) if bwlavg : headerlist_bawling_headers.append(bwlavg.group(2)) #print(bwlavg.group(2))

#going to retrieve batting and fielding avg data elif baf_test_datanext==1 and tabcounter==1: #going to retrieve test data only regex_dat=re.search(r'([0-9]+([\.0-9])*)(</td>)',linestr) if regex_dat and dat_incrementer < 6: #print(regex_dat.group(1)) bafavg_tests.append(regex_dat.group(1)) dat_incrementer+=1 elif baf_ODI_datanext==1 and tabcounter==1: #going to retrieve ODI data on ly regex_dat=re.search(r'([0-9]+([\.0-9])*)(</td>)',linestr) if regex_dat and dat_incrementer < 6: #print(regex_dat.group(1)) bafavg_ODIs.append(regex_dat.group(1)) dat_incrementer+=1 elif baf_T20_datanext==1 and tabcounter==1: #going to retrieve ODI data on ly regex_dat=re.search(r'([0-9]+([\.0-9])*)(</td>)',linestr) if regex_dat and dat_incrementer < 6: #print(regex_dat.group(1)) bafavg_T20.append(regex_dat.group(1)) dat_incrementer+=1 #going to retrieve bawling avg data if bawl_test_datanext==1 and tabcounter==2: nly regex_dat=re.search(r'([0-9]+([\.0-9])*)(</td>)',linestr) if regex_dat and dat_incrementer < 6: #print(regex_dat.group(1)) bawlavg_tests.append(regex_dat.group(1)) dat_incrementer+=1 if baf_ODI_datanext==1 and tabcounter==2: #going to retrieve test data onl y regex_dat=re.search(r'([0-9]+([\.0-9])*)(</td>)',linestr) if regex_dat and dat_incrementer < 6: #print(regex_dat.group(1)) bawlavg_ODIs.append(regex_dat.group(1)) dat_incrementer+=1 if bawl_T20_datanext==1 and tabcounter==2: #going to retrieve test data on ly regex_dat=re.search(r'([0-9]+([\.0-9])*)(</td>)',linestr) if regex_dat and dat_incrementer < 6: #print(regex_dat.group(1)) bawlavg_T20.append(regex_dat.group(1)) dat_incrementer+=1 #going to retrieve test data o

#detection reagon if r"<b>Full name</b>" in linestr : nameisinnextline=1 if r'<span class="ciPhotoWidgetLink">Batting and fielding averages</span>' i n linestr : headersnext=1 tabcounter+=1 if r'<span class="ciPhotoWidgetLink">Bowling averages</span>' in linestr : headersnext=1

tabcounter+=1 #this is the 2nd table dat_incrementer=0 if r'<td class="left" nowrap="nowrap"><b>Tests</b></td>' in linestr : baf_test_datanext=1 if r'<td class="left" nowrap="nowrap"><b>ODIs</b></td>' in linestr : baf_test_datanext=0 baf_ODI_datanext=1 dat_incrementer=0 #start counter from the begining for ODI data if r'<td class="left" nowrap="nowrap"><b>T20Is</b></td>' in linestr : baf_test_datanext=0 baf_ODI_datanext=0 baf_T20_datanext=1 dat_incrementer=0 #start counter from the begining for T20s data if r'<td class="left" nowrap="nowrap"><b>Tests</b></td>' in linestr and tabc ounter==2: bawl_test_datanext=1 if r'<td class="left" nowrap="nowrap"><b>ODIs</b></td>' in linestr and tabco unter==2: bawl_test_datanext=0 bawl_ODI_datanext=1 dat_incrementer=0 #start counter from the begining for ODI data if r'<td class="left" nowrap="nowrap"><b>T20Is</b></td>' in linestr and tabc ounter==2: bawl_test_datanext=0 bawl_ODI_datanext=0 bawl_T20_datanext=1 dat_incrementer=0 #start counter from the begining for T20s data #else if : #printing data print("Name : {}".format(fullname)) #print full name print("\nBatting and fielding averages\n"); ##printing table header print ("\t{}\t{}\t{}\t{}\t{}\t{}".format(headerlist_batting_and_fielding_headers [0],headerlist_batting_and_fielding_headers[1] ,headerlist_batting_and_fielding_headers[2],headerlist_batting_and_fieldi ng_headers[3],headerlist_batting_and_fielding_headers[4] ,headerlist_batting_and_fielding_headers[5])) #converting data before displaying matches=int(bafavg_tests[0]) inns=int(bafavg_tests[1]) notouts=int(bafavg_tests[2]) runs=int(bafavg_tests[3]) hs=int(bafavg_tests[4]) avg=float(bafavg_tests[5]) print ("Tests\t{}\t{}\t{}\t{}\t{}\t{}".format(matches,inns,notouts,runs,hs,avg)) #converting data before displaying matches=int(bafavg_ODIs[0]) inns=int(bafavg_ODIs[1]) notouts=int(bafavg_ODIs[2]) runs=int(bafavg_ODIs[3]) hs=int(bafavg_ODIs[4]) avg=float(bafavg_ODIs[5])

print ("ODIs\t{}\t{}\t{}\t{}\t{}\t{}".format(matches,inns,notouts,runs,hs,avg)) #converting data before displaying matches=int(bafavg_T20[0]) inns=int(bafavg_T20[1]) notouts=int(bafavg_T20[2]) runs=int(bafavg_T20[3]) hs=int(bafavg_T20[4]) avg=float(bafavg_T20[5]) print ("T20s\t{}\t{}\t{}\t{}\t{}\t{}".format(matches,inns,notouts,runs,hs,avg)) print("\nBowling averages\n") print ("\t{}\t{}\t{}\t{}\t{}\t{}".format(headerlist_bawling_headers[0],headerlis t_bawling_headers[1] ,headerlist_bawling_headers[2],headerlist_bawling_headers[3],headerlist_b awling_headers[4] ,headerlist_bawling_headers[5])) #converting data before displaying matches=int(bawlavg_tests[0]) inns=int(bawlavg_tests[1]) balls=int(bawlavg_tests[2]) runs=int(bawlavg_tests[3]) wkts=int(bawlavg_tests[4]) avg=float(bawlavg_tests[5]) print ("Tests\t{}\t{}\t{}\t{}\t{}\t{}".format(matches,inns,balls,runs,wkts,avg)) #converting data before displaying matches=int(bawlavg_ODIs[0]) inns=int(bawlavg_ODIs[1]) balls=int(bawlavg_ODIs[2]) runs=int(bawlavg_ODIs[3]) wkts=int(bawlavg_ODIs[4]) avg=float(bawlavg_ODIs[5]) print ("ODIs\t{}\t{}\t{}\t{}\t{}\t{}".format(matches,inns,balls,runs,wkts,avg)) #converting data before displaying matches=int(bawlavg_T20[0]) inns=int(bawlavg_T20[1]) balls=int(bawlavg_T20[2]) runs=int(bawlavg_T20[3]) wkts=int(bawlavg_T20[4]) avg=float(bawlavg_T20[5]) print ("T20s\t{}\t{}\t{}\t{}\t{}\t{}".format(matches,inns,balls,runs,wkts,avg)) file.close()

Вам также может понравиться