작성일자 : 2023-08-27
Ver 0.1.1
import pandas as pd
import tqdm as tqdm
import os
current_dir = os.getcwd()
current_dir
'/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics'
new_dir = '/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics'
os.chdir(new_dir)
print(current_dir)
/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics/Excercise
match_id = 2499895
match_events = pd.read_pickle(f'data/refined_events/England/2499895.pkl') #pkl 파일을 Dataframe으로 불러오기
match_events
|
match_id |
event_id |
period |
time |
team_id |
team_name |
player_id |
player_name |
event_type |
sub_event_type |
tags |
start_x |
start_y |
end_x |
end_y |
0 |
2499895 |
215108367 |
1H |
1.784 |
1625 |
Manchester City |
8325 |
S. Agüero |
Pass |
Simple pass |
[Accurate] |
52.00 |
34.68 |
40.56 |
34.68 |
1 |
2499895 |
215108368 |
1H |
3.324 |
1625 |
Manchester City |
105339 |
Fernandinho |
Pass |
Simple pass |
[Accurate] |
40.56 |
34.68 |
29.12 |
8.16 |
2 |
2499895 |
215108369 |
1H |
6.406 |
1625 |
Manchester City |
8277 |
K. Walker |
Pass |
Simple pass |
[Accurate] |
29.12 |
8.16 |
44.72 |
10.20 |
3 |
2499895 |
215108370 |
1H |
7.124 |
1625 |
Manchester City |
38021 |
K. De Bruyne |
Pass |
Simple pass |
[Accurate] |
44.72 |
10.20 |
69.68 |
6.12 |
4 |
2499895 |
215108371 |
1H |
8.676 |
1625 |
Manchester City |
11066 |
R. Sterling |
Pass |
Simple pass |
[Accurate] |
69.68 |
6.12 |
58.24 |
12.24 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
1607 |
2499895 |
215110122 |
2H |
2875.703 |
1624 |
Tottenham Hotspur |
210044 |
E. Dier |
Pass |
Simple pass |
[Accurate] |
36.40 |
26.52 |
48.88 |
29.92 |
1608 |
2499895 |
215110123 |
2H |
2876.142 |
1624 |
Tottenham Hotspur |
240070 |
H. Winks |
Duel |
Ground attacking duel |
[Anticipation, Lost, Not accurate] |
48.88 |
29.92 |
43.68 |
24.48 |
1609 |
2499895 |
215109959 |
2H |
2876.768 |
1625 |
Manchester City |
447205 |
P. Foden |
Duel |
Ground defending duel |
[Anticipated, Won, Accurate] |
55.12 |
38.08 |
60.32 |
43.52 |
1610 |
2499895 |
215109960 |
2H |
2878.046 |
1625 |
Manchester City |
11066 |
R. Sterling |
Duel |
Ground attacking duel |
[Free space right, Lost, Not accurate] |
60.32 |
43.52 |
60.32 |
43.52 |
1611 |
2499895 |
215110124 |
2H |
2878.216 |
1624 |
Tottenham Hotspur |
240070 |
H. Winks |
Duel |
Ground defending duel |
[Free space left, Lost, Not accurate] |
43.68 |
24.48 |
NaN |
NaN |
1612 rows × 15 columns
데이터 집계 기초
(1) Series.value_counts
shot_records = match_events[(match_events['event_type']== 'Shot') |
(match_events['sub_event_type'].isin(['Free kick shot','Penalty']))
]
shot_records['team_name'].value_counts()
team_name
Manchester City 20
Tottenham Hotspur 6
Name: count, dtype: int64
pass_records = match_events[(match_events['event_type'] == 'Pass') |
(match_events['sub_event_type'].isin(['Free kick shot', 'Penalty']))
]
pass_records['player_name'].value_counts()
player_name
F. Delph 54
J. Vertonghen 53
K. Walker 51
E. Dier 50
K. De Bruyne 45
K. Trippier 44
D. Rose 40
N. Otamendi 39
İ. Gündoğan 39
R. Sterling 35
E. Mangala 35
Fernandinho 33
M. Dembélé 32
H. Winks 32
H. Lloris 31
C. Eriksen 30
L. Sané 29
Ederson 28
D. Alli 24
Son Heung-Min 17
H. Kane 15
S. Agüero 13
Gabriel Jesus 9
P. Foden 8
Bernardo Silva 5
E. Lamela 4
M. Sissoko 2
Name: count, dtype: int64
(2) DataFrame.groupby 를 활용한 항목별 연산
match_events.groupby('period')['time'].max()
period
1H 2817.335
2H 2878.216
Name: time, dtype: object
match_events.groupby('period')[['time']].max()
|
time |
period |
|
1H |
2817.335 |
2H |
2878.216 |
match_events.groupby('team_name')[['start_x', 'start_y', 'end_x', 'end_y']].mean()
|
start_x |
start_y |
end_x |
end_y |
team_name |
|
|
|
|
Manchester City |
51.102901 |
33.699796 |
55.694915 |
34.064720 |
Tottenham Hotspur |
44.841227 |
35.546896 |
49.198455 |
35.735172 |
match_events.groupby(['team_name', 'event_type'])[['event_id']].count()
|
|
event_id |
team_name |
event_type |
|
Manchester City |
Duel |
216 |
Foul |
13 |
Free kick |
52 |
Goalkeeper leaving line |
1 |
Interruption |
1 |
Offside |
4 |
Others on the ball |
56 |
Pass |
422 |
Save attempt |
2 |
Shot |
19 |
Substitution |
6 |
Tottenham Hotspur |
Duel |
216 |
Foul |
20 |
Free kick |
54 |
Interruption |
67 |
Offside |
2 |
Others on the ball |
66 |
Pass |
374 |
Save attempt |
11 |
Shot |
6 |
Substitution |
4 |
(3) pandas.pivot_table 함수를 활용한 다차원 집계 (DataFrame 형태)
match_events.pivot_table(values='event_id', index='event_type', columns= 'team_name', aggfunc='count')
team_name |
Manchester City |
Tottenham Hotspur |
event_type |
|
|
Duel |
216.0 |
216.0 |
Foul |
13.0 |
20.0 |
Free kick |
52.0 |
54.0 |
Goalkeeper leaving line |
1.0 |
NaN |
Interruption |
1.0 |
67.0 |
Offside |
4.0 |
2.0 |
Others on the ball |
56.0 |
66.0 |
Pass |
422.0 |
374.0 |
Save attempt |
2.0 |
11.0 |
Shot |
19.0 |
6.0 |
Substitution |
6.0 |
4.0 |
match_events.pivot_table('event_id', 'event_type', 'team_name', 'count', fill_value=0)
team_name |
Manchester City |
Tottenham Hotspur |
event_type |
|
|
Duel |
216 |
216 |
Foul |
13 |
20 |
Free kick |
52 |
54 |
Goalkeeper leaving line |
1 |
0 |
Interruption |
1 |
67 |
Offside |
4 |
2 |
Others on the ball |
56 |
66 |
Pass |
422 |
374 |
Save attempt |
2 |
11 |
Shot |
19 |
6 |
Substitution |
6 |
4 |
이벤트 성공 여부 집계
pass_records.head()
|
match_id |
event_id |
period |
time |
team_id |
team_name |
player_id |
player_name |
event_type |
sub_event_type |
tags |
start_x |
start_y |
end_x |
end_y |
0 |
2499895 |
215108367 |
1H |
1.784 |
1625 |
Manchester City |
8325 |
S. Agüero |
Pass |
Simple pass |
[Accurate] |
52.00 |
34.68 |
40.56 |
34.68 |
1 |
2499895 |
215108368 |
1H |
3.324 |
1625 |
Manchester City |
105339 |
Fernandinho |
Pass |
Simple pass |
[Accurate] |
40.56 |
34.68 |
29.12 |
8.16 |
2 |
2499895 |
215108369 |
1H |
6.406 |
1625 |
Manchester City |
8277 |
K. Walker |
Pass |
Simple pass |
[Accurate] |
29.12 |
8.16 |
44.72 |
10.20 |
3 |
2499895 |
215108370 |
1H |
7.124 |
1625 |
Manchester City |
38021 |
K. De Bruyne |
Pass |
Simple pass |
[Accurate] |
44.72 |
10.20 |
69.68 |
6.12 |
4 |
2499895 |
215108371 |
1H |
8.676 |
1625 |
Manchester City |
11066 |
R. Sterling |
Pass |
Simple pass |
[Accurate] |
69.68 |
6.12 |
58.24 |
12.24 |
(1) 반복문 기반 성공 여부 판단
#반복문은 성능에 좋지는 않음
success_idx = []
for i in pass_records.index:
tags = pass_records.at[i, 'tags']
if 'Accurate' in tags:
success_idx.append(i)
acc_pass_records = pass_records.loc[success_idx]
acc_pass_records
|
match_id |
event_id |
period |
time |
team_id |
team_name |
player_id |
player_name |
event_type |
sub_event_type |
tags |
start_x |
start_y |
end_x |
end_y |
0 |
2499895 |
215108367 |
1H |
1.784 |
1625 |
Manchester City |
8325 |
S. Agüero |
Pass |
Simple pass |
[Accurate] |
52.00 |
34.68 |
40.56 |
34.68 |
1 |
2499895 |
215108368 |
1H |
3.324 |
1625 |
Manchester City |
105339 |
Fernandinho |
Pass |
Simple pass |
[Accurate] |
40.56 |
34.68 |
29.12 |
8.16 |
2 |
2499895 |
215108369 |
1H |
6.406 |
1625 |
Manchester City |
8277 |
K. Walker |
Pass |
Simple pass |
[Accurate] |
29.12 |
8.16 |
44.72 |
10.20 |
3 |
2499895 |
215108370 |
1H |
7.124 |
1625 |
Manchester City |
38021 |
K. De Bruyne |
Pass |
Simple pass |
[Accurate] |
44.72 |
10.20 |
69.68 |
6.12 |
4 |
2499895 |
215108371 |
1H |
8.676 |
1625 |
Manchester City |
11066 |
R. Sterling |
Pass |
Simple pass |
[Accurate] |
69.68 |
6.12 |
58.24 |
12.24 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
1593 |
2499895 |
215109948 |
2H |
2813.211 |
1625 |
Manchester City |
447205 |
P. Foden |
Pass |
Simple pass |
[Accurate] |
24.96 |
55.08 |
10.40 |
63.92 |
1594 |
2499895 |
215109949 |
2H |
2815.542 |
1625 |
Manchester City |
8464 |
F. Delph |
Pass |
Simple pass |
[Accurate] |
10.40 |
63.92 |
5.20 |
40.12 |
1603 |
2499895 |
215110119 |
2H |
2866.949 |
1624 |
Tottenham Hotspur |
48 |
J. Vertonghen |
Pass |
Simple pass |
[Accurate] |
39.52 |
48.96 |
28.08 |
36.04 |
1604 |
2499895 |
215110120 |
2H |
2869.894 |
1624 |
Tottenham Hotspur |
25381 |
H. Lloris |
Pass |
Launch |
[Accurate] |
28.08 |
36.04 |
59.28 |
17.00 |
1607 |
2499895 |
215110122 |
2H |
2875.703 |
1624 |
Tottenham Hotspur |
210044 |
E. Dier |
Pass |
Simple pass |
[Accurate] |
36.40 |
26.52 |
48.88 |
29.92 |
665 rows × 15 columns
(2) DataFrame.apply 함수 기반 성공 여부 판단
def is_accurate(tags) :
return 'Accurate' in tags
pass_records['tags'].apply(is_accurate)
0 True
1 True
2 True
3 True
4 True
...
1603 True
1604 True
1605 False
1606 False
1607 True
Name: tags, Length: 797, dtype: bool
acc_pass_records = pass_records[pass_records['tags'].apply(is_accurate)]
acc_pass_records
|
match_id |
event_id |
period |
time |
team_id |
team_name |
player_id |
player_name |
event_type |
sub_event_type |
tags |
start_x |
start_y |
end_x |
end_y |
0 |
2499895 |
215108367 |
1H |
1.784 |
1625 |
Manchester City |
8325 |
S. Agüero |
Pass |
Simple pass |
[Accurate] |
52.00 |
34.68 |
40.56 |
34.68 |
1 |
2499895 |
215108368 |
1H |
3.324 |
1625 |
Manchester City |
105339 |
Fernandinho |
Pass |
Simple pass |
[Accurate] |
40.56 |
34.68 |
29.12 |
8.16 |
2 |
2499895 |
215108369 |
1H |
6.406 |
1625 |
Manchester City |
8277 |
K. Walker |
Pass |
Simple pass |
[Accurate] |
29.12 |
8.16 |
44.72 |
10.20 |
3 |
2499895 |
215108370 |
1H |
7.124 |
1625 |
Manchester City |
38021 |
K. De Bruyne |
Pass |
Simple pass |
[Accurate] |
44.72 |
10.20 |
69.68 |
6.12 |
4 |
2499895 |
215108371 |
1H |
8.676 |
1625 |
Manchester City |
11066 |
R. Sterling |
Pass |
Simple pass |
[Accurate] |
69.68 |
6.12 |
58.24 |
12.24 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
1593 |
2499895 |
215109948 |
2H |
2813.211 |
1625 |
Manchester City |
447205 |
P. Foden |
Pass |
Simple pass |
[Accurate] |
24.96 |
55.08 |
10.40 |
63.92 |
1594 |
2499895 |
215109949 |
2H |
2815.542 |
1625 |
Manchester City |
8464 |
F. Delph |
Pass |
Simple pass |
[Accurate] |
10.40 |
63.92 |
5.20 |
40.12 |
1603 |
2499895 |
215110119 |
2H |
2866.949 |
1624 |
Tottenham Hotspur |
48 |
J. Vertonghen |
Pass |
Simple pass |
[Accurate] |
39.52 |
48.96 |
28.08 |
36.04 |
1604 |
2499895 |
215110120 |
2H |
2869.894 |
1624 |
Tottenham Hotspur |
25381 |
H. Lloris |
Pass |
Launch |
[Accurate] |
28.08 |
36.04 |
59.28 |
17.00 |
1607 |
2499895 |
215110122 |
2H |
2875.703 |
1624 |
Tottenham Hotspur |
210044 |
E. Dier |
Pass |
Simple pass |
[Accurate] |
36.40 |
26.52 |
48.88 |
29.92 |
665 rows × 15 columns
(3) 람다 표현식(lamda expression) 기반 성공 여부 판단
acc_pass_records = pass_records[pass_records['tags'].apply(lambda x : 'Accurate' in x)]
acc_pass_records
|
match_id |
event_id |
period |
time |
team_id |
team_name |
player_id |
player_name |
event_type |
sub_event_type |
tags |
start_x |
start_y |
end_x |
end_y |
0 |
2499895 |
215108367 |
1H |
1.784 |
1625 |
Manchester City |
8325 |
S. Agüero |
Pass |
Simple pass |
[Accurate] |
52.00 |
34.68 |
40.56 |
34.68 |
1 |
2499895 |
215108368 |
1H |
3.324 |
1625 |
Manchester City |
105339 |
Fernandinho |
Pass |
Simple pass |
[Accurate] |
40.56 |
34.68 |
29.12 |
8.16 |
2 |
2499895 |
215108369 |
1H |
6.406 |
1625 |
Manchester City |
8277 |
K. Walker |
Pass |
Simple pass |
[Accurate] |
29.12 |
8.16 |
44.72 |
10.20 |
3 |
2499895 |
215108370 |
1H |
7.124 |
1625 |
Manchester City |
38021 |
K. De Bruyne |
Pass |
Simple pass |
[Accurate] |
44.72 |
10.20 |
69.68 |
6.12 |
4 |
2499895 |
215108371 |
1H |
8.676 |
1625 |
Manchester City |
11066 |
R. Sterling |
Pass |
Simple pass |
[Accurate] |
69.68 |
6.12 |
58.24 |
12.24 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
1593 |
2499895 |
215109948 |
2H |
2813.211 |
1625 |
Manchester City |
447205 |
P. Foden |
Pass |
Simple pass |
[Accurate] |
24.96 |
55.08 |
10.40 |
63.92 |
1594 |
2499895 |
215109949 |
2H |
2815.542 |
1625 |
Manchester City |
8464 |
F. Delph |
Pass |
Simple pass |
[Accurate] |
10.40 |
63.92 |
5.20 |
40.12 |
1603 |
2499895 |
215110119 |
2H |
2866.949 |
1624 |
Tottenham Hotspur |
48 |
J. Vertonghen |
Pass |
Simple pass |
[Accurate] |
39.52 |
48.96 |
28.08 |
36.04 |
1604 |
2499895 |
215110120 |
2H |
2869.894 |
1624 |
Tottenham Hotspur |
25381 |
H. Lloris |
Pass |
Launch |
[Accurate] |
28.08 |
36.04 |
59.28 |
17.00 |
1607 |
2499895 |
215110122 |
2H |
2875.703 |
1624 |
Tottenham Hotspur |
210044 |
E. Dier |
Pass |
Simple pass |
[Accurate] |
36.40 |
26.52 |
48.88 |
29.92 |
665 rows × 15 columns