작성일자 : 2023-08-27
Ver. 0.1.1
Ver. 0.1.1
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
Change directory
current_dir = os.getcwd()
print(current_dir)
/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics
new_dir = '/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics'
os.chdir(new_dir)
print(current_dir)
/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics/Excercise
User scr.plot_utils
from src.plot_utils import * #plot_uitils.py <- 경기장 이미지를 그리는 코드
17/18 시즌 18Round Mancity vs Tottenham
match_id = 2499895
match_events = pd.read_pickle(f'data/refined_events/England/2499895.pkl') #pkl 파일을 Dataframe으로 불러오기
match_events
match_id | event_id | period | time | team_id | team_name | player_id | player_name | event_type | sub_event_type | tags | start_x | start_y | end_x | end_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2499895 | 215108367 | 1H | 1.784 | 1625 | Manchester City | 8325 | S. Agüero | Pass | Simple pass | [Accurate] | 52.00 | 34.68 | 40.56 | 34.68 |
1 | 2499895 | 215108368 | 1H | 3.324 | 1625 | Manchester City | 105339 | Fernandinho | Pass | Simple pass | [Accurate] | 40.56 | 34.68 | 29.12 | 8.16 |
2 | 2499895 | 215108369 | 1H | 6.406 | 1625 | Manchester City | 8277 | K. Walker | Pass | Simple pass | [Accurate] | 29.12 | 8.16 | 44.72 | 10.20 |
3 | 2499895 | 215108370 | 1H | 7.124 | 1625 | Manchester City | 38021 | K. De Bruyne | Pass | Simple pass | [Accurate] | 44.72 | 10.20 | 69.68 | 6.12 |
4 | 2499895 | 215108371 | 1H | 8.676 | 1625 | Manchester City | 11066 | R. Sterling | Pass | Simple pass | [Accurate] | 69.68 | 6.12 | 58.24 | 12.24 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1607 | 2499895 | 215110122 | 2H | 2875.703 | 1624 | Tottenham Hotspur | 210044 | E. Dier | Pass | Simple pass | [Accurate] | 36.40 | 26.52 | 48.88 | 29.92 |
1608 | 2499895 | 215110123 | 2H | 2876.142 | 1624 | Tottenham Hotspur | 240070 | H. Winks | Duel | Ground attacking duel | [Anticipation, Lost, Not accurate] | 48.88 | 29.92 | 43.68 | 24.48 |
1609 | 2499895 | 215109959 | 2H | 2876.768 | 1625 | Manchester City | 447205 | P. Foden | Duel | Ground defending duel | [Anticipated, Won, Accurate] | 55.12 | 38.08 | 60.32 | 43.52 |
1610 | 2499895 | 215109960 | 2H | 2878.046 | 1625 | Manchester City | 11066 | R. Sterling | Duel | Ground attacking duel | [Free space right, Lost, Not accurate] | 60.32 | 43.52 | 60.32 | 43.52 |
1611 | 2499895 | 215110124 | 2H | 2878.216 | 1624 | Tottenham Hotspur | 240070 | H. Winks | Duel | Ground defending duel | [Free space left, Lost, Not accurate] | 43.68 | 24.48 | NaN | NaN |
1612 rows × 15 columns
match_pass = match_events[match_events['event_type'] == 'Pass']
match_pass.head()
match_id | event_id | period | time | team_id | team_name | player_id | player_name | event_type | sub_event_type | tags | start_x | start_y | end_x | end_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2499895 | 215108367 | 1H | 1.784 | 1625 | Manchester City | 8325 | S. Agüero | Pass | Simple pass | [Accurate] | 52.00 | 34.68 | 40.56 | 34.68 |
1 | 2499895 | 215108368 | 1H | 3.324 | 1625 | Manchester City | 105339 | Fernandinho | Pass | Simple pass | [Accurate] | 40.56 | 34.68 | 29.12 | 8.16 |
2 | 2499895 | 215108369 | 1H | 6.406 | 1625 | Manchester City | 8277 | K. Walker | Pass | Simple pass | [Accurate] | 29.12 | 8.16 | 44.72 | 10.20 |
3 | 2499895 | 215108370 | 1H | 7.124 | 1625 | Manchester City | 38021 | K. De Bruyne | Pass | Simple pass | [Accurate] | 44.72 | 10.20 | 69.68 | 6.12 |
4 | 2499895 | 215108371 | 1H | 8.676 | 1625 | Manchester City | 11066 | R. Sterling | Pass | Simple pass | [Accurate] | 69.68 | 6.12 | 58.24 | 12.24 |
match_pass.info()
<class 'pandas.core.frame.DataFrame'>
Index: 796 entries, 0 to 1607
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 match_id 796 non-null object
1 event_id 796 non-null object
2 period 796 non-null object
3 time 796 non-null object
4 team_id 796 non-null object
5 team_name 796 non-null object
6 player_id 796 non-null object
7 player_name 796 non-null object
8 event_type 796 non-null object
9 sub_event_type 796 non-null object
10 tags 796 non-null object
11 start_x 796 non-null float64
12 start_y 796 non-null float64
13 end_x 795 non-null float64
14 end_y 795 non-null float64
dtypes: float64(4), object(11)
memory usage: 99.5+ KB
team1_name, team2_name = match_pass['team_name'].unique()
team1_pass = match_pass[match_pass['team_name'] == team1_name]
team2_pass = match_pass[match_pass['team_name'] == team2_name]
team1_name, team2_name
('Manchester City', 'Tottenham Hotspur')
team1_pass
match_id | event_id | period | time | team_id | team_name | player_id | player_name | event_type | sub_event_type | tags | start_x | start_y | end_x | end_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2499895 | 215108367 | 1H | 1.784 | 1625 | Manchester City | 8325 | S. Agüero | Pass | Simple pass | [Accurate] | 52.00 | 34.68 | 40.56 | 34.68 |
1 | 2499895 | 215108368 | 1H | 3.324 | 1625 | Manchester City | 105339 | Fernandinho | Pass | Simple pass | [Accurate] | 40.56 | 34.68 | 29.12 | 8.16 |
2 | 2499895 | 215108369 | 1H | 6.406 | 1625 | Manchester City | 8277 | K. Walker | Pass | Simple pass | [Accurate] | 29.12 | 8.16 | 44.72 | 10.20 |
3 | 2499895 | 215108370 | 1H | 7.124 | 1625 | Manchester City | 38021 | K. De Bruyne | Pass | Simple pass | [Accurate] | 44.72 | 10.20 | 69.68 | 6.12 |
4 | 2499895 | 215108371 | 1H | 8.676 | 1625 | Manchester City | 11066 | R. Sterling | Pass | Simple pass | [Accurate] | 69.68 | 6.12 | 58.24 | 12.24 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1594 | 2499895 | 215109949 | 2H | 2815.542 | 1625 | Manchester City | 8464 | F. Delph | Pass | Simple pass | [Accurate] | 10.40 | 63.92 | 5.20 | 40.12 |
1595 | 2499895 | 215109951 | 2H | 2818.302 | 1625 | Manchester City | 71654 | Ederson | Pass | Launch | [Not accurate] | 5.20 | 40.12 | 42.64 | 53.72 |
1597 | 2499895 | 215109953 | 2H | 2821.927 | 1625 | Manchester City | 105339 | Fernandinho | Pass | Simple pass | [Not accurate] | 28.08 | 42.84 | 23.92 | 33.32 |
1602 | 2499895 | 215109957 | 2H | 2864.952 | 1625 | Manchester City | 105339 | Fernandinho | Pass | Simple pass | [Not accurate] | 41.60 | 17.00 | 64.48 | 19.04 |
1606 | 2499895 | 215109958 | 2H | 2873.567 | 1625 | Manchester City | 105339 | Fernandinho | Pass | High pass | [Not accurate] | 45.76 | 19.72 | 67.60 | 41.48 |
422 rows × 15 columns
plt.scatter(team1_pass['start_x'], team1_pass['start_y'], c = 'blue')
plt.scatter(team2_pass['start_x'], team2_pass['start_y'], c = 'black')
plt.show()
draw_pitch(pitch='#440F48', line='white')
plt.savefig('img/pitch_ver2.png', bbox_inches='tight')
team2_pass[['start_x', 'end_x']].head()
start_x | end_x | |
---|---|---|
7 | 47.84 | 55.12 |
13 | 78.00 | 85.28 |
20 | 30.16 | 10.40 |
21 | 10.40 | 13.52 |
22 | 13.52 | 22.88 |
team2_pass[['start_y', 'end_y']].head()
start_y | end_y | |
---|---|---|
7 | 61.20 | 47.60 |
13 | 63.92 | 57.80 |
20 | 37.40 | 36.04 |
21 | 36.04 | 63.24 |
22 | 63.24 | 65.96 |
#데이터는 한방향으로 데이터가 쌓이고 있기에, team2의 위치 반전 필요
# x축 0~ 104 / y 축 0 ~ 68
team2_pass[['start_x', 'end_x']] = 104 - team2_pass[['start_x', 'end_x']]
team2_pass[['start_y', 'end_y']] = 68 - team2_pass[['start_y', 'end_y']]
/var/folders/_b/znjp14gd02d8lg63thqc7bm40000gn/T/ipykernel_2729/1796384401.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
team2_pass[['start_x', 'end_x']] = 104 - team2_pass[['start_x', 'end_x']]
/var/folders/_b/znjp14gd02d8lg63thqc7bm40000gn/T/ipykernel_2729/1796384401.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
team2_pass[['start_y', 'end_y']] = 68 - team2_pass[['start_y', 'end_y']]
draw_pitch('darkgreen','white')
plt.scatter (
team1_pass['start_x'], team1_pass['start_y'], c = 'blue' , edgecolors= 'k' , alpha= 0.4, label = f'{team1_name} : {len(team1_pass)} pass'
)
plt.scatter (
team2_pass['start_x'], team2_pass['start_y'], c = 'black' , edgecolors= 'k' , alpha= 0.4, label = f'{team2_name} : {len(team2_pass)} pass'
)
plt.legend(fontsize = 15, bbox_to_anchor = (1,1))
plt.show()