작성일자 : 2024-05-08
Ver 0.1.1
데이터 준비¶
In [1]:
import os
# directory 변경
new_dir = '/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics'
os.chdir(new_dir)
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from src.plot_utils import draw_pitch
(1) 가공 데이터 불러오기¶
In [3]:
match_id = 1
file = f'data_metrica/data/Sample_Game_{match_id}/Sample_Game_{match_id}_IntegratedData.csv'
traces = pd.read_csv(file, header=0, index_col=0)
traces
Out[3]:
period | time | H11_x | H11_y | H01_x | H01_y | H02_x | H02_y | H03_x | H03_y | ... | A24_speed | A26_vx | A26_vy | A26_speed | A27_vx | A27_vy | A27_speed | A28_vx | A28_vy | A28_speed | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
frame | |||||||||||||||||||||
1 | 1 | 0.04 | 0.08528 | 32.80184 | 33.95392 | 44.41896 | 35.04904 | 33.22684 | 32.16408 | 24.15972 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 1 | 0.08 | 0.09984 | 32.80184 | 33.95392 | 44.41896 | 35.04904 | 33.22684 | 32.16408 | 24.15972 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 1 | 0.12 | 0.11856 | 32.80184 | 33.95392 | 44.41896 | 35.04904 | 33.22684 | 32.16408 | 24.15972 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 1 | 0.16 | 0.12584 | 32.80184 | 33.92688 | 44.41556 | 35.03448 | 33.31184 | 32.18176 | 24.17672 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 | 1 | 0.20 | 0.13416 | 32.80184 | 33.90088 | 44.38292 | 35.01056 | 33.33224 | 32.18592 | 24.15904 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
145002 | 2 | 5800.08 | 9.76144 | 37.15860 | NaN | NaN | 68.47672 | 24.07336 | 71.52288 | 22.09116 | ... | 0.499132 | 0.354571 | 0.405385 | 0.538570 | 0.298571 | -0.494308 | 0.577482 | 0.0 | 0.0 | 0.0 |
145003 | 2 | 5800.12 | 9.82800 | 37.15860 | NaN | NaN | 68.50792 | 24.08356 | 71.52080 | 22.08028 | ... | 0.499132 | 0.453857 | 0.299462 | 0.543750 | 0.359857 | -0.606769 | 0.705455 | 0.0 | 0.0 | 0.0 |
145004 | 2 | 5800.16 | 9.88832 | 37.15860 | NaN | NaN | 68.54744 | 24.09308 | 71.53744 | 22.09932 | ... | 0.499132 | 0.553143 | 0.193538 | 0.586024 | 0.421143 | -0.719231 | 0.833459 | 0.0 | 0.0 | 0.0 |
145005 | 2 | 5800.20 | 9.92576 | 37.15792 | NaN | NaN | 68.58176 | 24.10464 | 71.58216 | 22.12176 | ... | 0.499132 | 0.652429 | 0.087615 | 0.658285 | 0.482429 | -0.831692 | 0.961483 | 0.0 | 0.0 | 0.0 |
145006 | 2 | 5800.24 | 9.92576 | 37.15792 | NaN | NaN | 68.64416 | 24.11008 | 71.63312 | 22.07824 | ... | 0.499132 | 0.751714 | -0.018308 | 0.751937 | 0.543714 | -0.944154 | 1.089519 | 0.0 | 0.0 | 0.0 |
145006 rows × 147 columns
- 컬럼별로 선수별 x,y 가 존재하기 때문에 선수별로 데이터를 집계하기에는 어려운 상태
(2) 가공 데이터 형태 변환¶
In [4]:
players = [c[:-2] for c in traces.columns if c.endswith('_x') and not c.startswith('ball')]
player_cols = ['period', 'time', 'x', 'y', 'vx', 'vy', 'speed']
trace_list = []
for p in players:
player_trace = traces[['period', 'time', f'{p}_x', f'{p}_y', f'{p}_vx', f'{p}_vy', f'{p}_speed']].copy()
player_trace['team'] = 'Home' if p[0] == 'H' else 'Away'
player_trace['player_id'] = int(p[1:])
player_trace.columns = player_cols + ['team', 'player_id']
trace_list.append(player_trace)
traces = pd.concat(trace_list).reset_index()[['team', 'player_id', 'frame'] + player_cols]
traces
Out[4]:
team | player_id | frame | period | time | x | y | vx | vy | speed | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Home | 11 | 1 | 1 | 0.04 | 0.08528 | 32.80184 | NaN | NaN | NaN |
1 | Home | 11 | 2 | 1 | 0.08 | 0.09984 | 32.80184 | NaN | NaN | NaN |
2 | Home | 11 | 3 | 1 | 0.12 | 0.11856 | 32.80184 | NaN | NaN | NaN |
3 | Home | 11 | 4 | 1 | 0.16 | 0.12584 | 32.80184 | NaN | NaN | NaN |
4 | Home | 11 | 5 | 1 | 0.20 | 0.13416 | 32.80184 | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4060163 | Away | 28 | 145002 | 2 | 5800.08 | 81.33008 | 12.86628 | 0.0 | 0.0 | 0.0 |
4060164 | Away | 28 | 145003 | 2 | 5800.12 | 81.33008 | 12.86628 | 0.0 | 0.0 | 0.0 |
4060165 | Away | 28 | 145004 | 2 | 5800.16 | 81.33008 | 12.86628 | 0.0 | 0.0 | 0.0 |
4060166 | Away | 28 | 145005 | 2 | 5800.20 | 81.33008 | 12.86628 | 0.0 | 0.0 | 0.0 |
4060167 | Away | 28 | 145006 | 2 | 5800.24 | 81.33008 | 12.86628 | 0.0 | 0.0 | 0.0 |
4060168 rows × 10 columns
- 뛴 거리는 각 지점별 최단 거리의 합들로 계산
In [5]:
traces['distance'] = traces['speed'] * 0.04
traces['speed'] = traces['speed'] * 3.6 # m/s -> km/h 로 변경
traces.to_csv('data_metrica/data/Sample_Game_1/Sample_Game_1_IntegratedData_Reshaped.csv')
traces[34700:34720]
Out[5]:
team | player_id | frame | period | time | x | y | vx | vy | speed | distance | |
---|---|---|---|---|---|---|---|---|---|---|---|
34700 | Home | 11 | 34701 | 1 | 1388.04 | 4.92752 | 33.32612 | 4.948 | -1.671231 | 18.801417 | 0.208905 |
34701 | Home | 11 | 34702 | 1 | 1388.08 | 5.14696 | 33.22820 | 5.042 | -1.805923 | 19.280385 | 0.214227 |
34702 | Home | 11 | 34703 | 1 | 1388.12 | 5.36536 | 33.13028 | 5.140 | -1.938000 | 19.775585 | 0.219729 |
34703 | Home | 11 | 34704 | 1 | 1388.16 | 5.58480 | 33.03168 | 5.238 | -2.072692 | 20.279442 | 0.225327 |
34704 | Home | 11 | 34705 | 1 | 1388.20 | 5.80528 | 32.93308 | 5.288 | -2.399615 | 20.905153 | 0.232279 |
34705 | Home | 11 | 34706 | 1 | 1388.24 | 6.02680 | 32.83380 | 5.196 | -2.828538 | 21.297597 | 0.236640 |
34706 | Home | 11 | 34707 | 1 | 1388.28 | 6.24832 | 32.73520 | 4.840 | -3.439231 | 21.375001 | 0.237500 |
34707 | Home | 11 | 34708 | 1 | 1388.32 | 6.46984 | 32.63524 | 4.554 | -3.555615 | 20.799564 | 0.231106 |
34708 | Home | 11 | 34709 | 1 | 1388.36 | 6.69240 | 32.53596 | 4.432 | -3.503308 | 20.337862 | 0.225976 |
34709 | Home | 11 | 34710 | 1 | 1388.40 | 6.91600 | 32.43600 | 4.426 | -3.240462 | 19.747589 | 0.219418 |
34710 | Home | 11 | 34711 | 1 | 1388.44 | 7.11464 | 32.23608 | 4.478 | -2.892615 | 19.191650 | 0.213241 |
34711 | Home | 11 | 34712 | 1 | 1388.48 | 7.23944 | 31.98312 | 4.554 | -2.552615 | 18.794193 | 0.208824 |
34712 | Home | 11 | 34713 | 1 | 1388.52 | 7.22696 | 31.63496 | 4.640 | -2.196923 | 18.481739 | 0.205353 |
34713 | Home | 11 | 34714 | 1 | 1388.56 | 7.29560 | 31.47720 | 4.684 | -1.873923 | 18.161793 | 0.201798 |
34714 | Home | 11 | 34715 | 1 | 1388.60 | 7.45160 | 31.40648 | 4.626 | -1.616308 | 17.640855 | 0.196009 |
34715 | Home | 11 | 34716 | 1 | 1388.64 | 7.66688 | 31.44524 | 4.470 | -1.401846 | 16.864789 | 0.187387 |
34716 | Home | 11 | 34717 | 1 | 1388.68 | 7.91336 | 31.52752 | 4.224 | -1.264538 | 15.873197 | 0.176369 |
34717 | Home | 11 | 34718 | 1 | 1388.72 | 8.17336 | 31.60572 | 4.004 | -0.928462 | 14.796857 | 0.164410 |
34718 | Home | 11 | 34719 | 1 | 1388.76 | 8.43960 | 31.69140 | 3.878 | -0.512615 | 14.082240 | 0.156469 |
34719 | Home | 11 | 34720 | 1 | 1388.80 | 8.68400 | 31.76076 | 3.982 | 0.082385 | 14.338268 | 0.159314 |
활동량 지표 집계¶
(1) 뛴 거리¶
In [6]:
distances = traces.groupby('player_id')['distance'].sum()
distances
Out[6]:
player_id 1 3086.510866 2 9774.328865 3 9498.537998 4 10184.797706 5 11006.797770 6 8661.772918 7 10543.077091 8 11119.621735 9 10066.199448 10 9080.472763 11 3696.155770 12 7320.100725 13 2439.279263 14 1925.185387 15 9770.806183 16 9836.752563 17 10849.339113 18 11165.790709 19 10746.203857 20 10218.125707 21 10893.032354 22 9283.927933 23 10222.688175 24 8620.446836 25 4867.230563 26 2788.394785 27 2844.054805 28 2107.814431 Name: distance, dtype: float64
(2) 출전 시간¶
In [7]:
durations = traces[['player_id', 'x']].dropna().groupby('player_id').count() * 0.04 # frame 1개당 0.04 초
durations.columns = ['duration']
durations
Out[7]:
duration | |
---|---|
player_id | |
1 | 1868.24 |
2 | 5800.24 |
3 | 5800.24 |
4 | 5800.24 |
5 | 5800.24 |
6 | 4433.96 |
7 | 5800.24 |
8 | 5800.24 |
9 | 5800.24 |
10 | 4855.68 |
11 | 5800.24 |
12 | 3932.04 |
13 | 1366.32 |
14 | 944.60 |
15 | 5800.24 |
16 | 5800.24 |
17 | 5800.24 |
18 | 5800.24 |
19 | 4775.24 |
20 | 5800.24 |
21 | 5800.24 |
22 | 4274.76 |
23 | 5800.24 |
24 | 4263.96 |
25 | 5800.24 |
26 | 1536.32 |
27 | 1525.52 |
28 | 1025.04 |
(3) 1분당 뛴 거리¶
- 선수별로 출전시간이 다르기 때문에 1분당 뛴 거리로 scailing
In [8]:
stats = pd.concat([durations, distances], axis=1)
stats['dist_1min'] = stats['distance'] / stats['duration'] * 60
stats
Out[8]:
duration | distance | dist_1min | |
---|---|---|---|
player_id | |||
1 | 1868.24 | 3086.510866 | 99.125729 |
2 | 5800.24 | 9774.328865 | 101.109563 |
3 | 5800.24 | 9498.537998 | 98.256672 |
4 | 5800.24 | 10184.797706 | 105.355617 |
5 | 5800.24 | 11006.797770 | 113.858714 |
6 | 4433.96 | 8661.772918 | 117.210434 |
7 | 5800.24 | 10543.077091 | 109.061802 |
8 | 5800.24 | 11119.621735 | 115.025810 |
9 | 5800.24 | 10066.199448 | 104.128789 |
10 | 4855.68 | 9080.472763 | 112.204339 |
11 | 5800.24 | 3696.155770 | 38.234512 |
12 | 3932.04 | 7320.100725 | 111.699282 |
13 | 1366.32 | 2439.279263 | 107.117480 |
14 | 944.60 | 1925.185387 | 122.285754 |
15 | 5800.24 | 9770.806183 | 101.073123 |
16 | 5800.24 | 9836.752563 | 101.755299 |
17 | 5800.24 | 10849.339113 | 112.229899 |
18 | 5800.24 | 11165.790709 | 115.503400 |
19 | 4775.24 | 10746.203857 | 135.024047 |
20 | 5800.24 | 10218.125707 | 105.700375 |
21 | 5800.24 | 10893.032354 | 112.681879 |
22 | 4274.76 | 9283.927933 | 130.308058 |
23 | 5800.24 | 10222.688175 | 105.747571 |
24 | 4263.96 | 8620.446836 | 121.301985 |
25 | 5800.24 | 4867.230563 | 50.348578 |
26 | 1536.32 | 2788.394785 | 108.898984 |
27 | 1525.52 | 2844.054805 | 111.859096 |
28 | 1025.04 | 2107.814431 | 123.379445 |
(4) 속도 구간별 뛴 거리¶
In [9]:
# FIFA에서 나누는 zone 1 ~ 5 기준점이 되는 속도
speed_bins = [0, 7, 15, 20, 25, 50]
speed_cats = pd.cut(traces['speed'], speed_bins, right=False, labels=np.arange(1, 6))
distances_by_speed = traces.pivot_table('distance', index='player_id', columns=speed_cats, aggfunc='sum')
distances_by_speed
Out[9]:
speed | 1 | 2 | 3 | 4 | 5 |
---|---|---|---|---|---|
player_id | |||||
1 | 1217.931550 | 1282.799118 | 455.477781 | 120.126291 | 10.176127 |
2 | 3725.625280 | 4825.400329 | 927.748290 | 212.003771 | 83.551196 |
3 | 4081.893197 | 4252.977082 | 878.452741 | 171.691357 | 113.523620 |
4 | 3687.151282 | 4680.773126 | 1067.728191 | 536.396559 | 212.748548 |
5 | 3847.243775 | 5015.489516 | 1130.576320 | 647.675724 | 365.812436 |
6 | 2766.991173 | 4328.730604 | 1307.106794 | 232.435299 | 26.509048 |
7 | 3885.758813 | 4575.688296 | 1506.997724 | 490.907722 | 83.724537 |
8 | 3898.154833 | 4464.576597 | 1522.565696 | 783.532281 | 450.792328 |
9 | 3951.717889 | 3569.714419 | 1467.116554 | 768.268454 | 309.382132 |
10 | 3910.213073 | 3267.428039 | 1094.716599 | 607.130206 | 200.984845 |
11 | 3282.563229 | 371.176295 | 33.802253 | 6.238904 | 2.375089 |
12 | 2511.083830 | 3625.267429 | 980.233156 | 192.127237 | 11.389073 |
13 | 945.750675 | 889.513095 | 279.182292 | 224.779260 | 100.053942 |
14 | 564.131943 | 775.154661 | 351.832959 | 224.012757 | 10.053068 |
15 | 4132.798862 | 4206.381171 | 935.262943 | 336.529421 | 159.833786 |
16 | 4024.257560 | 4247.959630 | 1016.137160 | 391.781403 | 156.616809 |
17 | 4147.272702 | 4247.127041 | 1774.457437 | 546.592458 | 133.889475 |
18 | 3526.730228 | 5295.385219 | 1564.463895 | 535.277400 | 243.933967 |
19 | 3677.541130 | 4506.565476 | 2028.468939 | 453.380768 | 80.247543 |
20 | 3395.837437 | 4754.236541 | 1340.282890 | 567.465522 | 160.303316 |
21 | 3499.442314 | 4977.847699 | 1769.059185 | 501.451468 | 145.231689 |
22 | 4505.327415 | 3048.550862 | 1047.319116 | 521.782919 | 160.947621 |
23 | 3963.602878 | 4291.647139 | 1110.694186 | 440.797862 | 415.946109 |
24 | 3280.351892 | 3463.719445 | 1309.653142 | 445.626703 | 121.095654 |
25 | 3852.077039 | 890.742902 | 108.892115 | 15.518508 | 0.000000 |
26 | 999.136621 | 1233.878420 | 374.667924 | 167.182760 | 13.529060 |
27 | 930.092231 | 1222.834657 | 472.294892 | 173.719479 | 45.113546 |
28 | 620.928677 | 851.555307 | 361.364594 | 231.876492 | 42.089360 |
(5) 추가 지표 산출 및 연결¶
In [10]:
distances_by_speed.columns = [f'zone{i}_dist' for i in distances_by_speed.columns]
stats = pd.concat([stats, distances_by_speed], axis=1)
grouped = traces.groupby('player_id')
stats['max_speed'] = grouped['speed'].max()
stats['mean_x'] = grouped['x'].mean()
stats['mean_y'] = grouped['y'].mean()
stats['team'] = grouped['team'].first()
stats = stats.reset_index().set_index(['team', 'player_id']).round(2)
stats
Out[10]:
duration | distance | dist_1min | zone1_dist | zone2_dist | zone3_dist | zone4_dist | zone5_dist | max_speed | mean_x | mean_y | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
team | player_id | |||||||||||
Home | 1 | 1868.24 | 3086.51 | 99.13 | 1217.93 | 1282.80 | 455.48 | 120.13 | 10.18 | 26.04 | 45.94 | 50.91 |
2 | 5800.24 | 9774.33 | 101.11 | 3725.63 | 4825.40 | 927.75 | 212.00 | 83.55 | 32.98 | 38.05 | 41.72 | |
3 | 5800.24 | 9498.54 | 98.26 | 4081.89 | 4252.98 | 878.45 | 171.69 | 113.52 | 32.59 | 38.08 | 30.83 | |
4 | 5800.24 | 10184.80 | 105.36 | 3687.15 | 4680.77 | 1067.73 | 536.40 | 212.75 | 32.42 | 40.74 | 18.66 | |
5 | 5800.24 | 11006.80 | 113.86 | 3847.24 | 5015.49 | 1130.58 | 647.68 | 365.81 | 32.54 | 53.73 | 46.08 | |
6 | 4433.96 | 8661.77 | 117.21 | 2766.99 | 4328.73 | 1307.11 | 232.44 | 26.51 | 26.15 | 51.25 | 35.93 | |
7 | 5800.24 | 10543.08 | 109.06 | 3885.76 | 4575.69 | 1507.00 | 490.91 | 83.72 | 28.30 | 44.25 | 46.68 | |
8 | 5800.24 | 11119.62 | 115.03 | 3898.15 | 4464.58 | 1522.57 | 783.53 | 450.79 | 31.98 | 57.90 | 16.37 | |
9 | 5800.24 | 10066.20 | 104.13 | 3951.72 | 3569.71 | 1467.12 | 768.27 | 309.38 | 30.26 | 63.93 | 33.78 | |
10 | 4855.68 | 9080.47 | 112.20 | 3910.21 | 3267.43 | 1094.72 | 607.13 | 200.98 | 32.68 | 61.60 | 34.41 | |
11 | 5800.24 | 3696.16 | 38.23 | 3282.56 | 371.18 | 33.80 | 6.24 | 2.38 | 27.73 | 11.37 | 33.99 | |
12 | 3932.04 | 7320.10 | 111.70 | 2511.08 | 3625.27 | 980.23 | 192.13 | 11.39 | 26.32 | 48.02 | 33.81 | |
13 | 1366.32 | 2439.28 | 107.12 | 945.75 | 889.51 | 279.18 | 224.78 | 100.05 | 32.03 | 60.26 | 35.99 | |
14 | 944.60 | 1925.19 | 122.29 | 564.13 | 775.15 | 351.83 | 224.01 | 10.05 | 25.24 | 52.88 | 49.74 | |
Away | 15 | 5800.24 | 9770.81 | 101.07 | 4132.80 | 4206.38 | 935.26 | 336.53 | 159.83 | 30.99 | 66.68 | 24.15 |
16 | 5800.24 | 9836.75 | 101.76 | 4024.26 | 4247.96 | 1016.14 | 391.78 | 156.62 | 30.39 | 67.90 | 35.27 | |
17 | 5800.24 | 10849.34 | 112.23 | 4147.27 | 4247.13 | 1774.46 | 546.59 | 133.89 | 33.12 | 61.57 | 28.83 | |
18 | 5800.24 | 11165.79 | 115.50 | 3526.73 | 5295.39 | 1564.46 | 535.28 | 243.93 | 33.85 | 51.65 | 26.32 | |
19 | 4775.24 | 10746.20 | 135.02 | 3677.54 | 4506.57 | 2028.47 | 453.38 | 80.25 | 28.41 | 53.91 | 30.44 | |
20 | 5800.24 | 10218.13 | 105.70 | 3395.84 | 4754.24 | 1340.28 | 567.47 | 160.30 | 29.35 | 63.26 | 41.33 | |
21 | 5800.24 | 10893.03 | 112.68 | 3499.44 | 4977.85 | 1769.06 | 501.45 | 145.23 | 29.54 | 55.78 | 38.46 | |
22 | 4274.76 | 9283.93 | 130.31 | 4505.33 | 3048.55 | 1047.32 | 521.78 | 160.95 | 29.16 | 59.80 | 53.61 | |
23 | 5800.24 | 10222.69 | 105.75 | 3963.60 | 4291.65 | 1110.69 | 440.80 | 415.95 | 34.19 | 41.80 | 37.31 | |
24 | 4263.96 | 8620.45 | 121.30 | 3280.35 | 3463.72 | 1309.65 | 445.63 | 121.10 | 30.92 | 43.60 | 34.88 | |
25 | 5800.24 | 4867.23 | 50.35 | 3852.08 | 890.74 | 108.89 | 15.52 | 0.00 | 23.16 | 92.38 | 35.22 | |
26 | 1536.32 | 2788.39 | 108.90 | 999.14 | 1233.88 | 374.67 | 167.18 | 13.53 | 26.29 | 38.14 | 33.89 | |
27 | 1525.52 | 2844.05 | 111.86 | 930.09 | 1222.83 | 472.29 | 173.72 | 45.11 | 28.16 | 49.09 | 51.68 | |
28 | 1025.04 | 2107.81 | 123.38 | 620.93 | 851.56 | 361.36 | 231.88 | 42.09 | 26.00 | 50.40 | 36.72 |
활동량 지표 시각화¶
(1) 선수별 지표값 막대그래프 시각화¶
In [11]:
# 비교하고자 하는 컬럼
col_name = 'distance'
plt.figure(figsize=(15, 8))
plt.rcParams.update({'font.size': 15})
for team, color in zip(['Home', 'Away'], ['r', 'b']):
team_stats = stats.loc[team]
plt.bar(team_stats.index, team_stats[col_name], color=color, label=team)
plt.grid(axis='y', color='k', linestyle='--')
plt.legend()
plt.xticks(stats.reset_index()['player_id'])
plt.xlabel('player_id')
plt.ylabel(col_name)
plt.show()
(2) 속도 구간별 뛴 거리 막대그래프 시각화¶
In [12]:
plt.get_cmap('jet')
Out[12]:
jet
under
bad
over
In [13]:
plt.figure(figsize=(15, 8))
plt.title('Distance by Speed Zone')
player_ids = stats.reset_index()['player_id']
n_zones = len(distances_by_speed.columns)
colors = plt.cm.jet(np.linspace(0.9, 0.1, n_zones))
bottom = 0
for i, zone_dist in enumerate(distances_by_speed.columns[::-1]):
plt.bar(player_ids, stats[zone_dist], bottom=bottom, color=colors[i], label=f'Zone {5-i}')
if i < n_zones - 1:
bottom = bottom + stats[zone_dist]
plt.grid(axis='y', color='k', linestyle='--')
plt.axvline(14.5, color='k', linestyle='--')
plt.xticks(stats.reset_index()['player_id'])
plt.ylim(0, 12000)
plt.xlabel('player_id')
plt.ylabel('distance')
# plt.legend() : legend 순서 변경
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1])
plt.show()
(3) 선수별 평균 위치 및 지표값 산점도 시각화¶
In [14]:
col_name = 'dist_1min'
values_sorted = stats[col_name].sort_values()
min_size = 600
max_size = 1200
min_values = values_sorted.iloc[8]
max_values = values_sorted.iloc[-1]
sizes = min_size + (stats[col_name] - min_values) / (max_values - min_values) * (max_size - min_size)
draw_pitch('white', 'black', size_x=15, size_y=10)
for team, color in zip(['Home', 'Away'], ['r', 'b']):
team_stats = stats.loc[team]
x = team_stats['mean_x']
y = team_stats['mean_y']
plt.scatter(x, y, c=color, s=sizes[team].clip(0))
for p in team_stats.index:
plt.text(x[p], y[p], p, color='w', ha='center', va='center', fontsize=15)
plt.show()