작성일자 : 2023-12-23
Ver 0.1.1
패스 네트워크
In [1]:
# 필요 패키지 불러오기
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import matplotlib.pyplot as plt
# from src.plot_utils import draw_pitch
pd.set_option('display.max_rows', 30)
In [2]:
# 현대 디렉토리 확인
current_dir = os.getcwd()
current_dir
Out[2]:
'/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics/Excercise'
In [3]:
# Data가 있는 디렉토리로 변경
new_dir = '/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics'
os.chdir(new_dir)
print(current_dir)
/Users/limjongjun/Desktop/JayJay/Growth/Python/soccer-analytics/Excercise
In [4]:
from src.plot_utils import draw_pitch
경기 데이터 불러오기¶
- PL 18 Round Manchester City vs Tottenham
In [5]:
# Manshester City VS Tottenham Hotspur 경기 불러오기
match_id = 2499895
match_events = pd.read_pickle(f'data/refined_events/England/2499895.pkl') #pkl 파일을 Dataframe으로 불러오기
In [6]:
match_events.head()
Out[6]:
match_id | event_id | period | time | team_id | team_name | player_id | player_name | event_type | sub_event_type | tags | start_x | start_y | end_x | end_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2499895 | 215108367 | 1H | 1.784 | 1625 | Manchester City | 8325 | S. Agüero | Pass | Simple pass | [Accurate] | 52.00 | 34.68 | 40.56 | 34.68 |
1 | 2499895 | 215108368 | 1H | 3.324 | 1625 | Manchester City | 105339 | Fernandinho | Pass | Simple pass | [Accurate] | 40.56 | 34.68 | 29.12 | 8.16 |
2 | 2499895 | 215108369 | 1H | 6.406 | 1625 | Manchester City | 8277 | K. Walker | Pass | Simple pass | [Accurate] | 29.12 | 8.16 | 44.72 | 10.20 |
3 | 2499895 | 215108370 | 1H | 7.124 | 1625 | Manchester City | 38021 | K. De Bruyne | Pass | Simple pass | [Accurate] | 44.72 | 10.20 | 69.68 | 6.12 |
4 | 2499895 | 215108371 | 1H | 8.676 | 1625 | Manchester City | 11066 | R. Sterling | Pass | Simple pass | [Accurate] | 69.68 | 6.12 | 58.24 | 12.24 |
선수별 노드 시각화¶
(1) 선수별 패스 집계
In [7]:
passes = match_events[match_events['event_type'] == 'Pass']
pass_summary = passes.groupby(['team_name', 'player_name'])[['start_x', 'start_y']].mean()
pass_summary['total_count'] = passes.groupby(['team_name', 'player_name'])['event_id'].count()
pass_summary
Out[7]:
start_x | start_y | total_count | ||
---|---|---|---|---|
team_name | player_name | |||
Manchester City | Bernardo Silva | 66.768000 | 16.320000 | 5 |
E. Mangala | 31.675429 | 47.833143 | 35 | |
Ederson | 10.511429 | 36.817143 | 28 | |
F. Delph | 51.114074 | 58.039259 | 54 | |
Fernandinho | 45.003636 | 35.050909 | 33 | |
Gabriel Jesus | 63.310000 | 24.990000 | 8 | |
K. De Bruyne | 60.019556 | 27.426667 | 45 | |
K. Walker | 45.963922 | 10.800000 | 51 | |
L. Sané | 68.675862 | 57.846897 | 29 | |
N. Otamendi | 30.346667 | 23.852308 | 39 | |
P. Foden | 62.660000 | 52.445000 | 8 | |
R. Sterling | 67.897143 | 17.291429 | 35 | |
S. Agüero | 69.120000 | 37.870769 | 13 | |
İ. Gündoğan | 60.613333 | 41.113846 | 39 | |
Tottenham Hotspur | C. Eriksen | 59.938667 | 39.734667 | 30 |
D. Alli | 59.150000 | 37.173333 | 24 | |
D. Rose | 48.204000 | 60.656000 | 40 | |
E. Dier | 27.476800 | 20.672000 | 50 | |
E. Lamela | 66.560000 | 31.620000 | 4 | |
H. Kane | 71.066667 | 31.597333 | 15 | |
H. Lloris | 9.460645 | 35.425806 | 31 | |
H. Winks | 51.447500 | 38.080000 | 32 | |
J. Vertonghen | 26.961509 | 50.743396 | 53 | |
K. Trippier | 49.589091 | 11.822727 | 44 | |
M. Dembélé | 47.027500 | 35.785000 | 32 | |
M. Sissoko | 48.880000 | 12.240000 | 2 | |
Son Heung-Min | 71.209412 | 23.600000 | 17 |
(2) 경기 전체 선수별 노드 시각화
In [8]:
team_name = 'Manchester City'
nodes = pass_summary.loc[team_name].reset_index()
nodes
Out[8]:
player_name | start_x | start_y | total_count | |
---|---|---|---|---|
0 | Bernardo Silva | 66.768000 | 16.320000 | 5 |
1 | E. Mangala | 31.675429 | 47.833143 | 35 |
2 | Ederson | 10.511429 | 36.817143 | 28 |
3 | F. Delph | 51.114074 | 58.039259 | 54 |
4 | Fernandinho | 45.003636 | 35.050909 | 33 |
5 | Gabriel Jesus | 63.310000 | 24.990000 | 8 |
6 | K. De Bruyne | 60.019556 | 27.426667 | 45 |
7 | K. Walker | 45.963922 | 10.800000 | 51 |
8 | L. Sané | 68.675862 | 57.846897 | 29 |
9 | N. Otamendi | 30.346667 | 23.852308 | 39 |
10 | P. Foden | 62.660000 | 52.445000 | 8 |
11 | R. Sterling | 67.897143 | 17.291429 | 35 |
12 | S. Agüero | 69.120000 | 37.870769 | 13 |
13 | İ. Gündoğan | 60.613333 | 41.113846 | 39 |
In [9]:
draw_pitch('white', 'black', size_x=18, size_y=12)
x = nodes['start_x']
y = nodes['start_y']
s = nodes['total_count'] * 50
plt.scatter(x, y, s=s, c='skyblue', edgecolors='black')
for i, node in nodes.iterrows():
plt.annotate(node['player_name'], xy=(node['start_x'], node['start_y']), c='k', fontsize=15)
- 위 코드는 교체 선수까지 모두 포함한(= 출전 시간을 고려하지 않고) 지표이므로 객관적인 분석 어려움
(3) 동시 출전 선수 추출을 위한 phase 구분
In [10]:
team_events = match_events[match_events['team_name'] == team_name] # Manchester City 선수들로만 필터링
player_change_records = team_events[
(team_events['event_type'] == 'Substitution') |
(team_events['tags'].apply(lambda x: 'Red card' in x))
]
in_players = player_change_records[player_change_records['sub_event_type'] == 'Player in']['player_id'].tolist()
player_ids = [p for p in team_events['player_id'].unique() if not p in in_players]
period_durations = match_events.groupby('period')['time'].max()
phase_record_list = []
phase = 1
for period in period_durations.index:
change_times = player_change_records[player_change_records['period'] == period]['time'].unique().tolist()
change_times.append(period_durations[period])
if 0 not in change_times:
change_times = [0] + change_times
for i in range(len(change_times[:-1])):
moment_records = player_change_records[
(player_change_records['period'] == period) &
(player_change_records['time'] == change_times[i])
]
for _, record in moment_records.iterrows():
if record['sub_event_type'] == 'Player out' or record['event_type'] == 'Foul':
player_ids.remove(record['player_id'])
else:
player_ids.append(record['player_id'])
phase_record = {
'phase': phase,
'period': period,
'start_time': change_times[i],
'end_time': change_times[i+1],
'duration': change_times[i+1] - change_times[i],
'player_ids': player_ids.copy()
}
phase += 1
phase_record_list.append(phase_record)
phase_records = pd.DataFrame(phase_record_list).set_index('phase')
phase_records
Out[10]:
period | start_time | end_time | duration | player_ids | |
---|---|---|---|---|---|
phase | |||||
1 | 1H | 0.0 | 2817.335 | 2817.335 | [8325, 105339, 8277, 38021, 11066, 70086, 8464... |
2 | 2H | 0.0 | 660.000 | 660.000 | [8325, 105339, 8277, 38021, 11066, 70086, 8464... |
3 | 2H | 660.0 | 2220.000 | 1560.000 | [105339, 8277, 38021, 11066, 70086, 8464, 7165... |
4 | 2H | 2220.0 | 2340.000 | 120.000 | [105339, 8277, 38021, 11066, 70086, 8464, 7165... |
5 | 2H | 2340.0 | 2878.216 | 538.216 | [105339, 8277, 38021, 11066, 70086, 8464, 7165... |
(4) 특정 phase 내 선수별 패스 집계
In [11]:
phase = 1
period = phase_records.at[phase, 'period']
start_time = phase_records.at[phase, 'start_time']
end_time = phase_records.at[phase, 'end_time']
team_phase_events = team_events[
(team_events['period'] == period) &
(team_events['time'] >= start_time) &
(team_events['time'] <= end_time)
].reset_index(drop=True)
passes = team_phase_events[team_phase_events['event_type'] == 'Pass']
nodes = passes.groupby('player_name')[['start_x', 'start_y']].mean()
nodes['total_count'] = passes.groupby('player_name')['event_id'].count()
nodes
Out[11]:
start_x | start_y | total_count | |
---|---|---|---|
player_name | |||
E. Mangala | 35.310476 | 50.125714 | 21 |
Ederson | 10.583529 | 38.480000 | 17 |
F. Delph | 57.315556 | 58.177778 | 36 |
Fernandinho | 46.861176 | 36.840000 | 17 |
K. De Bruyne | 56.160000 | 29.604286 | 28 |
K. Walker | 41.779310 | 11.864828 | 29 |
L. Sané | 69.472000 | 60.248000 | 20 |
N. Otamendi | 30.963636 | 26.890909 | 22 |
R. Sterling | 70.026667 | 15.957333 | 15 |
S. Agüero | 65.614545 | 40.429091 | 11 |
İ. Gündoğan | 61.651200 | 42.486400 | 25 |
(5) 특정 phase 선수별 노드 시각화
In [12]:
draw_pitch('white', 'black', size_x=18, size_y=12)
x = nodes['start_x']
y = nodes['start_y']
s = nodes['total_count'] * 50
plt.scatter(x, y, s=s, c='skyblue', edgecolors='black')
for player_name, node in nodes.iterrows():
plt.annotate(player_name, xy=(node['start_x'], node['start_y']), c='k', fontsize=15)
선수간 간선 시각화¶
(1) 주는 선수와 받는 선수 구분이 있는 선수간 패스 횟수 산출
- 선수간 패스 횟수를 defaultdict에 저장
In [13]:
pass_count_dict = defaultdict(int)
pass_count_dict
Out[13]:
defaultdict(int, {})
In [14]:
pass_count_dict['A'] = 1
pass_count_dict['A']
Out[14]:
1
In [15]:
pass_count_dict['B']
Out[15]:
0
In [16]:
pass_count_dict = defaultdict(int)
for i in team_phase_events.index[:-2]:
event = team_phase_events.loc[i]
next_event = team_phase_events.loc[i+1]
if event['event_type'] == 'Pass' and 'Accurate' in event['tags']:
sender = event['player_name']
receiver = next_event['player_name']
pass_count_dict[(sender, receiver)] += 1
pass_count_dict
Out[16]:
defaultdict(int, {('S. Agüero', 'Fernandinho'): 2, ('Fernandinho', 'K. Walker'): 3, ('K. Walker', 'K. De Bruyne'): 6, ('K. De Bruyne', 'R. Sterling'): 3, ('R. Sterling', 'K. De Bruyne'): 1, ('N. Otamendi', 'K. Walker'): 4, ('K. Walker', 'S. Agüero'): 2, ('R. Sterling', 'K. Walker'): 3, ('K. Walker', 'Fernandinho'): 4, ('Fernandinho', 'S. Agüero'): 1, ('L. Sané', 'F. Delph'): 7, ('F. Delph', 'L. Sané'): 9, ('L. Sané', 'E. Mangala'): 2, ('E. Mangala', 'İ. Gündoğan'): 1, ('K. De Bruyne', 'S. Agüero'): 5, ('S. Agüero', 'K. De Bruyne'): 3, ('Fernandinho', 'N. Otamendi'): 1, ('N. Otamendi', 'K. De Bruyne'): 4, ('İ. Gündoğan', 'K. Walker'): 2, ('K. Walker', nan): 1, ('R. Sterling', 'Fernandinho'): 2, ('Fernandinho', 'E. Mangala'): 3, ('E. Mangala', 'S. Agüero'): 2, ('İ. Gündoğan', 'F. Delph'): 9, ('F. Delph', 'E. Mangala'): 5, ('E. Mangala', 'F. Delph'): 4, ('F. Delph', 'S. Agüero'): 1, ('Ederson', 'E. Mangala'): 2, ('E. Mangala', 'Ederson'): 6, ('Ederson', 'K. Walker'): 5, ('K. De Bruyne', nan): 1, ('K. Walker', 'İ. Gündoğan'): 2, ('N. Otamendi', 'İ. Gündoğan'): 1, ('İ. Gündoğan', 'L. Sané'): 3, ('F. Delph', 'İ. Gündoğan'): 8, ('İ. Gündoğan', 'Fernandinho'): 2, ('Fernandinho', 'İ. Gündoğan'): 2, ('K. De Bruyne', 'Fernandinho'): 1, ('K. Walker', 'R. Sterling'): 4, ('İ. Gündoğan', 'N. Otamendi'): 1, ('Ederson', 'F. Delph'): 3, ('F. Delph', 'K. De Bruyne'): 3, ('K. De Bruyne', 'L. Sané'): 4, ('L. Sané', 'S. Agüero'): 3, ('S. Agüero', 'F. Delph'): 1, ('F. Delph', 'Fernandinho'): 3, ('Fernandinho', 'R. Sterling'): 2, ('R. Sterling', 'İ. Gündoğan'): 1, ('İ. Gündoğan', 'R. Sterling'): 1, ('K. De Bruyne', 'F. Delph'): 2, ('L. Sané', 'İ. Gündoğan'): 2, ('N. Otamendi', 'L. Sané'): 1, ('E. Mangala', 'N. Otamendi'): 2, ('R. Sterling', 'L. Sané'): 2, ('Fernandinho', 'F. Delph'): 3, ('İ. Gündoğan', 'E. Mangala'): 2, ('Ederson', 'Fernandinho'): 1, ('N. Otamendi', 'Fernandinho'): 2, ('Fernandinho', 'L. Sané'): 1, ('N. Otamendi', 'Ederson'): 3, ('L. Sané', 'K. De Bruyne'): 2, ('K. De Bruyne', 'E. Mangala'): 1, ('K. De Bruyne', 'K. Walker'): 1, ('Ederson', 'K. De Bruyne'): 2, ('S. Agüero', 'İ. Gündoğan'): 1, ('İ. Gündoğan', 'S. Agüero'): 1, ('S. Agüero', 'L. Sané'): 1, ('K. De Bruyne', 'N. Otamendi'): 1, ('F. Delph', 'Ederson'): 2, ('İ. Gündoğan', 'K. De Bruyne'): 1, ('K. De Bruyne', 'İ. Gündoğan'): 2, ('E. Mangala', nan): 1, ('R. Sterling', 'N. Otamendi'): 2, ('N. Otamendi', 'F. Delph'): 2, ('K. Walker', nan): 1, ('R. Sterling', nan): 1, ('R. Sterling', 'S. Agüero'): 1, ('N. Otamendi', 'E. Mangala'): 1, ('K. Walker', 'Ederson'): 1, ('S. Agüero', 'R. Sterling'): 1, ('Ederson', 'S. Agüero'): 1, ('S. Agüero', nan): 1, ('K. Walker', 'N. Otamendi'): 2, ('N. Otamendi', 'R. Sterling'): 1, ('K. Walker', 'K. Walker'): 1, ('E. Mangala', nan): 1, ('K. Walker', 'E. Mangala'): 1, ('Ederson', 'N. Otamendi'): 1, ('K. De Bruyne', nan): 1})
- defaultdict 내부 정보를 DataFrame에 저장
In [17]:
pass_count_list = []
for pair, count in pass_count_dict.items():
pass_count_list.append([pair[0], pair[1], count])
pass_count_df = pd.DataFrame(pass_count_list, columns=['sender', 'receiver', 'count'])
pass_count_df
Out[17]:
sender | receiver | count | |
---|---|---|---|
0 | S. Agüero | Fernandinho | 2 |
1 | Fernandinho | K. Walker | 3 |
2 | K. Walker | K. De Bruyne | 6 |
3 | K. De Bruyne | R. Sterling | 3 |
4 | R. Sterling | K. De Bruyne | 1 |
... | ... | ... | ... |
84 | K. Walker | K. Walker | 1 |
85 | E. Mangala | NaN | 1 |
86 | K. Walker | E. Mangala | 1 |
87 | Ederson | N. Otamendi | 1 |
88 | K. De Bruyne | NaN | 1 |
89 rows × 3 columns
(2) 주는 선수와 받는 선수 구분이 없는 선수간 패스 횟수 산출
In [18]:
# 강의 코드
# pass_count_dict = defaultdict(int)
# for i in team_phase_events.index[:-2]:
# event = team_phase_events.loc[i]
# next_event = team_phase_events.loc[i+1]
# if event['event_type'] == 'Pass' and 'Accurate' in event['tags']:
# player_pair = [event['player_name'], next_event['player_name']]
# player1 = min(player_pair)
# player2 = max(player_pair)
# pass_count_dict[(player1, player2)] += 1
# pass_count_list = []
# for pair, count in pass_count_dict.items():
# pass_count_list.append([pair[0], pair[1], count])
# edges = pd.DataFrame(pass_count_list, columns=['player1', 'player2', 'count'])
# edges
# 수정 코드
## player_name 열의 모든 값을 문자열로 변환
team_phase_events['player_name'] = team_phase_events['player_name'].astype(str)
for i in team_phase_events.index[:-2]:
event = team_phase_events.loc[i]
next_event = team_phase_events.loc[i+1]
if event['event_type'] == 'Pass' and 'Accurate' in event['tags']:
player_pair = [event['player_name'], next_event['player_name']]
player_pair.sort() # 선수 이름을 정렬하여 순서를 통일
pass_count_dict[tuple(player_pair)] += 1
pass_count_list = []
for pair, count in pass_count_dict.items():
pass_count_list.append([pair[0], pair[1], count])
edges = pd.DataFrame(pass_count_list, columns=['player1', 'player2', 'count'])
(3) 패스 횟수 데이터에 평균 패스 위치 병합하기
- Player 1 평균 패스 위치 병합
In [19]:
edges = pd.merge(edges, nodes[['start_x', 'start_y']], left_on='player1', right_index=True)
edges
Out[19]:
player1 | player2 | count | start_x | start_y | |
---|---|---|---|---|---|
0 | S. Agüero | Fernandinho | 2 | 65.614545 | 40.429091 |
15 | S. Agüero | K. De Bruyne | 3 | 65.614545 | 40.429091 |
44 | S. Agüero | F. Delph | 1 | 65.614545 | 40.429091 |
64 | S. Agüero | İ. Gündoğan | 3 | 65.614545 | 40.429091 |
66 | S. Agüero | L. Sané | 1 | 65.614545 | 40.429091 |
... | ... | ... | ... | ... | ... |
40 | Ederson | F. Delph | 8 | 10.583529 | 38.480000 |
56 | Ederson | Fernandinho | 2 | 10.583529 | 38.480000 |
63 | Ederson | K. De Bruyne | 4 | 10.583529 | 38.480000 |
80 | Ederson | S. Agüero | 2 | 10.583529 | 38.480000 |
87 | Ederson | N. Otamendi | 5 | 10.583529 | 38.480000 |
102 rows × 5 columns
- Player 2 평균 패스 위치 병합
In [20]:
edges = edges.rename(columns={'start_x': 'player1_x', 'start_y': 'player1_y'})
edges = pd.merge(edges, nodes[['start_x', 'start_y']], left_on='player2', right_index=True)
edges
Out[20]:
player1 | player2 | count | player1_x | player1_y | start_x | start_y | |
---|---|---|---|---|---|---|---|
0 | S. Agüero | Fernandinho | 2 | 65.614545 | 40.429091 | 46.861176 | 36.840000 |
8 | K. Walker | Fernandinho | 4 | 41.779310 | 11.864828 | 46.861176 | 36.840000 |
37 | K. De Bruyne | Fernandinho | 1 | 56.160000 | 29.604286 | 46.861176 | 36.840000 |
20 | R. Sterling | Fernandinho | 2 | 70.026667 | 15.957333 | 46.861176 | 36.840000 |
57 | N. Otamendi | Fernandinho | 2 | 30.963636 | 26.890909 | 46.861176 | 36.840000 |
... | ... | ... | ... | ... | ... | ... | ... |
27 | Ederson | E. Mangala | 2 | 10.583529 | 38.480000 | 35.310476 | 50.125714 |
78 | K. Walker | Ederson | 1 | 41.779310 | 11.864828 | 10.583529 | 38.480000 |
59 | N. Otamendi | Ederson | 3 | 30.963636 | 26.890909 | 10.583529 | 38.480000 |
68 | F. Delph | Ederson | 2 | 57.315556 | 58.177778 | 10.583529 | 38.480000 |
28 | E. Mangala | Ederson | 14 | 35.310476 | 50.125714 | 10.583529 | 38.480000 |
89 rows × 7 columns
In [21]:
edges = edges.rename(columns={'start_x': 'player2_x', 'start_y': 'player2_y'}).reset_index(drop=True)
edges
Out[21]:
player1 | player2 | count | player1_x | player1_y | player2_x | player2_y | |
---|---|---|---|---|---|---|---|
0 | S. Agüero | Fernandinho | 2 | 65.614545 | 40.429091 | 46.861176 | 36.840000 |
1 | K. Walker | Fernandinho | 4 | 41.779310 | 11.864828 | 46.861176 | 36.840000 |
2 | K. De Bruyne | Fernandinho | 1 | 56.160000 | 29.604286 | 46.861176 | 36.840000 |
3 | R. Sterling | Fernandinho | 2 | 70.026667 | 15.957333 | 46.861176 | 36.840000 |
4 | N. Otamendi | Fernandinho | 2 | 30.963636 | 26.890909 | 46.861176 | 36.840000 |
... | ... | ... | ... | ... | ... | ... | ... |
84 | Ederson | E. Mangala | 2 | 10.583529 | 38.480000 | 35.310476 | 50.125714 |
85 | K. Walker | Ederson | 1 | 41.779310 | 11.864828 | 10.583529 | 38.480000 |
86 | N. Otamendi | Ederson | 3 | 30.963636 | 26.890909 | 10.583529 | 38.480000 |
87 | F. Delph | Ederson | 2 | 57.315556 | 58.177778 | 10.583529 | 38.480000 |
88 | E. Mangala | Ederson | 14 | 35.310476 | 50.125714 | 10.583529 | 38.480000 |
89 rows × 7 columns
(4) 선수간 간선(edge) 시각화
In [22]:
draw_pitch('white', 'black', size_x=18, size_y=12)
x = nodes['start_x']
y = nodes['start_y']
s = nodes['total_count'] * 100
plt.scatter(x, y, s=s, c='skyblue', edgecolors='black', zorder=2)
for player_name, node in nodes.iterrows():
plt.annotate(player_name, xy=(node['start_x'], node['start_y']), c='k', fontsize=15, zorder=3)
max_count = edges['count'].max()
for i, edge in edges.iterrows():
alpha = edge['count'] / max_count * 0.7 + 0.3
plt.plot(
edge[['player1_x', 'player2_x']], edge[['player1_y', 'player2_y']],
lw=edge['count'] * 2, color='grey', alpha=alpha, zorder=1
)
# plt.savefig('img/pass_network.png', bbox_inches='tight')
plt.show()