시각화 - wifi 데이터
포스트
취소

시각화 - wifi 데이터

본 글은 Google Colab 기준으로 작성되었습니다.


1
2
3
4
5
6
# Colab 한글 폰트
# 참고링크 : https://velog.io/@heiswicked/%EC%84%B8%ED%8C%85%EC%9D%B4-%EB%B0%98%EC%9D%B4%EB%8B%A4-Google-COLAB-%ED%95%9C%EA%B8%80%ED%8F%B0%ED%8A%B8%ED%8E%B8

!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf
1
2
3
4
5
6
7
8
9
10
11
12
13
import os
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns


%config InlineBackend.figure_format = 'retina' 
plt.rc('font', family='NanumBarunGothic')

Data 소개

사용된 데이터는 서울 열린데이터광장의 서울시 공공와이파이 위치정보 데이터 입니다.

※ 원 데이터는 좌표가 잘못 입력되는 오류가 있습니다. 이를 미리 처리한 데이터를 이용하여 이후 과정을 진행하였습니다.

1
2
df = pd.read_csv("spatial/서울시 공공와이파이 위치정보(수정).csv")
df

image

구별 공공 Wi-fi 현황

Before

1
2
3
4
5
6
7
8
9
tmp = df.groupby(['gu'],as_index=False).agg({'id':'count'})

google_colors = ['#4285F4','#EA4335','#FBBC05','#34A853']


fig,ax = plt.subplots(figsize=(16,8))

ax.barh(y=tmp['gu'],width=tmp['id'],color=google_colors[0])
plt.show()

image

After

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
threshold = 500

tmp1 = tmp[tmp['id'] <= threshold].sort_values('id',ascending=True)
tmp2 = tmp[tmp['id'] > threshold].sort_values('id',ascending=True)

ax_font_dict = {'fontsize':12,'fontweight':'bold'}
title_font_dict = {'fontsize':16,'fontweight':'bold'}
yticks = list(np.arange(0,len(tmp)))
yticklabels = list(tmp1['gu'])+list(tmp2['gu'])


fig,ax = plt.subplots(figsize=(16,8),sharex=True)

ax.set_yticks(yticks)
ax.set_yticklabels(yticklabels,ax_font_dict)
ax.set_xticklabels(np.arange(0,1400,200),ax_font_dict)

ax.barh(y=tmp1['gu'],width=tmp1['id'],color=google_colors[1])
ax.barh(y=tmp2['gu'],width=tmp2['id'],color=google_colors[0])

ax.vlines(threshold, ymin=-1, ymax=tmp['gu'].nunique(), linestyles='dashed',alpha=0.5)
ax.set_title("서울시 구별 공공 Wifi 현황",title_font_dict)

ax.legend(labels=["Threshold",'Low','High'],loc='center right',fontsize='large')

for i in yticks:
    xx = tmp.loc[tmp['gu']==yticklabels[i],'id'].values[0]
    ax.annotate(xx, xy = (xx, i), xytext = (15,0),
                textcoords = 'offset points',
                ha='center',va='center',fontsize=11)

ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.show()

image

구별, 통신사별 공공 Wifi 현황

Before

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
df1['telecom'] = df['comp'].apply(lambda x: re.search("SKT|KT|LGU\+", x).group() \ 
                        if re.search("SKT|KT|LGU\+", x)!=None else np.nan) \
df1 = df1.dropna(subset=['telecom'], axis=0)
tmp = df1.pivot_table(index=['gu','telecom'],values='id',aggfunc='count').reset_index()

tmp1 = tmp[tmp['telecom']=='LGU+']
tmp2 = tmp[tmp['telecom']=='KT']
tmp3 = tmp[tmp['telecom']=='SKT']


fig,ax = plt.subplots(figsize=(10,8))

ax.barh(y=tmp1['gu'],width=tmp1['id'],color=google_colors[2])
ax.barh(y=tmp2['gu'],width=tmp2['id'],color=google_colors[1],left=tmp1['id'].values)
ax.barh(y=tmp3['gu'],width=tmp3['id'],color=google_colors[0],left=(tmp1['id'].values+tmp2['id'].values))

plt.show()

image

After

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
yticks = list(np.arange(0,len(tmp1)))
yticklabels = list(df1.pivot_table(index='gu',values='id',aggfunc='count').reset_index().sort_values('id',ascending=True)['gu'])

tmp1 = pd.merge(pd.DataFrame(yticklabels,columns=['gu']),tmp1,on='gu')
tmp2 = pd.merge(pd.DataFrame(yticklabels,columns=['gu']),tmp2,on='gu')
tmp3 = pd.merge(pd.DataFrame(yticklabels,columns=['gu']),tmp3,on='gu')


fig,ax = plt.subplots(figsize=(16,8),sharex=True)

ax.set_yticks(yticks)
ax.set_yticklabels(yticklabels,ax_font_dict)
ax.set_xticklabels(np.arange(0,1400,200),ax_font_dict)

ax.barh(y=tmp1['gu'],width=tmp1['id'],color=google_colors[2])
ax.barh(y=tmp2['gu'],width=tmp2['id'],color=google_colors[1],left=tmp1['id'].values)
ax.barh(y=tmp3['gu'],width=tmp3['id'],color=google_colors[0],left=(tmp1['id'].values+tmp2['id'].values))

ax.set_title("서울시 구&통신사별 공공 Wifi 현황",title_font_dict)

ax.legend(labels = ['LGU+','KT','SKT'],loc='center right',fontsize='large')

for i in yticks:
    xx1 = tmp1.loc[tmp1['gu']==yticklabels[i],'id'].values[0]
    xx2 = tmp2.loc[tmp1['gu']==yticklabels[i],'id'].values[0]
    xx3 = tmp3.loc[tmp1['gu']==yticklabels[i],'id'].values[0]

    ax.annotate(xx1, xy = (xx1/2, i), xytext = (0,0),
                textcoords = 'offset points',
                ha='center',va='center',fontsize=11)
    ax.annotate(xx2, xy = (xx1+xx2/2, i), xytext = (0,0),
                textcoords = 'offset points',
                ha='center',va='center',fontsize=11)
    ax.annotate(xx3, xy = (xx1+xx2+xx3/2, i), xytext = (0,0),
                textcoords = 'offset points',
                ha='center',va='center',fontsize=11)

ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.show()

image

통신사별 공공 Wifi 현황

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
tmp1 = tmp[tmp['telecom']=='LGU+'].sort_values('id',ascending=False).drop('telecom',axis=1)
tmp2 = tmp[tmp['telecom']=='KT'].sort_values('id',ascending=False).drop('telecom',axis=1)
tmp3 = tmp[tmp['telecom']=='SKT'].sort_values('id',ascending=False).drop('telecom',axis=1)

dict_1 = dict(gu=['etc'],id=[np.sum(tmp1[15:]['id'])])
dict_2 = dict(gu=['etc'],id=[np.sum(tmp2[15:]['id'])])
dict_3 = dict(gu=['etc'],id=[np.sum(tmp3[15:]['id'])])

tmp1 = pd.concat([tmp1[:15],pd.DataFrame(data=dict_1)])
tmp2 = pd.concat([tmp2[:15],pd.DataFrame(data=dict_2)])
tmp3 = pd.concat([tmp3[:15],pd.DataFrame(data=dict_3)])

cmap1 = plt.get_cmap("YlOrBr")
cmap2 = plt.get_cmap("Reds")
cmap3 = plt.get_cmap("Blues")

colors1 = cmap1(sorted(np.arange(len(tmp1))*6,reverse=True))
colors2 = cmap2(sorted(np.arange(len(tmp1))*9,reverse=True))
colors3 = cmap3(sorted(np.arange(len(tmp1))*9,reverse=True))


fig,axes = plt.subplots(1,3,figsize=(20,7))

axes[0].pie(tmp1['id'],labels=tmp1['gu'], autopct='%1.1f%%',
        shadow=True, startangle=90, counterclock=False,colors=colors1)
axes[0].axis('equal')

axes[1].pie(tmp2['id'],labels=tmp2['gu'], autopct='%1.1f%%',
        shadow=True, startangle=90, counterclock=False,colors=colors2)
axes[1].axis('equal')

axes[2].pie(tmp3['id'],labels=tmp3['gu'], autopct='%1.1f%%',
        shadow=True, startangle=90, counterclock=False,colors=colors3)
axes[2].axis('equal')

axes[0].set_title("LGU+",title_font_dict)
axes[1].set_title("KT",title_font_dict)
axes[2].set_title("SKT",title_font_dict)

plt.show()

image

지도 시각화

1
2
import folium
import geopandas as gpd

Base

1
2
3
map = folium.Map(location = [37.564346,126.982062], zoom_start=11,
                 tiles='cartodbpositron')
map

image

CircleMarker

1
2
3
4
5
6
7
8
9
df = df[df['lon'].isnull()==False]

map = folium.Map(location = [37.564346,126.982062], zoom_start=11,
                 tiles='cartodbpositron')

for row in df.itertuples():
    folium.CircleMarker(location=[row.lat,  row.lon], radius=1.5, color='tomato', fill=True).add_to(map)

map

image

Marker + Cluster

1
2
3
4
5
6
7
8
9
10
11
12
13
from folium.plugins import MarkerCluster

map = folium.Map(location = [37.564346,126.982062], zoom_start=11,
                 tiles='cartodbpositron')

mc = MarkerCluster()
for row in df.itertuples():
    mc.add_child(folium.Marker(location=[row.lat,  row.lon], 
                            #    tooltip=row.area
                               )) 
    map.add_child(mc) 

map

image

Heatmap

1
2
3
4
5
6
7
8
9
10
from folium.plugins import HeatMap

df1 = [[row['lat'],row['lon']] for index, row in df.loc[:,['lon','lat']].iterrows()]

map = folium.Map(location = [37.564346,126.982062],zoom_start=11,
                 tiles='Stamenterrain')

HeatMap(df1,radius=15,blur=15).add_to(map)

map

image

Choropleth

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
dat = df.groupby('gu').count().iloc[:,0]
dat = dat.reset_index()
dat.columns = ['gu','count']

geo_path = './spatial/geojson/geojson/SIG_202005.geojson'
sig_dat = gpd.read_file(geo_path)

sig_dat['SIG_sub'] = sig_dat['SIG_CD'].str.slice(stop=2)
sig_dat = sig_dat[sig_dat['SIG_sub']=='11']

dat = dat.merge(sig_dat,how='left',left_on='gu',right_on='SIG_KOR_NM')
dat.dropna(inplace=True)

import json
geo_str = json.load(open(geo_path, encoding='utf-8'))


map = folium.Map(location = [37.564346,126.982062],zoom_start=11,
                 tiles='Stamenterrain')

map.choropleth(geo_data = geo_str,
               data = dat,
               columns = ["SIG_CD", "count"],
               fill_color = 'YlGn',
               key_on = 'feature.properties.SIG_CD')

map

image

이 기사는 저작권자의 CC BY 4.0 라이센스를 따릅니다.