Python 시각화

현재 교육과정에서 진행중인 프로젝트의 시각화를 해봤습니다.
사용데이터: Kaggle

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from IPython.display import set_matplotlib_formats

set_matplotlib_formats('retina')
plt.rc('font',family='Malgun Gothic')
df_City = pd.read_csv('../data/City Hotel.csv')
df_Resort = pd.read_csv('../data/Resort Hotel.csv')
length = lambda x: x == 1
length_2 = lambda x: x == 0
df_City_1 = df_City[df_City['is_canceled'].apply(length)]
df_City_0 = df_City[df_City['is_canceled'].apply(length_2)]
df_Resort_1 = df_Resort[df_Resort['is_canceled'].apply(length)]
df_Resort_0 = df_Resort[df_Resort['is_canceled'].apply(length_2)]
plt.figure(figsize=(15,8))
ax_1 = sns.distplot(df_City_0['lead_time'],label='is_canceled_0',hist=False)
ax_2 = sns.distplot(df_City_1['lead_time'],hist=False,label='is_canceled_1')
plt.axvline(x=71,color='red')
<matplotlib.lines.Line2D at 0x1c1b1504f08>

output_5_1

plt.figure(figsize=(15,8))
sns.distplot(df_Resort_0['lead_time'],hist=False,label='is_canceled_0')
sns.distplot(df_Resort_1['lead_time'],hist=False,label='is_canceled_1')
plt.axvline(x=39,color='red')
<matplotlib.lines.Line2D at 0x1c1b10b4248>

output_6_1

sns.pairplot(df_Resort[['agent','days_in_waiting_list','adr','is_canceled']],hue='is_canceled',diag_kws={'bw':10})
<seaborn.axisgrid.PairGrid at 0x2eeb8836208>

output_7_1

sns.pairplot(df_City[['agent','days_in_waiting_list','adr','is_canceled']],hue='is_canceled',diag_kws={'bw':10})
<seaborn.axisgrid.PairGrid at 0x2eeba7498c8>

output_8_1

parking_spaces_City = list(df_City['required_car_parking_spaces'].unique())
parking_spaces_Resort = list(df_Resort['required_car_parking_spaces'].unique())
for i in parking_spaces_City:
    globals()['df_City_Park{}'.format(i)] = df_City[df_City['required_car_parking_spaces']==i]
    print(i)
figure, axes = plt.subplots(nrows = 1, ncols = 4, figsize = (20,5))
figure.set_size_inches(15,5)
for i,j in enumerate(parking_spaces_City):
     sns.countplot(data= globals()['df_City_Park{}'.format(j)] ,x='required_car_parking_spaces',hue='is_canceled',ax=axes[i])

output_11_0

for i in parking_spaces_Resort:
    globals()['df_Resort_Park{}'.format(i)] = df_Resort[df_Resort['required_car_parking_spaces']==i]
    print(i)
0
1
2
8
3
figure, axes = plt.subplots(nrows = 1, ncols = 5, figsize = (20,5))
figure.set_size_inches(15,5)
for i,j in enumerate(parking_spaces_Resort):
     sns.countplot(data= globals()['df_Resort_Park{}'.format(j)] ,x='required_car_parking_spaces',hue='is_canceled',ax=axes[i])

output_13_0

plt.figure(figsize=(10,5))
ax = sns.countplot(data=df_Resort, x= 'total_of_special_requests',hue='is_canceled',palette=(sns.color_palette("Set1", n_colors=8, desat=.5)))
for p in ax.patches:
    left, bottom, width, height = p.get_bbox().bounds
    ax.annotate("%.2f"%(height/df_Resort.shape[0]),(left+width/1.9, height*1.015), ha='center',size=11.5)

output_14_0

plt.figure(figsize=(10,5))
ax = sns.countplot(data=df_City,x= 'total_of_special_requests',hue='is_canceled',palette=(sns.color_palette("Set1", n_colors=8, desat=.5)))
for p in ax.patches:
    left, bottom, width, height = p.get_bbox().bounds
    ax.annotate("%.2f"%(height/df_City.shape[0]),(left+width/1.9, height*1.011), ha='center',size=11.5)

output_15_0