이산화
pd.cut() VS pd.qcut()
bins == 그룹 개수
q == quantile
# SalePrice - cut, qcut
train["SalePrice_cut"] = pd.cut(train["SalePrice"], bins=4, labels=[1,2,3,4])
# q == quantile
train["SalePrice_qcut"] = pd.qcut(train["SalePrice"], q=4, labels=[1,2,3,4])
차이 알아보기
# "SalePrice_cut", "SalePrice_qcut" - value_counts
display(train["SalePrice_cut"].value_counts())
display(train["SalePrice_qcut"].value_counts().sort_index())
# "SalePrice_cut", "SalePrice_qcut" - countplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))
sns.countplot(data=train, x="SalePrice_cut", ax=axes[0])
sns.countplot(data=train, x="SalePrice_qcut", ax=axes[1])