본문 바로가기
파이썬. 머신러닝/sklearn

로지스틱 회귀, K-최근접 이웃 분류기

by 한국수달보호협회장 2024. 10. 13.

 

 

요약

 

 

# 로지스틱 회귀

# 1. 선언
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(verbose=2)

# 2. 학습
lr_model.fit(X_train, Y_train)

# 3. 예측 (수컷 1, 암컷 0)
predictions = lr_model.predict(X_test)

# 4. 성능 평가 ( 실제값 : Y_test, 예측값 : predictions )
from sklearn.metrics import accuracy_score
lr_acc = accuracy_score(Y_test, predictions)
lr_acc

 

# K-최근접 이웃 분류기

# 1. 선언
from sklearn.neighbors import KNeighborsClassifier
knc_model = KNeighborsClassifier()

# 2. 학습
knc_model.fit(X_train, Y_train)

# 3. 예측
predictions = knc_model.predict(X_test)

# 4.성능 평가
from sklearn.metrics import accuracy_score
knc_acc = accuracy_score(Y_test, predictions)
print('KNeighborsClassifier 모델의 예측 정확도는 {}% 입니다.'.format(round(knc_acc*100)))

 

 

 

 

 

 

 

 

 

처음부터 (전처리부터)

 

# 라이브러리 불러오기
import matplotlib.pylab as plt
import seaborn as sns
import numpy as np
import pandas as pd

penguins = sns.load_dataset('penguins')
penguins

 

 

 

penguins = penguins.dropna().reset_index(drop=True)
penguins

 

 

 

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# 정규화
features = penguins[['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g']]
features_normed = scaler.fit_transform(features)
penguins = penguins.assign(bill_length_mm = features_normed[:, 0],
                           bill_depth_mm = features_normed[:, 1],
                           flipper_length_mm = features_normed[:, 2],
                           body_mass_g = features_normed[:, 3])

# 라벨인코더
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

features = penguins[['species', 'island', 'sex']]
encoded = features.apply(encoder.fit_transform)
penguins = penguins.assign(species = encoded['species'],
                           island = encoded['island'],
                           sex = encoded['sex'])

penguins

 

 

 

# 테스트, 시험 분류
from sklearn.model_selection import train_test_split

X = penguins.drop('sex', axis=1)
Y = penguins['sex']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

 

 

 

 

로지스틱

# 로지스틱 회귀

# 1. 선언
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(verbose=2)

# 2. 학습
lr_model.fit(X_train, Y_train)

# 3. 예측 (수컷 1, 암컷 0)
predictions = lr_model.predict(X_test)

# 4. 성능 평가 ( 실제값 : Y_test, 예측값 : predictions )
from sklearn.metrics import accuracy_score
lr_acc = accuracy_score(Y_test, predictions)
lr_acc

 

 

# K-최근접 이웃 분류기

# 1. 선언
from sklearn.neighbors import KNeighborsClassifier
knc_model = KNeighborsClassifier()

# 2. 학습
knc_model.fit(X_train, Y_train)

# 3. 예측
predictions = knc_model.predict(X_test)

# 4.성능 평가
from sklearn.metrics import accuracy_score
knc_acc = accuracy_score(Y_test, predictions)
print('KNeighborsClassifier 모델의 예측 정확도는 {}% 입니다.'.format(round(knc_acc*100)))