요약
# 로지스틱 회귀
# 1. 선언
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(verbose=2)
# 2. 학습
lr_model.fit(X_train, Y_train)
# 3. 예측 (수컷 1, 암컷 0)
predictions = lr_model.predict(X_test)
# 4. 성능 평가 ( 실제값 : Y_test, 예측값 : predictions )
from sklearn.metrics import accuracy_score
lr_acc = accuracy_score(Y_test, predictions)
lr_acc
# K-최근접 이웃 분류기
# 1. 선언
from sklearn.neighbors import KNeighborsClassifier
knc_model = KNeighborsClassifier()
# 2. 학습
knc_model.fit(X_train, Y_train)
# 3. 예측
predictions = knc_model.predict(X_test)
# 4.성능 평가
from sklearn.metrics import accuracy_score
knc_acc = accuracy_score(Y_test, predictions)
print('KNeighborsClassifier 모델의 예측 정확도는 {}% 입니다.'.format(round(knc_acc*100)))
처음부터 (전처리부터)
# 라이브러리 불러오기
import matplotlib.pylab as plt
import seaborn as sns
import numpy as np
import pandas as pd
penguins = sns.load_dataset('penguins')
penguins
penguins = penguins.dropna().reset_index(drop=True)
penguins
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# 정규화
features = penguins[['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g']]
features_normed = scaler.fit_transform(features)
penguins = penguins.assign(bill_length_mm = features_normed[:, 0],
bill_depth_mm = features_normed[:, 1],
flipper_length_mm = features_normed[:, 2],
body_mass_g = features_normed[:, 3])
# 라벨인코더
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
features = penguins[['species', 'island', 'sex']]
encoded = features.apply(encoder.fit_transform)
penguins = penguins.assign(species = encoded['species'],
island = encoded['island'],
sex = encoded['sex'])
penguins
# 테스트, 시험 분류
from sklearn.model_selection import train_test_split
X = penguins.drop('sex', axis=1)
Y = penguins['sex']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)
로지스틱
# 로지스틱 회귀
# 1. 선언
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(verbose=2)
# 2. 학습
lr_model.fit(X_train, Y_train)
# 3. 예측 (수컷 1, 암컷 0)
predictions = lr_model.predict(X_test)
# 4. 성능 평가 ( 실제값 : Y_test, 예측값 : predictions )
from sklearn.metrics import accuracy_score
lr_acc = accuracy_score(Y_test, predictions)
lr_acc
# K-최근접 이웃 분류기
# 1. 선언
from sklearn.neighbors import KNeighborsClassifier
knc_model = KNeighborsClassifier()
# 2. 학습
knc_model.fit(X_train, Y_train)
# 3. 예측
predictions = knc_model.predict(X_test)
# 4.성능 평가
from sklearn.metrics import accuracy_score
knc_acc = accuracy_score(Y_test, predictions)
print('KNeighborsClassifier 모델의 예측 정확도는 {}% 입니다.'.format(round(knc_acc*100)))
'파이썬. 머신러닝 > sklearn' 카테고리의 다른 글
분류 모델 성능 평가 지표 링크 (0) | 2024.11.29 |
---|---|
로지스틱 회귀 설명 영상 (0) | 2024.10.12 |
분류 분석 모델링 (MinMaxScaler, LabelEncoder, train_test_split) (1) | 2024.10.12 |
데이터 인코딩 LabelEncoder(), fit, transform (0) | 2024.09.21 |
train_test_split에서 stratify 역할 (0) | 2024.09.21 |