Wednesday, 4 September 2019

python machine learning 3 plot

#cmd

activate tensor
pip install matplotlib

-------------------------------------
#pycharm

from sklearn import  linear_model
from sklearn.utils import shuffle
import matplotlib.pyplot as pyplot
import pickle
from matplotlib import style

data = pd.read_csv("student-mat.csv", sep=";")

print(data.head())

data = data[['G1','G2','G3','studytime','failures','absences']]

print(data.head())

predict = 'G3'

x = np.array(data.drop([predict],1))
y = np.array(data[predict])
#x_train, x_text, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.1)

#train model 30 time to find one with best accuracy, save.
best = 0
for _ in range(30):
    x_train, x_text, y_train, y_test = sklearn.model_selection.train_test_split(x,y,test_size=0.1)

    linear = linear_model.LinearRegression()

    linear.fit(x_train, y_train)
    accuracy = linear.score(x_text, y_test)

    print(accuracy)

    if accuracy > best:
        best = accuracy
        with open('studentmodel.pickle', 'wb') as f:
            pickle.dump(linear, f)

-----------------------------
#most accurate model saved, load the modal and plot

import pandas as pd
import numpy as np
import keras
import sklearn
from sklearn import  linear_model
from sklearn.utils import shuffle
import matplotlib.pyplot as pyplot
import pickle
from matplotlib import style

data = pd.read_csv("student-mat.csv", sep=";")

print(data.head())

data = data[['G1','G2','G3','studytime','failures','absences']]

print(data.head())

predict = 'G3'

x = np.array(data.drop([predict],1))
y = np.array(data[predict])
x_train, x_text, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.1)

pickle_in = open('studentmodel.pickle', 'rb')
linear = pickle.load(pickle_in)

print("Co: \n", linear.coef_)
print("Intercept: \n", linear.intercept_)

predictions = linear.predict(x_text)

for x in range(len(predictions)):
    print(predictions[x], x_text[x], y_test[x])

#plot relationship between variables and predicted value
p= 'G1'
style.use('ggplot')
pyplot.scatter(data[p], data['G3'])
pyplot.xlabel(p)
pyplot.ylabel('final grade')
pyplot.show()

-------------------------------------------------
test 1 good -> final good

test 2 good -> final good

study longer doesn't mean better grade

tests fail -> final bad

student with good final grade tends to be in class often

reference:
https://www.youtube.com/watch?v=3AQ_74xrch8

No comments:

Post a Comment