import pandas as pd # Naloga 1 df = pd.read_csv('train-data.csv') print(df) # Naloga 2 y = df["planet.radius"].values X = df[["star.temp", "star.logg", "star.rad", "star.mass", "star.k.mag"]].values print(X, y) # Naloga 3 import sklearn.linear_model, sklearn.tree lin = sklearn.linear_model.LinearRegression().fit(X, y) tree = sklearn.tree.DecisionTreeRegressor().fit(X, y) print(lin.predict([[6188.44, 4.50000, 1.01630, 1.21398, 9.03000]])) print(tree.predict([[6188.44, 4.50000, 1.01630, 1.21398, 9.03000]])) # Naloga 4 import sklearn.metrics, sklearn.utils df_shuf = sklearn.utils.shuffle(df) df_train = df_shuf[:-300] y_train = df_train["planet.radius"].values X_train = df_train[["star.temp", "star.logg", "star.rad", "star.mass", "star.k.mag"]].values df_test = df_shuf[-300:] y_test = df_test["planet.radius"].values X_test = df_test[["star.temp", "star.logg", "star.rad", "star.mass", "star.k.mag"]].values lin = sklearn.linear_model.LinearRegression().fit(X_train, y_train) tree = sklearn.tree.DecisionTreeRegressor().fit(X_train, y_train) y_lin = lin.predict(X_test) y_tree = tree.predict(X_test) print(f"Povprečna absolutna napaka linearnega modela: {sklearn.metrics.mean_absolute_error(y_test, y_lin)}") print(f"Povprečna absolutna napaka regresijskega drevesa: {sklearn.metrics.mean_absolute_error(y_test, y_tree)}")