Import related libriaires and define variables
1 | import pandas as pd |
2 | import numpy as np |
3 | np.random.seed(0) |
4 | import matplotlib.pyplot as plt |
5 | df = pd.read_csv('/winequality-red.csv') # Load the data |
6 | from sklearn.model_selection import train_test_split |
7 | from sklearn import preprocessing |
8 | from sklearn.ensemble import RandomForestRegressor |
9 | # The target variable is 'quality'. |
10 | Y = df['quality'] |
11 | X = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar','chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density','pH', 'sulphates', 'alcohol']] |
12 | # Split the data into train and test data: |
13 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2) |
14 | # Build the model with the random forest regression algorithm: |
15 | model = RandomForestRegressor(max_depth=6, random_state=0, n_estimators=10) |
16 | model.fit(X_train, Y_train) |