1 | import pandas as pd |
2 | import numpy as np |
3 | np.random.seed(0) |
4 | import matplotlib.pyplot as plt |
5 | df = pd.read_csv('/winequality-red.csv') |
6 | from sklearn.model_selection import train_test_split |
7 | from sklearn import preprocessing |
8 | from sklearn.ensemble import RandomForestRegressor |
9 |
|
10 | Y = df['quality'] |
11 | X = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar','chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density','pH', 'sulphates', 'alcohol']] |
12 |
|
13 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2) |
14 |
|
15 | model = RandomForestRegressor(max_depth=6, random_state=0, n_estimators=10) |
16 | model.fit(X_train, Y_train) |
Plot Feature Importance with Blue Bar Charts
1 | import shap |
2 | shap_values = shap.TreeExplainer(model).shap_values(X_train) |
3 | shap.summary_plot(shap_values, X_train, plot_type="bar") |
Plot the Red&Blue Feature Importance Plot
1 | shap.summary_plot(shap_values, X_train) |
Partial Dependence Plot
The partial dependence plot shows the marginal effect one or two features have on the predicted outcome of a machine learning model
1 | shap.dependence_plot(“alcohol”, shap_values, X_train) |
View the original article on Mediumn:
Explain Your Model with the SHAP Values