Random Forest in Python - ML From Scratch 10
Implement a Random Forest algorithm using only built-in Python modules and numpy, and learn about the math behind this popular ML algorithm.
In this Machine Learning from Scratch Tutorial, we are going to implement a Random Forest algorithm using only built-in Python modules and numpy. We will also learn about the concept and the math behind this popular ML algorithm.
All algorithms from this course can be found on GitHub together with example tests.
Implementation¶
import numpy as np
from collections import Counter
from decision_tree import DecisionTree
def bootstrap_sample(X, y):
n_samples = X.shape[0]
idxs = np.random.choice(n_samples, n_samples, replace=True)
return X[idxs], y[idxs]
def most_common_label(y):
counter = Counter(y)
most_common = counter.most_common(1)[0][0]
return most_common
class RandomForest:
def __init__(self, n_trees=10, min_samples_split=2,
max_depth=100, n_feats=None):
self.n_trees = n_trees
self.min_samples_split = min_samples_split
self.max_depth = max_depth
self.n_feats = n_feats
self.trees = []
def fit(self, X, y):
self.trees = []
for _ in range(self.n_trees):
tree = DecisionTree(min_samples_split=self.min_samples_split,
max_depth=self.max_depth, n_feats=self.n_feats)
X_samp, y_samp = bootstrap_sample(X, y)
tree.fit(X_samp, y_samp)
self.trees.append(tree)
def predict(self, X):
tree_preds = np.array([tree.predict(X) for tree in self.trees])
tree_preds = np.swapaxes(tree_preds, 0, 1)
y_pred = [most_common_label(tree_pred) for tree_pred in tree_preds]
return np.array(y_pred)
FREE VS Code / PyCharm Extensions I Use
✅ Write cleaner code with Sourcery, instant refactoring suggestions: Link*
Python Problem-Solving Bootcamp
🚀 Solve 42 programming puzzles over the course of 21 days: Link*