PyTorch
Copy the code snippet below and follow the commented steps to connect Robust Intelligence to a PyTorch model.
Once that is done, you can specify that model file when configuring your model source.
"""Template for connecting a PyTorch model to Robust Intelligence.
We expect this file to contain a `predict_df` function that takes in a Pandas
DataFrame corresponding to one or more rows in the dataset. This method should
return a NumPy array containing scores between 0 and 1 for each row in the dataset.
The DataFrame will be loaded from the data sources you configure.
This specific file implements this assuming that 1) you use a standardization
scaler on the input data, 2) you define the model class in this file, and 3)
you saved the model via state_dict (see section 4 of:
https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_models_for_inference.html).
"""
from pathlib import Path
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
# Step 1: Define the model class.
class BinaryClassification(nn.Module):
def __init__(self):
"""Define network architecture."""
super().__init__()
input_dims = 30
hidden_dims = 64
self.layer_1 = nn.Linear(input_dims, hidden_dims)
self.layer_2 = nn.Linear(hidden_dims, hidden_dims)
self.layer_out = nn.Linear(hidden_dims, 1)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.1)
self.batchnorm1 = nn.BatchNorm1d(hidden_dims)
self.batchnorm2 = nn.BatchNorm1d(hidden_dims)
def forward(self, inputs: torch.FloatTensor):
"""Implement the forward pass."""
x = self.relu(self.layer_1(inputs))
x = self.batchnorm1(x)
x = self.relu(self.layer_2(x))
x = self.batchnorm2(x)
x = self.dropout(x)
x = self.layer_out(x)
return x
# Step 2: Load the model and scaler.
model_dir = Path(__file__).parent / "model"
scaler: StandardScaler = joblib.load(model_dir / "scaler.bin")
model = BinaryClassification()
model.load_state_dict(torch.load(model_dir / "model.pt"))
model.eval()
# Step 3: If the model requires additional preprocessing to the input data,
# include the logic in the below 'preprocess_df' function or 'predict_df'
# function. By default, Robust Intelligence passes rows from the dataset defined in the config
# to 'predict_df' with the label and prediction columns omitted.
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
"""Apply preprocessing to rows of the dataframe."""
return scaler.transform(df)
# Step 4: Implement the below function that returns a prediction per row in
# the dataset, with any additional preprocessing if necessary.
def predict_df(df: pd.DataFrame) -> np.ndarray:
"""Return array of probabilities assigned to the positive class."""
# Apply custom preprocessing to the row.
df = preprocess_df(df)
output = model(torch.FloatTensor(df))
preds = torch.sigmoid(output)
preds = preds.detach().numpy().reshape(-1)
if np.isnan(preds).any():
raise ValueError("Unable to return predictions, model outputs NaN.")
return preds