Deploy and maintain models with vetiver
Posit’s pro products, like Connect
AWS SageMaker (R only, for now)
A public or private cloud, using Docker
Containerized environments for your code
Start with a trained and versioned model
requirements.txt
or renv.lock
app.py
or plumber.R
Start with a trained and versioned model
# # Generated by the vetiver package; edit with care
# start with python base image
FROM python:3.9
# create directory in container for vetiver files
WORKDIR /vetiver
# copy and install requirements
COPY vetiver_requirements.txt /vetiver/requirements.txt
#
RUN pip install --no-cache-dir --upgrade -r /vetiver/requirements.txt
# copy app file
COPY app.py /vetiver/app/app.py
# expose port
EXPOSE 8080
# run vetiver API
CMD ["uvicorn", "app.app:api", "--host", "0.0.0.0", "--port", "8080"]
# Generated by the vetiver package; edit with care
FROM rocker/r-ver:4.2.2
ENV RENV_CONFIG_REPOS_OVERRIDE https://packagemanager.rstudio.com/cran/latest
RUN apt-get update -qq && apt-get install -y --no-install-recommends \
libcurl4-openssl-dev \
libicu-dev \
libsodium-dev \
libssl-dev \
make \
zlib1g-dev \
&& apt-get clean
COPY vetiver_renv.lock renv.lock
RUN Rscript -e "install.packages('renv')"
RUN Rscript -e "renv::restore()"
COPY plumber.R /opt/ml/plumber.R
EXPOSE 8080
ENTRYPOINT ["R", "-e", "pr <- plumber::plumb('/opt/ml/plumber.R'); pr$run(host = '0.0.0.0', port = 8080)"]
import pandas as pd
import numpy as np
from sklearn import preprocessing, ensemble, pipeline, compose, model_selection
inspections = pd.read_parquet('../data/inspections.parquet')
inspections['inspection_date'] = pd.to_datetime(inspections['inspection_date'])
inspections['month'] = inspections['inspection_date'].dt.month
inspections['year'] = inspections['inspection_date'].dt.year
categorical_features = ['facility_type', 'risk', 'month']
X, y = inspections.drop(columns=['aka_name', 'results','inspection_date']), inspections['results']
X_train, X_test, y_train, y_test = model_selection.train_test_split(
X, y,
stratify = y,
test_size = 0.2
)
oe = compose.make_column_transformer(
(preprocessing.OrdinalEncoder(
handle_unknown="use_encoded_value", unknown_value=-1),
categorical_features,),
remainder="passthrough",
).fit(X_train)
rf = ensemble.RandomForestClassifier().fit(oe.transform(X_train), y_train)
inspection_fit = pipeline.Pipeline([("ordinal_encoder", oe), ("random_forest", rf)])
library(tidyverse)
library(tidymodels)
library(arrow)
path <- here::here("data", "inspections.parquet")
inspections <- read_parquet(path)
set.seed(123)
inspect_split <- initial_split(inspections, prop = 0.8)
inspect_train <- training(inspect_split)
inspect_test <- testing(inspect_split)
inspection_rec <-
recipe(results ~ facility_type + risk + total_violations + inspection_date,
data = inspect_train) |>
step_date(inspection_date, features = c("month", "year"), keep_original_cols = FALSE)
inspection_fit <-
workflow(inspection_rec, rand_forest(mode = "classification", trees = 1e3)) |>
fit(data = inspect_train)
from sklearn import metrics
metric_set = [metrics.accuracy_score, metrics.f1_score, metrics.log_loss]
y_predictions = pd.Series(inspection_fit.predict(X_test)).map({"PASS": 0, "FAIL": 1})
y_truth = y_test.map({"PASS": 0, "FAIL": 1})
inspect_metrics = pd.DataFrame()
for metric in metric_set:
metric_name = str(metric.__name__)
metric_output = metric(y_truth, y_predictions)
inspect_metrics = pd.concat(
(
inspect_metrics,
pd.DataFrame({"name": [metric_name],
"score": [metric_output]}),
),
axis=0,
)
inspect_metrics.reset_index(inplace=True, drop=True)
inspect_metrics
#> name score
#> 0 accuracy_score 0.671449
#> 1 f1_score 0.626427
#> 2 log_loss 11.842176
inspect_metric_set <- metric_set(accuracy, mn_log_loss, f_meas)
inspect_metrics <-
augment(inspection_fit, new_data = inspect_test) |>
inspect_metric_set(truth = results, estimate = .pred_class, .pred_FAIL)
inspect_metrics
#> # A tibble: 3 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 accuracy binary 0.694
#> 2 f_meas binary 0.643
#> 3 mn_log_loss binary 0.601
Activity
Compute metrics for your model using the testing data.
Store these metrics as metadata in a vetiver model object.
Write this new vetiver model object as a new version of your pin.
07:00
How do we extract our metrics out to use them?
Activity
Obtain the metrics metadata for your versioned model.
Optional: Redeploy your model to your Connect server, then obtain the metrics metadata for your neighbor’s model by calling the /metadata
endpoint for their API.
What else might you want to store as model metadata?
How or when might you use model metadata?
07:00
# Generated by the vetiver package; edit with care
library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
# Packages needed to generate model predictions
if (FALSE) {
library(kernlab)
library(parsnip)
library(recipes)
library(workflows)
}
b <- board_connect(auth = "envvar")
v <- vetiver_pin_read(b, "julia.silge/chicago-inspections-rstats", version = "78859")
#* @plumber
function(pr) {
pr %>% vetiver_api(v)
}
Activity
Create a Plumber or FastAPI app file to serve your model’s predictions.
Run this app locally and check out the visual documentation again.
05:00
# Generated by the vetiver package; edit with care
library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
# Packages needed to generate model predictions
if (FALSE) {
library(kernlab)
library(parsnip)
library(recipes)
library(workflows)
}
b <- board_connect(auth = "envvar")
v <- vetiver_pin_read(b, "julia.silge/chicago-inspections-rstats", version = "78859")
#* @plumber
function(pr) {
pr %>% vetiver_api(v)
}
# Generated by the vetiver package; edit with care
library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
library(lubridate)
# Packages needed to generate model predictions
if (FALSE) {
library(kernlab)
library(parsnip)
library(recipes)
library(workflows)
}
b <- board_connect(auth = "envvar")
v <- vetiver_pin_read(b, "julia.silge/chicago-inspections-rstats", version = "78859")
handler_weekday <- function(req) {
wday(req$body$inspection_date, label = TRUE)
}
#* @plumber
function(pr) {
pr |>
vetiver_api(v) |>
pr_post(path = "/weekday", handler = handler_weekday)
}
from vetiver import VetiverModel
import vetiver
import pins
import calendar
b = pins.board_connect(allow_pickle_read=True)
v = VetiverModel.from_pin(b, 'isabel.zimmerman/inspection-result-python', version = '78841')
def get_month_names(x):
mnth_name = x["month"].apply(lambda x: calendar.month_name[x])
return mnth_name
vetiver_api = vetiver.VetiverAPI(v)
vetiver_api.vetiver_post(get_month_names, "get_month_names")
api = vetiver_api.app
Activity
Add a new endpoint to the API app file you already made.
Run the app locally and check out your new endpoint.
How might you want to use an additional endpoint?
07:00