Skip to contents

Existing pipeline

Let’s start where we left off in the Get started with pipeflow vignette, that is, we have the following pipeline

pip
# <pipeflow_pip> my-pip (4 steps)
# -------------------------------
#          step             depends                out state
# 1:       data                     <data.frame[10x6]>  done
# 2:  data_prep                data <data.frame[10x7]>  done
# 3:  model_fit           data_prep           <lm[13]>  done
# 4: model_plot model_fit,data_prep  <ggplot2::ggplot>  done

with the following set data

pip_get_params(pip)[["data"]] |> head(3)
#   Ozone Solar.R Wind Temp Month Day
# 1    41     190  7.4   67     5   1
# 2    36     118  8.0   72     5   2
# 3    12     149 12.6   74     5   3

Insert new step

Let’s say we want to insert a new step after the data_prep step that standardizes the y-variable.

pip |> pip_add(
    "standardize",
    function(
        data = ~data_prep,
        yVar = "Ozone"
    ) {
        data[, yVar] <- scale(data[, yVar])
        data
    },
    after = "data_prep"
)
pip
# <pipeflow_pip> my-pip (5 steps)
# -------------------------------
#           step             depends                out state
# 1:        data                     <data.frame[10x6]>  done
# 2:   data_prep                data <data.frame[10x7]>  done
# 3: standardize           data_prep             [NULL]   new
# 4:   model_fit           data_prep           <lm[13]>  done
# 5:  model_plot model_fit,data_prep  <ggplot2::ggplot>  done
library(visNetwork)
do.call(visNetwork, args = pip_get_graph(pip)) |>
    visHierarchicalLayout(direction = "LR", sortMethod = "directed")

The standardize step is now part of the pipeline, but so far it is not used by any other step.

Replace existing steps

Let’s revisit the function definition of the model_fit step

pip[["model_fit", "fun"]]
# function (data = ~data_prep, xVar = "Solar.R") 
# {
#     lm(paste("Ozone ~", xVar), data = data)
# }

To use the standardized data, we need to change the data dependency such that it refers to the standardize step. Also instead of a fixed y-variable in the model, let’s pass it as a parameter.

pip |> pip_replace(
    "model_fit",
    function(
        data = ~standardize, # <- changed data reference
        xVar = "Temp.Celsius",
        yVar = "Ozone" # <- new y-variable
    ) {
        lm(paste(yVar, "~", xVar), data = data)
    }
)

The model_plot step needs to be updated in a similar way.

pip |> pip_replace(
    "model_plot",
    function(
        model = ~model_fit,
        data = ~standardize, # <- changed data reference
        xVar = "Temp.Celsius",
        yVar = "Ozone", # <- new y-variable
        title = "Linear model fit"
    ) {
        coeffs <- coefficients(model)
        ggplot(data) +
            geom_point(aes(.data[[xVar]], .data[[yVar]])) +
            geom_abline(intercept = coeffs[1], slope = coeffs[2]) +
            labs(title = title)
    }
)

The updated pipeline now looks as follows.

pip
# <pipeflow_pip> my-pip (5 steps)
# -------------------------------
#           step               depends                out state
# 1:        data                       <data.frame[10x6]>  done
# 2:   data_prep                  data <data.frame[10x7]>  done
# 3: standardize             data_prep             [NULL]   new
# 4:   model_fit           standardize             [NULL]   new
# 5:  model_plot model_fit,standardize             [NULL]   new

We see that the model_fit and model_plot steps now use (i.e., depend on) the standardized data. Let’s re-run the pipeline and inspect the output.

pip_set_params(pip, params = list(xVar = "Solar.R", yVar = "Wind"))
pip_run(pip)
# info [2026-06-07 15:34:31.523 UTC]: Start run of pipeflow_pip 'my-pip'
# info [2026-06-07 15:34:31.524 UTC]: Step 1/5 data - skipping done step
# info [2026-06-07 15:34:31.524 UTC]: Step 2/5 data_prep - skipping done step
# info [2026-06-07 15:34:31.524 UTC]: Step 3/5 standardize
# info [2026-06-07 15:34:31.526 UTC]: Step 4/5 model_fit
# info [2026-06-07 15:34:31.529 UTC]: Step 5/5 model_plot
# info [2026-06-07 15:34:31.540 UTC]: Finished run of pipeflow_pip 'my-pip'
pip[["model_fit", "out"]] |> coefficients()
#  (Intercept)      Solar.R 
#  0.979672739 -0.006625601
pip[["model_plot", "out"]]

model-plot

Removing steps

Let’s see the pipeline again.

pip
# <pipeflow_pip> my-pip (5 steps)
# -------------------------------
#           step               depends                out state
# 1:        data                       <data.frame[10x6]>  done
# 2:   data_prep                  data <data.frame[10x7]>  done
# 3: standardize             data_prep <data.frame[10x7]>  done
# 4:   model_fit           standardize           <lm[13]>  done
# 5:  model_plot model_fit,standardize  <ggplot2::ggplot>  done

When you are trying to remove a step, {pipeflow} by default checks if the step is used by any other step, and raises an error if removing the step would violate the integrity of the pipeline.

try(pip_remove(pip, "standardize"))
# Error in pip_remove(pip, "standardize") : 
#   cannot remove step 'standardize' because the following steps depend on it: 'model_fit', 'model_plot'

To enforce removing a step together with all its downstream dependencies, you can use the recursive argument.

pip_remove(pip, "standardize", recursive = TRUE)
# Removing step 'standardize' and its downstream dependencies: 'model_fit', 'model_plot'
pip
# <pipeflow_pip> my-pip (2 steps)
# -------------------------------
#         step depends                out state
# 1:      data         <data.frame[10x6]>  done
# 2: data_prep    data <data.frame[10x7]>  done

Naturally, the last step never has any downstream dependencies, so it can be removed without any issues.

last_step <- tail(pip[["step"]], 1)
pip_remove(pip, last_step)
pip
# <pipeflow_pip> my-pip (1 step)
# ------------------------------
#    step depends                out state
# 1: data         <data.frame[10x6]>  done

Replacing steps in a pipeline as shown in this vignette will allow to re-use existing pipelines and adapt them programmatically to new requirements. Another way of re-using pipelines is to combine them, which is shown in the Combining pipelines vignette.