Utilize assign() and get() in loops when
dealing with a dynamic number of variables.
# assign the value of 2 to a newly created variable called var.1
assign(paste("var", 1, sep="."),2)
# retrive the value of the var.1 variable
get("var.1")#> [1] 2
set.seed(as.numeric(as.Date("1777-04-30")))
# Create a data frame directly
my_data = data.frame(
id = paste("rval", 1:5, sep = "_"),
value = rnorm(5)
)
print(my_data)#> id value
#> 1 rval_1 0.0475086
#> 2 rval_2 1.0277163
#> 3 rval_3 -1.4956767
#> 4 rval_4 -1.2280738
#> 5 rval_5 -0.1876937
Create custom environments to control where R looks for variables, leveraging parent-child links.
defaults = new.env()
defaults$tax = 0.15
order = new.env(parent = defaults)
order$price = 200
# 'tax' is not in 'order', but is found via its parent environment
order$total = order$price * (1 + get("tax", envir = order))
order$total#> [1] 230
# Create a parent environment
parent.env = new.env()
# Add some variables to the parent environment
parent.env$a = 10
parent.env$b = 5
# A simple calculation in the parent environment
parent.env$sum_parent = parent.env$a + parent.env$b # 15
# Check what's in the global environment (you should NOT see a, b, or sum_parent)
ls() # default is ls(globalenv())#> [1] "defaults" "my_data" "order" "parent.env" "var.1"
#> [1] "a" "b" "sum_parent"
# [1] "a" "b" "sum_parent"
# Try to access 'a' from the global environment (this should give an error)
try(a, silent = FALSE)#> Error in eval(expr, envir) : object 'a' not found
# Error: object 'a' not found
# Correct way: explicitly get 'a' from parent.env
get("a", envir = parent.env) # 10#> [1] 10
Dynamically construct and evaluate expressions from strings, or extract data from model calls.
# Define an expression
expr = expression(sqrt(81))
# Evaluate the expression
result = eval(expr)
# Print the result
print(result)#> [1] 9
# Dynamically construct an expression
x = 12
operation = "x ** 2"
expr = parse(text = operation)
# Evaluate the dynamically constructed expression
result = eval(expr)
# Print the result (returns 144)
print(result)#> [1] 144
set.seed(42)
lm.data = data.frame(x = 1:10, y = 2.5 * (1:10) + rnorm(10))
lm.model = lm(y ~ x, data = lm.data)
# Extract the original data frame via the stored call
head(eval(lm.model$call$data), 3)#> [1] 1
#> [1] 2
#> [1] 3
#> [1] 4
#> [1] 5
#> [1] 6
#> [1] 7
#> [1] 8
#> [1] 9
#> [1] 10
library(tictoc)
set.seed(1905)
n_sims = 100000
sim_means_grow = numeric(0) # Empty vector
tic("Without pre-allocation")
for (i in seq_len(n_sims)) {
sim_means_grow = c(sim_means_grow, mean(rnorm(50)))
}
toc()#> Without pre-allocation: 10.25 sec elapsed
sim_means_prealloc = numeric(n_sims) # Pre-allocated vector
tic("With pre-allocation")
for (i in seq_len(n_sims)) {
sim_means_prealloc[i] = mean(rnorm(50))
}
toc()#> With pre-allocation: 1.22 sec elapsed
Always preallocate storage before calculating for-loops to drastically improve efficiency.
set.seed(1925)
library(microbenchmark)
n = 1e4
bench = microbenchmark(
prealloc = {
v = numeric(n)
for (i in seq_len(n)) v[i] = i # EFFICIENT
},
grow = {
v = numeric(0)
for (i in seq_len(n)) v = c(v, i) # NOT EFFICIENT
},
times = 10
)
# 1. Save the filtered summary to a data frame
bench_summary = summary(bench)[, c("expr", "median", "min", "max")]
# 2. Round the numeric columns (columns 2, 3, and 4) to 0 decimal places
bench_summary[, 2:4] = round(bench_summary[, 2:4], digits = 0)
# 3. Print the clean results
print(bench_summary)#> expr median min max
#> 1 prealloc 2 1 4
#> 2 grow 243 203 339
#> [,1] [,2] [,3]
#> [1,] 1 4 7
#> [2,] 2 5 8
#> [3,] 3 6 9
# Save the matrix as an .rds file in the current directory
saveRDS(sample_matrix, file = "./sample_matrix.rds")
# Read the file back into a new variable to verify
restored_matrix = readRDS("./sample_matrix.rds")
# Check if the restored object is identical to the original
identical(sample_matrix, restored_matrix)#> [1] TRUE
Use which() with arr.ind = TRUE to get row
and column indices in data frames.
#> [1] 2 5
#> [1] 2 4 5 6
#> [1] 7 6 7 9
#> [1] 6
#> [1] 7
#> [1] 9
#> [1] 1
#> a b
#> 1 1 5
#> 2 3 3
#> 3 5 1
#> row col
#> [1,] 3 1
#> [2,] 1 2
Group multiple outputs (data, models, plots) into a single nested list.
# Define sample function with input parameters
my_function = function(x, y){
# Perform some operations
result1 = x^2
result2 = y^3
# Perform additional operations
result3 = x^2+y^2
# Combine results into a list
result_list = list(
x_squared = result1,
y_cubed = result2,
sum_of_squares = result3
)
# Return the list of results
return(result_list)
}
# Example usage of the function
output = my_function(3, 5)
# Access the third result (sum of 3^2 + 5^2)
output$sum_of_squares#> [1] 34
...)Pass varied arguments to internal functions using ...
.
# Define the main plotting function
plot_modern = function(x, y, ...) {
# Internal function to create a stylized plot
internal_plot = function(x, y, ...) {
# 1. Set a soft background color for the plot area
par(bg = "#f4f7f6", mar = c(5, 5, 4, 2))
# 2. Initialize an empty plot window (type = "n") to lay down the grid first
plot(x, y, type = "n", bty = "n", ...)
# 3. Add a clean white grid
grid(nx = NULL, ny = NULL, col = "white", lty = 1, lwd = 1.5)
# 4. Draw a smooth trendline underneath the points
lines(spline(x, y), col = "#b0bec5", lwd = 2)
# 5. Plot the actual points, passing the ellipsis (...) for custom styling
points(x, y, ...)
}
# Execute the internal function
internal_plot(x, y, ...)
}
# 1. Generate sample data
set.seed(42)
x = 1:15
y = x^2 + rnorm(15, mean = 0, sd = 15)
# 2. Draw the standard base R plot (Left side)
plot(x, y,
main = "Standard Base R Plot",
xlab = "X", ylab = "Y")# 3. Draw our custom modern plot (Right side)
plot_modern(x, y,
main = "Quadratic Growth Trajectory",
xlab = "Time (X-axis)",
ylab = "Value (Y-axis)",
col = "#ffffff", # Point border color
bg = "#ff6b6b", # Point fill color (works with pch = 21)
pch = 21, # Filled circle
cex = 2, # Point size
lwd = 1.5, # Border thickness
family = "sans") # Modern fontdo.call().# Define the main wrapper function
process_and_plot = function(data, plot_args = list(), ...) {
# Internal: Calculate statistics using arguments from the ellipsis (...)
calculate_summary = function(data, ...) {
summary(data, ...)
}
# Internal: Create a histogram with modern default styling
create_histogram = function(data, main = "Histogram", xlab = "Values",
col = "#90CAF9", border = "white", breaks = 15) {
hist(data, main = main, xlab = xlab, col = col, border = border, breaks = breaks)
}
# 1. Execute summary via do.call, passing along the ellipsis arguments
statistics = do.call(calculate_summary, list(data = data, ...))
# 2. Inject data into plot_args and execute the histogram
plot_args$data = data
do.call(create_histogram, plot_args)
# Implicitly return the statistics object
statistics
}
# Example usage
set.seed(1812)
sample_data = rnorm(100)
# Define custom arguments to override the histogram defaults
custom_plot_args = list(
main = "Customized Distribution",
col = "#5C6BC0",
border = "white",
breaks = 25
)
# Call the function:
# 'digits' and 'quantile.type' flow into the ellipsis (...) for the summary
# 'custom_plot_args' flows into the histogram
example_stats = process_and_plot(
sample_data,
plot_args = custom_plot_args,
digits = 2,
quantile.type = 5
)#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -3.3000 -0.5700 -0.0094 0.0400 0.8100 2.6000
Recreate the structure of a data frame and populate it cleanly.
# Create a sample data frame with some initial data
sample_df = data.frame(
ID = c(1, 2, 3),
Name = c("Alice", "Bruce", "Candice"),
Age = c(25, 30, 35),
Salary = c(50000, 55000, 60000)
)
# Print the sample data frame
sample_df# Fill the empty data frame by direct assignment
empty_df[1, ] = list(ID = 4, Name = "David", Age = 28, Salary = 55000)
empty_df[2, ] = list(ID = 5, Name = "Eva", Age = 32, Salary = 62000)
empty_df# set seed
set.seed(1918)
# Define matrix A(2x3)
A = matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, ncol = 3)
print(A)#> [,1] [,2] [,3]
#> [1,] 1 3 5
#> [2,] 2 4 6
#> [,1] [,2]
#> [1,] 7 10
#> [2,] 8 11
#> [3,] 9 12
#> [,1] [,2]
#> [1,] 76 103
#> [2,] 100 136
# Define matrix D(2x2) and fill it by rows
D = matrix(c(2, 4, 6, 8), nrow = 2, ncol = 2, byrow=T)
print(D)#> [,1] [,2]
#> [1,] 2 4
#> [2,] 6 8
#> [,1] [,2]
#> [1,] 152 412
#> [2,] 600 1088
simple_logistic_NN=function(X,y, W0,b0, num_epochs,learning_rate, verbose=F){
# X -> n × num_features
# y -> n × 1
# W0 -> num_features × 1
# b0 -> 1 × 1
# define the sigmoid function
sigmoid = function(z) {
1 / (1 + exp(-z))
}
# obtain the number of data points and features
n=dim(X)[1]
num_features=dim(X)[2]
# merge weights and biases and add a column of 1s into X
W_matrix=matrix(rbind(W0,b0), nrow=num_features+1,ncol=1)
X_matrix=matrix(cbind(X, rep(1,n)),nrow=n,ncol=num_features+1)
# preallocate a data frame to save losses and a matrix to save gradients
col_names = c("Epoch", "Loss")
df_loss = data.frame(matrix(NA, nrow = num_epochs, ncol = length(col_names)))
colnames(df_loss) = col_names
Gradients = matrix(NA,nrow= num_features+1, ncol=num_epochs)
# Training loop
for (epoch in 1:num_epochs)
{
# compute the forward pass
z = X_matrix%*% W_matrix
y_hat = sigmoid(z)
# calculate loss using binary cross-entropy
loss = -mean(y * log(y_hat) + (1 - y) * log(1 - y_hat))
# save epoch and loss
df_loss[epoch,]=c(epoch, loss)
# compute gradients
dW = t(X_matrix) %*% (y_hat - y) / n
# save gradient
Gradients[,epoch]=dW
# update weights and bias
W_matrix = W_matrix - learning_rate * dW
if(verbose!=FALSE){
# Print loss every verbose epochs
if (epoch %% verbose == 0) {
cat("Epoch:", epoch, "Loss:", loss,"Weights:", W_matrix[1:num_features,],
"Bias:", W_matrix[num_features+1,], "\n")
}
}
}
l=list("W_est" = W_matrix[1:num_features,],
"b_est" = W_matrix[num_features+1,],
"Loss_DF" = df_loss,
"Gradients" = Gradients)
return(l)
}set.seed(1986)
# Generate a synthetic dataset
n_examp_1 = 1000
X_examp_1 = matrix(rnorm(n_examp_1 * 2), ncol = 2)
y_examp_1 = as.numeric(X_examp_1[,1] + X_examp_1[,2] > 0)
# Initialize weights and bias
W0_examp_1 = matrix(runif(2), nrow = 2)
b0_examp_1 = runif(1)
logistic_NN_training_examp_1=simple_logistic_NN(X=X_examp_1, y=y_examp_1,
W0=W0_examp_1,b0=b0_examp_1,
num_epochs=1000,
learning_rate=0.01,
verbose=100)#> Epoch: 100 Loss: 0.4321067 Weights: 0.3626608 0.9717162 Bias: 0.2820893
#> Epoch: 200 Loss: 0.3857622 Weights: 0.5567836 1.056564 Bias: 0.2469912
#> Epoch: 300 Loss: 0.351951 Weights: 0.7207754 1.134028 Bias: 0.2190585
#> Epoch: 400 Loss: 0.3262268 Weights: 0.8619835 1.206138 Bias: 0.1967264
#> Epoch: 500 Loss: 0.3059498 Weights: 0.9856042 1.274081 Bias: 0.1787698
#> Epoch: 600 Loss: 0.289491 Weights: 1.095368 1.338592 Bias: 0.1642525
#> Epoch: 700 Loss: 0.2758048 Weights: 1.194004 1.40016 Bias: 0.1524602
#> Epoch: 800 Loss: 0.2641951 Weights: 1.283555 1.459134 Bias: 0.1428455
#> Epoch: 900 Loss: 0.2541826 Weights: 1.365575 1.515776 Bias: 0.1349851
#> Epoch: 1000 Loss: 0.2454275 Weights: 1.441269 1.570297 Bias: 0.1285489
#> Estimated Weights: 1.441269 1.570297
#> Estimated Bias: 0.1285489
svd_custom = function(A) {
# Check if A is a matrix
if (!is.matrix(A)) stop("Input must be a matrix")
# Step 1: Compute A^T * A
AtA = t(A) %*% A
# Step 2: Eigen decomposition of A^T * A
eigen_AtA = eigen(AtA)
# Eigenvalues and eigenvectors
eigenvalues = eigen_AtA$values
V = eigen_AtA$vectors
# Step 3: Compute singular values
singular_values = sqrt(eigenvalues)
# Step 4: Compute U matrix
U = matrix(0, nrow = nrow(A), ncol = nrow(A))
for (i in 1:nrow(A)) {
if (singular_values[i] != 0) {
U[, i] = (A %*% V[, i]) / singular_values[i]
}
}
# Step 5: Construct Sigma matrix
Sigma = diag(singular_values)
# Return the SVD components
list(U = U, Sigma = Sigma, V = V)
}set.seed(1903)
A = matrix(rnorm(9), nrow = 3, ncol = 3)
svd_result =svd_custom(A)
# UVT^t == A (evaluates to TRUE)
all.equal(svd_result$U%*%svd_result$Sigma%*%t(svd_result$V), A)#> [1] TRUE
# Preallocate the empty list with 3 elements
n = 3
list = vector("list", length = n)
# Fill in the first element with another list
list[[1]] = list(a = 5, b = 8, c = 13)
# Fill in the second element with a data frame
list[[2]] = data.frame(Name = c("Zu", "Georges"), Age = c(3, 14))
# Fill in the third element with a vector
list[[3]] = c(21, 34, 55, 89, 144)
list[[1]]$a#> [1] 5
#> [1] 3 14
#> [1] 21 34 55 89 144
Use environments for reference semantics and O(1) lookup.
# First way using an environment
dict_env = new.env()
# Assign key-value pairs to the environment
dict_env$a = 2
dict_env$b = 7
dict_env$c = 1
# Add a new key-value pair
dict_env$d = 8
# Modify an existing value
dict_env$a = 2
# List all objects in the environment
ls(dict_env)#> [1] "a" "b" "c" "d"
#> [1] 1
# generates a set of 100 colors using the rainbow function.
cols = rainbow(100)
# create a sequence of the first 100 odd numbers starting from 1.
numb = seq(1,200,2)
# preallocate three lists to store the colors, the numbers, and the indices
col_vals = vector("list",100)
num_vals = vector("list",100)
ind_vals = vector("list",100)
# iterate
for(c in 1:length(numb)){
col_vals[[c]] = cols[c]
num_vals[[c]] = numb[c]
ind_vals[[c]] = c
}
# retrieve the color associated with the number 157 ("#AD00FF")
col_vals[[which(num_vals==157)]]#> [1] "#AD00FF"
# find the number that corresponds to the color "#AD00FF" (157)
num_vals[[which(col_vals=="#AD00FF")]]#> [1] 157
#check if the color and the number are associated with the same index (TRUE)
ind_vals[[which(col_vals=="#AD00FF")]]==ind_vals[[which(num_vals==157)]]#> [1] TRUE
R’s apply family avoids explicit loops with concise,
vectorized C-level execution.
#> [1] 5.5 6.5 7.5
#> [1] 1 1 1 1
set.seed(1942)
# 1. Create a list of score vectors -----------------------------
scores = lapply(1:5, \(i) rnorm(50, mean = 70 + i * 5, sd = 8))
names(scores) = paste0("Class_", 1:5)
# 2. Compute the mean score for every class ---------------------
class_means = lapply(scores, mean)
class_means#> $Class_1
#> [1] 74.04006
#>
#> $Class_2
#> [1] 78.34366
#>
#> $Class_3
#> [1] 84.09832
#>
#> $Class_4
#> [1] 90.77986
#>
#> $Class_5
#> [1] 95.59702
# Create 3 normally distributed vectors with different means and SDs
set.seed(1914)
params = data.frame(mu = c(0, 5, 10), sd = c(1, 2, 3))
samples = mapply(
FUN = rnorm,
n = 5, # recycled for each call
mean = params$mu,
sd = params$sd,
SIMPLIFY = FALSE # keep as list of vectors
)
str(samples)#> List of 3
#> $ : num [1:5] -0.379 0.13 0.334 -1.887 2.051
#> $ : num [1:5] 10.09 4.61 7.11 1.09 5.21
#> $ : num [1:5] 13.17 11.02 8.87 9.4 9.94
nums = list(a = 1:4, b = 5:7, c = 8:9)
# Sums each element returning a numeric vector
sapply(nums, sum)#> a b c
#> 10 18 17
#> [1] 1 4 9 16 25 36
#> [1] 31
Use parLapply for cross-platform parallel execution, or
mclapply for Unix-based systems.
# Expensive function: Monte Carlo estimate of pi
estimate_pi = function(n = 1e6) {
x = runif(n)
y = runif(n)
mean(x^2 + y^2 <= 1) * 4
}
n_rep = 30
# ---------------------------------------------------------
# 1. SEQUENTIAL RUN
# ---------------------------------------------------------
set.seed(1986)
t_seq = system.time({
seq_results = lapply(1:n_rep, function(i) {
t0 = proc.time()["elapsed"]
pi_est = estimate_pi()
# Return both the estimate and the time it took
data.frame(run = i, pi_est = pi_est, elapsed = proc.time()["elapsed"] - t0)
})
})
# Bind into a single data frame
seq_df = do.call(rbind, seq_results)
# ---------------------------------------------------------
# 2. PARALLEL RUN
# ---------------------------------------------------------
n_cores = max(1, detectCores() - 1)
cl = makeCluster(n_cores)
clusterExport(cl, varlist = "estimate_pi")
set.seed(1986)
t_par = system.time({
par_results = parLapply(cl, 1:n_rep, function(i) {
t0 = proc.time()["elapsed"]
pi_est = estimate_pi()
# Return both the estimate and the time it took
data.frame(run = i, pi_est = pi_est, elapsed = proc.time()["elapsed"] - t0)
})
})
stopCluster(cl)
# Bind into a single data frame
par_df = do.call(rbind, par_results)
# ---------------------------------------------------------
# 3. COMPARE RESULTS
# ---------------------------------------------------------
# Check if the estimates are mathematically equivalent
all.equal(seq_df$pi_est, par_df$pi_est)#> [1] "Mean relative difference: 0.0005693522"
#> Total Sequential Time: 1.54 seconds
#> Total Parallel Time: 0.11 seconds
#> Avg Sequential per run: 0.051 seconds
#> Avg Parallel per run: 0.079 seconds
# Install if needed: install.packages(c("future", "future.apply"))
library(future)
library(future.apply)
# 1. Define the workhorse function
estimate_pi = function(n = 1e6) {
mean(runif(n)^2 + runif(n)^2 <= 1) * 4
}
n_rep = 20
# 2. Tell R your "plan" (run asynchronously across multiple background sessions)
plan(multisession, workers = availableCores() - 1)
set.seed(1986)
# 3. Execute! future_lapply acts just like lapply, but routes traffic asynchronously
t_future = system.time({
# future.seed = TRUE ensures random numbers are safely generated across cores
future_results = future_lapply(1:n_rep, function(i) {
t0 = proc.time()["elapsed"]
pi_est = estimate_pi()
data.frame(run = i, pi_est = pi_est, elapsed = proc.time()["elapsed"] - t0)
}, future.seed = TRUE)
})
# 4. Bind the results
future_df = do.call(rbind, future_results)
# 5. Always explicitly close background workers when done
plan(sequential)
cat("Total Future Async Time: ", round(t_future["elapsed"], 3), "seconds\n")#> Total Future Async Time: 0.23 seconds
#> Average Future Async per run: 0.056 seconds
Execute Python scripts and pass data frames back and forth using
{reticulate}.
#> python: C:/Users/vadim/AppData/Local/R/cache/R/reticulate/uv/cache/archive-v0/hTrjH-QW50JlBG25MDlZa/Scripts/python.exe
#> libpython: C:/Users/vadim/AppData/Local/R/cache/R/reticulate/uv/python/cpython-3.12.13-windows-x86_64-none/python312.dll
#> pythonhome: C:/Users/vadim/AppData/Local/R/cache/R/reticulate/uv/cache/archive-v0/hTrjH-QW50JlBG25MDlZa
#> virtualenv: C:/Users/vadim/AppData/Local/R/cache/R/reticulate/uv/cache/archive-v0/hTrjH-QW50JlBG25MDlZa/Scripts/activate_this.py
#> version: 3.12.13 (main, Apr 7 2026, 20:53:22) [MSC v.1944 64 bit (AMD64)]
#> Architecture: 64bit
#> numpy: C:/Users/vadim/AppData/Local/R/cache/R/reticulate/uv/cache/archive-v0/hTrjH-QW50JlBG25MDlZa/Lib/site-packages/numpy
#> numpy_version: 2.4.4
#>
#> NOTE: Python version was forced by VIRTUAL_ENV
# Run Python inline
py_run_string("
x = [1, 2, 3]
y = [i**2 for i in x]
")
# Access Python variables natively in R
py$y
py$xnp = import("numpy")
# Create NumPy arrays and compute statistics
a = np$array(c(1, 2, 3, 4, 5))
mean_a = np$mean(a)
mean_a#> [1] 3
import pandas as pd
# Pull the dataframe from R into Python
py_df = r.r_df
# Perform our Python operations
py_df['sum'] = py_df['a'] + py_df['b']#> a b sum
#> 1 1 4 5
#> 2 2 5 7
#> 3 3 6 9
Note: The following code blocks are structured but intentionally unevaluated (
eval=FALSE) in this notebook to prevent rendering failures on environments lacking CUDA/GPU configurations.
Note on Windows Compatibility: Enabling GPU acceleration for TensorFlow in a Windows environment is not a “plug-and-play” process. This requires a specific infrastructure stack: a WSL2 (Windows Subsystem for Linux) installation, an Ubuntu R Server and the dedicated GPU version of TensorFlow. For a step-by-step walkthrough on configuring this environment, please refer to Chapter 6 of my Medium article, where I cover the full backend setup required to get R and your GPU talking to each other.
set.seed(1925)
n_per_class <- 2000
# Inner circle (class 0)
theta1 <- runif(n_per_class, 0, 2 * pi)
r1 <- rnorm(n_per_class, mean = 1, sd = 0.08)
x1 <- cbind(r1 * cos(theta1), r1 * sin(theta1))
y1 <- rep(0, n_per_class)
# Outer ring (class 1)
theta2 <- runif(n_per_class, 0, 2 * pi)
r2 <- rnorm(n_per_class, mean = 2, sd = 0.08)
x2 <- cbind(r2 * cos(theta2), r2 * sin(theta2))
y2 <- rep(1, n_per_class)
# Full dataset
x <- rbind(x1, x2)
y <- c(y1, y2)
# Shuffle
idx <- sample(seq_len(nrow(x)))
x <- x[idx, ]
y <- y[idx]
# Train / test split
n <- nrow(x)
n_tr <- floor(0.8 * n)
x_train <- x[1:n_tr, ]
y_train <- y[1:n_tr]
x_test <- x[(n_tr + 1):n, ]
y_test <- y[(n_tr + 1):n]
model <- keras_model_sequential() %>%
layer_dense(units = 32, activation = "relu", input_shape = 2) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 32, activation = "relu") %>%
layer_dense(units = 1, activation = "sigmoid") # binary classification
model %>%
compile(
optimizer = "adam",
loss = "binary_crossentropy",
metrics = "accuracy"
)
# summary(model)
batch_size <- 128
epochs <- 20
t <- system.time({
history <- model %>%
fit(
x = x_train, y = y_train,
batch_size = batch_size,
epochs = epochs,
validation_split = 0.2,
verbose = 2
)
})
cat("Training elapsed time (GPU-backed TF):", t["elapsed"], "seconds\n")
model %>%
evaluate(x_test, y_test, verbose = 0)library(ggplot2)
library(viridis)
# Define grid for visualization
x_min <- min(x[,1]) - 0.2
x_max <- max(x[,1]) + 0.2
y_min <- min(x[,2]) - 0.2
y_max <- max(x[,2]) + 0.2
grid <- expand.grid(
x1 = seq(x_min, x_max, length.out = 400), # Higher resolution for crispness
x2 = seq(y_min, y_max, length.out = 400)
)
# Predict probabilities
grid$prob <- as.numeric(model %>% predict(as.matrix(grid), verbose = 0))
ggplot() +
# 1. Background Grid/Heatmap (Smooth Ukrainian Flag gradient)
# Low prob (Inner) -> Blue. High prob (Outer) -> Yellow.
geom_raster(data = grid, aes(x = x1, y = x2, fill = prob), interpolate = TRUE) +
# 2. Add a strong decision boundary line
geom_contour(data = grid, aes(x = x1, y = x2, z = prob),
breaks = 0.5, color = "white", linetype = "solid", linewidth = 1.2) +
# 3. Add the points (Small and translucent so they don't hide the boundary)
geom_point(aes(x = x[,1], y = x[,2], color = factor(y)), size = 0.6, alpha = 0.3) +
# --- Color Palettes (Ukrainian Flag Theme) ---
# Background Fill: Cividis provides a great smooth blue-to-yellow gradient
scale_fill_viridis_c(option = "cividis", direction = -1,
name = "Predicted\nProbability", limits = c(0, 1),
breaks = c(0, 0.5, 1), labels = c("Inner", "Boundary", "Outer")) +
# Points Color: Force them to pop against the blue/yellow background
scale_color_manual(values = c("black", "red"),
name = "True Class", labels = c("Inner (0)", "Outer (1)")) +
# --- Theme and Labs ---
labs(
title = "Neural Network classification Boundary",
subtitle = "Modern TensorFlow/Keras on GPU via WSL2",
x = "Feature 1",
y = "Feature 2"
) +
coord_fixed() + # Critical: keeps the circle circular
theme_minimal(base_size = 14) +
theme(
legend.position = "right",
# Center the title and make it bold
plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5, size = 12),
plot.caption = element_text(size = 9, color = "gray50"),
panel.grid.major = element_line(color = "gray90"),
plot.background = element_rect(fill = "white", color = NA)
)
)#> R version 4.5.2 (2025-10-31 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 11 x64 (build 26200)
#>
#> Matrix products: default
#> LAPACK version 3.12.1
#>
#> locale:
#> [1] LC_COLLATE=English_Canada.utf8 LC_CTYPE=English_Canada.utf8
#> [3] LC_MONETARY=English_Canada.utf8 LC_NUMERIC=C
#> [5] LC_TIME=English_Canada.utf8
#>
#> time zone: America/Toronto
#> tzcode source: internal
#>
#> attached base packages:
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] reticulate_1.45.0.9000 future.apply_1.20.2 future_1.69.0
#> [4] microbenchmark_1.5.0 tictoc_1.2.1
#>
#> loaded via a namespace (and not attached):
#> [1] vctrs_0.7.1 cli_3.6.5 knitr_1.51 rlang_1.1.7
#> [5] xfun_0.56 otel_0.2.0 png_0.1-9 jsonlite_2.0.0
#> [9] listenv_0.10.0 htmltools_0.5.9 sass_0.4.10 rmarkdown_2.30
#> [13] grid_4.5.2 evaluate_1.0.5 jquerylib_0.1.4 fastmap_1.2.0
#> [17] yaml_2.3.12 lifecycle_1.0.5 compiler_4.5.2 codetools_0.2-20
#> [21] Rcpp_1.1.1 rstudioapi_0.18.0 lattice_0.22-9 digest_0.6.39
#> [25] R6_2.6.1 parallelly_1.46.1 Matrix_1.7-4 bslib_0.10.0
#> [29] withr_3.0.2 tools_4.5.2 globals_0.19.0 cachem_1.1.0