R Kernel Example¶

This notebook demonstrates using the R kernel (IRkernel) for statistical computing and data analysis.

Kernel Information¶

This notebook uses the R kernel. To use this kernel, you need to install:

install.packages('IRkernel')
IRkernel::installspec()

In [ ]:

Copied!

# Display R version and information
R.version.string
sessionInfo()
# Display R version and information
R.version.string
sessionInfo()

R Features¶

R is excellent for:

Statistical Analysis
Data Visualization
Machine Learning
Bioinformatics
Academic Research

In [ ]:

Copied!





# Example: Basic data structures in R
# Create a data frame
languages <- data.frame(
  name = c("Python", "R", "Julia", "Ruby"),
  type = c("General", "Statistical", "Scientific", "General"),
  year_created = c(1991, 1993, 2012, 1995),
  data_science_score = c(9, 10, 8, 5),
  stringsAsFactors = FALSE
)

# Display the data frame
print("Programming Languages for Data Science:")
languages
# Example: Basic data structures in R
# Create a data frame
languages <- data.frame(
  name = c("Python", "R", "Julia", "Ruby"),
  type = c("General", "Statistical", "Scientific", "General"),
  year_created = c(1991, 1993, 2012, 1995),
  data_science_score = c(9, 10, 8, 5),
  stringsAsFactors = FALSE
)

# Display the data frame
print("Programming Languages for Data Science:")
languages

In [ ]:

Copied!





# Statistical summary
summary(languages$data_science_score)

# Calculate age of languages
current_year <- as.numeric(format(Sys.Date(), "%Y"))
languages$age <- current_year - languages$year_created

print("\nLanguages with age:")
languages[order(languages$age, decreasing = TRUE), ]
# Statistical summary
summary(languages$data_science_score)

# Calculate age of languages
current_year <- as.numeric(format(Sys.Date(), "%Y"))
languages$age <- current_year - languages$year_created

print("\nLanguages with age:")
languages[order(languages$age, decreasing = TRUE), ]

Data Visualization with Base R¶

In [ ]:

Copied!





# Create visualizations
# Bar plot
barplot(languages$data_science_score, 
        names.arg = languages$name,
        main = "Data Science Scores by Language",
        ylab = "Score",
        col = c("#3776ab", "#276DC3", "#9558B2", "#CC342D"),
        ylim = c(0, 12))

# Add value labels
text(x = 1:4 * 1.2 - 0.5, 
     y = languages$data_science_score + 0.5, 
     labels = languages$data_science_score)
# Create visualizations
# Bar plot
barplot(languages$data_science_score, 
        names.arg = languages$name,
        main = "Data Science Scores by Language",
        ylab = "Score",
        col = c("#3776ab", "#276DC3", "#9558B2", "#CC342D"),
        ylim = c(0, 12))

# Add value labels
text(x = 1:4 * 1.2 - 0.5, 
     y = languages$data_science_score + 0.5, 
     labels = languages$data_science_score)

In [ ]:

Copied!





# Statistical analysis example
# Generate sample data
set.seed(42)
n <- 100
x <- rnorm(n, mean = 50, sd = 10)
y <- 2 * x + rnorm(n, mean = 0, sd = 5)

# Create scatter plot with regression line
plot(x, y, 
     main = "Linear Regression Example",
     xlab = "X Variable",
     ylab = "Y Variable",
     pch = 19,
     col = rgb(0, 0, 1, 0.5))

# Fit linear model
model <- lm(y ~ x)
abline(model, col = "red", lwd = 2)

# Add R-squared to plot
r_squared <- round(summary(model)$r.squared, 3)
text(40, 120, paste("R² =", r_squared), pos = 4)
# Statistical analysis example
# Generate sample data
set.seed(42)
n <- 100
x <- rnorm(n, mean = 50, sd = 10)
y <- 2 * x + rnorm(n, mean = 0, sd = 5)

# Create scatter plot with regression line
plot(x, y, 
     main = "Linear Regression Example",
     xlab = "X Variable",
     ylab = "Y Variable",
     pch = 19,
     col = rgb(0, 0, 1, 0.5))

# Fit linear model
model <- lm(y ~ x)
abline(model, col = "red", lwd = 2)

# Add R-squared to plot
r_squared <- round(summary(model)$r.squared, 3)
text(40, 120, paste("R² =", r_squared), pos = 4)

Advanced Statistical Analysis¶

In [ ]:

Copied!





# ANOVA example
# Create sample data for three groups
set.seed(123)
group_a <- rnorm(30, mean = 100, sd = 15)
group_b <- rnorm(30, mean = 110, sd = 15)
group_c <- rnorm(30, mean = 105, sd = 15)

# Combine into a data frame
experiment_data <- data.frame(
  value = c(group_a, group_b, group_c),
  group = factor(rep(c("A", "B", "C"), each = 30))
)

# Perform ANOVA
anova_result <- aov(value ~ group, data = experiment_data)
summary(anova_result)

# Box plot
boxplot(value ~ group, data = experiment_data,
        main = "Group Comparison",
        xlab = "Group",
        ylab = "Value",
        col = c("lightblue", "lightgreen", "lightcoral"))
# ANOVA example
# Create sample data for three groups
set.seed(123)
group_a <- rnorm(30, mean = 100, sd = 15)
group_b <- rnorm(30, mean = 110, sd = 15)
group_c <- rnorm(30, mean = 105, sd = 15)

# Combine into a data frame
experiment_data <- data.frame(
  value = c(group_a, group_b, group_c),
  group = factor(rep(c("A", "B", "C"), each = 30))
)

# Perform ANOVA
anova_result <- aov(value ~ group, data = experiment_data)
summary(anova_result)

# Box plot
boxplot(value ~ group, data = experiment_data,
        main = "Group Comparison",
        xlab = "Group",
        ylab = "Value",
        col = c("lightblue", "lightgreen", "lightcoral"))

In [ ]:

Copied!





# Time series analysis example
# Generate time series data
dates <- seq(as.Date("2023-01-01"), by = "day", length.out = 365)
trend <- seq(100, 150, length.out = 365)
seasonal <- 10 * sin(2 * pi * (1:365) / 365)
noise <- rnorm(365, mean = 0, sd = 5)
values <- trend + seasonal + noise

# Create time series plot
plot(dates, values, type = "l",
     main = "Time Series Analysis",
     xlab = "Date",
     ylab = "Value",
     col = "blue")

# Add trend line
lines(dates, trend, col = "red", lwd = 2)

# Add legend
legend("topleft", 
       legend = c("Observed", "Trend"),
       col = c("blue", "red"),
       lty = 1,
       lwd = c(1, 2))
# Time series analysis example
# Generate time series data
dates <- seq(as.Date("2023-01-01"), by = "day", length.out = 365)
trend <- seq(100, 150, length.out = 365)
seasonal <- 10 * sin(2 * pi * (1:365) / 365)
noise <- rnorm(365, mean = 0, sd = 5)
values <- trend + seasonal + noise

# Create time series plot
plot(dates, values, type = "l",
     main = "Time Series Analysis",
     xlab = "Date",
     ylab = "Value",
     col = "blue")

# Add trend line
lines(dates, trend, col = "red", lwd = 2)

# Add legend
legend("topleft", 
       legend = c("Observed", "Trend"),
       col = c("blue", "red"),
       lty = 1,
       lwd = c(1, 2))

R Packages and Functions¶

In [ ]:

Copied!





# Custom functions
calculate_stats <- function(x) {
  stats <- list(
    mean = mean(x, na.rm = TRUE),
    median = median(x, na.rm = TRUE),
    sd = sd(x, na.rm = TRUE),
    min = min(x, na.rm = TRUE),
    max = max(x, na.rm = TRUE),
    n = length(x[!is.na(x)])
  )
  return(stats)
}

# Test the function
test_data <- c(23, 45, 67, 89, 12, 34, 56, 78, 90, NA, 43)
results <- calculate_stats(test_data)

print("Statistical Summary:")
for (name in names(results)) {
  cat(sprintf("%-10s: %.2f\n", name, results[[name]]))
}
# Custom functions
calculate_stats <- function(x) {
  stats <- list(
    mean = mean(x, na.rm = TRUE),
    median = median(x, na.rm = TRUE),
    sd = sd(x, na.rm = TRUE),
    min = min(x, na.rm = TRUE),
    max = max(x, na.rm = TRUE),
    n = length(x[!is.na(x)])
  )
  return(stats)
}

# Test the function
test_data <- c(23, 45, 67, 89, 12, 34, 56, 78, 90, NA, 43)
results <- calculate_stats(test_data)

print("Statistical Summary:")
for (name in names(results)) {
  cat(sprintf("%-10s: %.2f\n", name, results[[name]]))
}

Installing R Packages¶

You can install R packages directly from a notebook cell:

In [ ]:

Copied!





# Example of installing packages (commented out to avoid actual installation)
# install.packages("ggplot2")
# install.packages("dplyr")
# install.packages("tidyr")
# install.packages("caret")

print("Use install.packages('package_name') to install R packages")
print("Popular R packages for data science:")
print("- ggplot2: Advanced data visualization")
print("- dplyr: Data manipulation")
print("- tidyr: Data tidying")
print("- caret: Machine learning")
# Example of installing packages (commented out to avoid actual installation)
# install.packages("ggplot2")
# install.packages("dplyr")
# install.packages("tidyr")
# install.packages("caret")

print("Use install.packages('package_name') to install R packages")
print("Popular R packages for data science:")
print("- ggplot2: Advanced data visualization")
print("- dplyr: Data manipulation")
print("- tidyr: Data tidying")
print("- caret: Machine learning")

Summary¶

The R kernel (IRkernel) provides:

Comprehensive statistical computing capabilities
Excellent built-in visualization functions
Extensive package ecosystem (CRAN)
Specialized tools for various domains
Strong academic and research community

R in Jupyter is ideal for:

Statistical analysis and modeling
Data visualization and exploration
Academic research and publication
Bioinformatics and genomics
Time series analysis