2 min read

Boxplot with trend line

Background

Last week my colleague showed me a drawn graph and asked can we plot this kind of graph. It is a graph with grouped boxplot. Additionally, it contains a trend line that connects mean of each data box. So this post explains how to draw a graph like that using two ways;

The data

This data is sample data set from my colleague. The data contains VI (Vegetation Index) time series values for three different crops extracted from Landsat 8 data.

Load data

Load libraries

library(ggplot2)
library(readr)
library(plyr)
library(dplyr)
library(RColorBrewer)

Read and prepare data

read.df <- read.csv("D:/Blog_data/sample_ts.csv", header = TRUE)
sample.df <- read.df # Make a copy of the read table

levels(sample.df$date) # Checking the levels in the date column
## [1] "April"     "August"    "July"      "June"      "May"       "September"
levels(sample.df$crop) # Checking the levels in the crop column
## [1] "Crop 01" "Crop 02" "Crop 03"
# Reorder levels
sample.df$date <- factor(sample.df$date, levels = c("April", "May", "June", "July","August", "September"))

sample.df$crop <- factor(sample.df$crop, levels = c("Crop 01", "Crop 02", "Crop 03"))

Plot with ggplot library

# Create a color palette for three crops
my_colors = c("greenyellow", "gold3", "darksalmon")

# Create a plot
ggplot(data=sample.df, aes(x=date, y=NDVI, fill=crop)) +
  geom_boxplot() + ylim(c(-0.2,1.2)) +
  stat_summary(fun.y=mean, colour="red",
               geom="point",position=position_dodge(width=0.75)) +
  stat_summary(fun.y=mean, colour="red", aes(group=1),
               geom="line", lwd=1, lty=1) +
  theme_bw(base_size = 12) +
  scale_fill_manual(values = my_colors, name = "Crop Type", labels = c("Crop 01", "Crop 02", "Crop 03")) +
  labs(x="Date", y="VI", 
       title="Phenology of the selected crops", 
       subtitle="VI extracted from Landsat 8 time series data")

Plot with base library

# Make the boxplot
boxplot(sample.df$NDVI ~ sample.df$crop+sample.df$date , ylim=c(-0.2,1.2), xaxt="n" , xlab="Date" , col=my_colors , pch=20 , cex=0.3 , 
        ylab="VI", xlab="Date", 
        main="Phenology of the selected crops \n VI extracted from Landsat 8 time series data" )
abline(v= seq(0, 3*6, 3) + 0.5 , col="grey")
axis(1, labels=levels(sample.df$date) , at=seq(2,3*6,3) )


# Add general trend
a=aggregate(sample.df$NDVI , by=list(sample.df$crop, sample.df$date) , mean)
lines(a[,3], type="l" , col="red" , lwd=2)


#Add legend for crops
legend("top", fill = my_colors, legend = c("Crop 01", "Crop 02", "Crop 03"), horiz = T)