『データ可視化学入門』をRで書く 3.2 線で特徴をとらえる

3.2 線で特徴をとらえる

この記事はR言語 Advent Calendar 2023 シリーズ2の10日目の記事です。
qiita.com

3.2.1 時間推移を可視化する

##### 3.2.1 #####
library(tidyverse)
library(patchwork)

df <- read_csv(
  "https://www.esri.cao.go.jp/jp/sna/data/data_list/sokuhou/files/2023/qe233_2/tables/gaku-jfy2332.csv",
  col_names = FALSE,
  skip = 7,
  col_types = "cnn__nn_nn_______________________"
  ) |>
  drop_na() |>
  mutate( # 各金額を兆円単位に変換
  年度 = str_remove(X1, "/4-3.") |> as.integer(),
  `国内総生産(支出側)` = X2 / 1000,
  民間最終消費支出 = X3 / 1000,
  民間住宅 = X6 / 1000,
  民間企業設備 = X7 / 1000,
  政府最終消費支出 = X9 / 1000,
  公的固定資本形成 = X10 / 1000,
  .keep = "unused"
  )

p1 <- df |>
  ggplot(aes(x = 年度, y = `国内総生産(支出側)`)) +
  geom_line(color="skyblue") +
  labs(title = "折れ線グラフ", y = "国内総生産(支出側) [兆円]") +
  theme(axis.title.x=element_blank())

p2 <- df |>
  ggplot(aes(x = 年度, y = `国内総生産(支出側)`)) +
  geom_area(fill="skyblue") +
  labs(title = "エリアチャート", x = "年度") +
  theme(axis.title.y=element_blank())

p3 <- df |>
  select(!`国内総生産(支出側)`) |>
  pivot_longer(!年度, names_to = "sector", values_to = "val") |>
  mutate(
    sector = factor(sector, levels = rev(c("民間最終消費支出", "民間住宅", "民間企業設備", "政府最終消費支出", "公的固定資本形成")))
    ) |>
  ggplot(aes(x = 年度, y = val, fill = sector)) +
  geom_area() +
  labs(title = "積み上げエリアチャート", x = "年度", y = "国内総生産(支出側) [兆円]") +
  theme(
    axis.title.x=element_blank(),
    axis.title.y=element_blank()  
    )

p1 + p2 + p3
3.2.1

3.2.2 複数の時系列データの描画

##### 3.2.2 #####
library(tidyverse)

# データ生成
set.seed(0)

n_days <- 30  # 30日間
n_individuals <- 30  # 個体数
half_n <- as.integer(n_individuals / 2)  # 個体数の半分

# 各個体の活動量期待値を生成
expected_values <- c(
  rnorm(half_n, mean = 50, sd = 5),# 期待値50、標準偏差5の正規分布に従う乱数を生成
  rnorm(half_n, mean = 70, sd = 5)# 期待値70、標準偏差5の正規分布に従う乱数を生成
  )

# 各個体の30日間の活動量を生成
# 期待値expected_values、標準偏差10の正規分布に従う乱数を生成
activity_data <- map(expected_values, \(x) rnorm(n_days, mean = x, sd = 10)) |>
  list_c() |>
  matrix(ncol = 30) |>
  as.data.frame() |>
  set_names(paste0("v", formatC(1:30, width=2, flag="0"))) |>
  mutate(days = 1:30) |>
  pivot_longer(!days) |>
  group_by(days) |>
  mutate(
    ev = expected_values,
    group = c(rep("a", 15), rep("b", 15))
    ) |>
  ungroup()

# 全個体をそれぞれ違う色で描画
p1 <- activity_data |>
  ggplot(aes(x = days, y = value, color = name)) +
  geom_line() +
  labs(title = "すべての個体を異なる色で描画", x = "経過日数", y = "活動量スコア") +
  theme(
    legend.position="none"
    , axis.title.x=element_blank()
    , axis.title.y=element_blank()
  )

# 期待値最大の個体を強調
p2 <- activity_data |>
  mutate(color = if_else(ev == max(expected_values), "a", "b")) |>
  ggplot(aes(x = days, y = value, color = color, group=name)) +
  geom_line() +
  scale_color_manual(values = c("red", "grey")) +
  labs(title = "着目する個体だけ異なる色で描画", x = "経過日数", y = "活動量スコア") +
  theme(
    legend.position="none"
    , axis.title.x=element_blank()
    , axis.title.y=element_blank()
  )

# 15個体のグループごとに描画
p3 <- activity_data |>
  ggplot(aes(x = days, y = value, color = group, group=name)) +
  geom_line() +
  labs(title = "グループごとに異なる色で描画", x = "経過日数", y = "活動量スコア") +
  theme(
    legend.position="none"
    , axis.title.x=element_blank()
    , axis.title.y=element_blank()
  )

# 15個体のグループごとに平均と標準偏差で描画
p4 <- activity_data |>
  summarise(
    mean = mean(value),
    sd = sd(value),
    .by = c(group, days)
  ) |>
  ggplot(aes(x = days, y = mean, color = group, group=group)) +
  geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = group), alpha=0.2) +
  geom_line() +
  labs(title = "グループごとに平均と標準偏差で描画", x = "経過日数", y = "活動量スコア") +
  theme(
    legend.position="none"
    , axis.title.x=element_blank()
    , axis.title.y=element_blank()
  ) +
  coord_cartesian(ylim = c(20,100))

(p1 + p2 )/(p3 + p4)
3.2.2

3.2.3 スロープグラフで個々の変化をとらえる

##### 3.2.3 #####
library(tidyverse)
set.seed(0)

# ランダムな体重データを生成
weights_1 <- rnorm(50, 60, 2) # 平均60、分散4の正規分布
weights_2 <- weights_1 + rnorm(50, -1, 0.5) # 平均-1、分散0.25の正規分布
weights_3 <- weights_1 + rnorm(50, 0, 0.5) # 平均0、分散0.25の正規分布(追加)

# データフレームを作成
df <- data.frame(
  Weight = c(weights_1, weights_2),
  Group = factor(
    c(rep("実験開始時", 50), rep("一か月後",50)),
    levels=c("実験開始時", "一か月後")
    ),
  Person = c(seq(50), seq(50))
  )
  
# 点のみプロット
p1 <- df |>
  ggplot(aes(x = Group, y = Weight)) +
  geom_jitter(width = 0.05, height = 0, alpha = 0.5) +
  labs(x = "期間", y = "体重 [kg]") +
  coord_cartesian(ylim = c(55, 65)) +
  labs(title = "ストリッププロット") +
  theme(axis.title.x=element_blank()) 

df2 <- df |>
  pivot_wider(names_from = Group, values_from = Weight) |>
  mutate(diff = 一か月後 - 実験開始時) |>
  pivot_longer(!c(Person, diff), names_to = "Group", values_to = "Weight") |>
  mutate(Group = factor(Group, levels=c("実験開始時", "一か月後"))) 

# スロープグラフ
p2 <- df2 |>
  ggplot(aes(x = Group, y = Weight, group = Person, color = diff)) +
  geom_line(alpha = 0.5) +
  geom_point(alpha = 0.5) +
  scale_color_gradient2(
    midpoint = 0,
    limit = c(-2,2),
    low = "blue",
    mid = "white",
    high = "red",
    guide = "colorbar"
  ) +
  coord_cartesian(ylim = c(55, 65)) +
  labs(title = "スロープグラフ") +
  theme(
    axis.title.x=element_blank()
    , axis.title.y=element_blank()
  ) 

# スロープグラフ2
p3 <- data.frame(
  Weight = c(weights_1, weights_3),
  Group = factor(
    c(rep("実験開始時", 50), rep("一か月後",50)),
    levels=c("実験開始時", "一か月後")
  ),
  Person = c(seq(50), seq(50))
) |>
  pivot_wider(names_from = Group, values_from = Weight) |>
  mutate(diff = 一か月後 - 実験開始時) |>
  pivot_longer(!c(Person, diff), names_to = "Group", values_to = "Weight") |>
  mutate(Group = factor(Group, levels=c("実験開始時", "一か月後"))) |>
  ggplot(aes(x = Group, y = Weight, group = Person, color = diff)) +
  geom_line(alpha = 0.5) +
  geom_point(alpha = 0.5) +
  scale_color_gradient2(
    midpoint = 0,
    limit = c(-2,2),
    low = "blue",
    mid = "white",
    high = "red",
    guide = "colorbar"
    ) +
  coord_cartesian(ylim = c(55, 65)) +
  labs(title = "スロープグラフ") +
  theme(
    legend.position="none"
    , axis.title.x=element_blank()
    , axis.title.y=element_blank()
  ) 

p1 + p3 + p2
3.2.3

3.2.4 医薬品の月間販売額を要素分解する

##### 3.2.4 #####
library(tidyverse)
library(forecast)
library(patchwork)

# データの読み込み(インターネットからDL)
df <- read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv')

# 時系列データに変換
ts_df <- ts(
  df$value,
  start=c(year(df$date[1]), month(df$date[1])),
  frequency=12
  )

# 時系列データを分解
decomposed_add <- decompose(ts_df, type="additive") #加算
decomposed_mlt <- decompose(ts_df, type="multiplicative") #乗算

# プロット作成
p1 <- decomposed_add |>
  autoplot() +
  labs(title = "足し算で分解")

p2 <- decomposed_mlt |>
  autoplot() +
  labs(title = "掛け算で分解")

p1 + p2
3.2.4

第3章2節は以上です。