Analysis

Load the packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
Warning: package 'plotly' was built under R version 4.4.3

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(countrycode)
Warning: package 'countrycode' was built under R version 4.4.3
library(rnaturalearth)
Warning: package 'rnaturalearth' was built under R version 4.4.3
library(htmlwidgets)
Warning: package 'htmlwidgets' was built under R version 4.4.3
library(htmltools)

Load the cleaned data back

df_clean <- read_csv("df_clean.csv")
Rows: 5579 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): report, country, species, disease, status, notes
dbl (1): year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Analysis

Insepct the cleaned data

How many kinds of specieses are included in this dataset?

unique(df_clean$species)
[1] "Unknown" "Poultry" "Cattle"  "Goat"    "Horse"   "Swine"   "Canine" 

What are the countries included in this dataset? Are they complete or not?

unique(df_clean$country)
  [1] "Mongolia"                                    
  [2] "Niger"                                       
  [3] "Israel"                                      
  [4] "Kazakhstan"                                  
  [5] "China (People's Rep. of)"                    
  [6] "Russia"                                      
  [7] "Finland"                                     
  [8] "Zambia"                                      
  [9] "Vietnam"                                     
 [10] "Moldova"                                     
 [11] "Lithuania"                                   
 [12] "Cameroon"                                    
 [13] "Namibia"                                     
 [14] "Hong Kong"                                   
 [15] "Senegal"                                     
 [16] "Slovenia"                                    
 [17] "Romania"                                     
 [18] "Switzerland"                                 
 [19] "Ireland"                                     
 [20] "Italy"                                       
 [21] "Guatemala"                                   
 [22] "Mozambique"                                  
 [23] "Canada"                                      
 [24] "Korea (Rep. of)"                             
 [25] "Belgium"                                     
 [26] "Denmark"                                     
 [27] "Iceland"                                     
 [28] "Czech Republic"                              
 [29] "Slovakia"                                    
 [30] "United States of America"                    
 [31] "Bulgaria"                                    
 [32] "Sweden"                                      
 [33] "Ukraine"                                     
 [34] "United Kingdom"                              
 [35] "France"                                      
 [36] "Germany"                                     
 [37] "Japan"                                       
 [38] "Togo"                                        
 [39] "Norway"                                      
 [40] "Chinese Taipei"                              
 [41] "Mexico"                                      
 [42] "Pakistan"                                    
 [43] "Djibouti"                                    
 [44] "Portugal"                                    
 [45] "Spain"                                       
 [46] "Latvia"                                      
 [47] "Philippines"                                 
 [48] "Croatia"                                     
 [49] "Peru"                                        
 [50] "Ecuador"                                     
 [51] "Ceuta"                                       
 [52] "Australia"                                   
 [53] "Iran"                                        
 [54] "Uruguay"                                     
 [55] "Singapore"                                   
 [56] "Hungary"                                     
 [57] "Azerbaijan"                                  
 [58] "Iraq"                                        
 [59] "South Africa"                                
 [60] "Albania"                                     
 [61] "Austria"                                     
 [62] "Jordan"                                      
 [63] "Malawi"                                      
 [64] "Faeroe Islands"                              
 [65] "Greece"                                      
 [66] "Belize"                                      
 [67] "Saudi Arabia"                                
 [68] "Colombia"                                    
 [69] "Benin"                                       
 [70] "Thailand"                                    
 [71] "Estonia"                                     
 [72] "Laos"                                        
 [73] "Bosnia and Herzegovina"                      
 [74] "Lesotho"                                     
 [75] "Poland"                                      
 [76] "Brazil"                                      
 [77] "Netherlands"                                 
 [78] "Luxembourg"                                  
 [79] "Cote D'Ivoire"                               
 [80] "Botswana"                                    
 [81] "Morocco"                                     
 [82] "Bolivia"                                     
 [83] "Türkiye (Rep. of)"                           
 [84] "Indonesia"                                   
 [85] "Costa Rica"                                  
 [86] "Cambodia"                                    
 [87] "Myanmar"                                     
 [88] "India"                                       
 [89] "Egypt"                                       
 [90] "Palestine"                                   
 [91] "Congo (Dem. Rep. of the)"                    
 [92] "New Zealand"                                 
 [93] "Malaysia"                                    
 [94] "Panama"                                      
 [95] "Burkina Faso"                                
 [96] "Argentina"                                   
 [97] "Chile"                                       
 [98] "Georgia"                                     
 [99] "Afghanistan"                                 
[100] "Serbia and Montenegro"                       
[101] "Zimbabwe"                                    
[102] "Chad"                                        
[103] "North Macedonia"                             
[104] "Angola"                                      
[105] "Kyrgyzstan"                                  
[106] "St. Helena"                                  
[107] "Libya"                                       
[108] "Tunisia"                                     
[109] "Kuwait"                                      
[110] "Algeria"                                     
[111] "Eswatini"                                    
[112] "Malta"                                       
[113] "Bhutan"                                      
[114] "Kenya"                                       
[115] "Armenia"                                     
[116] "Honduras"                                    
[117] "Montenegro"                                  
[118] "Mauritania"                                  
[119] "Gabon"                                       
[120] "Greenland"                                   
[121] "Nepal"                                       
[122] "Sudan"                                       
[123] "St. Lucia"                                   
[124] "Sri Lanka"                                   
[125] "Martinique"                                  
[126] "Dominican (Rep.)"                            
[127] "El Salvador"                                 
[128] "Cuba"                                        
[129] "Mauritius"                                   
[130] "Tajikistan"                                  
[131] "Papua New Guinea"                            
[132] "Belarus"                                     
[133] "Ghana"                                       
[134] "Uganda"                                      
[135] "Serbia"                                      
[136] "Tanzania"                                    
[137] "Lebanon"                                     
[138] "Nicaragua"                                   
[139] "Korea (Dem People's Rep. of)"                
[140] "Guinea"                                      
[141] "French Polynesia"                            
[142] "Cyprus"                                      
[143] "Oman"                                        
[144] "Congo (Rep. of the)"                         
[145] "Reunion"                                     
[146] "Nigeria"                                     
[147] "Turkmenistan"                                
[148] "Mali"                                        
[149] "Guinea-Bissau"                               
[150] "Bangladesh"                                  
[151] "Melilla"                                     
[152] "Rwanda"                                      
[153] "Cayman Islands"                              
[154] "Falkland Islands (Malvinas)"                 
[155] "Trinidad and Tobago"                         
[156] "Gambia"                                      
[157] "Burundi"                                     
[158] "Comoros"                                     
[159] "Central African (Rep.)"                      
[160] "Brunei"                                      
[161] "Madagascar"                                  
[162] "Bahrain"                                     
[163] "Liechtenstein"                               
[164] "Guadeloupe"                                  
[165] "Puerto Rico"                                 
[166] "Timor-Leste"                                 
[167] "Cabo verde"                                  
[168] "New Caledonia"                               
[169] "Ethiopia"                                    
[170] "United Arab Emirates"                        
[171] "Paraguay"                                    
[172] "Haiti"                                       
[173] "Mayotte"                                     
[174] "French Guiana"                               
[175] "Venezuela"                                   
[176] "Qatar"                                       
[177] "Liberia"                                     
[178] "Maldives"                                    
[179] "Samoa"                                       
[180] "Suriname"                                    
[181] "Sierra Leone"                                
[182] "Jamaica"                                     
[183] "South Sudan (Rep. of)"                       
[184] "Fiji"                                        
[185] "Syria"                                       
[186] "Antarctica"                                  
[187] "South Georgia and the South Sandwich Islands"

Questions

Q2:Disease type: What are the top 10 animal diseases with the highest number of reports globally over the past 20 years?

Create a list of the top 10 diseases

top10_diseases<- df_clean |>
  group_by(disease) |>
  summarize(report_count = n()) |>
  arrange(desc(report_count)) |>
  top_n(10, report_count) |>
  pull(disease)

df_top10_diseases <- df_clean |>
  filter(disease %in% top10_diseases) |>
  group_by(year, disease) |>
  summarize(report_count = n())
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
glimpse(df_top10_diseases)
Rows: 187
Columns: 3
Groups: year [21]
$ year         <dbl> 2005, 2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 200…
$ disease      <chr> "African swine fever virus (Inf. with)", "Anthrax", "Foot…
$ report_count <int> 1, 3, 10, 6, 20, 2, 1, 2, 4, 7, 55, 2, 12, 1, 3, 6, 20, 2…

Show the top 10 diseases and their total reports

top10_summary <- df_top10_diseases |> 
  group_by(disease) |> 
  summarize(`Total Reports` = sum(report_count)) |> 
  arrange(desc(`Total Reports`))
glimpse(top10_summary)
Rows: 10
Columns: 2
$ disease         <chr> "High pathogenicity avian influenza viruses (poultry) …
$ `Total Reports` <int> 966, 704, 653, 417, 228, 217, 214, 182, 157, 133
print(top10_summary)
# A tibble: 10 × 2
   disease                                                       `Total Reports`
   <chr>                                                                   <int>
 1 High pathogenicity avian influenza viruses (poultry) (Inf. w…             966
 2 African swine fever virus (Inf. with)                                     704
 3 Influenza A viruses of high pathogenicity (Inf. with) (non-p…             653
 4 Foot and mouth disease virus (Inf. with)                                  417
 5 Bluetongue virus (Inf. with)                                              228
 6 Anthrax                                                                   217
 7 Newcastle disease virus (Inf. with)                                       214
 8 Low pathogenic avian influenza (poultry) (2006-2021)                      182
 9 Rabies virus (Inf. with)                                                  157
10 West Nile Fever                                                           133

Create a data frame for changing diseases names to abbreviations

top10_disease_abbr <- data.frame(
  disease_full = c("High pathogenicity avian influenza viruses (poultry) (Inf. with)", "African swine fever virus (Inf. with)", "Influenza A viruses of high pathogenicity (Inf. with) (non-poultry including wild birds) (2017-)", "Foot and mouth disease virus (Inf. with)", "Bluetongue virus (Inf. with)", "Anthrax", "Newcastle disease virus (Inf. with)", "Low pathogenic avian influenza (poultry) (2006-2021)", "Rabies virus (Inf. with)", "West Nile Fever"),
  disease_abbr = c("HPAI (poultry)", "ASFV", "IAOHA (non-poultry)", "Foot and mouth DV", "Bluetongue virus", "Anthrax", "Newcastle disease virus", "LPAI (poultry)", "Rabies virus", "West Nile Fever")
)

Merge data

df_plot1 <- df_top10_diseases |>
  left_join(top10_disease_abbr, by = c("disease" = "disease_full"))

Visualization

p2 <- ggplot(df_plot1, aes(x = year, y = report_count, color = disease_abbr)) +
  geom_line(linewidth = 0.5) +
  geom_point(size = 0.8) +
  labs(
    title = "Top 10 Animal Diseases Trend Globally (2005-2025)",
    x = "Year",
    y = "Report Count",
    color = "Disease"
  ) +
  theme_bw() +
  facet_wrap(~ disease_abbr, ncol = 3, scales = "free_y") + # Before "free_y" was set, the line is very close to the x-axis following an identical scale. It didn't work well, so this step was added
  scale_y_continuous(
    expand = expansion(mult = c(0.1, 0.1))
  ) +
theme(
    strip.text = element_text(size = 8, face = "bold"),
  )
ggplotly(p2)
saveWidget(ggplotly(p2), "out/interactive_plot2.html", selfcontained = TRUE)

Q3: Animnal species: Is there a particular disease that is highly prevalent in each species, and if so, which ones are they?

df_disease_species <- df_clean |>
  filter(species != "Unknown") |>
  count(species, disease, name = "cases") |>
  group_by(species) |>
  mutate(prop = cases / sum(cases))  # Calculate the percentage of diseases
print(df_disease_species)
# A tibble: 28 × 4
# Groups:   species [6]
   species disease                                                 cases    prop
   <chr>   <chr>                                                   <int>   <dbl>
 1 Canine  Rabies virus (Inf. with)                                  157 1      
 2 Cattle  Foot and mouth disease virus (Inf. with)                  417 1      
 3 Goat    Sheep pox and goat pox                                     66 1      
 4 Horse   Equid herpesvirus-1 (Inf. with) (Equine rhinopneumonit…     5 0.0294 
 5 Horse   Equine arteritis virus (Inf. with)                         11 0.0647 
 6 Horse   Equine encephalomyelitis (Eastern and Western)(-2005)       1 0.00588
 7 Horse   Equine encephalomyelitis (Eastern)(2006-)                  10 0.0588 
 8 Horse   Equine encephalomyelitis (Western)(2006-)                   4 0.0235 
 9 Horse   Equine encephalosis virus (Inf. with)                       1 0.00588
10 Horse   Equine infectious anaemia                                  69 0.406  
# ℹ 18 more rows

Get the top 10

df_disease_species |>
  arrange(desc(cases)) |>
  head(10)
# A tibble: 10 × 4
# Groups:   species [6]
   species disease                                                  cases   prop
   <chr>   <chr>                                                    <int>  <dbl>
 1 Poultry High pathogenicity avian influenza viruses (poultry) (I…   966 0.460 
 2 Swine   African swine fever virus (Inf. with)                      704 0.900 
 3 Poultry Influenza A viruses of high pathogenicity (Inf. with) (…   653 0.311 
 4 Cattle  Foot and mouth disease virus (Inf. with)                   417 1     
 5 Poultry Newcastle disease virus (Inf. with)                        214 0.102 
 6 Poultry Low pathogenic avian influenza (poultry) (2006-2021)       182 0.0867
 7 Canine  Rabies virus (Inf. with)                                   157 1     
 8 Swine   Classical swine fever virus (Inf. with)                     77 0.0985
 9 Horse   Equine infectious anaemia                                   69 0.406 
10 Goat    Sheep pox and goat pox                                      66 1     

Q3(1): What is the relationship between species and types of disease?

Again, create a data frame for changing diseases names to abbreviations

unique(df_disease_species$disease)
 [1] "Rabies virus (Inf. with)"                                                                        
 [2] "Foot and mouth disease virus (Inf. with)"                                                        
 [3] "Sheep pox and goat pox"                                                                          
 [4] "Equid herpesvirus-1 (Inf. with) (Equine rhinopneumonitis) (2014-)"                               
 [5] "Equine arteritis virus (Inf. with)"                                                              
 [6] "Equine encephalomyelitis (Eastern and Western)(-2005)"                                           
 [7] "Equine encephalomyelitis (Eastern)(2006-)"                                                       
 [8] "Equine encephalomyelitis (Western)(2006-)"                                                       
 [9] "Equine encephalosis virus (Inf. with)"                                                           
[10] "Equine infectious anaemia"                                                                       
[11] "Equine rhinopneumonitis (-2013)"                                                                 
[12] "Taylorella equigenitalis (Contagious equine metritis) (Inf. with)"                               
[13] "Theileria equi and Babesia caballi (Inf. with) (Equine piroplasmosis)"                           
[14] "Venezuelan equine encephalomyelitis"                                                             
[15] "Avian chlamydiosis"                                                                              
[16] "Avian infectious bronchitis"                                                                     
[17] "Avian infectious laryngotracheitis"                                                              
[18] "Equine influenza virus (Inf. with)"                                                              
[19] "High pathogenicity avian influenza viruses (poultry) (Inf. with)"                                
[20] "Influenza A virus (Inf. with)"                                                                   
[21] "Influenza A viruses of high pathogenicity (Inf. with) (non-poultry including wild birds) (2017-)"
[22] "Low pathogenic avian influenza (poultry) (2006-2021)"                                            
[23] "Mycoplasma gallisepticum (Avian mycoplasmosis) (Inf. with)"                                      
[24] "Mycoplasma synoviae (Avian mycoplasmosis) (Inf. with) (2006-)"                                   
[25] "Newcastle disease virus (Inf. with)"                                                             
[26] "African swine fever virus (Inf. with)"                                                           
[27] "Classical swine fever virus (Inf. with)"                                                         
[28] "Swine vesicular disease (-2014)"                                                                 
top10_disease_abbr2 <- data.frame(
  disease_full_2 = c("Rabies virus (Inf. with)", "Foot and mouth disease virus (Inf. with)", "Sheep pox and goat pox", "Equid herpesvirus-1 (Inf. with) (Equine rhinopneumonitis) (2014-)", "Equine arteritis virus (Inf. with)", "Equine encephalomyelitis (Eastern and Western)(-2005)", "Equine encephalomyelitis (Eastern)(2006-)", "Equine encephalomyelitis (Western)(2006-)", "Equine encephalosis virus (Inf. with)", "Equine infectious anaemia", "Equine rhinopneumonitis (-2013)", "Taylorella equigenitalis (Contagious equine metritis) (Inf. with)", "Theileria equi and Babesia caballi (Inf. with) (Equine piroplasmosis)", "Venezuelan equine encephalomyelitis", "Avian chlamydiosis", "Avian infectious bronchitis","Avian infectious laryngotracheitis", "Equine influenza virus (Inf. with)", "High pathogenicity avian influenza viruses (poultry) (Inf. with)", "Influenza A virus (Inf. with)", "Influenza A viruses of high pathogenicity (Inf. with) (non-poultry including wild birds) (2017-)", "Low pathogenic avian influenza (poultry) (2006-2021)", "Mycoplasma gallisepticum (Avian mycoplasmosis) (Inf. with)", "Mycoplasma synoviae (Avian mycoplasmosis) (Inf. with) (2006-)", "Newcastle disease virus (Inf. with)", "African swine fever virus (Inf. with)", "Classical swine fever virus (Inf. with)", "Swine vesicular disease (-2014)"),
  
  disease_abbr_2 = c("RV", "FMDV", "SPGP", "EH-1", "EAV", "EE(E&W)", "EE(E)", "EE(W)", "EEV", "EIA", "ER","TE", "TEABC", "VEE", "AC", "AIB", "AIL", "EIV", "HPAIV", "IAV", "IAOP", "LPAI", "MG", "MS", "NDV", "ASFV", "CSFV", "SVD" )
)

Merge data

df_plot2 <- df_disease_species |>
  left_join(top10_disease_abbr2, by = c("disease" = "disease_full_2"))

glimpse(df_plot2)
Rows: 28
Columns: 5
Groups: species [6]
$ species        <chr> "Canine", "Cattle", "Goat", "Horse", "Horse", "Horse", …
$ disease        <chr> "Rabies virus (Inf. with)", "Foot and mouth disease vir…
$ cases          <int> 157, 417, 66, 5, 11, 1, 10, 4, 1, 69, 9, 28, 20, 12, 3,…
$ prop           <dbl> 1.0000000000, 1.0000000000, 1.0000000000, 0.0294117647,…
$ disease_abbr_2 <chr> "RV", "FMDV", "SPGP", "EH-1", "EAV", "EE(E&W)", "EE(E)"…

##Visualization

p3 <- ggplot(df_plot2,
       aes(x = cases, y = reorder(disease_abbr_2, cases), color = species)) +
  geom_point(size = 3, alpha = 0.7) +
  facet_grid(species ~ ., scales = "free_y", space = "free_y") +
  labs(
    title = "Disease distribution among species",
    x = "Cases number",
    y = "Diseases type",
    color = "Species"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 10),
    strip.text.y = element_text(angle = 0, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    panel.spacing = unit(0.5, "lines") 
  ) +
  scale_color_brewer(palette = "Dark2")+
  scale_x_continuous(expand = c(0.02, 0)) 

print(p3)

# Save plot as PNG
ggsave("out/distribution.png", plot = p3, width = 8, height = 6, dpi = 300)

Q4: Geographical view:What is the global distribution of the total number of animal disease reports by country?

df_country <- df_clean |>
  # Standardized country names 
  mutate(
    country = case_when(
      country == "China (People's Rep. of)" ~ "China",
      country == "Korea (Rep. of)" ~ "South Korea",
      country == "Türkiye (Rep. of)" ~ "Turkey",
      country == "Central African (Rep.)" ~ "Central African Republic",
      country == "Dominican (Rep.)" ~ "Dominican Republic",
      country == "Serbia and Montenegro" ~ "Serbia",
      country == "Ceuta" ~ "Spain",
      country == "Melilla" ~ "Spain",
      TRUE ~ country
    )
  ) |>
  # Convert country name to ISO3 code
  mutate(country_iso3 = countrycode(country, "country.name", "iso3c")) |>
  # Get the number of reports for each country
  group_by(country_iso3) |>
  summarise(total_cases = n())

# Get the data of world map
world_map <- ne_countries(scale = "medium", returnclass = "sf") |>
  select(iso_a3, geometry)

# Combine the roports data with world map
df_country_merged <- world_map |>
  left_join(df_country, by = c("iso_a3" = "country_iso3")) |>
  mutate(total_cases = replace_na(total_cases, 0))  # 将NA替换为0

# Visualization
p4 <- ggplot(df_country_merged) +
  geom_sf(aes(fill = total_cases), color = "white", size = 0.2) +
  scale_fill_gradientn(
    colours = c("#FFF7BC", "#FEC44F", "#D95F0E"),
    name = "Reports number",
    breaks = seq(0, max(df_country_merged$total_cases, na.rm = TRUE), 20),
    na.value = "grey90"
  ) +
  labs(
    title = "Distribution of global animal disease reports(2005-2025)",
    caption = "Source:WAHIS | Author:Ruby Peng"
  ) +
  theme_void() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.key.width = unit(2, "cm")
  )
print(p4)

# Save plot as PNG
ggsave("out/map.png", plot = p4, width = 8, height = 6, dpi = 300)