# FILE PATHS (edit these)
chat_intensity_file <- "~/Desktop/CHAT_N_3_2026.xlsx" # has intensity_integrated_intensity_snca
chat_area_file <- "~/Desktop/CHAT_N_3_area_2026.xlsx" # has areashape_area (or AreaShape_Area)
msn_intensity_file <- "~/Desktop/MSN_N_3_2026.xlsx"
msn_area_file <- "~/Desktop/MSN_N_3_area_2026.xlsx"
extract_dataset_id <- function(x) {
str_extract(x, "(?<=/For_analysis/)\\d+")
# Robustly find the area column name after clean_names()
# (it will usually become "area_shape_area")
get_area_col <- function(df) {
if ("area_shape_area" %in% nm) return("area_shape_area")
if ("areashape_area" %in% nm) return("areashape_area")
stop("Couldn't find an area column. Expected 'area_shape_area' or 'areashape_area' after clean_names().")
# FUNCTION: build per-dataset table with total_intensity, total_area, intensity_per_area
summarise_intensity_and_area <- function(intensity_path, area_path, cell_type_label) {
df_int <- read_excel(intensity_path) %>%
mutate(dataset_id = extract_dataset_id(path_name_snca)) %>%
filter(!is.na(dataset_id)) %>%
mutate(dataset_id = as.integer(dataset_id))
intensity_by_dataset <- df_int %>%
total_intensity = sum(intensity_integrated_intensity_snca, na.rm = TRUE),
df_area <- read_excel(area_path) %>%
area_col <- get_area_col(df_area)
# Use path_name_snca if present; otherwise fall back to pathname_snca (or others)
# After clean_names(), your example becomes path_name_snca
if (!("path_name_snca" %in% names(df_area))) {
stop("Area file is missing 'path_name_snca' after clean_names(). Check the column name in the area sheet.")
mutate(dataset_id = extract_dataset_id(path_name_snca)) %>%
filter(!is.na(dataset_id)) %>%
mutate(dataset_id = as.integer(dataset_id))
area_by_dataset <- df_area %>%
total_area = sum(.data[[area_col]], na.rm = TRUE),
# ---- join + compute ----
out <- intensity_by_dataset %>%
left_join(area_by_dataset, by = "dataset_id") %>%
cell_type = cell_type_label,
intensity_per_area = total_intensity / total_area
chat_by_dataset <- summarise_intensity_and_area(chat_intensity_file, chat_area_file, "ChAT")
msn_by_dataset <- summarise_intensity_and_area(msn_intensity_file, msn_area_file, "MSN")
write_csv(chat_by_dataset, "~/Desktop/CHAT_pSNCA_per_area_per_dataset.csv")
write_csv(msn_by_dataset, "~/Desktop/MSN_pSNCA_per_area_per_dataset.csv")
# COMBINE + PLOT (individual datasets + mean)
combined <- bind_rows(chat_by_dataset, msn_by_dataset)
ggplot(combined, aes(x = cell_type, y = intensity_per_area)) +
geom_jitter(width = 0.12, size = 3, alpha = 0.85) +
y = "Total pSNCA intensity / Total mask area (per dataset)",
title = "pSNCA density per dataset (dots) + mean"
theme_classic(base_size = 13)
# Optional summary table (mean/SD/SEM across datasets)
summary_by_type <- combined %>%
mean_intensity_per_area = mean(intensity_per_area, na.rm = TRUE),
sd_intensity_per_area = sd(intensity_per_area, na.rm = TRUE),
sem_intensity_per_area = sd_intensity_per_area / sqrt(n()),
write_csv(summary_by_type, "~/Desktop/Summary_pSNCA_per_area_CHAT_vs_MSN.csv")