Function

コメント・参照等
  • 人口推計(総務省)ファイルを整形する関数

FUN = f_read_japanpopulation.r
function (download_folder) 
{
    lapply(c("readxl", "Nippon", "lubridate"), require, character.only = T)
    path_to_folder <- paste0("C:/Users/", Sys.info()["user"], download_folder)
    setwd(path_to_folder)
    getwd()
    xls_file <- dir()
    japanpopulation <- list()
    for (iii in seq(xls_file)) {
        buf0 <- readxl::read_excel(xls_file[iii], sheet = 1, col_names = F)
        buf0 <- data.frame(buf0, stringsAsFactors = F)
        if (grepl("表1", gsub("NA|\\s", "", zen2han(paste0(buf0[1, ], collapse = ""))))) {
            buf0[4, ] <- sapply(as.character(buf0[4, ]), zen2han)
            buf1 <- data.frame(t(buf0[-c(1:3), ]), stringsAsFactors = F)
            colnames(buf1) <- buf1[1, ]
            buf2 <- buf1[-1, ]
            row_num <- !is.na(as.numeric(substring(text = buf2[, 1], 5, 5)))
            row_cha <- !row_num
            row.names(buf2) <- NULL
            base_date <- as.Date(gsub("([0-9]{4})年([0-9]+)月", "\\1-\\2-1", buf2[which(row_cha)[1], 1]))
            first_date <- base_date %m-% months(which(row_cha)[1] - 1)
            Date <- seq(from = first_date, by = "+1 month", length.out = nrow(buf2))
            colnames(buf2) <- sapply(gsub("\\s", "", colnames(buf2)), zen2han)
            obj_col <- grep("総数", colnames(buf2))
            datatype <- gsub("\\s", "", buf2[1, obj_col - 1])
            colnames(buf2)[obj_col[1]:(obj_col[2] - 1)] <- paste0(datatype[1], ":", colnames(buf2)[obj_col[1]:(obj_col[2] - 1)])
            colnames(buf2)[obj_col[2]:ncol(buf2)] <- paste0(datatype[2], ":", colnames(buf2)[obj_col[2]:ncol(buf2)])
            buf3 <- buf2[, grep("NA", colnames(buf2), invert = T)]
            buf4 <- f_remove_blank_row_and_column(df = buf3)
            buf4$title <- zen2han(gsub("\\s", "", paste0(buf0[1, 7], buf0[1, 8])))
            japanpopulation[[iii]] <- data.frame(Date, buf4[, -1], stringsAsFactors = F, check.names = F)
        }
        else {
            for (ttt in 1:2) {
                if (ttt == 1) {
                  tmp1 <- buf0[, c(3, 5, 6, 7)]
                }
                else {
                  tmp1 <- buf0[, c(10, 12, 13, 14)]
                }
                colnames(tmp1) <- tmp1[4, ]
                tmp1 <- tmp1[-c(1:5), ]
                colnames(tmp1)[1] <- "Date"
                if (ttt == 1) {
                  buf1 <- tmp1
                }
                else {
                  buf1 <- rbind(buf1, tmp1)
                }
            }
            buf2 <- na.omit(buf1)
            buf2[, 1] <- as.Date(paste0(buf2[, 1, drop = T], "-1-1"))
            buf2[, -1] <- apply(buf2[, -1], 2, as.numeric)
            buf2$title <- zen2han(buf0[1, 1])
            japanpopulation[[iii]] <- buf2
        }
        row.names(japanpopulation[[iii]]) <- NULL
        print(tail(japanpopulation[[iii]][, 1:5]))
    }
    return(japanpopulation)
}