csv 将R Dataframe 导出到SPSS

lyr7nygr  于 2022-12-15  发布在  其他
关注(0)|答案(4)|浏览(232)

有一个软件包foreign,其中包含一个函数write.foreign(),可以写入SPS和CSV文件。SPS文件可以将CSV文件读取到SPSS中,包括标签。目前为止,该函数运行良好,但存在一些问题:
1.较新的SPSS版本可能会显示DATA LIST定义太少的错误
1.如果有通过attr()存储的数值变量的“标签”,这些标签将丢失。
1.即使SPSS版本支持最大32767的字符串,如果任何变量中的字符串超过255,函数write.foreign()也会停止。
1.如果使用任何字符变量,则会有一个星星(*),但较新的SPSS版本无法处理此问题。

  1. CSV文件以逗号分隔,不能使用引号,因此字符串(字符)中不允许使用逗号
    1.非ASCII字符(例如变音符号)将导致导入崩溃
    1.如果您有一个包含任何NA值的字符,您将看到...
    ...出现如下错误消息:
Error in if (any(lengths > 255L)) stop("Cannot handle character variables longer than 255") : 
    missing value where TRUE/FALSE needed

我花了很多时间在这上面,然后找到了一个很好的帖子(http://r.789695.n4.nabble.com/SPSS-export-in-R-package-foreign-td921491.html)来开始并使它变得更好。这是我的结果,我想和你分享。

ct2axkht

ct2axkht1#

要将R数据.frame导出到SPSS,请使用haven包中的write_sav

library(haven)
write_sav(mtcars, "mtcars.sav")
2hh7jdfx

2hh7jdfx2#

此函数替代foreign:write.foreign来处理上述问题。

**注意:**为避免SPSS查找CSV文件时出现问题,请至少为datafile指定完整路径(!)(如果使用原始foreign:write.foreign(),也是如此)。
**注意:**此脚本将在没有警告的情况下用空格替换字符串中的制表符(TAB)和其他空格(包括CR+LF)。您可以考虑使用GET DATA而不是麻烦的DATA LIST来解决此限制。
**注:**可能会有一个警告In FUN(X[[i]], ...) : probable complete loss of accuracy in modulus-这是指计算小数,可以忽略。
注意:POSIXltPOSIXct变量尚未被脚本正确处理。

writeForeignMySPSS = function (df, datafile, codefile, varnames = NULL, len = 32767) {
    adQuote <-  function (x) paste("\"", x, "\"", sep = "")

    # Last variable must not be empty for DATA LIST
    if (any(is.na(df[[length(df)]]))) {
        df$END_CASE = 0
    }

    # http://stackoverflow.com/questions/5173692/how-to-return-number-of-decimal-places-in-r
    decimalplaces <- function(x) {
        y = x[!is.na(x)]
        if (length(y) == 0) {
            return(0)
        }
        if (any((y %% 1) != 0)) {
            info = strsplit(sub('0+$', '', as.character(y)), ".", fixed=TRUE)
            info = info[sapply(info, FUN=length) == 2]
            if (length(info) >= 2) {
              dec = nchar(unlist(info))[seq(2, length(info), 2)]
            } else {
              return(0)
            }
            return(max(dec, na.rm=T))
        } else {
            return(0)
        }
    }

    dfn <- lapply(df, function(x) if (is.factor(x))
        as.numeric(x)
        else x)

    # Boolean variables (dummy coding)
    bv = sapply(dfn, is.logical)
    for (v in which(bv)) {
        dfn[[v]] = ifelse(dfn[[v]], 1, 0)
    }

    varlabels <- names(df)
    # Use comments where applicable
    for (i in 1:length(df)) {
      cm = comment(df[[i]])
      if (is.character(cm) && (length(cm) > 0)) {
        varlabels[i] = comment(df[[i]])
      }
    }

    if (is.null(varnames)) {
        varnames <- abbreviate(names(df), 8L)
        if (any(sapply(varnames, nchar) > 8L))
            stop("I cannot abbreviate the variable names to eight or fewer letters")
        if (any(varnames != varlabels))
            warning("some variable names were abbreviated")
    }
    varnames <- gsub("[^[:alnum:]_\\$@#]", "\\.", varnames)
    dl.varnames <- varnames
    chv = sapply(df, is.character)
    if (any(chv)) {
        for (v in which(chv)) {
            dfn[[v]] = gsub("\\s", " ", dfn[[v]])
        }
        lengths <- sapply(df[chv], function(v) max(nchar(v), na.rm=T))
        if (any(lengths > len)) {
            warning(paste("Clipped strings in", names(df[chv]), "to", len, "characters"))
            for (v in which(chv)) {
                df[[v]] = substr(df[[v]], start=1, stop=len)
            }
        }
        lengths[is.infinite(lengths)] = 0
        lengths[lengths < 1] = 1
        lengths <- paste("(A", lengths, ")", sep = "")
        # star <- ifelse(c(FALSE, diff(which(chv) > 1)), " *",
        dl.varnames[chv] <- paste(dl.varnames[chv], lengths)
    }

    # decimals and bools
    nmv = sapply(df, is.numeric)
    dbv = sapply(df, is.numeric)
    nv = (nmv | dbv)
    decimals = sapply(df[nv], FUN=decimalplaces)
    dl.varnames[nv] = paste(dl.varnames[nv], " (F", decimals+8, ".", decimals, ")", sep="")
    if (length(bv) > 0) {
        dl.varnames[bv] = paste(dl.varnames[bv], "(F1.0)")
    }
    rmv = !(chv | nv | bv)
    if (length(rmv) > 0) {
        dl.varnames[rmv] = paste(dl.varnames[rmv], "(F8.0)")
    }
    # Breaks in output
    brv = seq(1, length(dl.varnames), 10)
    dl.varnames[brv] = paste(dl.varnames[brv], "\n", sep=" ")

    cat("SET LOCALE = ENGLISH.\n", file = codefile)
    cat("DATA LIST FILE=", adQuote(datafile), " free (TAB)\n", file = codefile, append = TRUE)
    cat("/", dl.varnames, " .\n\n", file = codefile, append = TRUE)
    cat("VARIABLE LABELS\n", file = codefile, append = TRUE)
    cat(paste(varnames, adQuote(varlabels), "\n"), ".\n", file = codefile,
        append = TRUE)
    factors <- sapply(df, is.factor)
    if (any(factors)) {
        cat("\nVALUE LABELS\n", file = codefile, append = TRUE)
        for (v in which(factors)) {
            cat("/\n", file = codefile, append = TRUE)
            cat(varnames[v], " \n", file = codefile, append = TRUE)
            levs <- levels(df[[v]])
            cat(paste(1:length(levs), adQuote(levs), "\n", sep = " "),
                file = codefile, append = TRUE)
        }
        cat(".\n", file = codefile, append = TRUE)
    }

    # Labels stored in attr()
    attribs <- !unlist(lapply(sapply(df, FUN=attr, which="1"), FUN=is.null))
    if (any(attribs)) {
        cat("\nVALUE LABELS\n", file = codefile, append = TRUE)
        for (v in which(attribs)) {
            cat("/\n", file = codefile, append = TRUE)
            cat(varnames[v], " \n", file = codefile, append = TRUE)
            # Check labeled values
            tc = list()
            for (tcv in dimnames(table(df[[v]]))[[1]]) {
                if (!is.null(tcl <- attr(df[[v]], tcv))) {
                    tc[tcv] = tcl
                }
            }
            cat(paste(names(tc), tc, "\n", sep = " "),
                file = codefile, append = TRUE)
        }
        cat(".\n", file = codefile, append = TRUE)
    }

    ordinal <- sapply(df, is.ordered)
    if (any(ordinal)) {
        tmp = varnames[ordinal]
        brv = seq(1, length(tmp), 10)
        tmp[brv] = paste(tmp[brv], "\n")
        cat(paste("\nVARIABLE LEVEL", paste(tmp, collapse=" "), "(ORDINAL).\n"),
            file = codefile, append = TRUE)
    }
    num <- sapply(df, is.numeric)
    if (any(num)) {
        tmp = varnames[num]
        brv = seq(1, length(tmp), 10)
        tmp[brv] = paste(tmp[brv], "\n")
        cat(paste("\nVARIABLE LEVEL", paste(tmp, collapse=" "), "(SCALE).\n"),
            file = codefile, append = TRUE)
    }
    cat("\nEXECUTE.\n", file = codefile, append = TRUE)

    write.table(dfn, file = datafile, row = FALSE, col = FALSE,
                sep = "\t", quote = F, na = "", eol = "\n", fileEncoding="UTF-8")
}

从长远来看,这些修改可能会被考虑合并到foreign包中,不幸的是,r-project的bug报告系统目前仅限于以前注册的开发人员。

3bygqnnd

3bygqnnd3#

SPSS扩展命令STATS GET R可以从保存的R工作空间直接读取数据框到SPSS数据集。如果此扩展命令尚未安装(它将显示在文件菜单上),可以从实用程序菜单(Statistics 22-23)或扩展菜单(Statistics 24+)安装。

ljo96ir5

ljo96ir54#

我的发现是:
1.“foreign”无法处理其中包含逗号的值(基本上,它无法像Excel那样处理.csv文件)。
1.'haven'要求r数据集中的名称与SPSS兼容。
我对后者所做的是将它们重命名为Haven found them。它似乎在块中工作:“初始1”至“初始4”“最终1”至“最终4”“复发”和“复发”。

相关问题