-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathglimpse_labels.R
More file actions
136 lines (117 loc) · 5.55 KB
/
glimpse_labels.R
File metadata and controls
136 lines (117 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#' Get a glimpse of your data, including variable labels.
#'
#' This is like a transposed version of print: columns run down the page,
#' and data runs across. This makes it possible to see every column in
#' a data frame. It's a little like \code{str()} applied to a data frame
#' but it tries to show you as much data as possible. (And it always shows
#' the underlying data, even when applied to a remote data source.)
#'
#' This is a modified version of the tibble package's 'glimpse.tbl' S3 method
#' that includes the contents of the 'label' variable attribute, if present
#' (e.g., if you imported a Stata dataset with variable labels using
#' \code{haven::read_stata()}).
#'
#' @param x An object to glimpse at.
#' @param width Width of output: defaults to the setting of the option
#' `tibble.width` (if finite) or the width of the console.
#' @param labels Include variable labels in the printout. If `TRUE` and no
#' variables have the `label` attribute, labels will be printed as 10
#' spaces to demonstrate their absence.
#' @return x original x is (invisibly) returned, allowing \code{glimpse_labels()}
#' to be used within a data pipe line.
#' @importFrom tibble type_sum
#' @importFrom stringr str_wrap
#' @importFrom utils combn head
#' @export
#' @examples
#' dat1 <- tibble::data_frame(x = 1:20, y = 21:40)
#' attributes(dat1[["x"]])$label <- "this is my label"
#' glimpse_labels(dat1)
#'
#' dat2 <- dat1
#' attributes(dat2[["x"]])$label <- paste0(rep("my label", 12), collapse = " ")
#' glimpse_labels(dat2)
#'
#' dat3 <- dat1
#' attributes(dat3[["x"]])$label <- paste0(rep("my label", 15), collapse = " ")
#' glimpse_labels(dat3)
glimpse_labels <- function (x, width = NULL, labels = TRUE) {
# Same as glimpse
width <- tibble:::tibble_glimpse_width(width)
stopifnot(is.finite(width))
cat("Observations: ", tibble:::big_mark(nrow(x)), "\n", sep = "")
if (ncol(x) == 0)
return(invisible())
cat("Variables: ", tibble:::big_mark(ncol(x)), "\n", sep = "")
rows <- as.integer(width/3)
df <- as.data.frame(head(x, rows))
var_types <- vapply(df, tibble::type_sum, character(1))
# Edits to add labels
if (!labels) {
var_names <- paste0("$ ", format(names(df)), " <", var_types,
"> ")
} else {
if (!("label" %in% names(attributes(x[[1]])))) {
tbl_labels <- rep("", length(names(x)))
} else {
tbl_labels <- vapply(x, function(x) {
ifelse(is.null(attributes(x)[["label"]]), paste0(rep(" ", 10), collapse = ""),
attributes(x)[["label"]])
}, character(1))
}
tbl_labels_fmt <- format(tbl_labels)
# max label width is longest the label can be while leaving room for data ellipses on one line
names_width <- max(nchar(names(df)))
types_width <- max(nchar(var_types))
max_label_width <- width - names_width - types_width - 7 - 6 # 7 chars for $ and <>
# 6 chars for shortest data allowed
# wrap long lines to the leftmost label position
max_line_width <- width - 3 # glimpse() leaves 2 chars off by default, +1 to force wrap
left_label_pos <- names_width + 3 # 3 chars for $ and 2 spaces
label_wrap_width <- max_line_width - left_label_pos
tbl_labels <- ifelse(nchar(tbl_labels) > max_label_width,
stringr::str_wrap(tbl_labels, width = label_wrap_width,
exdent = left_label_pos),
tbl_labels)
# TO DO: deal with labels that don't wrap across lines because of single words that are too long
#wrap_prep <- (nchar(tbl_labels) > max_label_width) & !grepl(" ", tbl_labels)
# length of longest label that isn't longer than the max width
tbl_labels_width_nowrap <- max(nchar(tbl_labels)[nchar(tbl_labels) < max_label_width])
# if last wrapped line is short, pad end so var types line up
pad_to <- names_width + tbl_labels_width_nowrap + 4
last_line_width <- function(tbl_labels) {
newline_chars <- vapply(gregexpr("\n", tbl_labels), function(x) {
out <- x[length(x)]
out
}, numeric(1))
newline_chars <- ifelse(newline_chars == -1, NA, newline_chars)
last_line_width <- ifelse(!is.na(newline_chars),
nchar(substr(tbl_labels, newline_chars, nchar(tbl_labels))),
NA)
last_line_width
}
last_line_widths <- last_line_width(tbl_labels)
to_pad <- which(!is.na(last_line_widths) & last_line_widths < pad_to)
if (length(to_pad) > 0) {
for (i in seq_along(to_pad)) {
tbl_labels[to_pad[i]] <- paste0(tbl_labels[to_pad[i]],
paste0(rep(" ", pad_to - last_line_widths[to_pad[i]]),
collapse = ""))
}
last_line_widths <- last_line_width(tbl_labels)
}
tbl_labels <- ifelse(nchar(tbl_labels) > max_label_width,
tbl_labels,
substr(tbl_labels_fmt, 1, tbl_labels_width_nowrap))
var_names <- paste0("$ ", format(names(df)), " ",
tbl_labels, " <", var_types, "> ")
}
data_width <- width - nchar(var_names) - 2
data_width <- ifelse(data_width < 0, width - last_line_widths - 8, data_width)
formatted <- vapply(df, function(x) paste0(tibble:::format_v(x), collapse = ", "),
character(1), USE.NAMES = FALSE)
truncated <- tibble:::str_trunc(formatted, data_width)
cat(paste0(var_names, truncated, collapse = "\n"), "\n",
sep = "")
invisible(x)
}