-
Notifications
You must be signed in to change notification settings - Fork 190
/
StatsBase.jl
272 lines (235 loc) · 8.88 KB
/
StatsBase.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
module StatsBase
import Base: length, size, isempty, values, sum, show, maximum, minimum, extrema
import Base.Cartesian: @nloops, @nref, @nextract
using Base: @irrational, @propagate_inbounds
using DataAPI
import DataAPI: describe
import DataStructures: heapify!, heappop!, percolate_down!
using SortingAlgorithms
using Missings
using LogExpFunctions: xlogx, xlogy
using Statistics
using LinearAlgebra
using Random
using Printf
using SparseArrays
import Random: rand, rand!
import LinearAlgebra: BlasReal, BlasFloat
import Statistics: mean, mean!, var, varm, varm!, std, stdm, cov, covm,
cor, corm, cov2cor!, unscaled_covzm, quantile, sqrt!,
median, middle
using StatsAPI: StatisticalModel, RegressionModel
import StatsAPI: pairwise, pairwise!, params, params!,
fitted, response, responsename, meanresponse, modelmatrix,
crossmodelmatrix, leverage, cooksdistance, residuals, predict,
predict!, dof_residual, coef, coefnames, coeftable, confint,
deviance, islinear, nulldeviance, loglikelihood, nullloglikelihood,
loglikelihood, loglikelihood, score, nobs, dof, mss, rss,
informationmatrix, stderror, vcov, weights, isfitted, fit, fit!,
aic, aicc, bic, r2, r², adjr2, adjr²
## tackle compatibility issues
export
## functions defined in Statistics
cor,
cov,
mean,
mean!,
median,
median!,
quantile,
quantile!,
std,
var,
## weights
AbstractWeights, # abstract type to represent any weight vector
Weights, # to represent a generic weight vector
AnalyticWeights, # to represent an analytic/precision/reliability weight vector
FrequencyWeights, # to representing a frequency/case/repeat weight vector
ProbabilityWeights, # to representing a probability/sampling weight vector
UnitWeights, # to representing a uniform weight vector
weights, # construct a generic Weights vector
aweights, # construct an AnalyticWeights vector
fweights, # construct a FrequencyWeights vector
pweights, # construct a ProbabilityWeights vector
eweights, # construct an exponential Weights vector
uweights, # construct an UnitWeights vector
wsum, # weighted sum with vector as second argument
wsum!, # weighted sum across dimensions with provided storage
## moments
skewness, # (standardized) skewness
kurtosis, # (excessive) kurtosis
moment, # central moment of given order
mean_and_var, # (mean, var)
mean_and_std, # (mean, std)
mean_and_cov, # (mean, cov)
## scalarstats
geomean, # geometric mean
harmmean, # harmonic mean
genmean, # generalized/power mean
middle, # the mean of two real numbers
mode, # find a mode from data (the first one)
modes, # find all modes from data
zscore, # compute Z-scores
zscore!, # compute Z-scores inplace or to a pre-allocated array
percentile, # quantile using percentage (instead of fraction) as argument
nquantile, # quantiles at [0:n]/n
quantilerank, # quantile-position (0-1) of a value relative to a collection
percentilerank, # percentile-position (0-100) of a value relative to a collection
span, # The range minimum(x):maximum(x)
variation, # ratio of standard deviation to mean
sem, # standard error of the mean, i.e. sqrt(var / n)
mad, # median absolute deviation
iqr, # interquatile range
genvar, # generalized variance
totalvar, # total variation
entropy, # the entropy of a probability vector
renyientropy, # the Rényi (generalised) entropy of a probability vector
crossentropy, # cross entropy between two probability vectors
kldivergence, # K-L divergence between two probability vectors
summarystats, # summary statistics
describe, # print the summary statistics
# deviation
counteq, # count the number of equal pairs
countne, # count the number of non-equal pairs
sqL2dist, # squared L2 distance between two arrays
L2dist, # L2 distance between two arrays
L1dist, # L1 distance between two arrays
Linfdist, # L-inf distance between two arrays
gkldiv, # (Generalized) Kullback-Leibler divergence between two vectors
meanad, # mean absolute deviation
maxad, # maximum absolute deviation
msd, # mean squared deviation
rmsd, # root mean squared deviation
psnr, # peak signal-to-noise ratio (in dB)
# cov
scattermat, # scatter matrix (i.e. unnormalized covariance)
cov2cor, # converts a covariance matrix to a correlation matrix
cor2cov, # converts a correlation matrix to a covariance matrix
CovarianceEstimator, # abstract type for covariance estimators
SimpleCovariance, # simple covariance estimator
## counts
addcounts!, # add counts to an accumulating array or map
counts, # count integer values in given arrays
proportions, # proportions of integer values in given arrays
# (normalized version of counts)
countmap, # count distinct values and return a map
proportionmap, # proportions of distinct values returned as a map
## ranking
ordinalrank, # ordinal ranking ("1234" ranking)
competerank, # competition ranking ("1 2 2 4" ranking)
denserank, # dense ranking ("1 2 2 3" ranking)
tiedrank, # tied ranking ("1 2.5 2.5 4" ranking)
## rankcorr
corspearman, # spearman's rank correlation
corkendall, # kendall's rank correlation
## partialcor
partialcor, # partial correlation
## signalcorr
autocov!, autocov, # auto covariance
autocor!, autocor, # auto correlation
crosscov!, crosscov, # cross covariance
crosscor!, crosscor, # cross correlation
pacf!, pacf, # partial auto-correlation
## sampling
samplepair, # draw a pair of distinct elements
sample, # sampling from a population
sample!, # sampling from a population, with pre-allocated output
wsample, # sampling from a population with weights
wsample!, # weighted sampling, with pre-allocated output
## empirical
ecdf, # empirical cumulative distribution function
ECDF, # type for empirical cumulative distribution function
AbstractHistogram,
Histogram,
midpoints,
# histrange,
## robust
trim, # trimmed set
trim!, # trimmed set
winsor, # Winsorized set
winsor!, # Winsorized set
trimvar, # variance of the mean of a trimmed set
## misc
rle, # run-length encoding
inverse_rle, # inverse run-length encoding
indexmap, # construct a map from element to index
levelsmap, # construct a map from n unique elements to [1, ..., n]
indicatormat, # construct indicator matrix
pairwise, # pairwise application of functions
pairwise!, # pairwise! application of functions
# statistical models
CoefTable,
StatisticalModel,
RegressionModel,
adjr2,
adjr²,
aic,
aicc,
bic,
coef,
coefnames,
coeftable,
confint,
cooksdistance,
crossmodelmatrix,
deviance,
dof,
dof_residual,
fit,
fit!,
fitted,
informationmatrix,
isfitted,
islinear,
leverage,
loglikelihood,
meanresponse,
modelmatrix,
mss,
response,
responsename,
nobs,
nulldeviance,
nullloglikelihood,
rss,
score,
stderror,
vcov,
predict,
predict!,
residuals,
r2,
r²,
ConvergenceException,
# data standardization
standardize,
AbstractDataTransform, # the type to represent a abstract data transformation
ZScoreTransform, # the type to represent a z-score data transformation
UnitRangeTransform, # the type to represent a 0-1 data transformation
# reliability
CronbachAlpha, # the type to represent Cronbach's alpha scores
cronbachalpha # function to compute Cronbach's alpha scores
# source files
include("common.jl")
include("weights.jl")
include("moments.jl")
include("scalarstats.jl")
include("robust.jl")
include("deviation.jl")
include("cov.jl")
include("counts.jl")
include("ranking.jl")
include("toeplitzsolvers.jl")
include("rankcorr.jl")
include("signalcorr.jl")
include("partialcor.jl")
include("empirical.jl")
include("hist.jl")
include("pairwise.jl")
include("reliability.jl")
include("misc.jl")
include("sampling.jl")
include("statmodels.jl")
include("transformations.jl")
include("deprecates.jl")
end # module