/
notes-epland.tex
338 lines (299 loc) · 15.3 KB
/
notes-epland.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
%-----------------------------------------------------------------------------%
% Set metadata for use with pdfx
%\begingroup\newif\ifmy
%\IfFileExists{\jobname.xmpdata}{}{\mytrue}
%\ifmy
%\begin{filecontents*}{\jobname.xmpdata}
%\Title{Data Science Notes}
%\Author{Matthew Epland, Ph.D.}
%\Keywords{Data Science\sep Machine Learning\sep Statistics}
%\Copyright{CC-BY-4.0}
%\end{filecontents*}
%\fi\endgroup
% https://tex.stackexchange.com/questions/52317/pdftex-warning-version-allowed
% https://tex.stackexchange.com/questions/301386/why-is-1-5-the-default-pdf-version
% https://tex.stackexchange.com/a/375018
\directlua{pdf.setminorversion(6)}
% \Subject{TODO}
% https://tex.stackexchange.com/a/572817
\RequirePackage{silence}
\WarningFilter{latexfont}{Font shape}
\WarningFilter{latexfont}{Some font shapes were}
%-----------------------------------------------------------------------------%
% Set documentclass
\documentclass[nogradschool,singlespace,nobind]{dukedissertation_modified}
%-----------------------------------------------------------------------------%
% usepackages
\usepackage{amsmath,amssymb,bbm,bm} % https://ctan.org/pkg/bm
\usepackage{mathtools}
\usepackage{cancel}
\usepackage{graphicx}
\usepackage{xcolor}
\usepackage[TU,T1]{fontenc}
\usepackage{indentfirst}
\usepackage{textcomp} % needed to fix \mico \textmu from siunitx to work with microtype: https://tex.stackexchange.com/questions/74670/microtype-siunitx-and-micro-mysterious-warnings
\usepackage[protrusion=true,expansion=true]{microtype} % make text flow nicely... might screw up duke dissertation template
\usepackage{verbatim} % verbatim text and comment environment
\usepackage{lmodern} % allowing font sizes at arbitrary sizes
\DeclareFontShape{TU}{lmr}{bx}{sc} { <-> ssub * lmr/bx/n }{} % https://tex.stackexchange.com/questions/22240/choosing-font-for-bold-small-caps-or-any-other-particular-familyseriesshape-c]
\usepackage{notoccite} % fixes citation numbering in captions with respect to lof & lot, see https://ctan.org/pkg/notoccite
\usepackage[nocompress]{cite} % orders references numerically within one \cite{}, see https://tex.stackexchange.com/questions/69230/numbered-ordering-of-multiple-citations Also changes spacing after comma
\usepackage{fnpct} % make multiple footnotes at one point look nice, https://tex.stackexchange.com/questions/28465/multiple-footnotes-at-one-point
\usepackage[separate-uncertainty,multi-part-units=single,free-standing-units,product-units=repeat,use-xspace]{siunitx} % units package, see https://www.ctan.org/pkg/siunitx
\sisetup{range-phrase={\text{--}},range-units=single}
\usepackage{physics}
% silence warning about qty conflict
% https://tex.stackexchange.com/a/681701
\ExplSyntaxOn
\msg_redirect_name:nnn { siunitx } { physics-pkg } { none }
\ExplSyntaxOff
\usepackage{booktabs,array,multirow,diagbox}
\renewcommand{\arraystretch}{1.5} % gives extra height to tabular rows for super/subscripts
%\usepackage{longtable}
\usepackage{enumitem}
\usepackage{lscape} % landscape https://ctan.org/pkg/lscape
\usepackage{moresize}
\usepackage{fontawesome}
\usepackage{listings}
% https://tex.stackexchange.com/a/224098
\lstdefinelanguage{SQL_extended}{language=SQL,
morekeywords={
ARRAY_CONSTRUCT,
ARRAY_CONSTRUCT_COMPACT,
ARRAYS_OVERLAP,
CONCAT,
DATEDIFF,
FLATTEN,
FOR,
GENERATE_ARRAY,
IFF,
IS,
LAG,
LATERAL,
LEAD,
OVER,
PARTITION,
PIVOT,
QUALIFY,
REPLACE,
ROW_NUMBER,
STRING,
UNNEST,
WINDOW,
WITH
}}
\lstnewenvironment{SQLcode}{\lstset{language=SQL_extended}}{}
% for algorithms
\usepackage{algorithm}
\usepackage{algorithmicx}
\usepackage[noend]{algpseudocode}
\algdef{SE}[DOWHILE]{Do}{doWhile}{\algorithmicdo}[1]{\algorithmicwhile\ #1}%
\renewcommand\algorithmicdo{}
\newcommand{\CommentInline}[1]{\STATE \textcolor{gray}{\# #1}}
\usepackage[top=1in, bottom=1.25in, left=1.25in, right=1.25in]{geometry}
\usepackage{fancyhdr}
\pagestyle{plain}
% use subcaption to get split figures, but the caption dependency doesn't know about the dukedissertation document class - turn off the warning with silence
% load caption explicitly first to set it's options, subcaption says it passes them through but it doesn't seem to work
% https://tex.stackexchange.com/questions/34579/is-there-really-something-wrong-with-using-the-caption-package-for-continuedflo
% https://en.wikibooks.org/wiki/LaTeX/Floats,_Figures_and_Captions#Subfloats
% https://ctan.org/pkg/caption
% https://ctan.org/pkg/subcaption
\usepackage{silence}
\WarningFilter{caption}{Unknown document class}
\WarningFilter{caption}{Unsupported document class}
\WarningFilter{hyperref}{The PDF version number could not be set}
\usepackage{setspace} % needed to specify, https://ctan.org/pkg/setspace
\usepackage[style=base,skip=2pt,width=\textwidth]{caption} % ,font={stretch=1.3}
\usepackage[skip=1pt]{subcaption}
\newcommand\mysubref[1]{(\subref{#1})} % create mysubfigure command to reference subfigures from within the main caption, without messing with the normal \cref formatting. Adapted from https://tex.stackexchange.com/a/131366
\newsavebox{\largestimage} % see https://tex.stackexchange.com/questions/239128/subcaption-vertical-alignment-of-two-images-of-different-vertical-size
%\renewcommand{\GenericWarning}[2]{\GenericError{#1}{#2}{}{This warning has been turned into a fatal error.}} % crash on warnings for debugging https://stackoverflow.com/a/3277465
% Don't let floats get before the subsection where they're included
% https://tex.stackexchange.com/questions/32598/force-latex-image-to-appear-in-the-section-in-which-its-declared
% https://tex.stackexchange.com/questions/279/how-do-i-ensure-that-figures-appear-in-the-section-theyre-associated-with/235312#235312
% Also doesn't let a float go into a following subsection, results in a ton of blank space - probably better left off
% \usepackage{placeins}
%\let\Oldsubsection\subsection
%\renewcommand{\subsection}{\FloatBarrier\Oldsubsection}
\usepackage{float} % to allow for H option. Works better than \FloatBarrier from placeins, though it is more manual
\floatstyle{plaintop}
\restylefloat{table}
% keep footnotes from splitting, can still happen sometimes (10000 forces)
% https://tex.stackexchange.com/questions/32208/footnote-runs-onto-second-page
\interfootnotelinepenalty=9999
% keep inline equations from splitting, can still happen sometimes (10000 forces)
% https://tex.stackexchange.com/a/14243
\relpenalty=9999
\binoppenalty=9999
% center subfigure captions with multiple lines
\captionsetup[subfigure]{justification=centering}
%-----------------------------------------------------------------------------%
% Other possibly useful packages
% \usepackage{fancyvrb}
% \usepackage{ulem}
% \usepackage{overpic}
% \usepackage{amsfonts, amsthm}
% \usepackage{mathabx}
%-----------------------------------------------------------------------------%
% tweak listings
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codemauve}{rgb}{0.58,0,0.82}
\lstset{
backgroundcolor=\color{white}, % choose the background color; you must add \usepackage{color} or \usepackage{xcolor}; should come as last argument
% basicstyle=\ssmall, % the size of the fonts that are used for the code
basicstyle=\ttfamily, % make font tt
upquote=true, % make all single quotes straight up and down
breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace
breaklines=true, % sets automatic line breaking
captionpos=b, % sets the caption-position to bottom
commentstyle=\color{codegreen}, % comment style
% deletekeywords={...}, % if you want to delete keywords from the given language
% escapeinside={\%*}{*)}, % if you want to add LaTeX within your code
extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8
firstnumber=1, % start line enumeration with line 1
frame=none, % adds a frame around the code
keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible)
keywordstyle=\color{blue}, % keyword style
% language=Octave, % the language of the code
% morekeywords={*,...}, % if you want to add more keywords to the set
numbers=left, % where to put the line-numbers; possible values are (none, left, right)
numbersep=5pt, % how far the line-numbers are from the code
numberstyle=\ttfamily\tiny\color{codegray}, % the style that is used for the line-numbers
rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here))
showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces'
showstringspaces=false, % underline spaces within strings only
showtabs=false, % show tabs within strings adding particular underscores
stepnumber=1, % the step between two line-numbers. If it's 1, each line will be numbered
stringstyle=\color{codemauve}, % string literal style
tabsize=2, % sets default tabsize to 2 spaces
}
%-----------------------------------------------------------------------------%
% Include abbreviations
\input{abbreviations.tex}
%-----------------------------------------------------------------------------%
% Include tikz figures which can not be made standalone, only if they have internal references / citations. Also must be manually added to Makefile if they use feynmp
\usepackage{tikz}
%-----------------------------------------------------------------------------%
% Theorem, Lemma, etc. environments
%\newtheorem{theorem}{Theorem}%[section]
%\newtheorem{lemma}[theorem]{Lemma}
%\newtheorem{proposition}[theorem]{Proposition}
%\newtheorem{corollary}[theorem]{Corollary}
%\newtheorem{result}[theorem]{Result}
%-----------------------------------------------------------------------------%
% PREAMBLE
%-----------------------------------------------------------------------------%
\author{Matthew Epland, Ph.D.}
\title{Data Science Notes}
\date{\today}
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
% HYPERREF
%-----------------------------------------------------------------------------%
\usepackage[hyperpageref]{backref} % pages
% need to load in this order to get proper pdfx a-2b format!!!
\PassOptionsToPackage{hyperfootnotes,pagebackref}{hyperref}
% comment out \usepackage{hyperref} and \hypersetup if using pdfx
\usepackage{hyperref}
\makeatletter\hypersetup{
breaklinks, baseurl=http://, pdfborder=0 0 0, pdfpagemode=UseNone, pdfstartpage=1, bookmarksopen=false, bookmarksdepth=2, % to show sections and subsections
pdfauthor = {Matthew Epland, Ph.D.}, %
pdftitle = {Data Science Notes}, %
pdfsubject = {Data Science, Machine Learning, Statistics}, %
pdfkeywords = {Data Science, Machine Learning, Statistics}
}\makeatother
% \usepackage[a-2b]{pdfx} % Note pdfx does not work with travis CI due to latest ubuntu image being from 2016, thus containing this bug https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=877167 fixed in oct 2017. Can use locally if desired
\hypersetup{plainpages=false, bookmarksnumbered,
% draft, % for printing
colorlinks, linkcolor=blue, citecolor=blue, urlcolor=blue, % for web
% breaklinks=true,
}
% adapted from https://tex.stackexchange.com/questions/183702/formatting-back-references-in-bibliography-bibtex
\renewcommand*{\backrefalt}[4]{%
% \ifcase #1 Not cited.%
\ifcase #1% Not cited.%
\or Cited on page~#2.%
\else Cited on pages #2.%
\fi%
}
%-----------------------------------------------------------------------------%
% use cref and not ref, have to load last
\usepackage[capitalise]{cleveref} % https://ctan.org/pkg/cleveref see section 7.1, if redefining need to make them caps
\crefname{figure}{Figure}{Figures}
\Crefname{figure}{Figure}{Figures}
\crefname{tabular}{Table}{Tables}
\Crefname{tabular}{Table}{Tables}
\crefname{section}{Section}{Sections}
\Crefname{section}{Section}{Sections}
\crefname{chapter}{Chapter}{Chapters}
\Crefname{chapter}{Chapter}{Chapters}
\crefname{appchap}{Appendix}{Appendices}
\Crefname{appchap}{Appendix}{Appendices}
\crefformat{equation}{(#2#1#3)}
\newcommand\preface{%
\nmchapter{Preface}
}
\begin{document}
%-----------------------------------------------------------------------------%
% TITLE PAGE
%-----------------------------------------------------------------------------%
\maketitle
%-----------------------------------------------------------------------------%
% ABSTRACT -- included file should start with '\abstract'.
%-----------------------------------------------------------------------------%
% \include{{sections/abstract}}
%-----------------------------------------------------------------------------%
% FRONTMATTER
%-----------------------------------------------------------------------------%
\tableofcontents % Automatically generated
\backrefsetup{disable}
\include{{sections/listofabbr}} % List of Abbreviations. Start file with '\abbreviations'
\backrefsetup{enable}
%-----------------------------------------------------------------------------%
% PREFACE
%-----------------------------------------------------------------------------%
\include{{sections/preface}}
%==============================================================================
%-----------------------------------------------------------------------------%
%
% MAIN BODY
%
%
%-----------------------------------------------------------------------------%
\include{{sections/stats}}
\include{{sections/hypo}}
\include{{sections/regression}}
\include{{sections/ml_general}}
\include{{sections/dim_reduct}}
\include{{sections/opt}}
\include{{sections/class}}
\include{{sections/cluster}}
\include{{sections/time_series}}
\include{{sections/survival}}
\include{{sections/causality}}
\include{{sections/misc}}
%==============================================================================
%-----------------------------------------------------------------------------%
% APPENDICES -- OPTIONAL. These are just chapters enumerated by Appendix A, Appendix B, Appendix C...
%-----------------------------------------------------------------------------%
% Start each appendix tex file with '\chapter{Title}'
\appendix
\include{{sections/appendixes/pandas}}
\include{{sections/appendixes/sql}}
\include{{sections/appendixes/pyspark}}
\include{{sections/appendixes/coding}}
\include{{sections/appendixes/distributions}}
%\include{{sections/appendixes/finance}}
%-----------------------------------------------------------------------------%
% BIBLIOGRAPHY -- Change the style to match your discipline's standards.
%-----------------------------------------------------------------------------%
\bibliographystyle{./bib/atlasBibStyleWithTitle}
\cleardoublepage
\normalbaselines %Fixes spacing of bibliography
% \addcontentsline{toc}{chapter}{Bibliography} % not needed on my system
\bibliography{./bib/bib}
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------
\end{document}