diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3871c05..45301dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.1.0] - 2023-12-15 17:00:00
+
+### Added
+- Updates the Generalized Method of Moments chapter.
+- Adds a structural estimation paper chapter.
+- Adds other updates in the Structural Estimation section.
+
 ## [0.0.7] - 2023-12-06 15:00:00
 
 ### Added
@@ -53,6 +60,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 
 
+[0.1.0]: https://github.com/OpenSourceEcon/CompMethods/compare/v0.0.7...v0.1.0
 [0.0.7]: https://github.com/OpenSourceEcon/CompMethods/compare/v0.0.6...v0.0.7
 [0.0.6]: https://github.com/OpenSourceEcon/CompMethods/compare/v0.0.5...v0.0.6
 [0.0.5]: https://github.com/OpenSourceEcon/CompMethods/compare/v0.0.4...v0.0.5
diff --git a/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.bib b/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.bib
new file mode 100644
index 0000000..943e5b7
--- /dev/null
+++ b/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.bib
@@ -0,0 +1,85 @@
+@ARTICLE{Aiyagari:1994,
+	AUTHOR = {S. Rao Aiyagari},
+	TITLE = {Uninsured Idiosyncratic Risk and Aggregate Saving},
+	JOURNAL = {Quarterly Journal of Economics},
+	YEAR = {1994},
+	volume = {109},
+	number = {3},
+	pages = {659-684},
+	month = {August},
+}
+
+@ARTICLE{Auerbach:1996,
+	AUTHOR = {Alan J. Auerbach},
+	TITLE = {Dynamic Revenue Estimation},
+	JOURNAL = {Journal of Economic Perspectives},
+	YEAR = {1996},
+	volume = {10},
+	number = {1},
+	month = {Winter},
+}
+
+@ARTICLE{Auerbach:2005,
+	AUTHOR = {Alan J. Auerbach},
+	TITLE = {Dynamic Scoring: An Introduction to the Issues},
+	JOURNAL = {American Economic Review},
+	YEAR = {2005},
+	volume = {95},
+	number = {2},
+	month = {May},
+	pages = {421-425},
+}
+
+@BOOK{AuerbachKotlikoff:1987,
+	Author = {Alan J. Auerbach and Lawrence J. Kotlikoff},
+	Publisher = {Cambridge University Press},
+	Title = {Dynamic Fiscal Policy},
+	Year = {1987},
+}
+
+@TECHREPORT{BrillViard:2011,
+	AUTHOR = {Alex M. Brill and Alan D. Viard},
+	TITLE = {The Benefits and Limitations of Income Tax Reform},
+	INSTITUTION = {American Enterprise Institute for Public Policy Research},
+	YEAR = {2011},
+	type = {Tax Policy Outlook},
+	number = {2},
+	month = {September},
+}
+
+@ARTICLE{CagettiDeNardi:2008,
+	AUTHOR = {Marco Cagetti and Mariacristina {De Nardi}},
+	TITLE = {Wealth Inequality: Data and Models},
+	JOURNAL = {Macroeconomic Dynamics},
+	YEAR = {2008},
+	volume = {12},
+	number = {Supplement S2},
+	pages = {285-313},
+	month = {September},
+}
+
+@techreport{DEPR2015,
+	Author = {Jason DeBacker and Richard W. Evans and Kerk L. Phillips and Shanthi Ramnath},
+	Institution = {Mimeo},
+	Title = {Estimating the Hourly Earnings Processes of Top Earners},
+	Year = {2015}}
+
+@TECHREPORT{Census:2015,
+  AUTHOR = {{Census Bureau}},
+  TITLE = {Annual Estimates of the Resident Population by Single Year of Age and Sex: April 1, 2010 to July 1, 2013 (both sexes)},
+  INSTITUTION = {U.S. Census Bureau},
+  YEAR = {2015},
+  type = {National Characteristics},
+  number = {Vintage 2013},
+  address = {http://www.census.gov/popest/data/national/asrh/2013/index.html},
+}
+
+@TECHREPORT{DeNardi:2015,
+	AUTHOR = {Mariacristina {De Nardi}},
+	INSTITUTION = {National Bureau of Economic Research},
+	MONTH = {April},
+	NUMBER = {No. 21106},
+	TITLE = {Quantitative Models of Wealth Inequality: A Survey},
+	type = {NBER Working Paper},
+	YEAR = {2015},
+}
diff --git a/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.pdf b/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.pdf
new file mode 100644
index 0000000..8278be9
Binary files /dev/null and b/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.pdf differ
diff --git a/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.tex b/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.tex
new file mode 100644
index 0000000..77bf9dc
--- /dev/null
+++ b/code/StrEstPaper/LaTeXtemplates/LaTeX_projtemplate.tex
@@ -0,0 +1,176 @@
+\documentclass[letterpaper,12pt]{article}
+
+\usepackage{threeparttable}
+\usepackage{geometry}
+\geometry{letterpaper,tmargin=1in,bmargin=1in,lmargin=1.25in,rmargin=1.25in}
+\usepackage[format=hang,font=normalsize,labelfont=bf]{caption}
+\usepackage{amsmath}
+\usepackage{multirow}
+\usepackage{array}
+\usepackage{delarray}
+\usepackage{amssymb}
+\usepackage{amsthm}
+\usepackage{lscape}
+\usepackage{natbib}
+\usepackage{setspace}
+\usepackage{float,color}
+\usepackage[pdftex]{graphicx}
+\usepackage{pdfsync}
+\usepackage{verbatim}
+\usepackage{placeins}
+\usepackage{geometry}
+\usepackage{pdflscape}
+\synctex=1
+\usepackage{hyperref}
+\hypersetup{colorlinks,linkcolor=red,urlcolor=blue,citecolor=red}
+\usepackage{bm}
+
+
+\theoremstyle{definition}
+\newtheorem{theorem}{Theorem}
+\newtheorem{acknowledgement}[theorem]{Acknowledgement}
+\newtheorem{algorithm}[theorem]{Algorithm}
+\newtheorem{axiom}[theorem]{Axiom}
+\newtheorem{case}[theorem]{Case}
+\newtheorem{claim}[theorem]{Claim}
+\newtheorem{conclusion}[theorem]{Conclusion}
+\newtheorem{condition}[theorem]{Condition}
+\newtheorem{conjecture}[theorem]{Conjecture}
+\newtheorem{corollary}[theorem]{Corollary}
+\newtheorem{criterion}[theorem]{Criterion}
+\newtheorem{definition}{Definition} % Number definitions on their own
+\newtheorem{derivation}{Derivation} % Number derivations on their own
+\newtheorem{example}[theorem]{Example}
+\newtheorem{exercise}[theorem]{Exercise}
+\newtheorem{lemma}[theorem]{Lemma}
+\newtheorem{notation}[theorem]{Notation}
+\newtheorem{problem}[theorem]{Problem}
+\newtheorem{proposition}{Proposition} % Number propositions on their own
+\newtheorem{remark}[theorem]{Remark}
+\newtheorem{solution}[theorem]{Solution}
+\newtheorem{summary}[theorem]{Summary}
+\bibliographystyle{aer}
+\newcommand\ve{\varepsilon}
+\renewcommand\theenumi{\roman{enumi}}
+\newcommand\norm[1]{\left\lVert#1\right\rVert}
+
+\begin{document}
+
+\begin{titlepage}
+\title{Title of My Paper \thanks{You can put thanks here, affiliation here, anything you want here.}
+       }
+       \author{
+  Your Name\footnote{Your University, Your Department, Academic address, (???) ???-????, \href{mailto:your.emailr@uchicago.edu}{your.emailr@uchicago.edu}.} \\[-2pt]
+  \and
+  Other Name\footnote{Your University, Your Department, Academic address, (???) ???-????, \href{mailto:your.emailr@uchicago.edu}{your.emailr@uchicago.edu}.}}
+\date{March 2018 \\
+  \scriptsize{(version 18.03.a)}}
+\maketitle
+\vspace{-9mm}
+\begin{abstract}
+\small{Put abstract here.
+\vspace{3mm}
+
+\noindent\textit{keywords:}\: Static scoring, revenue estimates, dynamic scoring.
+
+\vspace{3mm}
+
+\noindent\textit{JEL classification:} D91, E21, H30
+}
+
+\end{abstract}
+\thispagestyle{empty}
+\end{titlepage}
+
+
+\begin{spacing}{1.5}{}
+
+\section{Introduction}\label{SecIntro}
+
+  Put introduction here. You'll probably want some references here like \citet{Auerbach:1996} or \citet{DEPR2015}. These citations use the \texttt{natbib} package above and require a call to the \texttt{bibliography} command just before the appendix section.
+
+
+\section{Model}\label{SecModel}
+
+  Put model description here.
+
+
+\section{Data}\label{SecData}
+
+  Put data description here.
+
+
+\section{Estimation}\label{SecEstimation}
+
+  Put estimation details here. You might want to use a table. Table \ref{TabTable1} is an example of a table generated by the code below.
+
+  \begin{table}[htbp] \centering \captionsetup{width=5.8in}
+    \caption{\label{TabTable1}\textbf{Percent change in macroeconomic variables over the budget window and in steady-state from policy change}}
+      \begin{threeparttable}
+      \begin{tabular}{>{\scriptsize}l |>{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r >{\scriptsize}r |>{\scriptsize}r >{\scriptsize}r}
+      \hline\hline
+      \multicolumn{1}{c}{\scriptsize{Macroeconomic}} & & & & & & & & & & & \multicolumn{1}{c}{\scriptsize{2016-}} & \\[-2mm]
+      \multicolumn{1}{c}{\scriptsize{variables}} & 2016  & 2017  & 2018  & 2019  & 2020  & 2021  & 2022  & 2023  & 2024  & 2025 & \multicolumn{1}{c}{\scriptsize{2025}} & \multicolumn{1}{c}{\scriptsize{SS}} \\
+      \hline
+      GDP & 0.54  & 0.50  & 0.55  & 0.91  & 0.90  & 1.02  & 1.03  & 1.02  & 1.03  & 1.22  & 0.87  & 1.40 \\
+      Consumption & 0.21  & 0.28  & 0.35  & 0.44  & 0.52  & 0.58  & 0.65  & 0.70  & 0.75  & 0.86  & 0.53  & 1.30 \\
+      Investment & 1.28  & 0.99  & 0.98  & 1.93  & 1.74  & 1.96  & 1.88  & 1.72  & 1.67  & 2.02  & 1.62  & 1.65 \\
+      Hours Worked & 0.83  & 0.71  & 0.73  & 1.25  & 1.16  & 1.27  & 1.23  & 1.15  & 1.13  & 1.37  & 1.08  & 1.27 \\
+      \hline
+      Avg. Wage & -0.29 & -0.21 & -0.19 & -0.35 & -0.26 & -0.26 & -0.20 & -0.13 & -0.09 & -0.15 & -0.21 & 0.13 \\
+      Interest Rate & 1.00  & 0.72  & 0.66  & 1.20  & 0.90  & 0.91  & 0.70  & 0.47  & 0.33  & 0.56 & 0.75  & -0.51 \\
+      \hline
+      Total Taxes & -3.59 & -2.42 & -3.10 & -8.23 & -8.21 & -8.36 & -8.32 & -8.53 & -8.89 & -8.27 & -6.71 & -7.43 \\
+      \hline\hline
+    \end{tabular}
+    % \begin{tablenotes}
+    %   \scriptsize{\item[]Note: Maybe put sources here.}
+    % \end{tablenotes}
+    \end{threeparttable}
+  \end{table}
+
+
+\section{Experiment}\label{SecExperiment}
+
+  Put experiment results here. You might want to include a figure. Here is a some figure code that generated Figure \ref{FigLogAbility}. Note that I put my images in an \texttt{images} folder in the directory of this \LaTeX file.
+
+  \begin{figure}[htb]\centering \captionsetup{width=4.0in}
+    \caption{\label{FigLogAbility}\textbf{Exogenous life cycle income ability paths $\log(e_{j,s})$ with $S=80$ and $J=7$}}
+    \fbox{\resizebox{4.0in}{2.7in}{\includegraphics{./images/ability_log_2D.png}}}
+  \end{figure}
+
+
+\section{Conclusion}\label{SecConclusion}
+
+  Put conclusion here.
+
+  \clearpage
+
+
+\end{spacing}
+
+
+\newpage
+\bibliography{LaTeX_projtemplate.bib}
+
+
+\newpage
+\renewcommand{\theequation}{A.\arabic{section}.\arabic{equation}}
+                                                 % redefine the command that creates the section number
+\renewcommand{\thesection}{A-\arabic{section}}   % redefine the command that creates the equation number
+\setcounter{equation}{0}                         % reset counter
+\setcounter{section}{0}                          % reset section number
+\section*{APPENDIX}                              % use *-form to suppress numbering
+
+\begin{spacing}{1.0}
+
+\section{Some Appendix}\label{AppSomeAppendix}
+
+  You can put appendices here at the end of the paper using section commands.
+
+
+
+
+\end{spacing}
+
+\end{document}
diff --git a/code/StrEstPaper/LaTeXtemplates/LaTeX_slides.pdf b/code/StrEstPaper/LaTeXtemplates/LaTeX_slides.pdf
new file mode 100644
index 0000000..b2bce0d
Binary files /dev/null and b/code/StrEstPaper/LaTeXtemplates/LaTeX_slides.pdf differ
diff --git a/code/StrEstPaper/LaTeXtemplates/LaTeX_slides.tex b/code/StrEstPaper/LaTeXtemplates/LaTeX_slides.tex
new file mode 100644
index 0000000..e1ffb05
--- /dev/null
+++ b/code/StrEstPaper/LaTeXtemplates/LaTeX_slides.tex
@@ -0,0 +1,98 @@
+\documentclass[11pt]{beamer}
+\usetheme{Darmstadt}
+\useoutertheme{split}
+\useoutertheme{miniframes}
+\usepackage{hyperref}
+\hypersetup{colorlinks, urlcolor=blue}
+\usepackage{bm}
+\usepackage{times}
+\usefonttheme{structurebold}
+\usepackage{array}
+\usepackage[english]{babel}
+\usepackage{amsmath,amssymb}
+\usepackage[latin1]{inputenc}
+
+%Code snippets and syntax highlighting
+\usepackage{listings}
+%Settings for Listings
+\lstset{
+  language=Python,
+  basicstyle=\ttfamily,
+  keywordstyle=\color{blue}\ttfamily,
+  stringstyle=\color{red}\ttfamily,
+  commentstyle=\color{green}\ttfamily,
+  morecomment=[l][\color{magenta}]{\#}
+}
+
+\usepackage{array}
+\usepackage{threeparttable}
+\usepackage{colortbl}
+\usepackage{multirow}
+\usepackage{amsthm}
+\usepackage{float,graphicx,color}
+\newtheorem*{thm}{Theorem}
+\theoremstyle{definition}
+%\numberwithin{equation}{section}
+\newtheorem*{defn}{Definition}
+\newcommand\boldline{\arrayrulewidth{1pt}\hline}
+\newcommand\ve{\varepsilon}
+
+
+\setbeamercovered{dynamic}
+
+\title[Short version of title]{Full version of title}
+\author[FirstAuthor and SecondAuthor]{\textbf{First Author} \and \textbf{Second Author}}
+\date{February 2019}
+
+
+\begin{document}
+
+\frame{\titlepage}
+
+
+\section{Intro}  % This puts a section heading at the top of the slides
+
+  \frame{
+  \frametitle{Introduction}
+    \begin{enumerate}
+      \item<1-> This is a numbered list
+      \item<2-> This second item in numbered list comes up on next click
+      \vspace{5mm}
+      \item<3-> The vspace command puts spaces between your items
+    \end{enumerate}
+    \begin{itemize}
+      \item<4-> Bulleted lists are also good
+      \item<4-> And you can set all the bullets to appear at once
+    \end{itemize}
+    \begin{block}<5->{Block (blue) and alertblock (red)}
+      The block and alertblock environment makes these nice boxes that add emphasis
+    \end{block}
+  }
+
+
+\section{Images}
+
+  \begin{frame}
+  \frametitle{Here is an image}
+    \begin{figure}[htb]\centering
+    \fbox{\resizebox{3.5in}{2.7in}{\includegraphics{images/ability_log_2D.png}}}
+    \end{figure}
+  \end{frame}
+
+
+\section{Tables}
+
+  \frame{
+  \frametitle{Here is a table}
+    \begin{tabular}{>{\small}c >{\small}l >{\small}r}
+      \hline\hline
+      Variable & Description & Value \\
+      \hline
+      $\beta$ & discount factor & 0.96 \\
+      $\alpha$ & capital share of income & 0.35 \\
+      \hline\hline
+    \end{tabular}
+  }
+
+
+\end{document}
diff --git a/code/StrEstPaper/LaTeXtemplates/images/ability_log_2D.png b/code/StrEstPaper/LaTeXtemplates/images/ability_log_2D.png
new file mode 100644
index 0000000..2aa967b
Binary files /dev/null and b/code/StrEstPaper/LaTeXtemplates/images/ability_log_2D.png differ
diff --git a/code/gmm/distributions.py b/code/gmm/distributions.py
new file mode 100644
index 0000000..0d2aace
--- /dev/null
+++ b/code/gmm/distributions.py
@@ -0,0 +1,177 @@
+"""
+------------------------------------------------------------------------
+This module contains the functions for probability density functions of
+continuous PDF's.
+
+This Python module defines the following function(s):
+    GA_pdf()
+    GG_pdf()
+    GB2_pdf()
+------------------------------------------------------------------------
+"""
+# Import packages
+import numpy as np
+import scipy.special as spc
+
+
+"""
+------------------------------------------------------------------------
+    Functions
+------------------------------------------------------------------------
+"""
+
+
+def LN_pdf(xvals, mu, sigma):
+    """
+    --------------------------------------------------------------------
+    This function gives the PDF of the lognormal distribution for xvals
+    given mu and sigma
+
+    (LN): f(x; mu, sigma) = (1 / (x * sigma * sqrt(2 * pi))) *
+            exp((-1 / 2) * (((log(x) - mu) / sigma) ** 2))
+            x in [0, infty), mu in (-infty, infty), sigma > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, data
+    mu    = scalar, mean of the ln(x)
+    sigma = scalar > 0, standard deviation of ln(x)
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals        = (N,) vector, probability of each observation given
+                      the parameter values
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (
+            (1 / (np.sqrt(2 * np.pi) * sigma * xvals))
+            * np.exp((-1.0 / 2.0) * (((np.log(xvals) - mu) / sigma) ** 2))
+        )
+    )
+
+    return pdf_vals
+
+
+def GA_pdf(xvals, alpha, beta):
+    """
+    --------------------------------------------------------------------
+    Returns the PDF values from the two-parameter gamma (GA)
+    distribution. See McDonald and Xu (1995).
+
+    (GA): f(x; alpha, beta) = (1 / ((beta ** alpha) *
+        spc.gamma(alpha))) * (x ** (alpha - 1)) * (e ** (-x / beta))
+    x in [0, infty), alpha, beta > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, values in the support of gamma distribution
+    alpha = scalar > 0, gamma distribution parameter
+    beta  = scalar > 0, gamma distribution parameter
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        spc.gamma()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals = (N,) vector, pdf values from gamma distribution
+               corresponding to xvals given parameters alpha and beta
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (1 / ((beta**alpha) * spc.gamma(alpha)))
+        * (xvals ** (alpha - 1))
+        * np.exp(-xvals / beta)
+    )
+
+    return pdf_vals
+
+
+def GG_pdf(xvals, alpha, beta, mm):
+    """
+    --------------------------------------------------------------------
+    Returns the PDF values from the three-parameter generalized gamma
+    (GG) distribution. See McDonald and Xu (1995).
+
+    (GG): f(x; alpha, beta, m) =
+        (m / ((beta ** alpha) * spc.gamma(alpha/m))) *
+        (x ** (alpha - 1)) * (e ** -((x / beta) ** m))
+    x in [0, infty), alpha, beta, m > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, values in the support of generalized gamma (GG)
+            distribution
+    alpha = scalar > 0, generalized gamma (GG) distribution parameter
+    beta  = scalar > 0, generalized gamma (GG) distribution parameter
+    mm    = scalar > 0, generalized gamma (GG) distribution parameter
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        spc.gamma()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals = (N,) vector, pdf values from generalized gamma
+               distribution corresponding to xvals given parameters
+               alpha, beta, and mm
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (mm / ((beta**alpha) * spc.gamma(alpha / mm)))
+        * (xvals ** (alpha - 1))
+        * np.exp(-((xvals / beta) ** mm))
+    )
+
+    return pdf_vals
+
+
+def GB2_pdf(xvals, aa, bb, pp, qq):
+    """
+    --------------------------------------------------------------------
+    Returns the PDF values from the four-parameter generalized beta 2
+    (GB2) distribution. See McDonald and Xu (1995).
+
+    (GB2): f(x; a, b, p, q) = (a * (x ** ((a*p) - 1))) /
+        ((b ** (a * p)) * spc.beta(p, q) *
+        ((1 + ((x / b) ** a)) ** (p + q)))
+    x in [0, infty), alpha, beta, m > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, values in the support of generalized beta 2
+            (GB2) distribution
+    aa    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+    bb    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+    pp    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+    qq    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        spc.beta()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals = (N,) vector, pdf values from generalized beta 2 (GB2)
+               distribution corresponding to xvals given parameters aa,
+               bb, pp, and qq
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (aa * (xvals ** (aa * pp - 1)))
+        / (
+            (bb ** (aa * pp))
+            * spc.beta(pp, qq)
+            * ((1 + ((xvals / bb) ** aa)) ** (pp + qq))
+        )
+    )
+
+    return pdf_vals
diff --git a/code/mle/distributions.py b/code/mle/distributions.py
new file mode 100644
index 0000000..0d2aace
--- /dev/null
+++ b/code/mle/distributions.py
@@ -0,0 +1,177 @@
+"""
+------------------------------------------------------------------------
+This module contains the functions for probability density functions of
+continuous PDF's.
+
+This Python module defines the following function(s):
+    GA_pdf()
+    GG_pdf()
+    GB2_pdf()
+------------------------------------------------------------------------
+"""
+# Import packages
+import numpy as np
+import scipy.special as spc
+
+
+"""
+------------------------------------------------------------------------
+    Functions
+------------------------------------------------------------------------
+"""
+
+
+def LN_pdf(xvals, mu, sigma):
+    """
+    --------------------------------------------------------------------
+    This function gives the PDF of the lognormal distribution for xvals
+    given mu and sigma
+
+    (LN): f(x; mu, sigma) = (1 / (x * sigma * sqrt(2 * pi))) *
+            exp((-1 / 2) * (((log(x) - mu) / sigma) ** 2))
+            x in [0, infty), mu in (-infty, infty), sigma > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, data
+    mu    = scalar, mean of the ln(x)
+    sigma = scalar > 0, standard deviation of ln(x)
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals        = (N,) vector, probability of each observation given
+                      the parameter values
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (
+            (1 / (np.sqrt(2 * np.pi) * sigma * xvals))
+            * np.exp((-1.0 / 2.0) * (((np.log(xvals) - mu) / sigma) ** 2))
+        )
+    )
+
+    return pdf_vals
+
+
+def GA_pdf(xvals, alpha, beta):
+    """
+    --------------------------------------------------------------------
+    Returns the PDF values from the two-parameter gamma (GA)
+    distribution. See McDonald and Xu (1995).
+
+    (GA): f(x; alpha, beta) = (1 / ((beta ** alpha) *
+        spc.gamma(alpha))) * (x ** (alpha - 1)) * (e ** (-x / beta))
+    x in [0, infty), alpha, beta > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, values in the support of gamma distribution
+    alpha = scalar > 0, gamma distribution parameter
+    beta  = scalar > 0, gamma distribution parameter
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        spc.gamma()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals = (N,) vector, pdf values from gamma distribution
+               corresponding to xvals given parameters alpha and beta
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (1 / ((beta**alpha) * spc.gamma(alpha)))
+        * (xvals ** (alpha - 1))
+        * np.exp(-xvals / beta)
+    )
+
+    return pdf_vals
+
+
+def GG_pdf(xvals, alpha, beta, mm):
+    """
+    --------------------------------------------------------------------
+    Returns the PDF values from the three-parameter generalized gamma
+    (GG) distribution. See McDonald and Xu (1995).
+
+    (GG): f(x; alpha, beta, m) =
+        (m / ((beta ** alpha) * spc.gamma(alpha/m))) *
+        (x ** (alpha - 1)) * (e ** -((x / beta) ** m))
+    x in [0, infty), alpha, beta, m > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, values in the support of generalized gamma (GG)
+            distribution
+    alpha = scalar > 0, generalized gamma (GG) distribution parameter
+    beta  = scalar > 0, generalized gamma (GG) distribution parameter
+    mm    = scalar > 0, generalized gamma (GG) distribution parameter
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        spc.gamma()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals = (N,) vector, pdf values from generalized gamma
+               distribution corresponding to xvals given parameters
+               alpha, beta, and mm
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (mm / ((beta**alpha) * spc.gamma(alpha / mm)))
+        * (xvals ** (alpha - 1))
+        * np.exp(-((xvals / beta) ** mm))
+    )
+
+    return pdf_vals
+
+
+def GB2_pdf(xvals, aa, bb, pp, qq):
+    """
+    --------------------------------------------------------------------
+    Returns the PDF values from the four-parameter generalized beta 2
+    (GB2) distribution. See McDonald and Xu (1995).
+
+    (GB2): f(x; a, b, p, q) = (a * (x ** ((a*p) - 1))) /
+        ((b ** (a * p)) * spc.beta(p, q) *
+        ((1 + ((x / b) ** a)) ** (p + q)))
+    x in [0, infty), alpha, beta, m > 0
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, values in the support of generalized beta 2
+            (GB2) distribution
+    aa    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+    bb    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+    pp    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+    qq    = scalar > 0, generalized beta 2 (GB2) distribution parameter
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        spc.beta()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    pdf_vals = (N,) vector, pdf values from generalized beta 2 (GB2)
+               distribution corresponding to xvals given parameters aa,
+               bb, pp, and qq
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    """
+    pdf_vals = np.float64(
+        (aa * (xvals ** (aa * pp - 1)))
+        / (
+            (bb ** (aa * pp))
+            * spc.beta(pp, qq)
+            * ((1 + ((xvals / bb) ** aa)) ** (pp + qq))
+        )
+    )
+
+    return pdf_vals
diff --git a/docs/book/CompMethods_references.bib b/docs/book/CompMethods_references.bib
index 49a5973..68c093e 100755
--- a/docs/book/CompMethods_references.bib
+++ b/docs/book/CompMethods_references.bib
@@ -219,6 +219,13 @@ @INCOLLECTION{BYUACME_Unix1
   url = {https://acme.byu.edu/2023-2024-materials},
 }
 
+@TECHREPORT{CPS:2012,
+  AUTHOR = {{Current Population Survey}},
+  INSTITUTION = {Bureau of the Census and Bureau of Labor Statistics},
+  TITLE = {2012 Annual Social and Economic (ASEC) Supplement},
+  YEAR = {2012},
+}
+
 @BOOK{DavidsonMacKinnon:2004,
   AUTHOR = {Russell Davidson and James G. MacKinnon},
   TITLE = {Econometric Theory and Methods},
diff --git a/docs/book/_toc.yml b/docs/book/_toc.yml
index ddbf37e..0cf4a43 100644
--- a/docs/book/_toc.yml
+++ b/docs/book/_toc.yml
@@ -41,6 +41,7 @@ parts:
     - file: struct_est/MLE
     - file: struct_est/GMM
     - file: struct_est/SMM
+    - file: struct_est/paper
   - caption: Appendix
     chapters:
     - file: appendix/glossary
diff --git a/docs/book/struct_est/GMM.md b/docs/book/struct_est/GMM.md
index 9c317c1..0440363 100644
--- a/docs/book/struct_est/GMM.md
+++ b/docs/book/struct_est/GMM.md
@@ -156,18 +156,46 @@ The first step is to estimate the GMM parameter vector $\hat{\theta}_{1,GMM}$ us
     \hat{\theta}_{1, GMM}=\theta:\quad \min_{\theta}\:e(x|\theta)^T \, I \, e(x|\theta)
 ```
 
-We use the $R\times 1$ moment error vector and the Step 1 GMM estimate $e(x|\hat{\theta}_{1,GMM})$ to get a new estimate of the variance-covariance matrix.
+As we will show in {eq}`EqGMM_estW_2step`, the optimal two-step weighting matrix is the inverse of the variance-covariance matrix of the moment error vector $e(x|\theta)$. To get an estimate of the variance-covariance matrix of the error moment vector, we need a matrix of errors that represents how the calculation of each moment varies across the $N$ observations in the data.
+
+Define $E(x|\theta)$ as the $R\times N$ moment error matrix such that the average across each row gives the moment error vector. When the errors are simple differences, the $E(x|\theta)$ matrix is the following,
+
+```{math}
+    :label: EqGMM_GMMest_2stp_ErrMatSimp
+    E(x|\theta) =
+    \begin{bmatrix}
+        m_1(x|\theta) - m_1(x_1) & m_1(x|\theta) - m_1(x_2) & ... & m_1(x|\theta) - m_1(x_N) \\
+        m_2(x|\theta) - m_2(x_1) & m_2(x|\theta) - m_2(x_2) & ... & m_2(x|\theta) - m_2(x_N) \\
+        \vdots & \vdots & \ddots & \vdots \\
+        m_R(x|\theta) - m_R(x_1) & m_R(x|\theta) - m_R(x_2) & ... & m_R(x|\theta) - m_R(x_N) \\
+    \end{bmatrix}
+```
+
+where $m_r(x_i)$ is a function associated with the $r$th moment and the $i$th data observation. When the errors are percent deviations, the $E(x|\theta)$ matrix is the following,
+
+```{math}
+    :label: EqGMM_GMMest_2stp_ErrMatPct
+    E(x|\theta) =
+    \begin{bmatrix}
+        \frac{m_1(x|\theta) - m_1(x_1)}{m_1(x_1)} & \frac{m_1(x|\theta) - m_1(x_2)}{m_1(x_2)} & ... & \frac{m_1(x|\theta) - m_1(x_N)}{m_1(x_N)} \\
+        \frac{m_2(x|\theta) - m_2(x_1)}{m_2(x_1)} & \frac{m_2(x|\theta) - m_2(x_2)}{m_2(x_2)} & ... & \frac{m_2(x|\theta) - m_2(x_N)}{m_2(x_N)} \\
+        \vdots & \vdots & \ddots & \vdots \\
+        \frac{m_R(x|\theta) - m_R(x_1)}{m_R(x_1)} & \frac{m_R(x|\theta) - m_R(x_2)}{m_R(x_2)} & ... & \frac{m_R(x|\theta) - m_R(x_N)}{m_R(x_N)} \\
+    \end{bmatrix}
+```
+
+where the denominator of the percentage deviation or baseline is the model moment that does not change. We use the $E(x|\theta)$ data matrix and the Step 1 GMM estimate $e(x|\hat{\theta}_{1,GMM})$ to get a new estimate of the variance covariance matrix.
 
 ```{math}
     :label: EqGMM_GMMest_2stp_2VarCov
-    \hat{\Omega}_2 = e(x|\hat{\theta}_{1,GMM})\,e(x|\hat{\theta}_{1,GMM})^T
+    \hat{\Omega}_2 = \frac{1}{N}E(x|\hat{\theta}_{1,GMM})\,E(x|\hat{\theta}_{1,GMM})^T
 ```
 
 This is simply saying that the $(r,s)$-element of the $R\times R$ estimator of the variance-covariance matrix of the moment vector is the following.
 
 ```{math}
     :label: EqGMM_2stepVarCov_rs
-    \hat{\Omega}_{2,r,s} = \Bigl[m_r(x|\hat{\theta}_{1,GMM}) - m_{r}(x)\Bigr]\Bigl[m_s(x|\theta) - m_s(x)\Bigr]
+    \hat{\Omega}_{2,r,s} = \frac{1}{N}\sum_{i=1}^N\Bigl[m_r(x|\theta) - m_{r}(x_i)\Bigr]\Bigl[m_s(x|\theta) - m_s(x_i)\Bigr]
 ```
 
 The optimal weighting matrix is the inverse of the two-step variance covariance matrix.
@@ -201,7 +229,7 @@ and the $(i+1)$th estimate of the optimal weighting matrix is defined as the fol
 
 ```{math}
     :label: EqGMM_estW_istep
-    \hat{W}_{i+1} \equiv \hat{\Omega}_{i+1}^{-1}\quad\text{where}\quad \hat{\Omega}_{i+1} = e(x|\hat{\theta}_{i,GMM})\,e(x|\hat{\theta}_{i,GMM})^T
+    \hat{W}_{i+1} \equiv \hat{\Omega}_{i+1}^{-1}\quad\text{where}\quad \hat{\Omega}_{i+1} = \frac{1}{N}E(x|\hat{\theta}_{i,GMM})\,E(x|\hat{\theta}_{i,GMM})^T
 ```
 
 The iterated GMM estimator $\hat{\theta}_{it,GMM}$ is the $\hat{\theta}_{i,GMM}$ such that $\hat{W}_{i+1}$ is very close to $\hat{W}_{i}$ for some distance metric (norm).
@@ -215,7 +243,7 @@ The iterated GMM estimator $\hat{\theta}_{it,GMM}$ is the $\hat{\theta}_{i,GMM}$
 (SecGMM_W_NW)=
 ### Newey-West consistent estimator of $\Omega$ and W
 
-[TODO: Need to get this right for the GMM case.] The Newey-West estimator of the optimal weighting matrix and variance covariance matrix is consistent in the presence of heteroskedasticity and autocorrelation in the data (See {cite}`NeweyWest:1987`). {cite}`AddaCooper:2003` (p. 82) have a nice exposition of how to compute the Newey-West weighting matrix $\hat{W}_{nw}$. The asymptotic representation of the optimal weighting matrix $\hat{W}^{opt}$ is the following:
+The Newey-West estimator of the optimal weighting matrix and variance covariance matrix is consistent in the presence of heteroskedasticity and autocorrelation in the data (See {cite}`NeweyWest:1987`). {cite}`AddaCooper:2003` (p. 82) have a nice exposition of how to compute the Newey-West weighting matrix $\hat{W}_{nw}$. The asymptotic representation of the optimal weighting matrix $\hat{W}^{opt}$ is the following:
 
 ```{math}
     :label: EqGMM_estW_WhatOpt
@@ -291,7 +319,7 @@ The following is a centered second-order finite difference numerical approximati
 
 
 (SecGMM_Ex)=
-## Examples
+## Code Examples
 
 In this section, we will use GMM to estimate parameters of the models from the {ref}`Chap_MLE` chapter. We will also go through the standard moment conditions in most econometrics textbooks in which the conditional and unconditional expectations provide moments for estimation.
 
@@ -299,47 +327,1248 @@ In this section, we will use GMM to estimate parameters of the models from the {
 (SecGMM_Ex_Trunc)=
 ### Fitting a truncated normal to intermediate macroeconomics test scores
 
-Let's revisit the problem from the {ref}`Chap_MLE` chapter of fitting a truncated normal distribution to intermediate macroeconomics test scores. The data are in the text file [`Econ381totpts.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/gmm/Econ381totpts.txt) in the GitHub repository [`../data/gmm/`](https://github.com/OpenSourceEcon/CompMethods/tree/main/data/smm) folder for this executable book. Recall that these test scores are between 0 and 450. The figure below shows a histogram of the data, as well as three truncated normal PDF's. The black line is the ML estimate of $\mu$ and $\sigma$ of the truncated normal pdf. The red and the green lines are just the PDF's of two "arbitrarily" chosen combinations of the truncated normal parameters $\mu$ and $\sigma$.
+Let's revisit the problem from the {ref}`Chap_MLE` chapter of fitting a truncated normal distribution to intermediate macroeconomics test scores. The data are in the text file [`Econ381totpts.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/gmm/Econ381totpts.txt) in the GitHub repository [`../data/gmm/`](https://github.com/OpenSourceEcon/CompMethods/tree/main/data/gmm) folder for this executable book. Recall that these test scores are between 0 and 450. {numref}`Figure %s <FigGMM_EconScores2MLEs>` below shows a histogram of the data, as well as the unconstrained and constrained maximum likelihood estimates of the truncated normal distribution from {numref}`Figure %s <FigMLE_EconScoresMLEconstr>` as well as an arbitrary distribution.
+
+The black line is the unconstrained MLE estimate of $\mu$ and $\sigma$ of the truncated normal pdf from Section {ref}`SecMLE_DistData_min`. The red line is the constrained MLE estimate of $\mu$ and $\sigma$ from Section {ref}`SecMLE_DistData_conmin`. And the green line is an arbitrary parameterization of the truncated normal PDF.
+
+```{code-cell} ipython3
+:tags: []
+
+import scipy.stats as sts
+
+
+def trunc_norm_pdf(xvals, mu, sigma, cut_lb=None, cut_ub=None):
+    '''
+    --------------------------------------------------------------------
+    Generate pdf values from the truncated normal pdf with mean mu and
+    standard deviation sigma. If the cutoff is given, then the PDF
+    values are inflated upward to reflect the zero probability on values
+    above the cutoff. If there is no cutoff given, this function does
+    the same thing as sp.stats.norm.pdf(x, loc=mu, scale=sigma).
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals  = (N,) vector, values of the normally distributed random
+             variable
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    prob_notcut = scalar
+    pdf_vals = (N,) vector, normal PDF values for mu and sigma
+               corresponding to xvals data
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: pdf_vals
+    --------------------------------------------------------------------
+    '''
+    if cut_ub == 'None' and cut_lb == 'None':
+        prob_notcut = 1.0
+    elif cut_ub == 'None' and cut_lb != 'None':
+        prob_notcut = 1.0 - sts.norm.cdf(cut_lb, loc=mu, scale=sigma)
+    elif cut_ub != 'None' and cut_lb == 'None':
+        prob_notcut = sts.norm.cdf(cut_ub, loc=mu, scale=sigma)
+    elif cut_ub != 'None' and cut_lb != 'None':
+        prob_notcut = (sts.norm.cdf(cut_ub, loc=mu, scale=sigma) -
+                       sts.norm.cdf(cut_lb, loc=mu, scale=sigma))
+
+    pdf_vals    = ((1/(sigma * np.sqrt(2 * np.pi)) *
+                    np.exp( - (xvals - mu)**2 / (2 * sigma**2))) /
+                    prob_notcut)
+
+    return pdf_vals
+```
 
+```{code-cell} ipython3
+:tags: ["hide-input", "remove-output"]
+
+# Import the necessary libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import requests
+
+# Download and save the data file Econ381totpts.txt as NumPy array
+url = ('https://raw.githubusercontent.com/OpenSourceEcon/CompMethods/' +
+       'main/data/gmm/Econ381totpts.txt')
+data_file = requests.get(url, allow_redirects=True)
+open('../../../data/gmm/Econ381totpts.txt', 'wb').write(data_file.content)
+if data_file.status_code == 200:
+    # Load the downloaded data into a NumPy array
+    data = np.loadtxt('../../../data/gmm/Econ381totpts.txt')
+else:
+    print('Error downloading the file')
+
+# Plot the histogram of the data
+num_bins = 30
+count, bins, ignored = plt.hist(data, num_bins, density=True,
+                                edgecolor='k', label='Data')
+plt.title('Intermediate macro scores: 2011-2012', fontsize=15)
+plt.xlabel(r'Total points')
+plt.ylabel(r'Percent of scores')
+plt.xlim([0, 550])  # This gives the xmin and xmax to be plotted"
+
+# Plot the unconstrained MLE estimated distribution
+dist_pts = np.linspace(0, 450, 500)
+mu_MLE = 622.16
+sig_MLE = 198.76
+plt.plot(
+    dist_pts,
+    trunc_norm_pdf(dist_pts, mu_MLE, sig_MLE, 0, 450),
+    linewidth=2, color='k',
+    label='Unconstr: $\hat{\mu}_{MLE}$=622,$\hat{\sigma}_{MLE}$=199'
+)
+
+# Plot the constrained MLE estimated distribution
+mu_MLE_constr = 420.0
+sig_MLE_constr = 129.04
+plt.plot(
+    dist_pts,
+    trunc_norm_pdf(dist_pts, mu_MLE_constr, sig_MLE_constr, 0, 450),
+    linewidth=2, color='r',
+    label='Constr: $\hat{\mu}_{MLE}$=420,$\hat{\sigma}_{MLE}$=129'
+)
+
+# Plot smooth line with distribution 1
+mu_1 = 380
+sig_1 = 150
+plt.plot(dist_pts, trunc_norm_pdf(dist_pts, mu_1, sig_1, 0, 450),
+         linewidth=2, color='g', label='Arbitrary: $\mu$=380,$\sigma$=150')
+
+plt.legend(loc='upper left')
+
+plt.show()
+```
 
-(SecGMM_Ident)=
-## Identification
+```{figure} ../../../images/gmm/Econ381scores_2MLEs.png
+---
+height: 500px
+name: FigGMM_EconScores2MLEs
+---
+Constrained maximum likelihood estimate of truncated normal distribution to fit intermediate macroeconomics midterm scores over two semesters along with unconstrained MLE estimate and arbitrary parameterization.
+```
 
-An issue that we saw in the examples from Section {ref}`SecGMM_Ex` is that there is some science as well as some art in choosing moments to identify the parameters in a GMM estimation.
 
-* The $\mu$ and $\sigma$ parameters were identified more precisely when using the two-step estimator of the optimal weighting matrix instead of the identity matrix.
-* The overidentified four-moment model of total scores produced much smaller standard errors for both $\mu$ and $\sigma$ than did the two-moment model.
+(SecGMM_Ex_Trunc_2momI)=
+#### Two moments, identity weighting matrix
 
-Suppose the parameter vector $\theta$ has $K$ elements, or rather, $K$ parameters to be estimated. In order to estimate $\theta$ by GMM, you must have at least as many moments as parameters to estimate $R\geq K$. If you have exactly as many moments as parameters to be estimated $R=K$, the model is said to be *exactly identified*. If you have more moments than parameters to be estimated $R>K$, the model is said to be *overidentified*. If you have fewer moments than parameters to be estimated $R<K$, the model is said to be *underidentified*. There are good reasons to overidentify $R>K$ the model in GMM estimation as we saw in the previous example. The main reason is that not all moments are orthogonal. That is, some moments convey roughly the same information about the data and, therefore, do not separately identify any extra parameters. So a good GMM model often is overidentified $R>K$.
+Let's try estimating the parameters $\mu$ and $\sigma$ by GMM. What moments should we use? Let's try the mean and variance of the data. These two statistics of the data are defined by:
 
-One last point about GMM regards moment selection and verification of results. The real world has an infinite supply of potential moments that describe some part of the data. Choosing moments to estimate parameters by GMM requires understanding of the model, intuition about its connections to the real world, and artistry. A good GMM estimation will include moments that have some relation to or story about their connection to particular parameters of the model to be estimated. In addition, a good verification of a GMM estimation is to take some moment from the data that was not used in the estimation and see how well the corresponding moment from the estimated model matches that *outside moment*.
+```{math}
+    :label: EqGMM_Ex_Trunc_2momI_mean
+    mean(scores_i) = \frac{1}{N}\sum_{i=1}^N scores_i
+```
+
+```{math}
+    :label: EqGMM_Ex_Trunc_2momI_var
+    var(scores_i) = \frac{1}{N}\sum_{i=1}^{N} \left(scores_i - mean(scores_i)\right)^2
+```
+
+So the data moment vector $m(x)$ for GMM is the following.
+
+```{math}
+    :label: EqGMM_Ex_Trunc_2momI_datamoms
+    m(scores_i) \equiv \begin{bmatrix} mean(scores_i) \\ var(scores_i) \end{bmatrix}
+```
+
+And the model moment vector $m(x|\theta)$ for GMM is the following.
+
+```{math}
+    :label: EqGMM_Ex_Trunc_2momI_modmoms
+    m(scores_i|\mu,\sigma) \equiv \begin{bmatrix} mean(scores_i|\mu,\sigma) \\ var(scores_i|\mu,\sigma) \end{bmatrix}
+```
+
+Define the error vector as the vector of percent deviations of the model moments from the data moments.
+
+```{math}
+    :label: EqGMM_Ex_Trunc_2momI_errvec
+    e(scores_i|\mu,\sigma) \equiv \frac{m(scores_i|\mu,\sigma) - m(scores_i)}{m(scores_i)}
+```
+
+The mimization problem for the GMM estimator for this moment vector is the following.
+
+```{math}
+    :label: EqGMM_Ex_Trunc_2momI_minprob
+    (\hat{\mu}_{GMM},\hat{\sigma}_{GMM}) = (\mu,\sigma):\quad \min_{\mu,\sigma} e(scores_i|\mu,\sigma)^T \, W \, e(scores_i|\mu,\sigma)
+```
+
+Keep in mind that the $\mu$ and $\sigma$ we are estimating are the two truncated normal parameters in contrast to the empirical mean of the data $mean(scores_i)$ and the empirical variance of the data $var(scores_i)$.
+
+Something interesting to note here is the $1/N$ weighting on our variance estimator. There is less bias in the estimator of the variance by using the weighting $1/(N-1)$ because one degree of freedom is used in calculating the mean used in the variance calculation. However, in GMM when many moments are used that might have differing degrees of freedom restrictions, it is important to have the same weighting for each moment. So we use $1/N$ in all cases.
+
+Now let's define a criterion function that takes as inputs the parameters and the estimator for the weighting matrix $\hat{W}$.
+
+```{code-cell} ipython3
+:tags: []
+
+import scipy.integrate as intgr
+
+def data_moments(xvals):
+    '''
+    --------------------------------------------------------------------
+    This function computes the two data moments for GMM
+    (mean(data), variance(data)).
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, test scores data
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    mean_data = scalar, mean value of test scores data
+    var_data  = scalar > 0, variance of test scores data
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: mean_data, var_data
+    --------------------------------------------------------------------
+    '''
+    mean_data = xvals.mean()
+    var_data = xvals.var()
+
+    return mean_data, var_data
+
+
+def model_moments(mu, sigma, cut_lb, cut_ub):
+    '''
+    --------------------------------------------------------------------
+    This function computes the two model moments for GMM
+    (mean(model data), variance(model data)).
+    --------------------------------------------------------------------
+    INPUTS:
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        trunc_norm_pdf()
+        xfx()
+        x2fx()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    mean_model = scalar, mean value of test scores from model
+    m_m_err    = scalar > 0, estimated error in the computation of the
+                 integral for the mean of the distribution
+    var_model  = scalar > 0, variance of test scores from model
+    v_m_err    = scalar > 0, estimated error in the computation of the
+                 integral for the variance of the distribution
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: mean_model, var_model
+    --------------------------------------------------------------------
+    '''
+    xfx = lambda x: x * trunc_norm_pdf(x, mu, sigma, cut_lb, cut_ub)
+    (mean_model, m_m_err) = intgr.quad(xfx, cut_lb, cut_ub)
+    x2fx = lambda x: ((x - mean_model) ** 2) * trunc_norm_pdf(x, mu, sigma, cut_lb, cut_ub)
+    (var_model, v_m_err) = intgr.quad(x2fx, cut_lb, cut_ub)
+
+    return mean_model, var_model
+
+
+def err_vec(xvals, mu, sigma, cut_lb, cut_ub, simple):
+    '''
+    --------------------------------------------------------------------
+    This function computes the vector of moment errors (in percent
+    deviation from the data moment vector) for GMM.
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals  = (N,) vector, test scores data
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+    simple = boolean, =True if errors are simple difference, =False if
+             errors are percent deviation from data moments
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        data_moments()
+        model_moments()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    mean_data  = scalar, mean value of data
+    var_data   = scalar > 0, variance of data
+    moms_data  = (2, 1) matrix, column vector of two data moments
+    mean_model = scalar, mean value from model
+    var_model  = scalar > 0, variance from model
+    moms_model = (2, 1) matrix, column vector of two model moments
+    err_vec    = (2, 1) matrix, column vector of two moment error
+                 functions
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: err_vec
+    --------------------------------------------------------------------
+    '''
+    mean_data, var_data = data_moments(xvals)
+    moms_data = np.array([[mean_data], [var_data]])
+    mean_model, var_model = model_moments(mu, sigma, cut_lb, cut_ub)
+    moms_model = np.array([[mean_model], [var_model]])
+    if simple:
+        err_vec = moms_model - moms_data
+    else:
+        err_vec = (moms_model - moms_data) / moms_data
+
+    return err_vec
+
+
+def criterion(params, *args):
+    '''
+    --------------------------------------------------------------------
+    This function computes the GMM weighted sum of squared moment errors
+    criterion function value given parameter values and an estimate of
+    the weighting matrix.
+    --------------------------------------------------------------------
+    INPUTS:
+    params = (2,) vector, ([mu, sigma])
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    args   = length 3 tuple, (xvals, cutoff, W_hat)
+    xvals  = (N,) vector, values of the truncated normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+    W_hat  = (R, R) matrix, estimate of optimal weighting matrix
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        norm_pdf()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    err        = (2, 1) matrix, column vector of two moment error
+                 functions
+    crit_val   = scalar > 0, GMM criterion function value
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: crit_val
+    --------------------------------------------------------------------
+    '''
+    mu, sigma = params
+    xvals, cut_lb, cut_ub, W = args
+    err = err_vec(xvals, mu, sigma, cut_lb, cut_ub, simple=False)
+    crit_val = err.T @ W @ err
+
+    return crit_val
+```
+
+Now we can perform the GMM estimation. Let's start with the identity matrix as our estimate for the optimal weighting matrix $W = I$.
+
+```{code-cell} ipython3
+:tags: []
+
+import scipy.optimize as opt
+
+# Note that this takes a little time because the intgr.quad() commands
+# are a little slow
+mu_init = 400
+sig_init = 60
+params_init = np.array([mu_init, sig_init])
+W_hat = np.eye(2)
+gmm_args = (data, 0.0, 450.0, W_hat)
+results = opt.minimize(criterion, params_init, args=(gmm_args))
+results = opt.minimize(criterion, params_init, args=(gmm_args),
+                       tol=1e-14, method='L-BFGS-B',
+                       bounds=((1e-10, None), (1e-10, None)))
+mu_GMM1, sig_GMM1 = results.x
+print('mu_GMM1=', mu_GMM1, ' sig_GMM1=', sig_GMM1)
+print("")
+print("SciPy.optimize.minimize results are the following:")
+print(results)
+```
+
+The data moments, model moments at the optimal parameters, and error vector values are the following.
+
+```{code-cell} ipython3
+:tags: []
+
+mean_data, var_data = data_moments(data)
+mean_model, var_model = model_moments(mu_GMM1, sig_GMM1, 0.0, 450.0)
+err1 = err_vec(data, mu_GMM1, sig_GMM1, 0.0, 450.0, False).reshape(2,)
+print('Mean of points =', mean_data, ', Variance of points =', var_data)
+print('Mean of model =', mean_model, ', Variance of model =', var_model)
+print('Error vector=', err1)
+```
+
+As we can see from the criterion function value at the optimum (2.69e-18) and from the difference between the model moments and data moments, this GMM estimation matches the moments very well. This GMM estimation is also very close to the unconstrained MLE estimates from Section {ref}`SecMLE_DistData_min`.
+
+{numref}`Figure %s <FigGMM_SurfCrit1>` shows the criterion function surface for different values of $\mu$ and $\sigma$ in the neighborhood of our GMM estimate.
+
+```{code-cell} ipython3
+:tags: ["remove-output"]
+
+from mpl_toolkits.mplot3d import Axes3D
+import matplotlib
+cmap1 = matplotlib.colormaps.get_cmap('summer')
+
+critfunc_GMM1 = criterion(np.array([mu_GMM1, sig_GMM1]),
+                          data, 0.0, 450.0, W_hat)
+
+mu_vals = np.linspace(590, 650, 90)
+sig_vals = np.linspace(180, 220, 100)
+critfunc_vals = np.zeros((90, 100))
+for mu_ind in range(90):
+    for sig_ind in range(100):
+        critfunc_vals[mu_ind, sig_ind] = \
+            criterion(np.array([mu_vals[mu_ind], sig_vals[sig_ind]]),
+                      data, 0.0, 450.0, W_hat)[0][0]
+
+mu_mesh, sig_mesh = np.meshgrid(mu_vals, sig_vals)
+
+fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
+ax.plot_surface(mu_mesh.T, sig_mesh.T, critfunc_vals, rstride=8,
+                cstride=1, cmap=cmap1, alpha=0.9)
+ax.scatter(mu_GMM1, sig_GMM1, critfunc_GMM1, color='red', marker='o',
+           s=18, label='GMM estimate')
+ax.view_init(elev=15, azim=-7, roll=0)
+ax.set_title('Criterion function surface for values of mu and sigma')
+ax.set_xlabel(r'$\mu$')
+ax.set_ylabel(r'$\sigma$')
+ax.set_zlabel(r'Criterion func.')
+
+plt.show()
+```
+
+```{figure} ../../../images/gmm/Econ381scores_SurfaceCrit1.png
+---
+height: 500px
+name: FigGMM_SurfCrit1
+---
+Surface of the 2 moment, identity weighting matrix GMM criterion function for values of $\mu$ and $\sigma$ in the neighborhood of the GMM estimate. The scatter point represents the criterion function value for the GMM estimate.
+```
+
+Let's compute the GMM estimator for the variance-covariance matrix $\hat{\Sigma}_{GMM}$ of our GMM estimates $\hat{\theta}_{GMM}$ using the equation in Section 4 based on the Jacobian $d(x|\hat{\theta}_{GMM})$ of the moment error vector $e(x|\hat{\theta}_{GMM})$ from the criterion function at the estimated (optimal) parameter values $\hat{\theta}_{GMM}$. We first write a function that computes the Jacobian $d(x|\hat{\theta}_{GMM})$.
+
+```{code-cell} ipython3
+:tags: []
+
+import numpy.linalg as lin
+
+def Jac_err2(xvals, mu, sigma, cut_lb, cut_ub, simple=False):
+    '''
+    This function computes the Jacobian matrix of partial derivatives of the
+    R x 1 moment error vector e(x|theta) with respect to the K parameters
+    theta_i in the K x 1 parameter vector theta. The resulting matrix is the
+    R x K Jacobian.
+    '''
+    Jac_err = np.zeros((2, 2))
+    h_mu = 1e-8 * mu
+    h_sig = 1e-8 * sigma
+    Jac_err[:, 0] = (
+        (err_vec(xvals, mu + h_mu, sigma, cut_lb, cut_ub, simple) -
+         err_vec(xvals, mu - h_mu, sigma, cut_lb, cut_ub, simple)) /
+        (2 * h_mu)
+    ).flatten()
+    Jac_err[:, 1] = (
+        (err_vec(xvals, mu, sigma + h_sig, cut_lb, cut_ub, simple) -
+         err_vec(xvals, mu, sigma - h_sig, cut_lb, cut_ub, simple)) /
+        (2 * h_sig)
+    ).flatten()
+
+    return Jac_err
+
+N = data.shape[0]
+d_err2 = Jac_err2(data, mu_GMM1, sig_GMM1, 0.0, 450.0, False)
+print("Jacobian matrix of derivatives")
+print(d_err2)
+print("")
+print("Weighting matrix")
+print(W_hat)
+SigHat2 = (1 / N) * lin.inv(d_err2.T @ W_hat @ d_err2)
+print("")
+print("Sigma hat squared")
+print(SigHat2)
+print("")
+print("Standard errors")
+print('Std. err. mu_hat=', np.sqrt(SigHat2[0, 0]))
+print('Std. err. sig_hat=', np.sqrt(SigHat2[1, 1]))
+```
+
+Note how big the standard errors are on our GMM estimated parameters using the identity matrix as our optimal weighting matrix.
+
+
+(SecGMM_Ex_Trunc_2mom2st)=
+#### Two moments, two-step weighting matrix
+
+Similar to the MLE problem, the GMM criterion function surface in {numref}`Figure %s <FigGMM_SurfCrit1>` looks like it is roughly equal for a specific portion increase of $\mu$ and $\sigma$ together. That is, with these two moments probably have a correspondence of values of $\mu$ and $\sigma$ that give roughly the same criterion function value. This issue has two possible solutions.
+
+1. Maybe we need the two-step variance covariance estimator to calculate a "more" optimal weighting matrix $W$.
+2. Maybe our two moments aren't very good moments for fitting the data.
+
+Let's first try the two-step weighting matrix using the steps from Section {ref}`SecGMM_Wgt_2step` in equations {eq}`EqGMM_GMMest_2stp_2VarCov` and {eq}`EqGMM_estW_2step`.
+
+The following function creates the moment error matrix for this problem defined in {eq}`EqGMM_GMMest_2stp_ErrMatPct`.
+
+```{code-cell} ipython3
+:tags: []
+
+def get_Err_mat2(xvals, mu, sigma, cut_lb, cut_ub, simple=False):
+    '''
+    --------------------------------------------------------------------
+    This function computes the R x N matrix of errors from each
+    observation for each moment. In this function, we have hard coded
+    R = 2.
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals  = (N,) vector, test scores data
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+    simple = boolean, =True if errors are simple difference, =False if
+             errors are percent deviation from data moments
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        model_moments()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    R          = integer = 2, hard coded number of moments
+    N          = integer >= R, number of data observations
+    Err_mat    = (R, N) matrix, error by moment and observation data
+    mean_model = scalar, mean value from model
+    var_model  = scalar > 0, variance from model
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: Err_mat
+    --------------------------------------------------------------------
+    '''
+    R = 2
+    N = len(xvals)
+    Err_mat = np.zeros((R, N))
+    mean_data = xvals.mean()
+    mean_model, var_model = model_moments(mu, sigma, cut_lb, cut_ub)
+    if simple:
+        Err_mat[0, :] = xvals - mean_model
+        Err_mat[1, :] = ((mean_data - xvals) ** 2) - var_model
+    else:
+        Err_mat[0, :] = (xvals - mean_model) / mean_model
+        Err_mat[1, :] = (((mean_data - xvals) ** 2) - var_model) / var_model
+
+    return Err_mat
+```
+
+```{code-cell} ipython3
+:tags: []
+
+Err_mat = get_Err_mat2(data, mu_GMM1, sig_GMM1, 0.0, 450.0, False)
+VCV2 = (1 / len(data)) * (Err_mat @ Err_mat.T)
+print("VCV2=")
+print(VCV2)
+W_hat2 = lin.inv(VCV2)
+print("")
+print("W_hat2=")
+print(W_hat2)
+```
+
+Now we can perform the GMM estimation with the optimal two-step weighting matrix.
+
+```{code-cell} ipython3
+:tags: []
+
+# Note that this takes a little time because the intgr.quad() commands
+# are a little slow
+mu_init = 400  # alternative initial guess is mu_GMM1
+sig_init = 60   # alternative initial guess is sig_GMM1
+params_init = np.array([mu_init, sig_init])
+gmm_args = (data, 0.0, 450.0, W_hat2)
+results = opt.minimize(criterion, params_init, args=(gmm_args),
+                       method='L-BFGS-B', bounds=((1e-10, None), (1e-10, None)))
+mu_GMM2, sig_GMM2 = results.x
+print('mu_GMM2=', mu_GMM2, ' sig_GMM2=', sig_GMM2)
+print("")
+print("Scipy.optimize.minimize results:")
+print(results)
+```
+
+The GMM estimates here with the two-step weighting matrix are pretty similar to the estimates from the previous section that simply used the identity matrix as the weighting matrix. However, the estimated results here are sensitive to the initial guess.
+
+But the real benefit of the two-step weighting matrix shows up in the much smaller (more efficient) estimated standard errors for the GMM parameter estimates.
+
+```{code-cell} ipython3
+:tags: []
+
+N = data.shape[0]
+d_err2_2 = Jac_err2(data, mu_GMM2, sig_GMM2, 0.0, 450.0, False)
+print("Jacobian matrix of derivatives")
+print(d_err2_2)
+print("")
+print("Weighting matrix")
+print(W_hat2)
+SigHat2_2 = (1 / N) * lin.inv(d_err2_2.T @ W_hat2 @ d_err2_2)
+print("")
+print("Sigma hat squared")
+print(SigHat2_2)
+print("")
+print("Standard errors")
+print('Std. err. mu_hat=', np.sqrt(SigHat2_2[0, 0]))
+print('Std. err. sig_hat=', np.sqrt(SigHat2_2[1, 1]))
+```
+
+
+(SecGMM_Ex_Trunc_4momI)=
+#### Four moments, identity weighting matrix
+
+Using a better weighting matrix didn't improve our estimates or fit very much it did improve the standard errors of our estimates. To get the right fit, we might need to choose different moments. Let's try an overidentified model $R>K$, where we estimate $\mu$ and $\sigma$ of the truncated normal distribution $K=2$ using the following four moments $R=4$.
+
+1. The percent of observations greater than 430 (between 430 and 450)
+2. The percent of observations between 320 and 430
+3. The percent of observations between 220 and 320
+4. The percent of observations less than 220 (between 0 and 220)
+
+```{code-cell} ipython3
+:tags: []
+
+def data_moments4(xvals):
+    '''
+    --------------------------------------------------------------------
+    This function computes the four data moments for GMM
+    (binpct_1, binpct_2, binpct_3, binpct_4).
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals = (N,) vector, test scores data
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    bpct_1_dat = scalar in [0, 1], percent of observations
+                 0 <= x < 220
+    bpct_2_dat = scalar in [0, 1], percent of observations
+                 220 <= x < 320
+    bpct_3_dat = scalar in [0, 1], percent of observations
+                 320 <= x < 430
+    bpct_4_dat = scalar in [0, 1], percent of observations
+                 430 <= x <= 450
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: bpct_1, bpct_2, bpct_3, bpct_4
+    --------------------------------------------------------------------
+    '''
+    bpct_1_dat = xvals[xvals < 220].shape[0] / xvals.shape[0]
+    bpct_2_dat = (xvals[(xvals >=220) & (xvals < 320)].shape[0] /
+                  xvals.shape[0])
+    bpct_3_dat = (xvals[(xvals >=320) & (xvals < 430)].shape[0] /
+                  xvals.shape[0])
+    bpct_4_dat = xvals[xvals >= 430].shape[0] / xvals.shape[0]
+
+    return bpct_1_dat, bpct_2_dat, bpct_3_dat, bpct_4_dat
+
+
+def model_moments4(mu, sigma, cut_lb, cut_ub):
+    '''
+    --------------------------------------------------------------------
+    This function computes the four model moments for GMM
+    (binpct_1, binpct_2, binpct_3, binpct_4).
+    --------------------------------------------------------------------
+    INPUTS:
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        trunc_norm_pdf()
+        xfx()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    bpct_1_mod = scalar in [0, 1], percent of model observations in
+                 bin 1
+    bp_1_err   = scalar > 0, estimated error in the computation of the
+                 integral for bpct_1_mod
+    bpct_2_mod = scalar in [0, 1], percent of model observations in
+                 bin 2
+    bp_2_err   = scalar > 0, estimated error in the computation of the
+                 integral for bpct_2_mod
+    bpct_3_mod = scalar in [0, 1], percent of model observations in
+                 bin 3
+    bp_3_err   = scalar > 0, estimated error in the computation of the
+                 integral for bpct_3_mod
+    bpct_4_mod = scalar in [0, 1], percent of model observations in
+                 bin 4
+    bp_4_err   = scalar > 0, estimated error in the computation of the
+                 integral for bpct_4_mod
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: bpct_1_mod, bpct_2_mod, bpct_3_mod, bpct_4_mod
+    --------------------------------------------------------------------
+    '''
+    xfx = lambda x: trunc_norm_pdf(x, mu, sigma, cut_lb, cut_ub)
+    (bpct_1_mod, bp_1_err) = intgr.quad(xfx, 0.0, 220)
+    (bpct_2_mod, bp_2_err) = intgr.quad(xfx, 220, 320)
+    (bpct_3_mod, bp_3_err) = intgr.quad(xfx, 320, 430)
+    (bpct_4_mod, bp_4_err) = intgr.quad(xfx, 430, 450)
+
+    return bpct_1_mod, bpct_2_mod, bpct_3_mod, bpct_4_mod
+
+
+def err_vec4(xvals, mu, sigma, cut_lb, cut_ub, simple):
+    '''
+    --------------------------------------------------------------------
+    This function computes the vector of moment errors (in percent
+    deviation from the data moment vector) for GMM.
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals  = (N,) vector, test scores data
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+    simple = boolean, =True if errors are simple difference, =False if
+             errors are percent deviation from data moments
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        data_moments4()
+        model_moments4()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    mean_data  = scalar, mean value of data
+    var_data   = scalar > 0, variance of data
+    moms_data  = (2, 1) matrix, column vector of two data moments
+    mean_model = scalar, mean value from model
+    var_model  = scalar > 0, variance from model
+    moms_model = (2, 1) matrix, column vector of two model moments
+    err_vec    = (2, 1) matrix, column vector of two moment error
+                 functions
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: err_vec
+    --------------------------------------------------------------------
+    '''
+    bpct_1_dat, bpct_2_dat, bpct_3_dat, bpct_4_dat = \
+        data_moments4(xvals)
+    moms_data = np.array([[bpct_1_dat], [bpct_2_dat], [bpct_3_dat],
+                          [bpct_4_dat]])
+    bpct_1_mod, bpct_2_mod, bpct_3_mod, bpct_4_mod = \
+        model_moments4(mu, sigma, cut_lb, cut_ub)
+    moms_model = np.array([[bpct_1_mod], [bpct_2_mod], [bpct_3_mod],
+                          [bpct_4_mod]])
+    if simple:
+        err_vec = moms_model - moms_data
+    else:
+        err_vec = (moms_model - moms_data) / moms_data
+
+    return err_vec
+
+
+def criterion4(params, *args):
+    '''
+    --------------------------------------------------------------------
+    This function computes the GMM weighted sum of squared moment errors
+    criterion function value given parameter values and an estimate of
+    the weighting matrix.
+    --------------------------------------------------------------------
+    INPUTS:
+    params = (2,) vector, ([mu, sigma])
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    args   = length 3 tuple, (xvals, cutoff, W_hat)
+    xvals  = (N,) vector, values of the truncated normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+    W_hat  = (R, R) matrix, estimate of optimal weighting matrix
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        err_vec4()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    err        = (4, 1) matrix, column vector of four moment error
+                 functions
+    crit_val   = scalar > 0, GMM criterion function value
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: crit_val
+    --------------------------------------------------------------------
+    '''
+    mu, sigma = params
+    xvals, cut_lb, cut_ub, W = args
+    err = err_vec4(xvals, mu, sigma, cut_lb, cut_ub, simple=False)
+    crit_val = err.T @ W @ err
+
+    return crit_val
+```
+
+Before performing the estimation, let's see what these four model moments would be relative to the data moments with the first GMM estimates from the two-moment GMM estimation with the identity weighting matrix from Section {ref}`SecGMM_Ex_Trunc_2momI` of $\mu\approx 622$ and $\sigma\approx 199$. Let's also look at the resulting criterion function at those values.
+
+```{code-cell} ipython3
+:tags: []
+
+params = np.array([mu_GMM1, sig_GMM1])
+print("2-moment mu_GMM1 is:", mu_GMM1, ", and 2-moment sig_GMM1 is:", sig_GMM1)
+print("")
+print("Data moments are the following:")
+print(data_moments4(data))
+print("")
+print("Model moments at the GMM1 estimates are the following:")
+print(model_moments4(mu_GMM1, sig_GMM1, 0.0, 450))
+print("")
+print("GMM criterion function value at GMM1 estimates with identity wgt mat:")
+print(criterion4(params, data, 0.0, 450.0, np.eye(4))[0][0])
+```
+
+Now let's perform the GMM estimation of the two parameters $\mu$ and $\sigma$ using the four moments described above and the identity weighting matrix.
+
+```{code-cell} ipython3
+:tags: []
+
+# Note that this takes a little time because the intgr.quad() commands
+# are a little slow
+mu_init = 400
+sig_init = 70
+params_init = np.array([mu_init, sig_init])
+W_hat1_4 = np.eye(4)
+gmm_args = (data, 0.0, 450.0, W_hat1_4)
+results_4 = opt.minimize(
+    criterion4, params_init, args=(gmm_args), method='L-BFGS-B',
+    bounds=((1e-10, None), (1e-10, None))
+)
+mu_GMM1_4, sig_GMM1_4 = results_4.x
+
+print('mu_GMM1_4=', mu_GMM1_4, ' sig_GMM1_4=', sig_GMM1_4)
+print("")
+print("Scipy.optimize.minimize results:")
+print(results_4)
+```
+
+Let's compare the model moments at these new GMM estimates to the data moments and the associated criterion function value.
+
+```{code-cell} ipython3
+:tags: []
+
+params = np.array([mu_GMM1_4, sig_GMM1_4])
+print("4-moment mu_GMM1 is:", mu_GMM1_4, ", and 4-moment sig_GMM1 is:", sig_GMM1_4)
+print("")
+print("Data moments are the following:")
+print(data_moments4(data))
+print("")
+print("Model moments at the GMM1_4 estimates are the following:")
+print(model_moments4(mu_GMM1_4, sig_GMM1_4, 0.0, 450))
+print("")
+print("GMM criterion function value at GMM1_4 estimates with identity wgt mat:")
+print(criterion4(params, data, 0.0, 450.0, W_hat1_4)[0][0])
+```
+
+The 4-moment GMM estimates with the identity weighting matrix of $\hat{mu}\approx 362$ and $\hat{\sigma}\approx 92$ have model moments that match the data moments much more closely that those associated with the 2-moment GMM estimates shown above. And the criterion function value of this new estimate is much lower ($\sim 0.96$) than that of the two-moment GMM estimates ($\sim 3.28$).
+
+{numref}`Figure %s <FigGMM_EconScores4mom2mom>` shows the histogram of the intermediate macroeconomics scores with the 4-moment estimated truncated normal distribution and the 2-moment estated distribution from Section {ref}`SecGMM_Ex_Trunc_2momI`.
+
+```{code-cell} ipython3
+:tags: ["remove-output"]
+
+# Plot the histogram of the data
+count, bins, ignored = plt.hist(data, num_bins, density=True,
+                                edgecolor='k', label='Data')
+plt.title('Intermediate macro scores: 2011-2012', fontsize=15)
+plt.xlabel(r'Total points')
+plt.ylabel(r'Percent of scores')
+plt.xlim([0, 550])  # This gives the xmin and xmax to be plotted"
+
+# Plot the 4-moment GMM estimated distribution
+plt.plot(
+    dist_pts,
+    trunc_norm_pdf(dist_pts, mu_GMM1_4, sig_GMM1_4, 0, 450),
+    linewidth=2, color='r',
+    label='4-moment: $\hat{\mu}_{GMM}$=362,$\hat{\sigma}_{GMM}$=92'
+)
+
+# Plot the 2-moment GMM estimated distribution
+plt.plot(
+    dist_pts,
+    trunc_norm_pdf(dist_pts, mu_GMM1, sig_GMM1, 0, 450),
+    linewidth=2, color='k',
+    label='2-moment: $\hat{\mu}_{GMM}$=622,$\hat{\sigma}_{GMM}$=199'
+)
+plt.legend(loc='upper left')
+
+plt.show()
+```
+
+```{figure} ../../../images/gmm/Econ381scores_4mom2mom.png
+---
+height: 500px
+name: FigGMM_EconScores4mom2mom
+---
+GMM estimated truncated normal distributions to fit intermediate macroeconomics test score data: 4-moment estimation versus 2-moment estimation.
+```
+
+We can compute the estimator of the variance-covariance matrix $\hat{\Sigma}$ of the GMM parameter estimator by computing the Jacobian of the error vector. In this case, the Jacobian $d(x|\theta)$ is $R\times K = 4\times 2$.
+
+```{code-cell} ipython3
+:tags: []
+
+def Jac_err4(xvals, mu, sigma, cut_lb, cut_ub, simple=False):
+    '''
+    This function computes the Jacobian matrix of partial derivatives of the
+    R x 1 moment error vector e(x|theta) with respect to the K parameters
+    theta_i in the K x 1 parameter vector theta. The resulting matrix is
+    R x K Jacobian.
+    '''
+    Jac_err = np.zeros((4, 2))
+    h_mu = 1e-8 * mu
+    h_sig = 1e-8 * sigma
+    Jac_err[:, 0] = (
+        (err_vec4(xvals, mu + h_mu, sigma, cut_lb, cut_ub, simple) -
+         err_vec4(xvals, mu - h_mu, sigma, cut_lb, cut_ub, simple)) /
+        (2 * h_mu)
+    ).flatten()
+    Jac_err[:, 1] = (
+        (err_vec4(xvals, mu, sigma + h_sig, cut_lb, cut_ub, simple) -
+         err_vec4(xvals, mu, sigma - h_sig, cut_lb, cut_ub, simple)) /
+        (2 * h_sig)
+    ).flatten()
+
+    return Jac_err
+
+d_err4 = Jac_err4(data, mu_GMM1_4, sig_GMM1_4, 0.0, 450.0, False)
+print("Jacobian matrix of derivatives")
+print(d_err4)
+print("")
+print("Weighting matrix")
+print(W_hat1_4)
+SigHat4 = (1 / N) * lin.inv(d_err4.T @ W_hat1_4 @ d_err4)
+print("")
+print("Sigma hat squared")
+print(SigHat4)
+print("")
+print("Standard errors")
+print('Std. err. mu_hat=', np.sqrt(SigHat4[0, 0]))
+print('Std. err. sig_hat=', np.sqrt(SigHat4[1, 1]))
+```
+
+Note how much tighter the standard errors are here with these four moments than they were in the econometric models of Sections {ref}`SecGMM_Ex_Trunc_2momI` and {ref}`SecGMM_Ex_Trunc_2mom2st`.
+
+{numref}`Figure %s <FigGMM_SurfCrit4>` shows the surface of the criterion function of this 4-moment problem in the neighborhood of the GMM estimate of $\hat{\mu}_{GMM}\approx 362$ and $\hat{\sigma}_{GMM}\approx 92$. This provides more evidence that the GMM estimates are a global minimum of the criterion function. There less of a flat ridge in $(\mu,\sigma)$-space as was the case in the 2-moment GMM problem and the MLE problems.
+
+```{code-cell} ipython3
+:tags: ["remove-output"]
+
+critfunc_GMM1_4 = criterion4(np.array([mu_GMM1_4, sig_GMM1_4]),
+                            data, 0.0, 450.0, W_hat1_4)
+
+mu_vals = np.linspace(330, 390, 90)
+sig_vals = np.linspace(60, 120, 100)
+critfunc_vals = np.zeros((90, 100))
+for mu_ind in range(90):
+    for sig_ind in range(100):
+        critfunc_vals[mu_ind, sig_ind] = \
+            criterion4(np.array([mu_vals[mu_ind], sig_vals[sig_ind]]),
+                       data, 0.0, 450.0, W_hat1_4)[0][0]
+
+mu_mesh, sig_mesh = np.meshgrid(mu_vals, sig_vals)
+
+fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
+ax.plot_surface(mu_mesh.T, sig_mesh.T, critfunc_vals, rstride=8,
+                cstride=1, cmap=cmap1, alpha=0.9)
+ax.scatter(mu_GMM1_4, sig_GMM1_4, critfunc_GMM1_4, color='red', marker='o',
+           s=18, label='GMM estimate')
+ax.view_init(elev=15, azim=27, roll=0)
+ax.set_title('Criterion function surface for values of mu and sigma')
+ax.set_xlabel(r'$\mu$')
+ax.set_ylabel(r'$\sigma$')
+ax.set_zlabel(r'Criterion func.')
+
+plt.show()
+```
+
+```{figure} ../../../images/gmm/Econ381scores_SurfaceCrit4.png
+---
+height: 500px
+name: FigGMM_SurfCrit4
+---
+Surface of the 4 moment, identity weighting matrix GMM criterion function for values of $\mu$ and $\sigma$ in the neighborhood of the GMM estimate. The scatter point represents the criterion function value for the GMM estimate.
+```
+
+
+(SecGMM_Ex_Trunc_4mom2st)=
+#### Four moments, two-step weighting matrix
+
+Let's see how much things change in this 4-moment case if we use the two-step estimator for the optimal weighting matrix $W$ instead of the identity matrix.
+
+```{code-cell} ipython3
+:tags: []
+
+def get_Err_mat4(xvals, mu, sigma, cut_lb, cut_ub, simple=False):
+    '''
+    --------------------------------------------------------------------
+    This function computes the R x N matrix of errors from each
+    observation for each moment. In this function, we have hard coded
+    R = 4.
+    --------------------------------------------------------------------
+    INPUTS:
+    xvals  = (N,) vector, test scores data
+    mu     = scalar, mean of the normally distributed random variable
+    sigma  = scalar > 0, standard deviation of the normally distributed
+             random variable
+    cut_lb = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar lower bound value of distribution. Values below
+             this value have zero probability
+    cut_ub = scalar or string, ='None' if no cutoff is given, otherwise
+             is scalar upper bound value of distribution. Values above
+             this value have zero probability
+    simple = boolean, =True if errors are simple difference, =False if
+             errors are percent deviation from data moments
+
+    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
+        model_moments()
+
+    OBJECTS CREATED WITHIN FUNCTION:
+    R          = 2, hard coded number of moments
+    N          = integer >= R, number of data observations
+    Err_mat    = (R, N) matrix, error by moment and observation data
+    mean_model = scalar, mean value from model
+    var_model  = scalar > 0, variance from model
+
+    FILES CREATED BY THIS FUNCTION: None
+
+    RETURNS: Err_mat
+    --------------------------------------------------------------------
+    '''
+    R = 4
+    N = len(xvals)
+    Err_mat = np.zeros((R, N))
+    pct_1_mod, pct_2_mod, pct_3_mod, pct_4_mod = \
+        model_moments4(mu, sigma, cut_lb, cut_ub)
+    if simple:
+        pts_in_grp1 = xvals < 220
+        Err_mat[0, :] = pts_in_grp1 - pct_1_mod
+        pts_in_grp2 = (xvals >= 220) & (xvals < 320)
+        Err_mat[1, :] = pts_in_grp2 - pct_2_mod
+        pts_in_grp3 = (xvals >= 320) & (xvals < 430)
+        Err_mat[2, :] = pts_in_grp3 - pct_3_mod
+        pts_in_grp4 = xvals >= 430
+        Err_mat[3, :] = pts_in_grp4 - pct_4_mod
+    else:
+        pts_in_grp1 = xvals < 220
+        Err_mat[0, :] = (pts_in_grp1 - pct_1_mod) / pct_1_mod
+        pts_in_grp2 = (xvals >= 220) & (xvals < 320)
+        Err_mat[1, :] = (pts_in_grp2 - pct_2_mod) / pct_2_mod
+        pts_in_grp3 = (xvals >= 320) & (xvals < 430)
+        Err_mat[2, :] = (pts_in_grp3 - pct_3_mod) / pct_3_mod
+        pts_in_grp4 = xvals >= 430
+        Err_mat[3, :] = (pts_in_grp4 - pct_4_mod) / pct_4_mod
+
+    return Err_mat
+```
+
+```{code-cell} ipython3
+:tags: []
+
+Err_mat4 = get_Err_mat4(data, mu_GMM1_4, sig_GMM1_4, 0.0, 450.0, False)
+VCV2_4 = (1 / len(data)) * (Err_mat4 @ Err_mat4.T)
+print("VCV2_4=")
+print(VCV2_4)
+# We use the pseudo-inverse command here because the VCV matrix is
+# poorly conditioned
+W_hat2_4 = lin.pinv(VCV2_4)
+print("")
+print("W_hat2_4=")
+print(W_hat2_4)
+```
 
+With the two-step optimal weighting matrix, we can estimate this 4-moment problem by GMM.
+
+```{code-cell} ipython3
+:tags: []
+
+# Note that this takes a little time because the intgr.quad() commands
+# are a little slow
+mu_init = mu_GMM1_4
+sig_init = sig_GMM1_4
+params_init = np.array([mu_init, sig_init])
+gmm_args = (data, 0.0, 450.0, W_hat2_4)
+results2_4 = opt.minimize(criterion4, params_init, args=(gmm_args),
+                          method='L-BFGS-B', bounds=((1e-10, None), (1e-10, None)))
+mu_GMM2_4, sig_GMM2_4 = results2_4.x
+print('mu_GMM2_4=', mu_GMM2_4, ' sig_GMM2_4=', sig_GMM2_4)
+print("")
+print("Scipy.optimize.minimize results:")
+print(results2_4)
+```
+
+In this case, the two-step estimator creates a fairly significant change in the estimates from that of the previous section with the identity weighting matrix. The estimate of $\mu$ stays roughly the same, but the estimate of $\sigma$ is one-half the size--from $(\mu=362,\sigma=92)$ with the idenity weighting matrix to this estimate of $(\mu=365,\sigma=49)$ with the two-step weighting matrix.
+
+The criterion function surface in {numref}`Figure %s <FigGMM_SurfCrit4_2>` shows the criterion function for different values of $\mu$ and $\sigma$. It has a clear minimum in a certain area. But it also has some really interesting nonlinearities.
+
+```{code-cell} ipython3
+:tags: ["remove-output"]
+
+critfunc_GMM2_4 = criterion4(np.array([mu_GMM2_4, sig_GMM2_4]),
+                             data, 0.0, 450.0, W_hat2_4)
+
+mu_vals = np.linspace(330, 390, 90)
+sig_vals = np.linspace(20, 80, 100)
+critfunc_vals = np.zeros((90, 100))
+for mu_ind in range(90):
+    for sig_ind in range(100):
+        critfunc_vals[mu_ind, sig_ind] = \
+            criterion4(np.array([mu_vals[mu_ind], sig_vals[sig_ind]]),
+                       data, 0.0, 450.0, W_hat2_4)[0][0]
+
+mu_mesh, sig_mesh = np.meshgrid(mu_vals, sig_vals)
+
+fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
+ax.plot_surface(mu_mesh.T, sig_mesh.T, critfunc_vals, rstride=8,
+                cstride=1, cmap=cmap1, alpha=0.9)
+ax.scatter(mu_GMM2_4, sig_GMM2_4, critfunc_GMM2_4, color='red', marker='o',
+           s=18, label='GMM estimate')
+ax.view_init(elev=15, azim=27, roll=0)
+ax.set_title('Criterion function surface for values of mu and sigma')
+ax.set_xlabel(r'$\mu$')
+ax.set_ylabel(r'$\sigma$')
+ax.set_zlabel(r'Criterion func.')
 
-(SecGMM_LinReg)=
-## Linear regression by GMM and relation to OLS
+plt.show()
+```
+
+```{figure} ../../../images/gmm/Econ381scores_SurfaceCrit4_2.png
+---
+height: 500px
+name: FigGMM_SurfCrit4_2
+---
+Surface of the 4 moment, two-step weighting matrix GMM criterion function for values of $\mu$ and $\sigma$ in the neighborhood of the GMM estimate. The scatter point represents the criterion function value for the GMM estimate.
+```
+
+We can compute the estimator of the variance-covariance matrix $\hat{\Sigma}$ of the GMM parameter estimate by computing the Jacobian of the error vector.
+
+```{code-cell} ipython3
+:tags: []
+
+d_err4_2 = Jac_err4(data, mu_GMM2_4, sig_GMM2_4, 0.0, 450.0, False)
+print("Jacobian matrix of derivatives")
+print(d_err4_2)
+print("")
+print("Weighting matrix")
+print(W_hat2_4)
+SigHat4_2 = (1 / N) * lin.inv(d_err4_2.T @ W_hat2_4 @ d_err4_2)
+print("")
+print("Sigma hat squared")
+print(SigHat4_2)
+print("")
+print("Standard errors")
+print('Std. err. mu_hat=', np.sqrt(SigHat4_2[0, 0]))
+print('Std. err. sig_hat=', np.sqrt(SigHat4_2[1, 1]))
+```
 
+In this case, the standard errors on the two GMM parameter estimates are a little larger than those from the previous section with the identity weighting matrix. But the standard errors here are still small.
 
-(SecGMM_LinReg_OLS)=
-### Ordinary least squares: overidentification
+
+(SecGMM_Ex_CondExp)=
+### Unconditional and conditional expectations, instruments, and moments
+
+Most standard treatments of the generalized method of moments estimator in econometrics textbooks start with this principle and this selection of moments. However, this notebook follows the progression of starting with the most general treatment of GMM and then covering these special cases.
+
+In stochastic models, the assumed data generating process might have one or more characterizing equations that involve an unconditional expectation. The unconditional expectation is a strong assumption with many implications on conditional expectations that can create moments for identifying parameters using GMM. In econometric models, these unconditional expectations often show up as an assumption on the error term of one or more of the equations. Note that this is a minimal assumption and does not require knowledge of the distribution of the error term.
+
+```{math}
+    :label: EqGMM_Ex_CondExp_LinReg
+    y_i = \beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i} + \varepsilon_i \quad\text{where}\quad E\left[\varepsilon_i\right] = 0
+```
+
+In a macroeconomic model like the {cite}`BrockMirman:1972` model (characterized by the following five equations), unconditional expectations show up in two places. The first is in the Euler equation for consumption {eq}`EqGMM_Ex_CondExp_EulC`, and the second is on the error term in the law of motion for the productivity shock {eq}`EqGMM_Ex_CondExp_z`.
+
+```{math}
+    :label: EqGMM_Ex_CondExp_EulC
+    \left(c_t\right)^{-1} = \beta E\left[r_{t+1}\left(c_{t+1}\right)^{-1}\right]
+```
+```{math}
+    :label: EqGMM_Ex_CondExp_bc
+    c_t + k_{t+1} = r_{t+1}k_t + w_t
+```
+```{math}
+    :label: EqGMM_Ex_CondExp_focl
+    w_t = (1 - \alpha)e^{z_t}k_{t}^\alpha
+```
+```{math}
+    :label: EqGMM_Ex_CondExp_fock
+    r_t = \alpha e^{z_t}k_{t}^{\alpha-1}
+```
+```{math}
+    :label: EqGMM_Ex_CondExp_z
+    z_{t} = \rho z_{t-1} + (1 - \rho)\mu + \varepsilon_t \quad\text{where}\quad E[\varepsilon_t]=0
+```
+
+It is valuable to note first that these unconditional expectations imply minimal restrictions on the stochastic distributions in the model. They only imply a restriction on the first moments of those particular parts of the distributions. Furthermore, because they are unconditional distributions (which is a strong assumption), they also imply restrictions on conditional distributions. Each of these restrictions---both from the unconditional expectations and conditional expectations implications---can be used as moments to identify parameters.
+
+Let $\mathcal{I}$ be the set of variables that are in the information set of the model at the time the expectations operator in the model is formed. Let $w\in\mathcal{I}$ be the typical element (variable) in the information set. In a cross sectional econometric model, the variables in the information set are $w\in\mathcal{I}$ that could possibly be related to the dependent variable $y$ and were determined at the time the expectation was formed. In dynamic models or time series models, variables in the information set include any variables that were determined on or before the period in which the expectation was formed.
+
+The following sequence shows how an unconditional expectation can lead to moments that can identify parameters.
+
+```{math}
+    :label: EqGMM_Ex_CondExp_ExExw
+    E[x] = 0 \Rightarrow E[x|\mathcal{I}] = 0 \Rightarrow Cov[x,w] = 0 \Rightarrow E[xw] = 0
+```
+
+The first equation states that the unconditional expectation of $x$ is zero. This implies that the conditional expectation of $x$ given anything else in the information set is also zero. This, in turn, implies that the covariance of $x$ and any element $w$ of the information set is zero so that the expectation of $x$ times $w$ is zero. It is this last equation that generates many of the moments used to identify parameters in GMM. Any variable in the instrument set $w\in\mathcal{I}$ can generate a moment condition.
+
+
+(SecGMM_Ex_CondExp_OLS)=
+#### Ordinary least squares (OLS): overidentification
 
 The most common method of estimating the parameters of a linear regression is using the ordinary least squares (OLS) estimator. This estimator is just special type of generalized method of moments (GMM) estimator. A simple regression specification in which the dependent variable $y_i$ is a linear function of two independent variables $x_{1,i}$ and $x_{2,i}$ is the following:
 
 ```{math}
-    :label: EqGMM_LinReg_LinReg
-    y_i = \beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i} + \varepsilon_i \quad\text{where}\quad \varepsilon_i\sim N\left(0,\sigma^2\right)
+    :label: EqGMM_Ex_CondExp_LinReg2
+    y_i = \beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i} + \varepsilon_i \quad\text{where}\quad E\left[\varepsilon_i\right]=0
 ```
 
-Note that we can solve for the parameters $(\beta_0,\beta_1,\beta_2)$ in a number of ways. And we can do it without making any assumptions about the distribution of the error terms $\varepsilon_i$.
+Note that we can solve for the parameters $(\beta_0,\beta_1,\beta_2)$ in a number of ways. And we can do it with only minimal assumptions about the distribution of the error terms $\varepsilon_i$.
 
-One way we might choose the parameters is to choose $(\beta_0,\beta_1,\beta_2)$ to minimize the distance between the $N$ observations of $y_i$ and the $N$ predicted values for $y_i$ given by $\beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i}$. You can think of the $N$ observations of $y_i$ as $N$ data moments. And you can think of the $N$ observations of $\beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i}$ as $N$ model moments. The least squares estimator minimizes the sum of squared errors, which is the sum of squared deviations between the $N$ values of $y_i$ and  $\beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i}$.
+One way we might choose the parameters is to choose $(\beta_0,\beta_1,\beta_2)$ to minimize the distance between the $N$ observations of $y_i$ and the $N$ predicted values for $y_i$ given by $\beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i}$. You can think of the $N$ observations of $y_i$ as $N$ data moments. And you can think of the $N$ observations of $\beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i}$ (the predicted values of $y_i$) as $N$ model moments. The least squares estimator minimizes the sum of squared errors, which is the sum of squared deviations between the $N$ values of $y_i$ and  $\beta_0 + \beta_1 x_{1,i} + \beta_2 x_{2,i}$.
 
 ```{math}
-    :label: EqGMM_LinReg_Errs
+    :label: EqGMM_Ex_CondExp_OLS_Errs
     \varepsilon_i = y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}
 ```
 
 ```{math}
-    :label: EqGMM_LinReg_gmmprob
+    :label: EqGMM_Ex_CondExp_OLS_gmmprob
     \hat{\theta}_{OLS} = \theta:\quad \min_{\theta} \varepsilon^T\, I \, \varepsilon
 ```
 
@@ -348,23 +1577,23 @@ The OLS GMM estimator of the linear regression model is an overidentified GMM es
 Let the $N\times 1$ vector of $y_i$'s be $Y$. Let the $N\times 3$ vector of data $(1, x_{1,i}, x_{2,i})$ be $X$. And let the vector of three parameters $(\beta_0, \beta_1, \beta_2)$ be $\beta$. It can be shown that the OLS estimator for the vector of parameters $\beta$ is the following.
 
 ```{math}
-    :label: EqGMM_LinReg_xxxy
+    :label: EqGMM_Ex_CondExp_OLS_xxxy
     \hat{\beta}_{OLS} = (X^T X)^{-1}(X^T Y)
 ```
 
 But you could also just estimate the coefficients using the criterion function in the GMM statement of the problem above. This method is called nonlinear least squares or generalized least squares. Many applications of regression use a weighting matrix in the criterion function that adjusts for issues like heteroskedasticity and autocorrelation.
 
-Lastly, many applications use a different distance metric than the weighted sum of squared errors for the difference in moments. Sum of squared errors puts a large penalty on big differences. Sometimes you might want to maximize the sum of absolute errors, which is sometimes called median regression. You could also minimize the maximum absolute difference in the errors, which is even more extreme than the sum of squared errors on penalizing large differences.
+Many applications use a different distance metric other than the weighted sum of squared errors for the difference in moments. Sum of squared errors puts a large penalty on big differences. Sometimes you might want to maximize the sum of absolute errors, which is sometimes called median regression. You could also minimize the maximum absolute difference in the errors, which is even more extreme than the sum of squared errors on penalizing large differences.
 
 
-(SecGMM_LinReg_mom)=
-### Linear regression by moment condition: exact identification
+(SecGMM_Ex_CondExp_mom)=
+#### Linear regression by moment condition: exact identification
 
-The exactly identified GMM approach to estimating the linear regression model comes from the underlying statistical assumptions of the model. We usually assume that the dependent variable $y_i$ and the independent variables $(x_{1,i}, x_{2,i})$ are not correlated with the error term $\varepsilon_i$. This implies the following three conditions.
+In the linear regression example in the two previous sections, there are three parameters to be estimated $(\beta_0, \beta_1, \beta_2)$. The OLS approach identifies these three parameters with more than three moments $R>K$. The exactly identified GMM approach to estimating the linear regression model comes from the underlying statistical assumptions of the model. We usually assume that the expectation of the error terms is zero. And we assume that the independent variables $(x_{1,i}, x_{2,i})$ are not correlated with the error term $\varepsilon_i$. This implies the following three conditions.
 
 ```{math}
-    :label: EqGMM_LinReg_momcond_y
-    E\left[y^T \varepsilon\right] = 0
+    :label: EqGMM_LinReg_momcond_eps
+    E\left[\varepsilon\right] = 0
 ```
 
 ```{math}
@@ -377,11 +1606,11 @@ The exactly identified GMM approach to estimating the linear regression model co
     E\left[x_2^T \varepsilon\right] = 0
 ```
 
-The data analogues for these moment conditions are the following.
+The data or empirical analogues for these moment conditions are the following.
 
 ```{math}
-    :label: EqGMM_LinReg_datacond_y
-    \frac{1}{N}\sum_{i=1}^N\left[y_i \varepsilon_i\right] = 0 \quad\Rightarrow\quad \sum_{i=1}^N\Bigl[y_i\left(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\right)\Bigr] = 0
+    :label: EqGMM_LinReg_datacond_eps
+    \frac{1}{N}\sum_{i=1}^N\left[\varepsilon_i\right] = 0 \quad\Rightarrow\quad \sum_{i=1}^N\bigl(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\bigr) = 0
 ```
 
 ```{math}
@@ -394,13 +1623,13 @@ The data analogues for these moment conditions are the following.
     \frac{1}{N}\sum_{i=1}^N\left[x_{2,i} \varepsilon_i\right] = 0 \quad\Rightarrow\quad \sum_{i=1}^N\Bigl[x_{2,i}\left(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\right)\Bigr] = 0
 ```
 
-Think of the assumed zero correlations in equations {eq}`EqGMM_LinReg_momcond_y`, {eq}`EqGMM_LinReg_momcond_x1`, and {eq}`EqGMM_LinReg_momcond_x2` as data moments that are all equal to zero. And think of the empirical analogues of those moments as the left-hand-sides of equations {eq}`EqGMM_LinReg_datacond_y`, {eq}`EqGMM_LinReg_datacond_x1`, and {eq}`EqGMM_LinReg_datacond_x2` as the corresponding model moments. The exactly identified GMM approach to estimating the linear regression model in {eq}`EqGMM_LinReg_LinReg` is to choose the parameter vector $\theta=[\beta_0,\beta_1,\beta_2]$ to minimize the three moment error conditions,
+Think of the assumed zero correlations in equations {eq}`EqGMM_LinReg_momcond_eps`, {eq}`EqGMM_LinReg_momcond_x1`, and {eq}`EqGMM_LinReg_momcond_x2` as data moments that are all equal to zero. And think of the empirical analogues of those moments as the left-hand-sides of equations {eq}`EqGMM_LinReg_datacond_eps`, {eq}`EqGMM_LinReg_datacond_x1`, and {eq}`EqGMM_LinReg_datacond_x2` as the corresponding model moments. The exactly identified GMM approach to estimating the linear regression model in {eq}`EqGMM_Ex_CondExp_LinReg2` is to choose the parameter vector $\theta=[\beta_0,\beta_1,\beta_2]$ to minimize the three moment error conditions,
 
 ```{math}
     :label: EqGMM_LinReg_exactprob
     \hat{\theta}_{lin,exact} = \theta:\quad \min_{\theta} e(x|\theta)^T\, W \, e(x|\theta) \\
     \text{where}\quad e(x|\theta)\equiv \begin{bmatrix}
-      \sum_{i=1}^N\Bigl[y_i\left(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\right)\Bigr] \\
+      \sum_{i=1}^N\bigl(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\bigr) \\
       \sum_{i=1}^N\Bigl[x_{1,i}\left(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\right)\Bigr] \\
       \sum_{i=1}^N\Bigl[x_{2,i}\left(y_i - \beta_0 - \beta_1 x_{1,i} - \beta_2 x_{2,i}\right)\Bigr]
     \end{bmatrix}
@@ -409,10 +1638,296 @@ Think of the assumed zero correlations in equations {eq}`EqGMM_LinReg_momcond_y`
 where $W$ is some $3\times 3$ weighting matrix.
 
 
+(SecGMM_Ex_BM72)=
+### Brock and Mirman (1972) dynamic macroeconomic model
+
+The {cite}`BrockMirman:1972` dynamic macroeconomic model was initially used to answer questions about optimal economic growth in a dynamic stochastic environment. However, the model has turned out to be one of the simplest versions of an internally consistent dynamic stochastic general equilibrium model. This model is described and characterized by the following five equations. You will use this model as an example for GMM estimation in {numref}`ExercStructEst_GMM_BM72`.
+
+```{math}
+    :label: EqGMM_Ex_BM72_EulC
+    \left(c_t\right)^{-1} = \beta E\left[r_{t+1}\left(c_{t+1}\right)^{-1}\right]
+```
+```{math}
+    :label: EqGMM_Ex_BM72_bc
+    c_t + k_{t+1} = r_{t+1}k_t + w_t
+```
+```{math}
+    :label: EqGMM_Ex_BM72_focl
+    w_t = (1 - \alpha)e^{z_t}k_{t}^\alpha
+```
+```{math}
+    :label: EqGMM_Ex_BM72_fock
+    r_t = \alpha e^{z_t}k_{t}^{\alpha-1}
+```
+```{math}
+    :label: EqGMM_Ex_BM72_z
+    z_{t} = \rho z_{t-1} + (1 - \rho)\mu + \varepsilon_t \quad\text{where}\quad E[\varepsilon_t]=0
+```
+
+
+(SecGMM_Ex_HS82)=
+### Hansen and Singleton (1982)
+
+{cite}`Hansen:1982` was the first paper to formalize the generalized method of moments (GMM) estimation method. And {cite}`HansenSingleton:1982` was the first finacial macroeconomic application of the method. In the previous section, we used the {cite}`BrockMirman:1972` model because it is such a simple dynamic stochastic macroeconomic model. {cite}`HansenSingleton:1982` use a slightly more complex dynamic stochastic macroeconomic model with a structure that applied more closely to data on asset prices.
+
+{cite}`HansenSingleton:1982` provide of comparison of their GMM estimates to the corresponding estimates implied by maximum likelihood estimation. They highlight the advantage of GMM that it requires fewer distributional assumptions and only requires the orthogonality conditions (unconditional and conditional expectations) of the model rather than the full solution of the model in rational expectations models.
+
+
+(SecGMM_Ident)=
+## Identification
+
+An issue that we saw in the examples from Section {ref}`SecGMM_Ex` is that there is some science as well as some art in choosing moments to identify the parameters in a GMM estimation.
+
+* The $\mu$ and $\sigma$ parameters were identified more precisely when using the two-step estimator of the optimal weighting matrix instead of the identity matrix.
+* The overidentified four-moment model of total scores produced much smaller standard errors for both $\mu$ and $\sigma$ than did the two-moment model.
+
+Suppose the parameter vector $\theta$ has $K$ elements, or rather, $K$ parameters to be estimated. In order to estimate $\theta$ by GMM, you must have at least as many moments as parameters to estimate $R\geq K$. If you have exactly as many moments as parameters to be estimated $R=K$, the model is said to be *exactly identified*. If you have more moments than parameters to be estimated $R>K$, the model is said to be *overidentified*. If you have fewer moments than parameters to be estimated $R<K$, the model is said to be *underidentified*. There are good reasons to overidentify $R>K$ the model in GMM estimation as we saw in the previous example. The main reason is that not all moments are orthogonal. That is, some moments convey roughly the same information about the data and, therefore, do not separately identify any extra parameters. So a good GMM model often is overidentified $R>K$.
+
+One last point about GMM regards moment selection and verification of results. The real world has an infinite supply of potential moments that describe some part of the data. Choosing moments to estimate parameters by GMM requires understanding of the model, intuition about its connections to the real world, and artistry. A good GMM estimation will include moments that have some relation to or story about their connection to particular parameters of the model to be estimated. In addition, a good verification of a GMM estimation is to take some moment from the data that was not used in the estimation and see how well the corresponding moment from the estimated model matches that *outside moment*.
+
+
 (SecGMM_Exerc)=
 ## Exercises
 
+```{exercise-start} Matching the US income distribution by GMM
+:label: ExercStructEst_GMM_incdist
+:class: green
+```
+In this exercise, you will use the comma-delimited data file [`hh_inc_synth.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/gmm/hh_inc_synth.txt) in the [`./data/gmm/`](https://github.com/OpenSourceEcon/CompMethods/tree/main/data/gmm) folder of the GitHub repository for this book, which contains the 121,085 observations (synthetic) on household US income. {numref}`TabGMMIncMoms` displays histogram counts and population percentages (moments) for each income range. The first column in the data file gives the percent of the population in each income bin (the third column of {numref}`TabGMMIncMoms`). The second column in the data file has the midpoint of each income bin. So the midpoint of the first income bin of all household incomes less than \$5,000 is \$2,500. You may want to use the [`distributions.py`](https://github.com/OpenSourceEcon/CompMethods/blob/main/code/gmm/distributions.py) module in the [`./code/gmm/`](https://github.com/OpenSourceEcon/CompMethods/blob/main/code/mle/) folder of the GitHub repository for this online book.
+
+1. Use the [`numpy.histogram()`](https://numpy.org/doc/stable/reference/generated/numpy.histogram.html) function to create the population count and population percentage moments in {numref}`TabGMMIncMoms` from the synthetic household income data in comma-delimited text file [`hh_inc_synth.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/gmm/hh_inc_synth.txt) by inputing the appropriate list of bin edges for the `bins` argument of the `numpy.histogram()` function.
+
+2. Plot the histogram of the data [`hh_inc_synth.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/gmm/hh_inc_synth.txt) using the bins described in the first column of {numref}`TabGMMIncMoms`, which you used as an input to parts (1) and (2), and the height being the density (not the count) such that the area of the histogram bars sums to one (use the `weights` option rather than the `density` option in [`matplotlib.pyplot.hist`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.html) because your bin widths are not equal). List the dollar amounts on the $x$-axis as thousands of dollars. That is, divide them by 1,000 to put them in units of thousands of dollars (\$000s). Even though the top bin is listed as \$250,000 and above in {numref}`TabGMMIncMoms`, the synthetic data are top-coded at \$350,000, so set to last bin edge to \$350,000. (It doesn't look very good graphing it between 0 and $\infty$.) The equation for the weight of each observation $i$ that normalizes a variable bin-width histogram to be a density is {eq}`EqGMM_Exc_IncMoms_wgt`, where $N$ is the number of observations in the data and $bin\_width_j$ is the width of the histogram bin that observation $i$ is part of. In summary, your histogram should have 42 bars. The first 40 bars for the lowest income bins should be the same width. However, the last two bars should be different widths from each other and from the rest of the bars. It should look like {numref}`Figure %s <FigGMM_hist_inc>`. [Hint: look at the [`matplotlib.pyplot.hist`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.html) command option of `bins` and submit a list of bin edges for the `bins` option.]
+```{math}
+    :label: EqGMM_Exc_IncMoms_wgt
+    weight_i = \frac{1}{N \times bin\_width_j} \:\:\text{for all}\:\: i \:\:\text{in histogram bin}\:\: j
+```
+
+3. Using GMM, fit the two-parameter lognormal $LN(x|\mu,\sigma)$ distribution defined in section {ref}`SecMLE_GBfam_LN` of the {ref}`Chap_MLE` chapter to the distribution of household income data using the moments from the data file. Make sure to try various initial guesses. (HINT: $\mu_0=\ln(avg.\:inc.)$ might be good.) For your weighting matrix $W$, use a $42\times 42$ diagonal matrix in which the diagonal non-zero elements are the population percentage moments from the data file. This will put the most weight on the moments with the largest percent of the population. Report your estimated values for $\hat{\mu}$ and $\hat{\sigma}$, as well as the value of the minimized criterion function $e(x|\hat{\theta})^T \, W \, e(x|\hat{\theta})$. Plot the histogram from part (2) overlayed with a line representing the implied histogram from your estimated lognormal (LN) distribution. Each point on the line is the midpoint of the bin and the implied height of the bin. Do not forget to divide the values for your last two moments by 10 and 20, respectively, so that they match up with the histogram.
+
+4. Using GMM, fit the gamma $GA(x|\alpha,\beta)$ distribution defined in section {ref}`SecMLE_GBfam_GA` of the {ref}`Chap_MLE` chapter to the distribution of household income data using the moments from the data file. Use $\alpha_0=3$ and $\beta_0=20,000$ as your initial guess. These initial guesses come from the property of the gamma (GA) distribution that $E(x)=\alpha\beta$ and $Var(x)=\alpha\beta^2$. Report your estimated values for $\hat{\alpha}$ and $\hat{\beta}$, as well as the value of the minimized criterion function $e(x|\hat{\theta})^T \, W \, e(x|\hat{\theta})$. Use the same weighting matrix as in part (3). Plot the histogram from part (2) overlayed with a line representing the implied histogram from your estimated gamma (GA) distribution. Do not forget to divide the values for your last two moments by 10 and 20, respectively, so that they match up with the histogram.
+
+5. Plot the histogram from part (2) overlayed with the line representing the implied histogram from your estimated lognormal (LN) distribution from part (3) and the line representing the implied histogram from your estimated gamma (GA) distribution from part (4). What is the most precise way to tell which distribution fits the data the best? Which estimated distribution---$LN$ or $GA$---fits the data best?
+
+6. Repeat your estimation of the $GA$ distribution from part (4), but use the two-step estimator for the optimal weighting matrix $\hat{W}_{twostep}$. Do your estimates for $\alpha$ and $\beta$ change much? How can you compare the goodness of fit of this estimated distribution versus the goodness of fit of the estimated distribution in part (4)?
+
+```{list-table} Distribution of Household Money Income by Selected Income Range, 2011. Source: 2011 CPS household income count data Current Population Survey (2012, Table HINC-01).
+:header-rows: 2
+:name: TabGMMIncMoms
+
+* - Income
+  - \# housholds
+  - \% of
+* - range
+  - (000s)
+  - population
+* - All households
+  - 121,084
+  - 100.0
+* - Less than \$5,000
+  - 4,261
+  - 3.5
+* - \$5,000 to \$9,999
+  - 4,972
+  - 4.1
+* - \$10,000 to \$14,999
+  - 7,127
+  - 5.9
+* - \$15,000 to \$19,999
+  - 6,882
+  - 5.7
+* - \$20,000 to \$24,999
+  - 7,095
+  - 5.9
+* - \$25,000 to \$29,999
+  - 6,591
+  - 5.4
+* - \$30,000 to \$34,999
+  - 6,667
+  - 5.5
+* - \$35,000 to \$39,999
+  - 6,136
+  - 5.1
+* - \$40,000 to \$44,999
+  - 5,795
+  - 4.8
+* - \$45,000 to \$49,999
+  - 4,945
+  - 4.1
+* - \$50,000 to \$54,999
+  - 5,170
+  - 4.3
+* - \$55,000 to \$59,999
+  - 4,250
+  - 3.5
+* - \$60,000 to \$64,999
+  - 4,432
+  - 3.7
+* - \$65,000 to \$69,999
+  - 3,836
+  - 3.2
+* - \$70,000 to \$74,999
+  - 3,606
+  - 3.0
+* - \$75,000 to \$79,999
+  - 3,452
+  - 2.9
+* - \$80,000 to \$84,999
+  - 3,036
+  - 2.5
+* - \$85,000 to \$89,999
+  - 2,566
+  - 2.1
+* - \$90,000 to \$94,999
+  - 2,594
+  - 2.1
+* - \$95,000 to \$99,999
+  - 2,251
+  - 1.9
+* - \$100,000 to \$104,999
+  - 2,527
+  - 2.1
+* - \$105,000 to \$109,999
+  - 1,771
+  - 1.5
+* - \$110,000 to \$114,999
+  - 1,723
+  - 1.4
+* - \$115,000 to \$119,999
+  - 1,569
+  - 1.3
+* - \$120,000 to \$124,999
+  - 1,540
+  - 1.3
+* - \$125,000 to \$129,999
+  - 1,258
+  - 1.0
+* - \$130,000 to \$134,999
+  - 1,211
+  - 1.0
+* - \$135,000 to \$139,999
+  -   918
+  - 0.8
+* - \$140,000 to \$144,999
+  - 1,031
+  - 0.9
+* - \$145,000 to \$149,999
+  -   893
+  - 0.7
+* - \$150,000 to \$154,999
+  - 1,166
+  - 1.0
+* - \$155,000 to \$159,999
+  -   740
+  - 0.6
+* - \$160,000 to \$164,999
+  -   697
+  - 0.6
+* - \$165,000 to \$169,999
+  -   610
+  - 0.5
+* - \$170,000 to \$174,999
+  -   617
+  - 0.5
+* - \$175,000 to \$179,999
+  -   530
+  - 0.4
+* - \$180,000 to \$184,999
+  -   460
+  - 0.4
+* - \$185,000 to \$189,999
+  -   363
+  - 0.3
+* - \$190,000 to \$194,999
+  -   380
+  - 0.3
+* - \$195,000 to \$199,999
+  -   312
+  - 0.3
+* - \$200,000 to \$249,999
+  - 2,297
+  - 1.9
+* - \$250,000 and over
+  - 2,808
+  - 2.3
+* - Mean income
+  - \$69,677
+  -
+* - Median income
+  - \$50,054
+  -
+```
+
+```{figure} ../../../images/gmm/hist_inc.png
+---
+height: 500px
+name: FigGMM_hist_inc
+---
+Histogram of US household income: $N=121,085$. Source: 2011 CPS household income count data {cite}`CPS:2012`.
+```
+```{exercise-end}
+```
+
+```{exercise-start} Estimating the Brock and Mirman, 1972 model by GMM
+:label: ExercStructEst_GMM_BM72
+:class: green
+```
+You can observe time series data in an economy for the following variables: $(c_t, k_t, w_t, r_t)$. Data on $(c_t, k_t, w_t, r_t)$ can be loaded from the file [`MacroSeries.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/gmm/MacroSeries.txt) in the [`./data/gmm/`](https://github.com/OpenSourceEcon/CompMethods/tree/main/data/gmm) folder of the GitHub repository for this book. This file is a comma separated text file with no labels. The variables are ordered as $(c_t, k_t, w_t, r_t)$. These data have 100 periods, which are quarterly (25 years). Suppose you think that the data are generated by a process similar to the {cite}`BrockMirman:1972` model. A simplified set of characterizing equations of the Brock and Mirman (1972) model are the following.
+
+```{math}
+    :label: EqGMM_Exc_BM72_EulC
+    \left(c_t\right)^{-1} = \beta E\left[r_{t+1}\left(c_{t+1}\right)^{-1}\right]
+```
+```{math}
+    :label: EqGMM_Exc_BM72_bc
+    c_t + k_{t+1} = r_{t+1}k_t + w_t
+```
+```{math}
+    :label: EqGMM_Exc_BM72_focl
+    w_t = (1 - \alpha)e^{z_t}k_{t}^\alpha
+```
+```{math}
+    :label: EqGMM_Exc_BM72_fock
+    r_t = \alpha e^{z_t}k_{t}^{\alpha-1}
+```
+```{math}
+    :label: EqGMM_Exc_BM72_z
+    z_{t} = \rho z_{t-1} + (1 - \rho)\mu + \varepsilon_t \quad\text{where}\quad E[\varepsilon_t]=0
+```
+The variable $c_t$ is aggregate consumption in period $t$, $k_{t+1}$ is total household savings and investment in period $t$ for which they receive a return in the next period (this model assumes full depreciation of capital). The wage per unit of labor in period $t$ is $w_t$ and the interest rate or rate of return on investment is $r_t$. Total factor productivity is $z_t$, which follows an AR(1) process given in {eq}`EqGMM_Exc_BM72_z`. The rest of the symbols in the equations are parameters that must be estimated or must be otherwise given $(\alpha,\beta,\rho,\mu,\sigma)$. The constraints on these parameters are the following.
+
+```{math}
+    :label: EqGMM_Exc_BM72_cstr
+    \alpha,\beta\in(0,1),\quad \mu,\sigma > 0,\quad \rho\in(-1,1)
+```
+
+Assume that the first observation in the data file variables is $t=1$. Let $k_1$ be the first observation in the data file for the variable $k_t$.
+
+1. Estimate $\alpha$, $\rho$, and $\mu$ by GMM using the unconditional moment conditions that $E[\ve_t]=0$ and $E[\beta r_{t+1}c_t/c_{t+1} - 1]=0$. Assume $\beta=0.99$. Use the $4\times 4$ identity matrix $I(4)$ as your estimator of the optimal weighting matrix. Use the following four moment conditions {eq}`EqGMM_Exc_BM72_Zmom1`, {eq}`EqGMM_Exc_BM72_Zmom2`, {eq}`EqGMM_Exc_BM72_MainMom1`, and {eq}`EqGMM_Exc_BM72_MainMom2` to estimate the four parameters. Report your estimated parameter values $(\hat{\alpha},\hat{\rho},\hat{\mu})$ and the value of your minimized criterion function. The estimation inside each iteration of the minimizer of the GMM objective function is the following.
+    * Given a guess for $(\alpha,\rho,\mu)$ and data $(c_t, k_t, w_t, r_t)$, use {eq}`EqGMM_Exc_BM72_fock` to back out an implied series for $z_t$.
+    * Given $z_t$, parameters $(\alpha,\rho,\mu)$ and data $(c_t, k_t, w_t, r_t)$, calculate four empirical analogues of the moment conditions {eq}`EqGMM_Exc_BM72_Zmom1`, {eq}`EqGMM_Exc_BM72_Zmom2`, {eq}`EqGMM_Exc_BM72_MainMom1`, and {eq}`EqGMM_Exc_BM72_MainMom2`.
+    * Update guesses for parameters $(\alpha,\rho,\mu)$ until minimum criterion value is found.
+
+```{math}
+    :label: EqGMM_Exc_BM72_Zmom1
+    E\Bigl[z_{t+1} - \rho z_t - (1-\rho)\mu\Bigr] = 0
+```
+```{math}
+    :label: EqGMM_Exc_BM72_Zmom2
+    E\biggl[\Bigl(z_{t+1} - \rho z_t - (1-\rho)\mu\Bigr)z_t\biggr] = 0
+```
+```{math}
+    :label: EqGMM_Exc_BM72_MainMom1
+    E\left[\beta\alpha e^{z_{t+1}}k_{t+1}^{\alpha-1}\frac{c_t}{c_{t+1}} - 1\right] = 0
+```
+```{math}
+    :label: EqGMM_Exc_BM72_MainMom2
+    E\left[\left(\beta\alpha e^{z_{t+1}}k_{t+1}^{\alpha-1}\frac{c_t}{c_{t+1}} - 1\right)w_t\right] = 0
+```
 
+2. Compute the two-step GMM estimator of $(\alpha,\rho,\mu)$ and use the finite difference Jacobian method for the estimator of the variance-covariance of the two-step GMM point estimates $(\hat{\alpha}, \hat{\rho}, \hat{\mu})$. Report the GMM two-step estimates for the parameters and their standard errors.
+```{exercise-end}
+```
 
 
 (SecGMMfootnotes)=
diff --git a/docs/book/struct_est/MLE.md b/docs/book/struct_est/MLE.md
index 6093399..dbdba9e 100644
--- a/docs/book/struct_est/MLE.md
+++ b/docs/book/struct_est/MLE.md
@@ -142,7 +142,7 @@ plt.show()
 height: 500px
 name: FigMLE_EconScoreHist
 ---
-Intermediate macroeconomics midterm scores over two semesters
+Histogram of intermediate macroeconomics midterm scores over two semesters: $N=161$
 ```
 
 Now lets code up a parametric distribution that is flexible enough to fit lots of different distributions of test scores, has the properties we would expect from a distribution of test scores, and is characterized by a minimal number of parameters. In this case, we will use a truncated normal distribution.[^TruncNorm]
@@ -212,7 +212,7 @@ Let's plot the histogram of the intermediate macroeconomics test scores overlaye
 # Plot histogram
 num_bins = 30
 count, bins, ignored = plt.hist(data, num_bins, density=True,
-                                edgecolor='k')
+                                edgecolor='k', label='Data')
 plt.title('Intermediate macro scores: 2011-2012', fontsize=15)
 plt.xlabel(r'Total points')
 plt.ylabel(r'Percent of scores')
@@ -402,7 +402,7 @@ Note that we used initial guesses for $\mu$ and $\sigma$ of 385 and 120 and the
 
 # Plot the histogram of the data
 count, bins, ignored = plt.hist(data, num_bins, density=True,
-                                edgecolor='k')
+                                edgecolor='k', label='Data')
 plt.title('Intermediate macro scores: 2011-2012', fontsize=15)
 plt.xlabel(r'Total points')
 plt.ylabel(r'Percent of scores')
@@ -480,7 +480,6 @@ ax.set_title('Log-likelihood function for values of mu and sigma')
 ax.set_xlabel(r'$\mu$')
 ax.set_ylabel(r'$\sigma$')
 ax.set_zlabel(r'Log-likelihood func.')
-plt.tight_layout()
 
 plt.show()
 ```
@@ -527,7 +526,6 @@ ax.set_title('Log-likelihood function for values of mu and sigma')
 ax.set_xlabel(r'$\mu$')
 ax.set_ylabel(r'$\sigma$')
 ax.set_zlabel(r'Log-likelihood func.')
-plt.tight_layout()
 
 plt.show()
 ```
@@ -602,13 +600,13 @@ The constrained minimizer is trying to get up to the unconstrained solution but
 
 # Plot the histogram of the data
 count, bins, ignored = plt.hist(data, num_bins, density=True,
-                                edgecolor='k')
+                                edgecolor='k', label='Data')
 plt.title('Intermediate macro scores: 2011-2012', fontsize=15)
 plt.xlabel(r'Total points')
 plt.ylabel(r'Percent of scores')
 plt.xlim([0, 550])  # This gives the xmin and xmax to be plotted"
 
-# Plot the constrained MLD estimated distribution
+# Plot the constrained MLE estimated distribution
 plt.plot(
     dist_pts,
     trunc_norm_pdf(dist_pts, mu_MLE_constr, sig_MLE_constr, 0, 450),
@@ -843,14 +841,14 @@ The statistical family tree figure above shows the all the relationships between
 :label: ExercStructEst_MLE_claims
 :class: green
 ```
-For this problem, you will use 10,619 health claims amounts from a fictitious sample of households. These data are in a single column of the text file [`claims.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/mle/claims.txt) in the online book repository data folder `data/mle/`. This file is a comma separated text file with no labels. Health claim amounts are reported in US dollars. For this exercise, you will need to use the generalized beta family of distributions shown in {numref}`Figure %s <FigMLE_GBtree>` of Section {ref}`SecMLE_GBfam`.
+For this problem, you will use 10,619 health claims amounts from a fictitious sample of households. These data are in a single column of the text file [`claims.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/mle/claims.txt) in the online book repository data folder `data/mle/`. This file is a comma separated text file with no labels. Health claim amounts are reported in US dollars. For this exercise, you will need to use the generalized beta family of distributions shown in {numref}`Figure %s <FigMLE_GBtree>` of Section {ref}`SecMLE_GBfam`. You may want to use the [`distributions.py`](https://github.com/OpenSourceEcon/CompMethods/blob/main/code/mle/distributions.py) module in the [`./code/mle/`](https://github.com/OpenSourceEcon/CompMethods/blob/main/code/mle/) folder of the GitHub repository for this online book.
 
 1. Calculate and report the mean, median, maximum, minimum, and standard deviation of monthly health expenditures for these data. Plot two histograms of the data in which the $y$-axis gives the percent of observations in the particular bin of health expenditures and the $x$-axis gives the value of monthly health expenditures. Use percentage histograms in which the height of each bar is the percent of observations in that bin. In the first histogram, use 1,000 bins to plot the frequency of all the data. In the second histogram, use 100 bins to plot the frequency of only monthly health expenditures less-than-or-equal-to \$800 ($x_i\leq 800$). Adjust the frequencies of this second histogram to account for the observations that you have not displayed ($x_i>800$). That is, the heights of the histogram bars in the second histogram should not sum to 1 because you are only displaying a fraction of the data. Comparing the two histograms, why might you prefer the second one?
-2. Using MLE, fit the gamma $GA(x;\alpha,\beta)$ distribution to the individual observation data. Use $\beta_0=Var(x)/E(x)$ and $\alpha_0=E(x)/\beta_0$ as your initial guess. These initial guesses come from the property of the gamma (GA) distribution that $E(x)=\alpha\beta$ and $Var(x)=\alpha\beta^2$. Report your estimated values for $\hat{\alpha}$ and $\hat{\beta}$, as well as the value of the maximized log likelihood function $\ln\mathcal{L}(\hat{\theta})$. Plot the second histogram from part (1) overlayed with a line representing the implied histogram from your estimated gamma (GA) distribution.
-3. Using MLE, fit the generalized gamma $GG(x;\alpha,\beta,m)$ distribution to the individual observation data. Use your estimates for $\alpha$ and $\beta$ from part(2), as well as $m=1$, as your initial guess. Report your estimated values for $\hat{\alpha}$, $\hat{\beta}$, and $\hat{m}$, as well as the value of the maximized log likelihood function $\ln\mathcal{L}$. Plot the second histogram from part (1) overlayed with a line representing the implied histogram from your estimated generalized gamma (GG) distribution.
-4. Using MLE, fit the generalized beta 2 $GB2(x;a,b,p,q)$ distribution to the individual observation data. Use your estimates for $\alpha$, $\beta$, and $m$ from part (3), as well as $q=10,000$, as your initial guess. Report your estimated values for $\hat{a}$, $\hat{b}$, $\hat{p}$, and $\hat{q}$, as well as the value of the maximized log likelihood function $\ln\mathcal{L}$. Plot the second histogram from part(1) overlayed with a line representing the implied histogram from your estimated generalized beta 2 (GB2) distribution.
-5. Perform a likelihood ratio test for each of the estimated in parts (2) and (3), respectively, against the GB2 specification in part (4). This is feasible because each distribution is a nested version of the GB2. The degrees of freedom in the $\chi^2(p)$ is 4, consistent with the GB2. Report the $\chi^2(4)$ values from the likelihood ratio test for the estimated GA and the estimated GG distributions.
-6. Using the estimated GB2 distribution from part (4), how likely am I to have a monthly health care claim of more than \$1,000? How does this amount change if I use the estimated GA distribution from part (2)?
+2. Using MLE, fit the gamma $GA(x|\alpha,\beta)$ distribution to the individual observation data. Use $\beta_0=Var(x)/E(x)$ and $\alpha_0=E(x)/\beta_0$ as your initial guess. These initial guesses come from the property of the gamma (GA) distribution that $E(x)=\alpha\beta$ and $Var(x)=\alpha\beta^2$. Report your estimated values for $\hat{\alpha}$ and $\hat{\beta}$, as well as the value of the maximized log likelihood function $\ln\mathcal{L}(\hat{\theta})$. Plot the second histogram from part (1) overlayed with a line representing the implied histogram from your estimated gamma (GA) distribution.
+3. Using MLE, fit the generalized gamma $GG(x|\alpha,\beta,m)$ distribution to the individual observation data. Use your estimates for $\alpha$ and $\beta$ from part(2), as well as $m=1$, as your initial guess. Report your estimated values for $\hat{\alpha}$, $\hat{\beta}$, and $\hat{m}$, as well as the value of the maximized log likelihood function $\ln\mathcal{L}$. Plot the second histogram from part (1) overlayed with a line representing the implied histogram from your estimated generalized gamma (GG) distribution.
+4. Using MLE, fit the generalized beta 2 $GB2(x|a,b,p,q)$ distribution to the individual observation data. Use your estimates for $\alpha$, $\beta$, and $m$ from part (3), as well as $q=10,000$, as your initial guess. Report your estimated values for $\hat{a}$, $\hat{b}$, $\hat{p}$, and $\hat{q}$, as well as the value of the maximized log likelihood function $\ln\mathcal{L}$. Plot the second histogram from part(1) overlayed with a line representing the implied histogram from your estimated generalized beta 2 (GB2) distribution.
+5. Perform a likelihood ratio test for each of the estimated distributions in parts (2) and (3), respectively, against the GB2 specification in part (4). This is feasible because each distribution is a nested version of the GB2. The degrees of freedom in the $\chi^2(p)$ is 4, consistent with the GB2. Report the $\chi^2(4)$ values from the likelihood ratio test for the estimated GA and the estimated GG distributions.
+6. Using the estimated GB2 distribution from part (4), how likely am I to have a monthly health care claim of more than \$1,000 in a given month? How does this amount change if I use the estimated GA distribution from part (2)?
 ```{exercise-end}
 ```
 
diff --git a/docs/book/struct_est/SMM.md b/docs/book/struct_est/SMM.md
index d36ee03..e00f9ba 100644
--- a/docs/book/struct_est/SMM.md
+++ b/docs/book/struct_est/SMM.md
@@ -1418,9 +1418,9 @@ print(results4_1)
 :tags: ["hide-input", "remove-output"]
 
 # Plot the histogram of the data
-count, bins, ignored = plt.hist(data, 30, density=True,
-                                edgecolor='black', linewidth=1.2)
-
+count, bins, ignored = plt.hist(
+    data, 30, density=True, edgecolor='black', linewidth=1.2, label='Data'
+)
 plt.xlabel('Total points')
 plt.ylabel('Percent of scores')
 plt.xlim([0, 550])  # This gives the xmin and xmax to be plotted"
@@ -1673,7 +1673,6 @@ ax.set_title('Criterion function for values of mu and sigma')
 ax.set_xlabel(r'$\mu$')
 ax.set_ylabel(r'$\sigma$')
 ax.set_zlabel(r'Crit. func.')
-plt.tight_layout()
 
 plt.show()
 ```
diff --git a/docs/book/struct_est/intro.md b/docs/book/struct_est/intro.md
index 03d77ee..bf5e59e 100644
--- a/docs/book/struct_est/intro.md
+++ b/docs/book/struct_est/intro.md
@@ -2,6 +2,10 @@
 (Chap_StructEstIntro)=
 # Introduction to Structural Estimation
 
+> ``You keep using that word. I do not think it means what you think it means." Inigo Montoya, *The Princess Bride*
+
+The term ``structural estimation" has been the source of debate in the economics profession. [TODO: Insert some of the debate here.]
+
 The material for the chapters in this Structural Estimation section was initially developed in the Structural Estimation course I taught in the Masters in Computational Social Science program at the University of Chicago from 2017 to 2020.[^MACSScourses]
 
 A good place to start in describing structural estimation is the definition of a {term}`model`.
diff --git a/docs/book/struct_est/paper.md b/docs/book/struct_est/paper.md
new file mode 100644
index 0000000..99c9156
--- /dev/null
+++ b/docs/book/struct_est/paper.md
@@ -0,0 +1,111 @@
+
+(Chap_StructEstPaper)=
+# Writing a Structural Estimation Paper
+
+TODO: Finish this section. The content for this section will be taken from my course slides on [creating a structural estimation paper proposal](https://github.com/rickecon/StructEst_W20/blob/master/Projects/ProposalPresent.pdf), and my slides on how to write the following sections of the paper: [data description](https://github.com/rickecon/StructEst_W20/blob/master/Projects/DataSection_slides.pdf), [model description](https://github.com/rickecon/StructEst_W20/blob/master/Projects/ModelDescr_slides.pdf), [estimation section](https://github.com/rickecon/StructEst_W20/blob/master/Projects/EstimResults_slides.pdf), and [conclusion/intro/abstract](https://github.com/rickecon/StructEst_W20/blob/master/Projects/IntroAbsConcl_slides.pdf).
+
+
+(SecStructEstPaperSections)=
+## Sections of a structural estimation project
+
+TODO: Include discussion from project sections and order of project slide in [creating a structural estimation paper proposal](https://github.com/rickecon/StructEst_W20/blob/master/Projects/ProposalPresent.pdf) slides.
+
+
+(SecStructEstPaperSect_Data)=
+### Data description
+
+See [data description](https://github.com/rickecon/StructEst_W20/blob/master/Projects/DataSection_slides.pdf) slides,
+
+
+(SecStructEstPaperSect_Model)=
+### Model description
+
+See [model description](https://github.com/rickecon/StructEst_W20/blob/master/Projects/ModelDescr_slides.pdf) slides.
+
+
+(SecStructEstPaperSect_Est)=
+### Estimation
+
+See [estimation section](https://github.com/rickecon/StructEst_W20/blob/master/Projects/EstimResults_slides.pdf) slides.
+
+
+(SecStructEstPaperSect_Concl)=
+### Conclusion, intro, abstract
+
+See [conclusion/intro/abstract](https://github.com/rickecon/StructEst_W20/blob/master/Projects/IntroAbsConcl_slides.pdf) slides.
+
+
+(SecStructEstPaperFind)=
+## Where/how do I find a project?
+
+TODO: Include discussion from ending slides in [creating a structural estimation paper proposal](https://github.com/rickecon/StructEst_W20/blob/master/Projects/ProposalPresent.pdf) slides. Make sure to include discussion of replication versus original research.
+
+
+(SecStructEstPaperExerc)=
+## Exercises
+
+```{exercise} Create a structural estimation project proposal
+:label: ExercStructEst_PaperProposal
+:class: green
+
+Create a 5-minute slide presentation of a structural estimation project proposal. You can work alone. However, I recommend you work in a group of (at most) two. The focus of your proposal presentation must be a research question. A good project will have a strong economic theory component. Structural estimation is taking economic theory directly to data. To estimate your model, your project must use GMM, MLE, SMM, or SMLE that you code yourself. I have included a LaTeX beamer style slides template in the [`./code/StrEstPaper/LaTeXtemplates/`](https://github.com/OpenSourceEcon/CompMethods/tree/main/code/StrEstPaper/LaTeXtemplates) folder of the GitHub repository for this online book if you want to do your slides in LaTeX. Your proposal should include the following requirements.
+
+1. Restrictions
+    *  The focus of your proposal presentation must be a research question. No "methods for the sake of methods" projects.
+    * You are not allowed to use linear regressions *unless*:
+        * it is involved in an indirect inference estimation
+        * it is a small subroutine of a bigger model
+
+2. State the research question.
+    * What are you trying to learn by using this model?
+    * Question should be focused. A narrow question is usually better than a broad question.
+
+3. Describe the model (the data generating process, DGP) $F(x_t,z_t|\theta)=0$
+    * What are the endogenous variables $x_t$?
+    * What are the exogenous variables $z_t$?
+    * What are the parameters $\theta$?
+    * Which parameters are estimated $\hat{\theta}_e$?
+    * Which parameters are calibrated $\bar{\theta}_c?
+    * How does one solve the model given $\theta$?
+        * Equations are sufficient (e.g., econometric models)
+        * Analytical solution (e.g., behavioral models)
+        * Computational solution (e.g., macroeconomic models)
+
+4. Describe the proposed data source $X$
+    * How available are the data?
+    * Can you show some initial descriptive statistics or visualizations?
+
+5. Describe your proposed estimation strategy $\hat{\theta}$
+    * Why did you choose this estimation strategy over alternatives?
+    * How will you identify your parameters?
+        * MLE: Likelihood function
+        * GMM: What moments will you use?
+
+6. Proposal conclusion
+    * Restate your research question
+    * Outline your hopes and dreams for the project
+    * Identify potential shortcomings/alternatives
+```
+
+```{exercise} Structural estimation project paper
+:label: ExercStructEst_Paper
+:class: green
+
+Write a structural estimation paper based on your project proposal from {numref}`ExercStructEst_PaperProposal` using the examples and suggestions from this chapter. I have posted a LaTeX template for a paper in the [`./code/StrEstPaper/LaTeXtemplates/`](https://github.com/OpenSourceEcon/CompMethods/tree/main/code/StrEstPaper/LaTeXtemplates) folder of the GitHub repository for this online book if you want to do your paper in LaTeX.
+
+1. There is no minimum page requirement, but your paper should be no more than 20 pages long. You can put any extra information in a technical appendix that is not subject to the maximum page requirement.
+2. Your paper should be focused on a research question, have a title, clear indication of authors, date, and abstract.
+3. You must perform a structural estimation in your paper using one of the following methods: GMM, MLE, SMM, or SMLE that you code yourself.
+4. The body of your paper should have the following sections, and you should follow the examples and recommendations for those sections from the corresponding discussions in this chapter.
+    * Introduction
+    * Data description
+    * Model description
+    * Estimation
+    * Conclusion
+```
+
+
+(SecStructEstPaperFootnotes)=
+## Footnotes
+
+The footnotes from this chapter.
diff --git a/images/gmm/Econ381Scores_SurfaceCrit4.png b/images/gmm/Econ381Scores_SurfaceCrit4.png
new file mode 100644
index 0000000..cd50571
Binary files /dev/null and b/images/gmm/Econ381Scores_SurfaceCrit4.png differ
diff --git a/images/gmm/Econ381scores_2MLEs.png b/images/gmm/Econ381scores_2MLEs.png
new file mode 100644
index 0000000..ce82f92
Binary files /dev/null and b/images/gmm/Econ381scores_2MLEs.png differ
diff --git a/images/gmm/Econ381scores_4mom2mom.png b/images/gmm/Econ381scores_4mom2mom.png
new file mode 100644
index 0000000..2e6cfe8
Binary files /dev/null and b/images/gmm/Econ381scores_4mom2mom.png differ
diff --git a/images/gmm/Econ381scores_SurfaceCrit1.png b/images/gmm/Econ381scores_SurfaceCrit1.png
new file mode 100644
index 0000000..9182a56
Binary files /dev/null and b/images/gmm/Econ381scores_SurfaceCrit1.png differ
diff --git a/images/gmm/Econ381scores_SurfaceCrit4_2.png b/images/gmm/Econ381scores_SurfaceCrit4_2.png
new file mode 100644
index 0000000..19eb18f
Binary files /dev/null and b/images/gmm/Econ381scores_SurfaceCrit4_2.png differ
diff --git a/images/gmm/hist_inc.png b/images/gmm/hist_inc.png
new file mode 100644
index 0000000..b8b2a60
Binary files /dev/null and b/images/gmm/hist_inc.png differ
diff --git a/images/mle/Econ381scores_2truncs.png b/images/mle/Econ381scores_2truncs.png
index 78ca072..a6e9b74 100644
Binary files a/images/mle/Econ381scores_2truncs.png and b/images/mle/Econ381scores_2truncs.png differ
diff --git a/images/mle/Econ381scores_MLE.png b/images/mle/Econ381scores_MLE.png
index 85371e7..aefae60 100644
Binary files a/images/mle/Econ381scores_MLE.png and b/images/mle/Econ381scores_MLE.png differ
diff --git a/images/mle/Econ381scores_MLEconstr.png b/images/mle/Econ381scores_MLEconstr.png
index 981d033..2e058f5 100644
Binary files a/images/mle/Econ381scores_MLEconstr.png and b/images/mle/Econ381scores_MLEconstr.png differ
diff --git a/images/mle/Econ381scores_SurfaceLogLike.png b/images/mle/Econ381scores_SurfaceLogLike.png
index 8e3993a..fec9387 100644
Binary files a/images/mle/Econ381scores_SurfaceLogLike.png and b/images/mle/Econ381scores_SurfaceLogLike.png differ
diff --git a/images/mle/Econ381scores_SurfaceLogLikeZoom.png b/images/mle/Econ381scores_SurfaceLogLikeZoom.png
index ae5f7c8..8d853c0 100644
Binary files a/images/mle/Econ381scores_SurfaceLogLikeZoom.png and b/images/mle/Econ381scores_SurfaceLogLikeZoom.png differ
diff --git a/images/smm/Econ381scores_smm4_1.png b/images/smm/Econ381scores_smm4_1.png
index e61145c..51ae5a9 100644
Binary files a/images/smm/Econ381scores_smm4_1.png and b/images/smm/Econ381scores_smm4_1.png differ
diff --git a/images/smm/GBtree.png b/images/smm/GBtree.png
deleted file mode 100644
index a578b05..0000000
Binary files a/images/smm/GBtree.png and /dev/null differ
diff --git a/images/smm/hist_pdf_1.png b/images/smm/hist_pdf_1.png
deleted file mode 100644
index db8e94d..0000000
Binary files a/images/smm/hist_pdf_1.png and /dev/null differ
diff --git a/setup.py b/setup.py
index 3bbe29a..660ca7e 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name="CompMethods",
-    version="0.0.7",
+    version="0.1.0",
     author="Richard W. Evans",
     author_email="rickecon@gmail.com",
     long_description=readme,