\documentclass[a4paper,12pt,fleqn]{article}
% The parameter in [..] can be beamer or handout
% Based on file
% $Header: /cvsroot/latex-beamer/latex-beamer/solutions/generic-talks/generic-ornate-15min-45min.en.tex,v 1.4 2004/10/07 20:53:08 tantau Exp $
% This file is a solution template for:
% - Giving a talk on some subject.
% - The talk is between 15min and 45min long.
% - Style is ornate.
% Copyright 2004 by Till Tantau <tantau@users.sourceforge.net>.

% When replacing figures, put a % at the end of the line
% to prevent a linebreak being read!


\usepackage{natbib}

\usepackage{rotating}
\usepackage{longtable, lscape}
\usepackage{threeparttablex}
%\usepackage[top=2.5cm, bottom=2.5cm, left=2cm , right=1.8cm]{geometry}
% geometry may have difficulties combining with hyperref
\usepackage[pdftex,dvipsnames]{color}

\usepackage{enumitem}
\usepackage{appendix}
\usepackage{framed}

\usepackage{times}
%\usepackage[T1]{fontenc}
% Or whatever. Note that the encoding and the font should match. If T1
% does not look nice, try deleting the line with the fontenc.
% The following redefines of amsmath are suggested by the hyperref README file.
\usepackage{amsmath}
\let\equation\gather
\let\endequation\endgather
\usepackage{amssymb}
%\usepackage[bookmarksopen=false]{hyperref}
% in the newer version of pdfscreen, hyperref can be loaded first with its own parameters
%\usepackage{hyperref}
\usepackage[pdfstartview={},pdftex,bookmarksopen]{hyperref}%,colorlinks}
%\usepackage[pdfstartview={},
%            pdftex,
%            plainpages=false]{hyperref}  % ,colorlinks


\usepackage{algorithm}
\usepackage{algorithmic}

\setlength{\bibsep}{0.01in}
\renewcommand\bibsection{\section{\refname}}

\newcommand{\firsttabitem}{\hspace{4mm} $\bullet$ \hspace{1mm}}
\newcommand{\tabitem}{\\ \\ \hspace{4mm} $\bullet$ \hspace{1mm}}

\makeatletter
\newenvironment{frindentation}[2]
{\par \setlength{\leftmargin}{#1}       \setlength{\rightmargin}{#2}
  \advance\linewidth -\leftmargin       \advance\linewidth -\rightmargin
  \advance\@totalleftmargin\leftmargin  \@setpar{{\@@par}}%
  \parshape 1 \@totalleftmargin         \linewidth \ignorespaces
  \begin{minipage}[h]{\linewidth}\begin{framed}}{\end{framed}\end{minipage}\par}
\newenvironment{frindent}[3]
{\par \setlength{\leftmargin}{#1}       \setlength{\rightmargin}{#2}
  \advance\linewidth - \leftmargin       \advance\linewidth -\rightmargin
  \advance\linewidth - #3
  \advance\@totalleftmargin\leftmargin  \@setpar{{\@@par}}%
  \parshape 1 \@totalleftmargin         \linewidth \ignorespaces
  \begin{minipage}[h]{\linewidth}\begin{framed}}{\end{framed}\end{minipage}\par}
\newenvironment{indentation}[2]
{\par \setlength{\leftmargin}{#1}       \setlength{\rightmargin}{#2}
  \advance\linewidth -\leftmargin       \advance\linewidth -\rightmargin
  \advance\@totalleftmargin\leftmargin  \@setpar{{\@@par}}%
  \parshape 1 \@totalleftmargin         \linewidth \ignorespaces}{\par}
\makeatother



\newcommand{\Reals}{\mbox{I}\hspace{-.07ex}\mbox{R}}
\newcommand{\E}{\mbox{E}}
\renewcommand{\P}{\mbox{P}}
\newcommand{\se}{\mbox{s.e. }}
\newcommand{\var}{\mbox{var}}
\newcommand{\Cov}{\mbox{Cov}}
\newcommand{\logit}{\mbox{logit}}
\newcommand{\uh}{\underline{h}}
\newcommand{\tuh}{\tilde{\uh}}
\newcommand{\neqsum}[3]
{\, \sum_{\stackrel{\scriptstyle #1 = 1}{\scriptstyle #2 \neq #3}}^g
\,}

\newcommand{\equat}[1]{$#1$}
\newcounter{savenumi}
\newcounter{savenumi2}
\newcounter{savenumi3}
\newcounter{savenumi4}

\renewcommand{\baselinestretch}{1.1}
\newcommand{\cc}{\color[named]{SeaGreen}}
\newcommand{\itc}{\color[rgb]{0.1,0.6,0.3}}  % itemcolor
\newcommand{\siec}{\color[named]{Sienna}} % Siena color (RawSiena dvipsnames)
\newcommand{\emec}{\color[rgb]{0.3,0.1,0.0}}  %  \color[named]{Emerald}}


\newcommand{\cdiamond}{{\color[named]{MidnightBlue}\diamond}}
\newcommand{\cbullet}{{\color[named]{MidnightBlue}$\bullet$}}
\newcommand{\cques}{{\color[named]{MidnightBlue}?`}}
\newcommand{\rast}{{\color[named]{Red}$\ast$}}
\newcommand{\gast}{{\color[named]{Green}$\ast$}}

\newcommand{\rtar}{{\itc $\Rightarrow$}}
\newcommand{\qques}{\raisebox{1.2ex}{\rotatebox{180}
                    {\color[named]{ForestGreen}\bf\large ?}}}
\newcommand{\excl}{{\color[named]{ForestGreen}\bf\large !}}
% or OliveGreen
\newcommand{\ttimes}{\,\times\,}

\setlength{\bibsep}{0.1em}
\renewcommand\bibsection{\section{\refname}}


\setlength{\parskip}{1.5ex plus0.5ex minus0.5ex}
\setlength{\parindent}{0ex}
\hyphenpenalty=8000
%\tolerance=1000

\newcommand{\sn}{{\scriptstyle N}}
\renewcommand{\sb}{{\scriptstyle B}}
\newcommand{\extraver}{{$\phantom{\Big(}$}}
\newcommand{\extraverr}{{$\phantom{\big(}$}}

\renewcommand{\th}[1]{$\theta_{#1}$}
\newcommand{\ga}[1]{$\gamma_{#1}$}
\newcommand{\be}[1]{$\beta_{#1}$}
\newcommand{\maxr}{\textit{maxbeh}_r}

\newcommand{\vit}{\theenumi}

\newcommand{\mcc}[2]{\multicolumn{#1}{c}{#2}}
\newcommand{\mcp}[2]{\multicolumn{#1}{c|}{#2}}

\newcommand{\equa}[1]{\[#1\]}

\newcommand{\separationb}{\\[0.5ex]\hline\rule{0pt}{2ex}}
\newcommand{\separationg}{\\[0.5ex]\arrayrulecolor{grey}\hline\arrayrulecolor{black}
                         \rule{0pt}{2ex}}

\newcommand{\nm}[1]{\textsf{\small #1}}
\newcommand{\nnm}[1]{\textsf{\small\textit{#1}\ }}
\newcommand{\nmm}[1]{\nnm{#1}}
\newcommand{\R}{{\sf R }}
\newcommand{\Rn}{{\sf R}}
\newcommand{\ms}{\textsl{\textsf{\small ms}}} % ministep
\newcommand{\sfn}[1]{\textsf{#1}} % Siena function

\newcommand{\remark}[1]{{\textcolor[named]{Red}{#1}}}
%\newcommand{\adda}[1]{{\textcolor[named]{Red}{#1}}}  % additions
%\newcommand{\addc}[1]{{\textcolor[named]{MidnightBlue}{#1}}}  % clarifications
\newcommand{\adda}[1]{{#1}}  % additions
\newcommand{\addc}[1]{{\textcolor[named]{Black}{#1}}}  % clarifications
\newcommand{\addaa}[1]{{\textcolor[named]{Black}{#1}}}  % additions
\newcommand{\addca}[1]{{\textcolor[named]{Black}{#1}}}  % clarifications
\newcommand{\addab}[1]{{\textcolor[named]{Black}{#1}}}  % additions
\newcommand{\addcb}[1]{{\textcolor[named]{Black}{#1}}}  % clarifications
\newcommand{\addcc}[1]{{\textcolor[named]{Black}{#1}}}
\newcommand{\addcd}[1]{{\textcolor[named]{Black}{#1}}}
\newcommand{\addce}[1]{{\textcolor[named]{Black}{#1}}}
\newcommand{\addcf}[1]{{\textcolor[named]{Black}{#1}}}

\hyphenation{Snij-ders Duijn Huis-man Steg-lich Schwein-ber-ger
                Zeg-ge-link siena-Data-Create short-Name short-Names
             multi-level inter-action coef-fi-cient para-meter para-meters people ex-ample
             reci-pro-city
             know-ledge pos-sible multi-para-meter mini-step mini-steps GWESPFF GWESPBB}
\setlength{\parskip}{1.5ex plus0.5ex minus0.5ex}
\setlength{\parindent}{0ex}



\title{Siena algorithms}

\author{Tom A.B.\ Snijders }

\begin{document}

\maketitle

\tableofcontents
\newpage

This paper gives a sketch of the main algorithms used in RSiena.
It is meant as background material for understanding the code of
RSiena.

For the Generalized Method of Moments (`GMoM'), there is the
separate text \sf{Changes\_RSiena\_GMoM.pdf}.

\iffalse
Changes in the version of May 17, 2011:\\
the \emph{Connect} procedure now avoids the creation of
incompatibilities due to  \nnm{higher},
\nnm{disjoint}, or \nnm{atleastone} requirements.

Changes in the version of February 11, 2011:\\
creation effect distinguished from endowment effect.

Changes in the version of January 3, 2011:\\
An erroneous change in the version of November 17, 2010,
 in \textit{MH\_InsPermute} and  \textit{MH\_DelPermute}, was redressed.

Changes in the version of November 19, 2010:\\
correction of \textit{MH\_DelPermute} (the correction
of November 17 was defined incorrectly).\\
Some changes in Section \ref{S_14.2} about changes in the rate functions
and associated scores.

Changes in the version of November 17, 2010:\\
suppression of some unimportant superfluous operations in \textit{MH\_InsPermute};
correction of \textit{MH\_DelPermute}. These changes are signaled \remark{in red}.\\
Also, the last paragraph of Section \ref{S_14.1} was changed,
and a lot of Section \ref{S_14.2} (further explanation).

The changes in the version of October 30, 2010, are corrections in part B
of procedures \textit{MH\_InsMis} and \textit{MH\_DelMis}.

The change in the version of August 28, 2010, is a clarification
at the start of Chapter 9.

The additions in the version of May 30, 2010, pertain
to the following.
\begin{enumerate}
\item The earlier, incomplete and incorrect, procedure \textit{MH\_RandomMis}
      was dropped and replaced by the two procedures \textit{MH\_InsMis}
      and \textit{MH\_DelMis} -- which are much more in line with the
      other existing procedures anyway.
\item Very small changes to correct errors in procedure  \textit{MH\_CancelDiag}.
\item Calculation of the new CCPNumber in \textit{MH\_InsPermute}.
\item Small changes to correct errors in procedures  \textit{MH\_InsPermute}
     and  \textit{MH\_DelPermute}.
\end{enumerate}

The additions in the version of May 11, 2010, pertain to the
following issues.
\begin{enumerate}
\item In the procedures \textit{MH\_InsPermute} and \textit{MH\_DelPermute},
      there was an error (because it was not certain that  \textit{MH\_InsPermute}
      increased the number of CCPs by 1),
      and a difficulty because also the permutation could
      change the number of CCPs, which would require complicated calculations
      for the acceptance probability. This was corrected by an extra
      requirement on the ministep inserted, and a restriction of the length
      of the permuted interval of ministeps, so that the procedure indeed
      creates one new CCP and otherwise cannot change the number of CCPs.\\
      This also required a parallel change  in \textit{MH\_DelPermute}.
\item The calculation of the score function for rate parameters in the
      likelihood-based estimation was drastically simplified,
      in a way which also has the advantage of being easily extended
      to the general case of state-dependent rate functions.
\item In the likelihood-based estimation, some extra checks in cases
      of conditions \nnm{uponly}, \nnm{downonly}, \nnm{higher},
      \nnm{disjoint}, and \nnm{atleastone} were added,
      which earlier had been overlooked.
\end{enumerate}
\fi

\section{Notation}

\adda{Logarithms (denoted $\log$) are natural logarithms.}

Symbols given \nnm{in italic sf font} refer to the names
of variables used in the \R or C++ code.

\emph{Generic symbols for variables}

There are $R_N$ networks and $R_B$ behavior variables.\\
We require $R_N + R_B \geq 1$; if the current structure of RSiena
requires this, then we require $R_N \geq 1$ (but if it is easy to
work with $R_N = 0, R_B \geq 1$ then it would be nice to permit this.)
\begin{tabbing}
$i, j$ \hspace*{1em} \=  \hspace*{6em} \=  actors.\\[1ex]
$m$ \> \> index for time period from $t_{m-1}$ to $t_m$ ($m = 2, \ldots, M$).  \\[1ex]
$M$ \> \nnm{observations}   \> total number of observations\\[1ex]
$x$ \> \> all $R_N$ networks jointly (one outcome).\\[1ex]
$z$ \> \> all $R_B$ behaviors jointly (one outcome).\\[1ex]
$y$ \> \> state: all networks and behaviors jointly (one outcome).\\[1ex]
$W$ \> \> variable with values $N$ or $B$, indicating \\
    \> \> whether something refers to network or behavior.\\[1ex]
$r$ \> \> index number of networks or behaviors,\\
    \> \> ranging from 1 to $R_W$ .\\[1ex]
missing \> \> missingness indicators \\
        \> \>  for ordered triples $(i,j,r)$ referring to networks $r$ \\
        \> \> and for ordered pairs $(i,r)$ referring to behaviors;\\
        \> \> values are F (\nnm{False}) and T (\nnm{True});\\
        \> \> if T, then further specifications are Start / End / Both,\\
        \> \> referring to the observation period from $t_{m-1}$ to $t_m$ .\\[1ex]
$\maxr$ \> \> maximum of the range of the $r'^{\text{th}}$ behavior variable.\\[1ex]
$^N$ \> \> as superscript: refers to network dynamics.\\[1ex]
$^B$ \> \> as superscript: refers to behavior dynamics.\\[1ex]
\end{tabbing}
\medskip

\emph{Changing variables (outcomes)}

\begin{tabbing}
$^{\rm{obs}}$ \hspace*{1em} \=  \hspace*{6em} \= as superscript: refers to observed values.\\[1ex]
$\theta$ \> \nnm{theta} \> vector of all statistical parameters. \\[1ex]
$p$  \>  \nnm{pp}  \> dimension of $\theta$.\\[1ex]
$J$ \> \>  simulated data score function (vector of partial derivatives of log-likelihood) \\
     \>  \>    ($p$-vector).\\[1ex]
$t$ \hspace*{3em} \> \> time.\\[1ex]
$N^{(r)}_{ij}$ \> \> dummy tie variable indicating $ i \stackrel{r}{\rightarrow} j $ for $r^{\text{th}}$ network.\\[1ex]
$B^{(r)}_{i}$ \> \> behavior variable for $r^{\text{th}}$ behavior for actor $i$.
\end{tabbing}
\medskip

Replacing an index by + denotes summation over this index.\\
Toggling a number $a$ in $\{0, 1\}$ means replacing $a$ by $1-a$.
\medskip


\iffalse
earlier:
\begin{tabbing}
$i, j$ \hspace*{1em} \=  \hspace*{6em} \=  actors.\\[1ex]
$m$ \> \> index for time period from $t_{m-1}$ to $t_m$ ($m = 2, \ldots, M$).  \\[1ex]
$M$ \> \nnm{observations}   \> total number of observations\\[1ex]
$x$ \> \> all networks jointly.\\[1ex]
$z$ \> \> all behaviors jointly.\\[1ex]
$r$ \> \> index number of networks or behaviors.\\[1ex]
$^N$ \> \> as superscript: refers to network dynamics.\\[1ex]
$^B$ \> \> as superscript: refers to behavior dynamics.\\[1ex]
$^{\rm{obs}}$ \> \> as superscript: refers to observed values.\\[1ex]
$W$ \> \> variable with values $N$ or $B$.\\[1ex]
$\theta$ \> \nnm{theta} \> vector of all statistical parameters. \\[1ex]
$p$  \>  \nnm{pp}  \> dimension of $\theta$.\\[1ex]
$J$ \> \>  simulated data score function (vector of partial derivatives of log-likelihood) \\
     \>  \>    ($p$-vector).\\[1ex]
\end{tabbing}
\medskip


\emph{Changing variables (outcomes)}

\begin{tabbing}
$t$ \hspace*{3em} \= time.\\[1ex]
$N^{(r)}_{ij}$ \> dummy tie variable indicating $ i \stackrel{r}{\rightarrow} j $ for $r^{\text{th}}$ network.\\[1ex]
$B^{(r)}_{i}$ \> behavior variable for $r^{\text{th}}$ behavior for actor $i$.
\end{tabbing}
\medskip

Replacing an index by + denotes summation over this index.\\
Toggling a number $a$ in $\{0, 1\}$ means replacing $a$ by $1-a$.
\medskip
\fi

\emph{Functions}

\begin{tabbing}
$\lambda^N(r,i,x,z)$ \hspace*{3em} \= rate function, network $r$.\\
                                  \> (0 for inactive actors)\\[1ex]
$\lambda^B(r,i,x,z)$ \hspace*{3em} \> rate function, behavior $r$.\\
                                  \> (0 for inactive actors)\\[1ex]
$f^N(r,i,x,z)$ \hspace*{3em} \> evaluation function function, network $r$.\\[1ex]
$f^B(r,i,x,z)$ \hspace*{3em} \> evaluation function, behavior $r$.\\[1ex]
$g_e^N(r,i,j,x,z)$ \hspace*{3em} \> endowment function function, network $r$\\[1ex]
$g_c^N(r,i,j,x,z)$ \hspace*{3em} \> creation function function, network $r$\\[1ex]
$g_e^B(r,i,x,z)$ \hspace*{3em} \> endowment function, behavior $r$.\\[1ex]
$g_c^B(r,i,x,z)$ \hspace*{3em} \> creation function, behavior $r$.\\[1ex]
$\Delta f^N(r,i,j,x,z)$ \hspace*{3em} \> change in $f^N(r,i,x,z)$
                                by toggling $N^{(r)}_{ij}$.\\[1ex]
$\Delta f^B(r,i,v,x,z)$ \hspace*{3em} \> change in $f^B(r,i,x,z)$
                                by changing $B^{(r)}_i$ to $B^{(r)}_i + v$.\\[1ex]
$ \sim E(\lambda)$ \> generate random variable according to exponential distribution\\
                   \> with parameter $\lambda$ (note: expected value $1/\lambda$).\\[1em]
\end{tabbing}
%\medskip

\iffalse
earlier
\begin{tabbing}
$\lambda^N(r,i,x,z)$ \hspace*{3em} \= rate function, network $r$.\\
                                  \> (0 for inactive actors)\\[1ex]
$\lambda^B(r,i,x,z)$ \hspace*{3em} \> rate function, behavior $r$.\\
                                  \> (0 for inactive actors)\\[1ex]
$f^N(r,i,x,z)$ \hspace*{3em} \> evaluation function function, network $r$.\\[1ex]
$f^B(r,i,x,z)$ \hspace*{3em} \> evaluation function, behavior $r$.\\[1ex]
$g^N(r,i,j,x,z)$ \hspace*{3em} \> endowment function function, network $r$.\\[1ex]
$g^B(r,i,x,z)$ \hspace*{3em} \> endowment function, behavior $r$.\\[1ex]
$\Delta f^N(r,i,j,x,z)$ \hspace*{3em} \> change in $f^N(r,i,x,z)$
                                by toggling $N^{(r)}_{ij}$.\\[1ex]
$\Delta f^B(r,i,v,x,z)$ \hspace*{3em} \> change in $f^B(r,i,x,z)$
                                by changing $B^{(r)}_i$ to $B^{(r)}_i + v$.\\[1ex]
$ \sim E(\lambda)$ \> generate random variable according to exponential distribution\\
                   \> with parameter $\lambda$ (note: expected value $1/\lambda$).\\[1em]
\end{tabbing}
\fi

Note. Whether the endowment function makes sense for behavior variables with a range
of more than two values, is doubted. But we keep it included anyway, for the moment.
\medskip

The \R convention is followed of denoting an assignment statement by $a \leftarrow b$,
meaning that the variable $a$ gets the value $b$.


\section{Outline of model dynamics / simulation algorithm}
\label{S_sim}

The tie-based model is defined as a continuous-time Markov chain by the following
algorithm for generating the next change in the outcome.
This is formulated here for the case that the state space includes networks as well as behavior.
If there are no behavior variables $B$, then the steps referring to these variables
can simply be dropped.
In the code this is function \nm{simstats0c}.

To estimate derivatives of expected values of statistics with respect to the
parameters, the score function method
\citep{SchweinbergerSnijders07a}
is used in the default method to estimate standard errors.
This is indicated by `SF only' and
can be skipped if the
finite differences (`FD') option, which also employs common random numbers,
is used to estimate standard errors.

For each network variable numbered $r$ the following logical (boolean)
variables are defined at the moment of data entry:
\begin{itemize}
\item \nnm{uponly}($r$) = $\big\{\text{for all }i, j, m:
                         x^{(r)\,\rm{obs}}_{ij}(t_{m})  \leq  x^{(r)\,\rm{obs}}_{ij}(t_{m+1}) \big\}$;
\item \nnm{downonly}($r$) = $\big\{\text{for all }i, j, m:
                         x^{(r)\,\rm{obs}}_{ij}(t_{m})  \geq  x^{(r)\,\rm{obs}}_{ij}(t_{m+1}) \big\}$;
\end{itemize}
For each ordered pair of network variables numbered $r$ and $r' \neq r$,
we define the following logical (boolean) variables:
\begin{itemize}
\item \nnm{higher}($r, r'$) = $\big\{\text{for all }i, j, m:
                         x^{(r)\,\rm{obs}}_{ij}(t_{m})  \geq  x^{(r')\,\rm{obs}}_{ij}(t_{m}) \big\}$;
\item  \nnm{disjoint}($r, r'$) = $\big\{\text{for all }i, j, m:
                       \min\{  x^{(r)\,\rm{obs}}_{ij}(t_{m}), \,
                           x^{(r')\,\rm{obs}}_{ij}(t_{m}) \} = 0 \big\}$;
\item \nnm{atleastone}($r, r'$) = $\big\{\text{for all }i, j, m:
                       \max\{  x^{(r)\,\rm{obs}}_{ij}(t_{m}), \,
                           x^{(r')\,\rm{obs}}_{ij}(t_{m}) \} = 1 \big\}$.
\end{itemize}
\addce{Analogous definitions \nnm{uponly}, \nnm{downonly},
\nnm{higher} can be made for behaviour variables.}


\emph{Model for microstep}


\begin{enumerate}
\item Initialize  time at $t=0$; initialise networks and behaviors $x, z$ at
      their observations at wave $m-1$.\\
      SF only: initialise the score function at $J_m = 0$.
\item \label{itemstart}
      Current time, networks, behaviors, denoted by $t, x, z$.
\item For all $r$, generate $\Delta t^N_r \sim E(\lambda^N(r,+,x,z))$.
\item For all $r$, generate $\Delta t^B_r \sim E(\lambda^B(r,+,x,z))$.
\item Let $W, r$ be the variable for which $\Delta t^W_r = \min_r\{t^N_r, t^B_r \}$.\\
      If $W = N$, goto \ref{itemx}; if $W = B$, goto \ref{itemz}.\\
      (\emph{Note}. An alternative, mathematically equivalent, is to choose\\
      $(W, r)$ with probabilities proportional to $\lambda^W(r,+,x,z)$ and\\
      only for this $W, r$ generate $\Delta t^W_r \sim E(\lambda^+(+,+,x,z))$.\\
      This is more efficient but the gain in computation time must be negligible.)
\item Choose $i$ with probabilities $\lambda^W(r,i,x,z) / \lambda^W(r,+,x,z)$.
\item Set $t = t + \Delta t^W_r$. (\emph{time step})
\item SF only:
      set
      \[
      J_m \,=\, J_m \,+\,
         \frac{\partial \ln \big(\lambda^W(r,i,x,z)/\lambda^+(+,+,x,z)\big)}
               {\partial \theta} \,+\,
         \frac{\partial \ln \lambda^W(r,i,x,z) }{ \partial \theta} \ .
      \]
      (Note: first added term for generating $W, r, i$; second term for $t$. )
\item \label{itemx}
      Define $C$ as the set of $j$ for which $N_{ij}^{(r)}$ is allowed to change.\\
      This is the set of all $j \in \{1, \ldots, n\}$ from which are excluded
      all $j \neq i$ for which at least one of the following hold:
      \begin{enumerate}
      \item $N_{ij}^{(r)} $ is structurally determined;
      \item \nnm{uponly}($r$) and $N_{ij}^{(r)} = 1$;
      \item \nnm{downonly}($r$) and $N_{ij}^{(r)} = 0$;
      \item for some $r' \neq r$, \nnm{higher}($r, r'$) and $N_{ij}^{(r)} = N_{ij}^{(r')} = 1$;
      \item for some $r' \neq r$, \nnm{higher}($r', r$) and $N_{ij}^{(r)} = N_{ij}^{(r')} = 0$;
      \item for some $r' \neq r$, \nnm{disjoint}($r, r'$) and $N_{ij}^{(r)} = 0, N_{ij}^{(r')} = 1$;
      \item for some $r' \neq r$, \nnm{atleastone}($r, r'$) and $N_{ij}^{(r)} = 1, N_{ij}^{(r')} = 0$.
      \end{enumerate}
      Obviously, in many cases, there are never any excluded $j$; and if there
      is only one dependent network variable, the four last conditions
      are never satisfied.\\
      If $C$ has one element, this must be $i$; then go to \ref{itemstart}.\\
      (The following steps in this item then are vacuous, so they can be skipped.)\\
      If $C$ is empty, this is an error, and the program must stop with an error message.

      For all $j \in C$, calculate $h_j = \Delta f^N(r,i,j,x,z)$, and $h_i = 0$.\\
      For all $j \in C$ with $N^{r}_{ij} = 1$, calculate $h_j = h_j - g_e^N(r,i,j,x,z)$.\\
      For all $j \in C$ with $N^{r}_{ij} = 0$, calculate $h_j = h_j + g_c^N(r,i,j,x,z)$.\\
      Choose $j \in C \cup {i}$ with probabilities
      \begin{equation}
         \pi_j = \frac{\exp(h_j) }{ \sum_k \exp(h_k) }               \label{pij}
      \end{equation}
      SF only: set
      $J_m = J_m + \partial h_j / \partial \theta \,-\, \sum_k \pi_k\, \partial h_k / \partial \theta  $.\\
      If $j \neq i$, toggle $N^{r}_{ij}$. (\emph{network step})\\
      Goto \ref{itemstart}.
\newpage
\item \label{itemz}
      Let $C$ be the set of $v \in \{-1, 1\}$ \\
      for which $B^{(r)}_i + v$ is
      within the range of $B^{(r)}_i$.\\
      For all $v \in C$, calculate $h_v = \Delta f^B(r,i,v,x,z)$, and $h_0 = 0$.\\
      If $-1 \in C$, calculate $h_{-1} = h_{-1} - g_e^B(r,i,x,z)$.\\
      If $+1 \in C$, calculate $h_{+1} = h_{+1} + g_c^B(r,i,x,z)$.\\
      Choose $v \in C \cup {0}$ with probabilities
      \begin{equation}
         \pi_v = \frac{  \exp(h_v) }{ \sum_u \exp(h_u) }              \label{piv}
      \end{equation}
      SF only: set
      $J_m = J_m + \partial h_v / \partial \theta \,-\, \sum_u \pi_u \, \partial h_u / \partial \theta  $.\\
      Add $v$ to $B^{r}_{i}$. (\emph{behavior step})\\
      Goto \ref{itemstart}.
\end{enumerate}
\medskip

\emph{Stopping criterion}

\begin{enumerate}
\item In the unconditional estimation option, microsteps continue until
      $t \geq 1$. \\
      Note that, by convention, time duration between waves is set to be unity.\\
      SF only: set \\
      $J_m = J_m - (1 - t^{\rm{last}})
           \big(\partial \ln \lambda^+(+,+,x,z) / \partial \theta\big) $,\\
      where $t^{\rm{last}}$ is the last generated value of $t$ before $t$ exceeded 1.
\item In the conditional estimation option, if the conditioning variable
      is network $N^{(r)}$, microsteps continue until
      \[
      \sum_{i,j} \mid N^{(r)}_{ij} - x^{(r)\,\rm{obs}}_{ij}(t_{m-1}) \mid \geq
       \sum_{i,j} \mid x^{(r)\,\rm{obs}}_{ij}(t_{m})  - x^{(r)\,\rm{obs}}_{ij}(t_{m-1}) \mid \ ,
      \]
      where the sum is over all tie variables that are not structurally fixed
      at $t_{m-1}$ or $t_m$ (note that
      it is possible that tie variables are structurally fixed but have different subsequent values).\\
      If the conditioning variable
      is behavior $B^{(r)}$, microsteps continue until
      \[
      \sum_{i} \mid B^{(r)}_{i} - z^{(r)\,\rm{obs}}_{i}(t_{m-1}) \mid \geq
       \sum_{i} \mid z^{(r)\,\rm{obs}}_{i}(t_{m})  - z^{(r)\,\rm{obs}}_{i}(t_{m-1}) \mid \ ,
      \]
      where the sum is over all actors that are not structurally inactive
      at $t_{m-1}$ or $t_m$.
\end{enumerate}
\medskip

\emph{Score function}
\smallskip

The generated statistics $S$ can be written as $S = \sum_{m=2}^M S_m$, where $S_m$ is calculated
in consequence of the simulation of the process in the period from $t_{m-1}$ to $t_m$.
Denote the value of $J$ generated in this period by $J_m$.
To use the score function method, we calculate
\begin{equation}
%  <\!SJ\!> \,=\, \sum_{m=2}^M  (S_m - s_m^{\rm{obs}})J_m' \ .    \label{SJ}
  <\!SJ\!> \,=\, \sum_{m=2}^M  S_m \, J_m' \ .    \label{SJ}
\end{equation}
This is a $p \times p$ matrix,
and an estimate for $\partial E_{\theta}S/\partial \theta$.\\
(Or do we work with $\sum_{m=2}^M  (S_m - s^{\rm{obs}}) J_m'$
for numerical accuracy?)

The decomposition into the $M-1$ periods is kept because it allows a more efficient
variance reduction (see further down).

(Mathematical note: for simulations taking place according to parameter \th{},
$E_{\theta} J_m = 0$.
We will later subtract a value $s_m J_m '$
for an `almost constant' $s_m$; this does not affect the expected value,
but leads to a considerable variance reduction;
see \citet{SchweinbergerSnijders07a}.)

\section{Outline of Robbins-Monro algorithm for MoM and ML}
\label{S_RobMon}

This section is based on the appendix of
\citet{Snijders01}, and updated
to include the algorithm changes that were incorporated after 2001.
The implementation of the algorithm in RSiena is function
\nm{robmon}, and has a number of additional
details to improve convergence.

This function is used for ML as well as MoM estimation.
One difference is that for MoM, estimation statistics are used
as described in Section~\ref{S_stats}, while for ML,
the statistics are the complete-data score functions.
Another difference is that the covariance matrix of the estimator
(which implies the standard errors) is estimated differently.
In the current section, the description of the covariance
matrix of the estimator is for the MoM.
The description of the covariance matrix of the ML estimator is
given in Section~\ref{S_MLRM}.

The purpose of the algorithm is to approximate the solution of the
moment equation
\begin{equation}
  \E_\theta S = s\, ,   \label{mom_eq0}
\end{equation}
where $s = s^{\rm{obs}}$, the observed value.
The solution is denoted by $\theta_0$.
The algorithm is a multivariate version of the Robbins-Monro (1951) algorithm.
It uses the idea of Polyak (1990) and Ruppert (1988)
to employ a diagonal matrix $\tilde{D}$ in the iteration step (\ref{step})
\begin{equation}
 \hat{\theta}_{N+1} \,=\, \hat{\theta}_N \, - \, a_N\, {\tilde{D}}^{-1} \, (S_N - s)~,   \label{step}
\end{equation}
 and estimate
the solution by partial averages of $\hat{\theta}_N$ rather than the
last value; and it uses the idea of Pflug (1990) to let the values of $a_N$ remain
constant if the average products of successive values $(S_N - s)(S_{N-1}-s)$
are positive, since this suggests that the process still is drifting toward its
limit value.
However, the specification used here deviates from Pflug's proposal by requiring,
for the premature decrease of $a_N\,$,
that for {\em each} coordinate the partial sum of the product of successive values
be negative, rather than requiring this only for the sum over the coordinates.
Further, the number of steps for which $a_N$ is constant is bounded between a lower
and an upper limit to ensure that $a_N$ is of order $N^{-c}$.

Under the option \nnm{doubleAveraging}, the iteration step is
\begin{equation}
 \hat{\theta}_{N+1} \,=\, \bar{\theta}_N \, - \, a_N\, N\,{\tilde{D}}^{-1} \, (\overline S_N - s)~,   \label{step.double}
\end{equation}
where
\[
  \bar{\theta}_N \,=\, \frac1N \sum_{n \leq N} \hat\theta_n \ , \hspace{1em}
  \overline S_N \,=\, \frac1N \sum_{n \leq N} s_n \ ,
\]
implementing a proposal of \citet{bather1989} studied by
\citet{SchwabeWalk96}.

Whether the algorithm yields an estimate that indeed solves
the moment equation (\ref{mom_eq0}) to a satisfactory degree of precision
is checked in the `third phase' of the algorithm below.

The reason for incorporating the matrix ${\tilde{D}}$ is to achieve better compatibility
between the scales of $S$ and of $\theta$.
The diagonal elements of ${\tilde{D}}$ are defined as the estimated values of the derivatives
$\partial \E_{\theta}(S_k) / \partial \theta_k$ where $\theta$ is
at its initial value.
To see that this leads to compatibility of the scales of $S$ and $\theta$
note that in the extreme case where $\var(S_k) = 0$ and the diagonal elements of ${\tilde{D}}$
are equal to
$\partial \E_{\theta}(S_k) / \partial \theta_k$,
(\ref{step}) for $a_N = 1$ is just the iteration step of the Newton-Raphson
algorithm applied to each coordinate of $S$ separately.
Thus, beginning the algorithm with $a_N$ in the order of magnitude of 1
will imply that the initial steps have an approximately right order of magnitude.

The results of Polyak and Ruppert do not point exclusively
to diagonal matrices; other positive definite matrices could also be used.
Therefore, the option is available instead of a diagonal matrix
to use a partial diagonalization; this uses the parameter
\nnm{diagonalize} set in \nm{sienaModelCreate}.

The number of dimensions of \th{} and of $S$ is denoted by $p$
and the initial value is denoted \th{1}.
`Generating $ S \sim \theta $' means to simulate the model
according to parameter value $\theta$ and calculate the statistics $S$.

The estimation of derivatives has two options: finite differences (`FD')
and score function (`SF').
SF is more efficient and unbiased
\citep{SchweinbergerSnijders07a}
and therefore is the default,
FD is available for some models for which
the derivatives of the log-likelihood needed for SF have not yet been
worked out.

The FD option is based on disturbing the current parameter values
by adding the value $\epsilon_j$, and using common random numbers.
Because of the discrete nature of the simulated
statistics, a very small $\epsilon_j$ will yield simulated values that
with high probability are equal to the values obtained without
the disturbance. This is undesirable
(see \citet{SchweinbergerSnijders07a}).
Good values of $\epsilon_j$ must be such that with rather
high probability (say, more than .5) the simulated values are
not identical to those obtained without the disturbance.


Symbols given \nnm{in italic sf font} refer to the names
of variables used in the \R code.

The standard initial value is calculated in function
\nnm{getNetworkStartingVals()} in file \nnm{effects.r}.
This uses an adapted version of (11.41) in \citet{Snijders05}.
The adaptation is a precision-based weighting of multiple periods
in case $M \geq 3$ (the vector \nnm{prec} used in the function
is a measure of precision).


The algorithm consists of three phases.

\begin{enumerate}
\item In this phase a small number $n_1$ of steps are made to estimate\\
      \nnm{dfra} = $D(\theta_1) =
                \big(\partial \E_\theta(S) / \partial \theta\big)
                       \mid_{\theta = \theta_1} $.\\
      This estimate is used to define ${\tilde{D}}$.
      Denote by $e_j$ the $j'$th unit vector in $p$ dimensions.

     Initialise ${{\rm{Sum}}}_{d} = 0_{p \times p}$,
           ${{\rm{Sum}}}_{S} = 0_{p \times 1}$.\\
     For SF, initialise additionally\\
              ${\rm{Sum}}_{Sm} = 0_{p \times 1}$ and
              ${\rm{Sum}}_{Jm} = 0_{p \times 1}$ for $m = 2, \ldots, M$.\\
     For $N = 1$ to $n_1$, do the following.
     \begin{description}
     \item[(FD)] Generate
     \begin{align*}
       \nnm{fra} = S & \sim  \theta_1  \\
        S_{j} & \sim  \theta_1 + \epsilon_j e_j \ (j = 1, \ldots, p),
     \end{align*}
      where all these $p+1$ random vectors
      use a common random number stream to make them strongly positively
      dependent and where $\epsilon_j$ are suitable constants.\\
      Compute the difference quotients
      \[
      \nnm{sdf} = d_{j} = \epsilon_j^{-1} (S_{j} - S)~;
      \]
      for small values of $\epsilon_j$ the expected value of the matrix
      $d = (d_{1}, ..., d_{p})$ approximates $D(\theta_1)$.
      However, $\epsilon_j$ must be chosen not too small because otherwise
      the variances of the $d_{j}$ become too large.\\
      Update
            \begin{align*}
            {\rm{Sum}}_{S}  &= {\rm{Sum}}_{S} + S \\
            {\rm{Sum}}_{d}  &= {\rm{Sum}}_{d} + d \\
            \end{align*}

      \item[(SF)] Generate
           \[
           \nnm{fra} = S \sim \theta_1  \\
           \]
               its components being $S_m \, (m = 2, \ldots, M)$
               (see `Score Function' above),
               the complete-data score functions
               $J_{m}$ ($m = 2, \ldots, M$),\\
               and $d = <\!SJ\!>$ according to (\ref{SJ}).\\
               Update
               \begin{align*}
               {\rm{Sum}}_{S}  &= {\rm{Sum}}_{S} + S \\
               {\rm{Sum}}_{d}  &= {\rm{Sum}}_{d} + d \\
               {\rm{Sum}}_{Sm}  &= {\rm{Sum}}_{Sm} + S_m, \ m = 2, \ldots, M \\
               {\rm{Sum}}_{Jm}  &= {\rm{Sum}}_{Jm} + J_m, \ m = 2, \ldots, M \\
               \end{align*}
     \end{description}

     The differences $\nnm{fra} - s$ are stored as $\nnm{sf}$ in procedure
     \nnm{doPhase1or3Iterations}.

     At the end of Phase 1, calculate the following results:
      \begin{enumerate}
      \item
      Estimate $E_{\theta_1} S$  by
      \[
      \bar{s} = \frac{{\rm{Sum}}_{S}}{n_1} \ .
      \]
      \item In \nnm{CalculateDerivative} : \
      Estimate $D(\theta_1)$ by
      \begin{align*}
      \text{FD:    }   \hat{D} &= \frac{{\rm{Sum}}_{d}}{n_1} \\
      \text{SF:     }  \hat{D} &= \frac{{\rm{Sum}}_{d}}{n_1} -
                         \frac{\sum_{m=2}^M {\rm{Sum}}_{Sm}{\rm{Sum}}_{Jm}}{n_1^2} \ .
      \end{align*}
      \item Partially (or entirely) diagonalized matrix
      \[
         \tilde{D} = \nnm{diagonalize}\times\text{diag}(\hat{D})
               \,+\, (1-\nnm{diagonalize})\times \hat{D}
      \]
      where \nnm{diagonalize} is set in function \nm{sienaAlgorithmCreate},
      with default value 1.
      \item Componentwise regression coefficients of statistics $S$ on scores
            $\sum_m J_m$, i.e., for each coordinate $j$,
            \[
            \nnm{RegrCoef}_j \,=\, \frac{\widehat{\text{cov}}(S_j, \sum_m J_{mj})}
                                   {\widehat{\text{var}}(S_j)}
            \]
            from the values generated in Phase 1.\\
            (For purely descriptive purposes, the corresponding correlation
            coefficients are also calculated.)
      \item Make one partial estimated Newton-Raphson step,
      \[
       {\hat{\theta}} = \theta_1 -
         a_1 {\hat{D}}^{-1} \left( \bar{s} - s \right)\, .
      \]
      where
      \[
      \nnm{targets} = s = \text{ observed values} .
      \]
      \end{enumerate}

\item[Phase 2.]
    \emph{Note 1: I cannot find anything about phase2.0 in the current code -- ts 23-05-13.}\\
    \emph{Note 2: the use of observedPos and observedNeg apparently was discontinued.}\\
    The boolean variable \nnm{phase2.0} differentiates between having or not
    having a subphase 2.0.
    Such a subphase was introduced in version R-Forge 1.1-220 (August 2012).\\
    If \nnm{phase2.0} then $k_{\rm{min}} = 0$, else $k_{\rm{min}} = 1$.\\
     Repeat for $k = k_{\rm{min}}, \ldots, k_{\rm{max}}$ (subphases):\\
     function \nnm{proc2subphase}
     \begin{enumerate}
     \item Initialise \nnm{nit} = $N = 0$,
        ${\rm{Sum}}_{\hat\theta} = 0_{p \times 1}$,  ${\rm{Sum}}_\Delta = 0_{p \times 1}$,
        $S_{\rm{prev}} = 0_{p \times 1}$, \\
        \nnm{ac} = $\rm{AC} = 0_{p \times p},
        \nnm{observedPos = observedNeg}  = \text{FALSE}_{p \times 1}$.
     \item Generate
           \[
           \nnm{fra} = S \sim \theta
           \]
           or, for multiple processes, as the average of \nnm{int}
           independent replicates of such variables;\\
           if \nnm{dolby}, then also generate the sum of scores $\sum_m J_m$
           and calculate
           \[
           \nnm{fra} \,=\, \nnm{fra} - \nnm{regrCoef}*\Big(\sum_m J_m)
           \]
           (componentwise multiplication); this serves for reducing the
           variance while not affecting the expected value.
    \item  Update
            \begin{align*}
              {\rm{Sum}}_\Delta & =  {\rm{Sum}}_\Delta + (S-s) \\
             \text{If \nnm{doubleAveraging},}\ \hat{\theta} &\,=\,
               {\rm{Sum}}_{\hat\theta}/N \,-\, a_N\, \tilde{D}^{-1} \, {\rm{Sum}}_\Delta \\
             \text{else}\ \hat{\theta} &\,=\,
               \hat{\theta} \,-\, a_N\, \tilde{D}^{-1} \, (S - s) \\
             N & =  N+1 \\
            {\rm{Sum}}_{\hat\theta} & =  {\rm{Sum}}_{\hat\theta} + \hat\theta \\
            \text{if $N \geq 2$, then }
            \rm{AC} & =  \rm{AC} + (S - s)(S_{\rm{prev}} - s)' \\
            \nnm{observedPos}_j &=
                \nnm{observedPos}_j \text{ or } \nnm{fra}_j \geq 0 \
                    \text{ (for all } j)\\
            \nnm{observedNeg}_j &=
                \nnm{observedNeg}_j \text{ or } \nnm{fra}_j \leq 0 \
                    \text{ (for all } j)\\
            S_{\rm{prev}} &= S \ .
            \end{align*}
            (updates for \nnm{observedPos} and \nnm{observedNeg}
            relevant for $k=0$ only)
     \item  Stopping rule for subphase $k=0$:\\
            If $N >= n_{2k}^+$ or
            $\nnm{observedPos = observedNeg}  = \text{TRUE}_{p \times 1}$
            then goto (b).
            \medskip

            Stopping rule for subphase $k \geq 1$:\\
            If $N >= n_{2k}^+$ or
            ($N >= n_{2k}^-$ and $\max_k \rm{AC}_{kk} \leq 0$), then\\
            $\big\{$ update $\hat\theta \,=\, N^{-1} {\rm{Sum}}_{\hat\theta}$ ;
                   set $a_k = a_k \times \text{reductionfactor} \big\}$ ;
            goto (b).\\
            (But if \nnm{phase2.0} then the update to $\hat\theta$ is done
            only for $k \geq 2$.  )
     \end{enumerate}
     In the code, \nnm{theta} = $\theta$, \nnm{gain} = $a_N$, \nnm{ac} = AC,
     \nnm{thav} = ${\rm{Sum}}_{\hat\theta}$, \nnm{nit} = $N$,\\
     \nnm{n2min} = $n_{2k}^-$, \nnm{n2max} = $n_{2k}^+$.
\item[Phase 3.]
      Phase 3 is used only for the estimation of $D(\theta)$ and
      $\Cov(\hat\theta)$,
      and as a check for the (approximate) validity of (\ref{mom_eq0}).
      The value of $\hat{\theta}$ is left unchanged in this phase
      and is equal to the value obtained after last subphase of phase 2.
      The procedure is mainly as in phase 1.

     Initialise ${{\rm{Sum}}}_{d} = 0_{p \times p}$,
           ${{\rm{Sum}}}_{S} = 0_{p \times 1}$,
           ${{\rm{SumSq}}}_{S} = 0_{p \times p}$.\\
     For SF, initialise additionally
              ${\rm{Sum}}_{Sm} = 0_{p \times 1}$ and
              ${\rm{Sum}}_{Jm} = 0_{p \times 1}$ for $m = 2, \ldots, M$.\\
     For $N = 1$ to $n_3$, do the following.
     \begin{description}
     \item[(FD)] Generate
     \begin{align*}
       \nnm{fra} {} = S & \sim \theta  \\
         S_{j} & \sim \theta + \epsilon_j e_j \ (j = 1, \ldots, p),
     \end{align*}
      where all the $p+1$ random vectors
      use a common random number stream to make them strongly positively
      dependent and where $\epsilon_j$ are suitable constants.
      Compute the difference quotients
      \[
      \nnm{sdf} = d_{j} = \epsilon_j^{-1} (S_{j} - S) \ .
      \]
      Update
            \begin{align*}
            {\rm{Sum}}_{S}  \phantom{Sq} &= {\rm{Sum}}_{S} + S \\
            {\rm{SumSq}}_{S}  &= {\rm{SumSq}}_{S} + S\,S' \\
            {\rm{Sum}}_{d}  \phantom{Sq} &= {\rm{Sum}}_{d} + d \\
            \end{align*}
      \item[(SF)] Generate
           \[
           \nnm{fra} = S \sim \theta  \\
           \]
               its components being $S_m \, (m = 2, \ldots, M)$
               (see `Score Function' above),
               the complete-data score functions
               $J_{m}$ ($m = 2, \ldots, M$),\\
               and $d = <\!SJ\!>$ according to (\ref{SJ}).\\
               Update
               \begin{align*}
                {\rm{Sum}}_{S}   &= \ {\rm{Sum}}_{S} + S \\
                {\rm{SumSq}}_{S} &= \ {\rm{SumSq}}_{S} + S\,S' \\
                {\rm{Sum}}_{d}   &= \ {\rm{Sum}}_{d} + d \\
                {\rm{Sum}}_{Sm}  &= \ {\rm{Sum}}_{Sm} + S_m, \ m = 2, \ldots, M \\
                {\rm{Sum}}_{Jm}  &= \ {\rm{Sum}}_{Jm} + J_m, \ m = 2, \ldots, M \\
               \end{align*}
     \end{description}

     At the end of Phase 3, calculate the following results:
      \begin{enumerate}
      \item
           Estimate $E_{\hat\theta} S$ and $\Cov_{\hat\theta} S$ by
           \[
           \bar{s} = \frac{{\rm{Sum}}_{S}}{n_3} \ , \hspace{1em}
           \Sigma = \frac{{\rm{SumSq}}_{S}}{n_3} \,-\, \bar{s}\,\bar{s}' \ .
           \]
      \item
           To check (approximate) validity of (\ref{mom_eq0}) compute the
           $t$-ratios for convergence,
           \begin{equation}
           \nnm{tstat} = t_j = \frac{\bar{s}_j - s^{\rm{obs}}_j}{\sigma_j} \ ,
                                \label{tstat}
           \end{equation}
           where $\sigma_j$ is the square root of the $j'$th diagonal element
           of $\Sigma$.
      \item In \nnm{CalculateDerivative3} : \
           Estimate $D(\hat\theta)$ by
      \begin{align*}
      \text{FD:    }   \hat{D} &= \frac{{\rm{Sum}}_{d}}{n_3} \\
      \text{SF:     }  \hat{D} &= \frac{{\rm{Sum}}_{d}}{n_3} -
                         \frac{\sum_{m=2}^M {\rm{Sum}}_{Sm}{\rm{Sum}}_{Jm}}{n_3^2} \ .
      \end{align*}
      \item Estimate the covariance matrix of $\hat\theta$ by
            \begin{equation}
            \Cov (\hat\theta) \,=\,  {\hat D}^{-1}  \Sigma {\hat D }^{' -1} \ .
                      \label{eq:cov}
            \end{equation}
            The standard errors are the square roots of the diagonal elements
            of $\Cov (\hat\theta)$.
      \item For possible later use with the \nnm{prevAns} option, recalculate the
            componentwise regression coefficients
            \[
            \nnm{RegrCoef}_j \,=\, \frac{\widehat{\text{cov}}(S_j, \sum_m J_{mj})}
                                   {\widehat{\text{var}}(S_j)}
            \]
            (all $j$) from the values generated in Phase 3.
      \end{enumerate}

\end{enumerate}


This algorithm contains various constants that can be adapted so as to achieve
favorable convergence properties.
Experience with various data sets led to the following values.

The number of steps in Phase 1 is
\[
\nnm{n1} =  n_1 = 7 + 3p \ .
\]
In the \nnm{dolby} option, at least 50 steps are taken in Phase 1.\\
The minimum number of steps in subphase $2.k$ is
\[
\nnm{n2minimum[1]} = n_{2k}^- = ((2.52)^k)\ttimes(7+p)
\]
 which is meant to approximate
$n_{2k}^- = 2^{4(k+2)/3}(7+p)$;
the maximum number is
\[
\nnm{n2maximum[1]} = n_{2k}^+ = n_{2k}^- + 200 \ .
\]
For multiple processes, we use
\[
  n_{2k}^- = ((2.52)^{k-1})\ttimes \max \{5, (7+p)\ttimes 2.52/\nnm{int}\}
\]
where \nnm{int} is the number of processes.
These bounds $n_{2k}^-$ and $n_{2k}^+$ are determined so that
$N^{3/4} a_N$ tends to a finite positive limit.\\
For large $p$ they are rather conservative (i.e., unnecessarily large).\\
The default number of steps in phase 3 in the SF option is \nnm{n3} = $n_3 = 1000$.
For the FD option, 500 is a good default.

The default number of subphases is \nnm{nsub} = 4;
more or fewer subphases could be used
to obtain smaller or larger precision, but 4 seems really a good number.\\
The initial value of $a_N$ in phase 2 is \nnm{firstg} = 0.2,
and for multiple processes $0.2 \ttimes \sqrt{\nnm{int}}$.\\
The reductionfactor at the end of subphases in phase 2 is
for MoM estimation parameter \nnm{reduceg}
set by \nnm{sienaAlgorithmCreate} (default 0.5), but for ML estimation
it is 0.25.

The values of \nnm{epsilon} = $\epsilon_j$ in the FD option
are chosen initially as 0.1,
but in Phase 1 a check is made and if
the $j'$th coordinate of $d - d_j$ is exactly 0 for all or most
of the simulations then $\epsilon_j$ is adaptively increased.
The variability obtained
by the use of small values of $\epsilon_j$ is more serious than the bias obtained
by the use of a large value.
An ideal value would be to have $\epsilon_j$ slightly less than the
standard error of $\hat\theta_j$. However, this is known only
after the estimation has finished. (Of course in many cases there
have been done earlier estimations, and the information obtained
from them might be used for this purpose.)

\subsection{prevAns: using the previous answer}

The \nnm{prevAns} option in \nnm{siena07} does the following.
\begin{enumerate}
\item Using the function \nnm{updateTheta}, for the requested effects
   the initial parameter values are taken from the previous
   answer object.
\item If the specifications of the previous answer and the current
  effects object correspond, then the following objects are taken from
  Phase 3 of the previous answer, and used to substitute for the calculations
  done at the end of Phase 1:\\
  \nnm{dfra};
  matrix \nnm{dinv} which is used to calculate matrix \nnm{D};
  \nnm{sf};
  \nnm{regrCoef};
  \nnm{regrCor}. \\
  This substitution is determined by the flag \nnm{haveDfra}.
\end{enumerate}

\subsection{Convergence criterion}

Up to version 1.1-284, the proposed convergence criterion focused on
the $t$-ratios for convergence \nnm{tstat} defined in (\ref{tstat}),
of which the maximum absolute value
\[
    \nnm{tmax} \,=\,  \max_j\{ \nnm{tstat}_j \}
\]
 should be less than 0.10.
This was supported by simulations of the estimators using this
convergence criterion, for which expected values and coverage rates of
confidence intervals were good.
Since then it has appeared that the convergence criterion should be improved,
and the following conclusion was reached.

The \emph{overall maximum convergence ratio} should be used as an additional
convergence criterion. It is defined as the maximum $t$-ratio for convergence for
any linear combination of the parameters,
\begin{equation}
           \nnm{tconv.max} \,=\, \max_{b}
           \left\{ \frac{b' \big(\bar{s} - s^{\rm{obs}}\big)}
           {\sqrt{b' \, \Sigma\, b}} \right\} \ , \label{tstatmax}
\end{equation}
where $\bar{s}$ is the average simulated vector of statistics and $s^{\rm{obs}}$
is its observed value.
This is equal to (use Cauchy-Schwarz)
\begin{equation}
 % \max_{b} \left\{ \frac{b' \big(\bar{s}_j - s^{\rm{obs}}\big)}
 %          {\sqrt{b' \Sigma b}} \right\} \,=\,
  \max_{c} \left\{ \frac{c' \, \Sigma^{-1/2}  \big(\bar{s} - s^{\rm{obs}}\big)}
           {\sqrt{c'c\,}} \right\} \,=\,
          \sqrt{ \big(\bar{s} - s^{\rm{obs}}\big)'\,
           \Sigma^{-1}  \big(\bar{s} - s^{\rm{obs}}\big) } \ .
\end{equation}

The definition implies that
\[
    \nnm{tconv.max} \, \geq \, \nnm{tmax}   \ .
\]
Further, the vector $b^*$ of weights that has the largest convergence $t$-ratio
(i.e., the largest value of the expression in braces in (\ref{tstatmax})) is
\begin{equation}
  b^* \,=\, \Sigma^{-1/2}c^* \,=\, \Sigma^{-1/2} \, \Sigma^{-1/2}  \big(\bar{s} - s^{\rm{obs}}\big)
  \,=\, \Sigma^{-1}  \big(\bar{s} - s^{\rm{obs}}\big) \ .
\end{equation}
In case of difficult convergence, the latter vector of weights may be used
to diagnose which is the linear combination giving most trouble
to the algorithm.
It may be even more instructive to study the weights of standardized statistics
(i.e., statistics divided by their standard deviation).

These can be obtained from RSiena, if the answer object is called \texttt{ans}, as
\begin{verbatim}
(solve(ans$msf) %*% apply(ans$sf,2,mean)) * sqrt(diag(ans$msf))
\end{verbatim}
A more readable representation may be given by
\begin{verbatim}
round((solve(ans$msf) %*% apply(ans$sf,2,mean)) * sqrt(diag(ans$msf)), 5)
\end{verbatim}


A study was made, using several data sets and model specifications,
in which estimations were first run until $ \nnm{tmax} \leq 0.10$,
and then continued (with \nnm{prevAns}) until $ \nnm{tconv.max} \leq 0.20$
and then on until $ \nnm{tconv.max} \leq 0.15$.
After each of these three endpoints, the estimate was retained.
This led to a set of estimates $\check{\theta}_i$ each with their estimated
covariance matrices $\check{\Sigma}_i$ and values $\nnm{tmax}_i$
and $\nnm{tconv.max}_i$.

To summarize these estimations and covariance matrices, only those
$i$ were used for which $\nnm{tconv.max}_i \leq 0.10$,
to obtain robust estimates
(elementwise) of the mean $\check{\theta}_i$, and of the mean
$\check{\Sigma}_i$; these are denoted $\bar\theta$
and $\bar\Sigma$. It was checked that $\bar\Sigma$ still was positive
definite.
The quality of all estimates $\check\theta_i$ then was assessed by the
squared Mahalanobis-type distances
\[
    d_i \,=\, (\check\theta_i - \bar\theta)' \, {\bar\Sigma}^{-1} \,
                (\check\theta_i - \bar\theta) \ .
\]
Regarding $\bar\theta$ as the true estimate, this measures the
how well $\check\theta_i$ approximates the true estimate, and therefore
can be regarded as a measure of convergence.



\begin{figure}
\hfill
\includegraphics*[scale=0.29]{tmax.png}
\includegraphics*[scale=0.29]{tconvmax.png}
\hfill\hfill
\caption{Distances $d_i$ as a function of  \nnm{tmax} (left) and \nnm{tconv.max} (right)
\label{distances}}
\end{figure}
% TestsConvergence.r

This led to plots such as those in Figure~\ref{distances}
(obtained for a model with some highly correlated
effects, for which the algorithm was expected to encounter
some difficulties).
We see clearly that, although the great majority of distances
for $\nnm{tmax}_i \leq 0.10$ are small, some distances
are too large. By contrast, the distances are well approximated
by a function of $\nnm{tconv.max}_i$, and for   $\nnm{tconv.max}_i \leq 0.25$
all deviations are small.
This supports the rule to require  $\nnm{tconv.max}_i \leq 0.25$
or (on the safe side) $\nnm{tconv.max}_i \leq 0.20$ as a signal of convergence.

\subsection{Some remarks about GMoM and sienacpp}

The Generalized Method of Moments (`\emph{GMoM}')
as decribed in \citet{ASS2015} was first implemented in \nnm{sienacpp()},
now also in \nnm{RSiena}.

The following indicates how some of the matrices in
the description of the algorithm in \citet{ASS2015}
can be obtained from a \nnm{sienaFit} object \nnm{ans}
produced by \nnm{sienacpp()}.

The dimension of the parameter is $P$ = \texttt{ans\$pp},
of the statistics $Q$ = \texttt{ans\$qq}, with $Q \geq P$.

First, let
\begin{verbatim}
fit <- ans$sienafit[[1]]
\end{verbatim}
To get some understanding of what is in this object,
look for \texttt{rifySienaFit} in file \nnm{RInterface.cpp}.

The matrix \texttt{z\$sf = fit\$phase3\_statistics} is a sample in Phase 3
from the distribution of $S^\ast$.
Hence $\Cov(S^\ast)$ is estimated by
\begin{verbatim}
    cov0 <- cov(fit$phase3_statistics)
\end{verbatim}
which is given as \texttt{ans\$msf}.
As indicated in (8) of \citet{ASS2015},
the inverse  $W = \big(\Cov(S^\ast)\big)^{-1}$  is used :
\begin{verbatim}
    W <- solve(cov0)
\end{verbatim}
The $Q \times P$ matrix of derivatives $\Gamma$ is
estimated by \texttt{t(fit\$gamma)}.
For the matrix $B$ we use \texttt{B1} as defined by
\begin{verbatim}
    B0 <- t(fit$gamma) %*% W
    B1 <- solve(diag(sqrt(rowSums(B0*B0)))) %*% B0
\end{verbatim}
Hence the covariance matrix of $\hat\theta$ under the GMoM
can be obtained as
\begin{verbatim}
    D0 <- B1 %*% fit$gamma
    D0inv <- solve(D0)
    cov2 <- B1 %*% cov1 %*% t(B1)
    covtheta <- D0inv %*% cov2 %*% t(D0inv)
\end{verbatim}

For the GMoM, the $t$-ratios for convergence are calculated
for the default (main) statistics for the effects.
The maximum convergence ratio is calculated for all linear
combinations of $B S^\ast$.
Therefore, the maximum convergence ratio refers to the
result of the estimation algorithm, and is not
necessarily larger than the absolute values of
all  $t$-ratios for convergence. When some of the latter are large,
this is an indication of poor model fit.


\section{Statistics for MoM}
\label{S_stats}

The statistics used for the MoM are proposed in
\citet{Snijders01} and \citet{SnijdersEA07}.


\subsection{Multi-group projects}

In the following sections, statistics are often added for all periods
from $m=2$ to $m=M$. For multi-group data sets the situation is different.
Such data sets can be implemented by `glueing' the data sets
as one sequence after each other.
This is done by the function \nm{sienaGroupCreate}.
Another implementation is by considering the data set as nested,
with periods nested in groups.
In the sequential implementation, we can denote statistics for period
$m$ by $S_m$; in the nested implementation, statistics for period
$m$ in group $g$ are denoted by $S_{gm}$.
Denote the number of groups by $G$, and the number of waves for
group $g$ by $M_g$; and the cumulative sums of these by
\[
    C_g \,=\, \sum_{h=1}^g M_h \ .
\]

In the sequential implementation, summations of the form
\[
\sum_{m=2}^M S_m
\]
should be replaced by
\[
\sum_{g=1}^G \sum_{m=C_{g-1}+1}^{C_g} S_m \ .
\]
In the nested implementation they should be replaced by
\[
\sum_{g=1}^G \sum_{m=2}^{M_g} S_{gm} \ .
\]


\subsection{Rate function for networks}

For an effect in the rate function
\begin{equation}
 \lambda^X(\alpha^X, i, y) \,=\, \rho^X_m \,
      \exp\Big(\sum_k \alpha^X_k s^X_{ki}(y) \Big) \ ,
               \label{ratef}
\end{equation}
the statistic for the method of moments to estimate $\alpha_k^X$ is
\begin{equation}
\sum_{m=2}^M  \sum_i \bigg\{ s^X_{ki}\big(y(t_{m-1})\big) \,
         \sum_j \mid x_{ij}(t_m) - x_{ij}(t_{m-1}) \mid  \bigg\} \ .
               \label{ratestat}
\end{equation}

In the unconditional method of moments, the statistic to estimate
$\rho^X_m$ is
\begin{equation}
 \sum_i \sum_j \mid x_{ij}(t_m) - x_{ij}(t_{m-1}) \mid  \ .
               \label{rhostat}
\end{equation}
In the conditional method of moments, this statistic is used for
the stopping criterion for the simulations in period $m-1$.

\subsection{Evaluation function for networks}

For an effect in the evaluation function
$s_{ik}^X(x,z)$, the change statistic or change contribution is defined by
\begin{equation}
\Delta_{kij}^X(x,z) = s_{ik}^X(x^{(+ij)},z) - s_{ik}^X(x^{(-ij)},z) \label{Delta}
\end{equation}
where $x^{(+ij)}$ is $x$ to which the tie $i \rightarrow j$ has been added, and
 $x^{(-ij)}$ is $x$ from which the tie $i \rightarrow j$ has been deleted.
 (Note that this definition implies that (\ref{Delta}) is not affected by the value
 of $x_{ij}$ in  $x$ as used in the left hand side.)

The quantity introduced in the beginning, called $\Delta f^N(r,i,j,x,z)$,
is a linear combination of the change statistics:
\begin{equation}
\Delta f^N(r,i,j,x,z) = \pm  \sum_k \beta^{X(r)}_k \Delta_{kij}^{X(r)}(x,z) \label{eq:Deltaf}
\end{equation}
where $\beta^{X(r)}_k$ is the appropriate element of the parameter vector $\theta$,
and where $\pm = +1$ if the toggle means that tie  $i \rightarrow j$ is added,
while $\pm = -1$ if the toggle means that this tie is dropped.

In the C++ code, the change contribution is the function
\nnm{calculateContribution()},
of which the basic instance is defined in \nnm{NetworkEffect()}
and specific instances in all functions defining specific effects.

The statistic used for estimation, also called the target statistic,
is defined as follows.
To be explicit, denote all changing covariates (monadic and/or dyadic) by $v$,
with value $v(t_m)$ for wave $m$, and all constant covariates by $w$.
The sum of the effect over all actors is defined by
\begin{equation}
 s_{k}^X(x,z,v,w) = \sum_i s_{ik}^X(x,z,v,w) \ .        \label{fitstat0}
\end{equation}
If there is only one network as dependent variable then there is no $z$
so we can write $s_{k}^X(x,v,w)$.
The target statistic then is
\begin{subequations}
   \label{fitstat}
\begin{equation}
    \sum_{m=2}^M s_{k}^X(x(t_m),v(t_{m-1}),w) \ .        \label{fitstatX}
\end{equation}
Note that $v$ is taken at wave $m-1$, because for changing covariates
the assumption is that the value observed at wave $m-1$ remains valid
up to just before wave $m$.\\
If there are two dependent variables, one network and one behavior,
then the target statistics for the network are
\begin{equation}
    \sum_{m=2}^M s_{k}^X(x(t_m),z(t_{m-1}), v(t_{m-1}),w) \ .        \label{fitstatXZ}
\end{equation}
and for the behavior
\begin{equation}
    \sum_{m=2}^M s_{k}^Z(x(t_{m-1}),z(t_{m}), v(t_{m-1}),w) \ .        \label{fitstatZX}
\end{equation}
\end{subequations}
Note the cross-lagged way of using the waves in these equations.
This is explained in \citet{SnijdersEA07}.

If the number of  dependent variables is more than one for other
configurations of dependent variables, like multiple networks
or a network with multiple behaviors,
the same cross-lagged principle is used: for wave $m \in \{2, \ldots, M\}$,
the dependent variable is taken as observed in wave $m$ and all
explanatory (`independent') variables as observed in wave $m-1$.

In the C++ code, the summand in (\ref{fitstat}) is the
function \nnm{evaluationStatistic()}.
It is used in \nnm{StatisticCalculator.cpp}.
It is defined in \nnm{NetworkEffect.cpp} and  \nnm{BehaviorEffect.cpp}
as the sum of \nnm{egoStatistic(i)} over all actors $i$,
which are the terms in (\ref{fitstat0}).
For \nnm{NetworkEffect.cpp}, these are computed  as
\begin{equation} \label{sum_tiestat}
 \text{egoStatistic}(i) = \sum_j x_{ij}\,\text{tieStatistic}(i,j) \ .
\end{equation}
Note that these are virtual effects, and will be refined in specific network
effects, which are descendants of class \nnm{networkEffect}.
It is necessary to redefine either \nnm{egoStatistic} or \nnm{tieStatistic}.

For main effects, the redefined \nnm{tieStatistic} is not used
if there also is a redefined \nnm{egoStatistic}.
However, for network interaction effects of dyadic and ego effects,
defined in Sections~\ref{S_dyadic} and~\ref{S_ego}, 
the \nnm{tieStatistics} are calculated 
as the products of the corresponding main
effects (this happens in \nnm{NetworkInteractionEffect.cpp}). 
Therefore, to be able to use a network effect in interactions, 
it is necessary to redefine the \nnm{tieStatistic}. 

The "number of distances equal to 2" effect is an example of an effect
where decomposition (\ref{sum_tiestat}) is not straightforward,
and the \nnm{egoStatistic()}
is defined using \nnm{initializeStatisticCalculation} and
\nnm{cleanupStatisticCalculation}.
The basic instances of all these functions also are defined in \nnm{NetworkEffect()},
and specific instances of \nnm{egoStatistic()}
or \nnm{tieStatistic()} are defined in all functions defining specific effects.

A special case of the construction is given by so-called generic effects,
see the documentation in \nnm{classdesign}.

Another construction uses the function \nnm{statistic()}; see
\nnm{networkEffect.cpp}:\\
'A convenience method for implementing statistics for both evaluation and
endowment function. It assumes that the statistic can be calculated by
iterating over ties $(i,j)$ of a network $Y$ and summing up some terms
$s_{ij}(X)$ with respect to another network $X$, namely,
$s(X,Y) = \sum_{(i,j) \in Y} s_{ij}(X)$.
For evaluation function, $X = Y$.
For endowment function, $X$ is the initial network of the period, and $Y$ is the
network of ties that have been lost during the network evolution.'

In \nnm{networkEffect.cpp}, \nnm{creationStatistic()} uses
\nmm{endowmentStatistic()};
\nmm{endowmentStatistic()} uses \nnm{statistic()};
\nnm{statistic()} uses \nnm{egoStatistic()}, and the latter uses
\nnm{tieStatistic()}.
If one of these is redefined in a given model or model class,
then this chain is broken and the lower elements in the chain
will be replaced by the redefinitions.

Preprocessing and postprocessing is possible using the functions
\nnm{initializeStatisticCalculation()} and
\nnm{cleanupStatisticCalculation()}. For examples, see
\nnm{balanceEffect}.
\nnm{InStructuralEquivalenceEffect} also is an example of
interesting constructions in this respect.

The estimation statistics for the creation and endowment effects
are calculated by applying the effect statistic to the network
of gained ties, and the network of lost ties, respectively.
This happens in functions \\
\nnm{StatisticCalculator::calculateNetworkCreationStatistics} and \\
\nnm{StatisticCalculator::calculateNetworkEndowmentStatistics}.


\subsection{Actor statistics}

\nnm{StatisticCalculator::StatisticCalculator} has an argument
\nnm{returnActorStatistics}, with the result that all calls of \nnm{evaluationStatistic},
\nnm{endowmentStatistic} and \nnm{creationStatistic} also return the contributions by actor.

Functions \nnm{getTheActorStatistics} and \nnm{getActorStatistics} use\\
\nnm{returnActorStatistics}=\texttt{TRUE}.

\nnm{siena07setup.cpp} has a non-exported function
\nnm{getTargetActorStatistics} that is used in \nnm{getTargets}.

In \nnm{simstats.c}, if \nnm{returnActorStatistics}
is present with value \texttt{TRUE} for the \nnm{sienaAlgorithm} object,
the \nnm{actorStatistics} are returned as \texttt{ans[[10]]}.
This possibility is not documented.

Currently, the \nnm{sienaAlgorithm} object never is given a component\\
\nnm{returnActorStatistics}.


\subsection{Interaction effects for networks}

User-defined interactions for network change are defined as follows.

Consider two network effects $ s_{ia}^X(x,z)$ and $ s_{ib}^X(x,z)$.
Denote their change statistics = change contributions by
\[
\Delta_{aij}^X(x,z) \text{  and  }  \Delta_{bij}^X(x,z) \ .
\]
\iffalse
Assume that their statistics used for MoM estimation are
\begin{equation}
\sum_i s_{ia}^X(x,z) = \sum_{i,j} x_{ij} \, \Delta_{aij}^X(x,z)  \label{int_cond1}
\end{equation}
and similarly for effect $b$; note that (\ref{int_cond1}) does not always hold,
it is a basic condition for an effect to be included in an interaction.
\fi

Then the interaction is defined by the change contribution
\begin{equation}
\Delta_{a \times b,ij}^X(x,z) = \Delta_{aij}^X(x,z) \times  \Delta_{bij}^X(x,z) \ . \label{int_change}
\end{equation}
\iffalse
and the MoM estimation statistic
\begin{equation}
 s_{a \times b}^X(x,z) = \sum_{i,j} x_{ij}
                      \Big( \Delta_{aij}^X(x,z)  \times  \Delta_{bij}^X(x,z) \Big)  \ . \label{int_fit}
\end{equation}
\fi

For interactions between three effects it is analogous,
with change statistic
\[
\Delta_{a \times b \times c,ij}^X(x,z) = \Delta_{aij}^X(x,z) \times \Delta_{bij}^X(x,z)
                                             \times \Delta_{cij}^X(x,z) \ .
\]
\iffalse
and MoM estimation statistic
\[
 \sum_{i,j} x_{ij} \Big( \Delta_{aij}^X(x,z)  \times  \Delta_{bij}^X(x,z)
                                             \times \Delta_{ij} s_{c}^X(x,z)  \Big)  \ .
\]
\fi

The question is, when does this makes sense, and what is the appropriate
MoM estimation statistic.
The change statistic and the estimation statistic must hang together according to
(\ref{Delta}) and (\ref{fitstat}), as mentioned above.
For user-defined interactions to be properly defined by (\ref{int_change}),
we must indicate the estimation statistic to be used
and prove that this satisfies (\ref{Delta}) and (\ref{fitstat}).

Sufficient conditions for user-defined interactions are
the following.

\subsubsection{Dyadic effects}
\label{S_dyadic}

An effect is defined to be \emph{dyadic} if it can be written as
\begin{equation}
 s_{ik}^X(x,z) = \sum_j x_{ij}\, c_{kij}(x,z) \        \label{dyadic}
\end{equation}
where $c_{kij}(x,z)$ is independent of $x_{i\ast}$ defined as the
row $x_{i\ast} = (x_{i1}, \ldots, x_{in})$.\\
For a dyadic effect we have
\[
 \Delta_{kij}^X(x,z) = c_{kij}(x,z)
\]
and
\[
   s_{k}^X(x,z) = \sum_{ij} x_{ij}\, c_{kij}(x,z) \ .
\]

The interaction between two dyadic effects is defined by
\begin{equation}
 s_{a \times b,i}^X(x,z) = \sum_j x_{ij}\, c_{aij}(x,z)\, c_{bij}(x,z)   \ .  \label{dyadic_inter}
\end{equation}
We then have
\begin{align*}
 \Delta_{a \times b,ij}^X(x,z) &= c_{aij}(x,z)\, c_{bij}(x,z)   \\
   s_{a \times b}^X(x,z) &= \sum_{i} x_{ij}\, c_{aij}(x,z)\, c_{bij}(x,z) \ .
\end{align*}
This indeed satisfies (\ref{Delta}) and (\ref{fitstat}).
The same holds for interactions between three (or more) dyadic effects.
One could say that the interaction between two dyadic effects is again
a dyadic effect.

\subsubsection{Ego effects}
\label{S_ego}

An effect is defined to be an \emph{ego} effect if it can be written as
\begin{equation}
 s_{ik}^X(x,z) = \sum_j x_{ij}\, c_{ki}(x,z) \        \label{ego}
\end{equation}
where $c_{ki}(x,z)$ is independent of $x_{i\ast}$ defined above,
and independent of $j$ (as the notation indicates).
(This definition implies that an ego effect is also a dyadic effect.)
\\
For an ego effect we have
\[
 \Delta_{kij}^X(x,z) = c_{ki}(x,z)
\]
and
\[
   s_{k}^X(x,z) = \sum_{ij} x_{ij}\, c_{ki}(x,z) \ .
\]
An interaction between an ego effect $a$ and any effect $b$ is defined by
\begin{equation}
 s_{a \times b,i}^X(x,z) =  c_{ai}(x,z) \, s_{ib}^X(x,z)  \ .        \label{ego_inter}
\end{equation}
This definition implies
\begin{align*}
 \Delta_{a \times b,ij}^X(x,z) &= c_{ai}(x,z)\, \Delta_{bij}^X(x,z)   \\
   s_{a \times b}^X(x,z) &= \sum_{ij}  c_{ai}(x,z)\, s_{ib}^X(x,z) \ .
\end{align*}
Given that effect $b$ satisfies (\ref{Delta}) and (\ref{fitstat}),
this interaction also satisfies (\ref{Delta}) and (\ref{fitstat}).
The same holds for interactions between two ego effects and
any third effect, because the interaction between two ego effects is again
an ego effect.

Whether effects are ego effects or not is defined separately on the R side
and the C++ side; it is not transferred from R to C++.
In C++, whether an effect is an ego effect is used only in
\nnm{NetworkInteractionEffect.cpp} for the calculation of the
estimation function.
This uses the property that ego effects always have a \nnm{tieStatistic()},
and that this is independent of alter.

\subsubsection{Contextual effects}

Some effects that do not satisfy the conditions defining ego or dyadic effects
are defined as `ego' or `dyadic' anyway, because they are meant to be used
as contextual effects in interactions, where the `context' is meant to
represent the conditions under which ego makes the choice in the ministep.
They are represented as elementary effects (see below).
Such effects are indicated by the suffix "\_ego" or "\_dya" in the
\nnm{shortName}.

\subsubsection{Implementation}

For the implementation of network interaction effects, see function class
\nnm{NetworkInteractionEffect.cpp}.
A dyadic or ego effect should be defined with a \nnm{tieStatistic()}
rather than an \nnm{egoStatistic()}.



\subsection{Interaction effects for behavior}

Interaction effects can be defined for those behavior effects
where the effect is defined as a product of the behavior itself ($z_i$)
and something independent of $z_i$ itself, although it may depend
on the other dependent variables and even on $z_j$ for $j \neq i$. In mathematical
terms, for effects defined as
\begin{equation}
  s_{ik}^Z(x,z) = z_i\, s_{ik}^{Z0}(x,z)   \label{beh_eff_standard}
\end{equation}
where $s_{ik}^{Z0}(x,z)$ is a function not depending on $z_i$.
Most effects are of this kind;
if I am right, all except for the quadratic tendency effect
and effects involving similarity with respect to $Z$.

Note that this concerns effect for the same behavior variable $Z$.
(I drop the index $r$ here.)

For such effects, the change contribution is $s_{ik}^{Z0}(x,z)$ and
the evaluation statistic is (\ref{beh_eff_standard}).


The interaction of two such effects, with indices $k_1$ and $k_2$
(where it is allowed that $k_1 = k_2$) is defined by the
evaluation statistic
\begin{equation}
  s_{i(k_1 \circ k_2)}^Z(x,z) = z_i\, s_{ik_1}^{Z0}(x,z) \, s_{ik_2}^{Z0}(x,z)  \label{beh_eff_int}
\end{equation}
(I just used a circle $\circ$ to denote the interaction)
and the change contribution
\[
   s_{ik_1}^{Z0}(x,z) \, s_{ik_2}^{Z0}(x,z)  \ .
\]

??????? RSiena is written in terms of an arbitrary allowed change. I have coded
the change as\[
  \textrm{difference}  * s_{ik_1}^{Z0}(x,z) \, s_{ik_2}^{Z0}(x,z)  \ .
\]

I have I hope this is correct. I have multiplied together the terms for each ego
and then divided by the value or difference until only one value or difference
is retained.



For interactions between three effects it is just the same.
The same effects qualify, the evaluation statistic is
\[
  s_{i(k_1 \circ k_2 \circ k_3)}^Z(x,z) = z_i\, s_{ik_1}^{Z0}(x,z) \, s_{ik_2}^{Z0}(x,z)\, s_{ik_3}^{Z0}(x,z)
\]
and the change contribution
\[
   s_{ik_1}^{Z0}(x,z) \, s_{ik_2}^{Z0}(x,z\, s_{ik_3}^{Z0}(x,z)  \ .
\]



\subsection{Effects: interactionType}

The effects object has a variable (column) that is called
\texttt{interactionType}.
See the manual.
For network effects, the interaction type is \texttt{"ego"},
\texttt{"dyadic"}, or \texttt{""} (blank);
for behaviour effects, it is \texttt{"OK"} or \texttt{""}.
This indicates, using the rules above, whether a interaction
is allowed. This is checked in the internal RSiena function
\nnm{fixUpEffectNames}. Since \texttt{"ego"} is stronger
than \texttt{"dyadic"}, the former is used when the property is satisfied.

For elementary network effects (see the manual)
the change statistics are not derived
from an evaluation function. Therefore the requirement
(\ref{Delta}) does not apply.  Elementary effects have
\texttt{interactionType} set to "dyadic".

\subsection{Weighted effects for behavior}

Another concept, which is similar to interaction, is
weighted effects. This is defined for effects
involving a network $X$. Again I drop the index $r$.

The effect can be weighted by an actor variable;
which can be an actor covariate as well as a dependent actor behavior
(the same behavior as the one under consideration or a different one).
Denote this variable by $V$.
The weighting is carried out by multiplying each $x_{ij}$
(but not other tie variables; only ties with $i$
as a sender)
by $v_j$; thus, in the interactions of the preceding subsection
we are working with $v_i$ but now with $v_j$.
(The weighted indegree effect below is an exception to this general description.)
This multiplication is done in the change contribution
as well as the evaluation statistic.

Let me give the following examples (going through the list
in the manual).

\begin{enumerate}
\addtocounter{enumi}{2}
 \item {\em weighted average similarity effect}, defined by the
 weighted average of centered similarity scores ${\rm sim}^z_{ij}$ between $i$
 and the other actors $j$ to whom he is tied,\\
 \[
 s^{\rm beh}_{i\vit \circ V}(x) =
   \frac{\sum_j v_j\, x_{ij} ({\rm sim}^z_{ij} - \widehat{{\rm sim}^z})}{\sum_j v_j \,x_{ij}} \ ;
   \]
 (and 0 if $\sum_j v_j\ x_{ij} = 0$)  ;

 \item {\em weighted total similarity effect}, defined by the
 weighted sum of centered similarity scores ${\rm sim}^z_{ij}$ between $i$
 and the other actors $j$ to whom he is tied,\\
 $s^{\rm beh}_{i\vit \circ V}(x) = \sum_j v_j\, x_{ij} ({\rm sim}^z_{ij} - \widehat{{\rm sim}^z}) $ ;

 \item {\em weighted indegree effect}, \\
 by an exception to the rule this could be defined as
 $s^{\rm beh}_{i\vit \circ V}(x) = z_i \sum_j v_j\,x_{ji}\,$;

 \item {\em weighted outdegree effect}, \\
 $s^{\rm beh}_{i\vit \circ V}(x) = z_i \sum_j v_j \,x_{ij} $ ;

\addtocounter{enumi}{1}

 \item
 {\em weighted average similarity $\times$ reciprocity effect}, defined by the
 sum of centered similarity scores ${\rm sim}^z_{ij}$ between $i$
 and the other actors $j$ to whom he is reciprocally tied,
 \[
 s^{\rm beh}_{i\vit \circ V}(x) =
 \frac{ \sum_j v_j \, x_{ij} x_{ji} ({\rm sim}^z_{ij} - \widehat{{\rm sim}^z})}
     {\sum_j v_j \, x_{ij} x_{ji}} \ ;
\]

 (and 0 if $\sum_j v_j \, x_{ij} x_{ji} = 0$) ;

 \item {\em weighted total similarity $\times$ reciprocity effect}, defined by the
 sum of weighted centered similarity scores ${\rm sim}^z_{ij}$ between $i$
 and the other actors $j$ to whom he is reciprocally tied,\\
 $s^{\rm beh}_{i\vit \circ V}(x) =  \sum_j v_j\, x_{ij} x_{ji} ({\rm sim}^z_{ij} -
                                \widehat{{\rm sim}^z}) $.

\medskip

\hspace{-2em}
Let us omit the popularity-interaction effects,
   which already contain a weight.

 \addtocounter{enumi}{4}


 \item {\em weighted average alter effect}, defined by the product of $i$'s
 behavior multiplied by the average behavior of his alters (a kind
 of ego-alter behavior covariance),
 \[
 s^{\rm beh}_{i\vit \circ V}(x) =  z_i \, \frac{  \sum_j v_j\,x_{ij}\, z_j }
                                { \sum_j v_j \,x_{ij} }
  \]
 (and the mean behavior, i.e. $0$, if the ratio is 0/0) ;

 \item {\em weighted average reciprocated alter effect}, defined by the product of $i$'s
 behavior multiplied by the average behavior of his reciprocated alters,
 \[
 s^{\rm beh}_{i\vit \circ V}(x) =  z_i \, \frac{\sum_j v_j \, x_{ij}\, x_{ji}\, z_j }
                                {\sum_j v_j\, x_{ij}\, x_{ji} }
\]
 (and 0 if the ratio is 0/0) ;

\addtocounter{enumi}{3}

 \item {\em weighted reciprocated degree effect}, \\
 $s^{\rm beh}_{i\vit \circ V}(x) = z_i \sum_j v_j \, x_{ij}\,x_{ji} $ .

\end{enumerate}

\subsection{Calculation of cross-lagged statistics in C++}

For coevolution models, evaluation effects of one dependent variable on another
dependent variable (such effects are called `mixed effects')
are estimated in the Method of Moments by cross-lagged
statistics \citep[][formulae 26 and 27]{SnijdersEA07}.

Evaluation effects are calculated in\\
\texttt{StatisticCalculator::calculateNetworkEvaluationStatistics} and
in the first part of\\
\texttt{StatisticCalculator::calculateBehaviorStatistics}.

In these functions, two states of the entire data set (with all dependent
variables and covariates) are used:
\smallskip

\texttt{lpPredictorState} is the state of everything at the start of the wave;\\[0.3em]
\texttt{pCurrentLessMissingsEtc} is the state of everything
at the end of the simulations, changed in a way that deals with missingness
of data and structurally fixed values
(this is documented in \texttt{missingsEtc.pdf}).
\smallskip

\big(Naming conventions in \texttt{siena{$\backslash$}src} are that private variable
names start with the letter \texttt{l} -- for  \texttt{local} --
and names of pointer variables have the first or second (after \texttt{l})
letter \texttt{p}.\big)\\

In \texttt{StatisticCalculator::calculateNetworkEvaluationStatistics},
the simulated state of this dependent network in
\texttt{pCurrentLessMissingsEtc}
is used to replace the component of \texttt{lpPredictorState}
corresponding to this dependent network.
The modified \texttt{lpPredictorState} is then used to initialize
all evaluation effects.

\subsection{Contemporaneous statistics for GMoM}

Note that the use of the variable \texttt{pSimulatedState} in the C++ code
always is for the GMoM. It indicates that the simulated state
instead of the preceding state should be used
for the evaluation statistics.

Estimation of co-evolution models by the Generalized Method of Moments (`GMoM')
operates as follows. For estimating the mixed effects,
in which one dependent variable
is being explained by another dependent variable,
the Method of Moments uses
cross-lagged estimation statistics, as discussed in the
preceding section.
\citet{ASS2019} proposed a GMoM estimator in which the cross-lagged
statistics are supplemented with contemporaneous statistics.

GMoM statistics are effects with \texttt{type=gmm}.
Contemporaneous effects for co-evolution models
all have shortNames ending in \texttt{\_gmm}.

Computing the contemporaneous statistics is achieved in \\
\texttt{StatisticCalculator::calculateNetworkGMMStatistics}
by initializing the \texttt{gmm} effects using function
 \texttt{effect::initialize} with five parameters
 (for other effects they have four), adding
parameter\\
\texttt{State * pSimulatedState}.
This initialization is prepared in classes \texttt{NetworkEffect}
and \texttt{BehaviorEffect} by defining \texttt{initialize} as virtual functions
\begin{verbatim}
void NetworkEffect::initialize(const Data * pData,
     State * pState, State * pSimulatedState,
     int period, Cache * pCache)
\end{verbatim}
and
\begin{verbatim}
void BehaviorEffect::initialize(const Data *pData,
     State *pState, State *pSimulatedState,
     int period, Cache *pCache)
\end{verbatim}
A similar construction is used for generic effects in
class \texttt{GenericNetworkEffect}.

These virtual functions \texttt{initialize} then are redefined for
the contemporaneous \texttt{gmm}
statistics by making \texttt{effect::evaluationStatistic} depend
on the contemporaneous instead of the cross-lagged statistics.
This is done, for the currently implemented contemporaneous \texttt{gmm}
statistics, in effect classes \texttt{CovariateDependentNetworkEffect},\\
\texttt{NetworkDependentBehaviorEffect}, and
\texttt{NetworkAlterFunction}.
Because of the inheritance property this needs to be done only once
in each chain of classes, and will work for all effect classes inheriting
from these three.

The actual contemporaneous \texttt{gmm} statistics then are defined
by modifying the corresponding effects of \texttt{type=eval}
such that a constructor is added with the parameter \texttt{bool simulatedState},
and the five-parameter version of  \texttt{effect::initialize} is added.
The same is done for functions defining generic effects.

Examples of this can be seen in effect classes \\
\texttt{CovariateEgoEffect}, \texttt{CovariateSimilarityEffect},\\
and \texttt{OutTieFunction}.
These then are specified as \texttt{gmm} statistics in
\texttt{effectFactory} by using them with \texttt{simulatedState=true}.

\newpage
\section{Rate effects}

Rate effects are currently of three types,
defined by the rateType column of  \texttt{allEffects.csv}.
The values are "structural", "diffusion", "covariate", or NA.

Function  \nnm{DependentVariable::initializeRateFunction}
in  \nnm{Dependentvariable.cpp}
initializes the rate function; here also the functions\\
 \nnm{model/effects/StructuralRateEffect.h} and
 \nnm{model/effects/DiffusionRateEffect.h}
are used.

Functions \nnm{StatisticCalculator::calculateNetworkRateStatistics} and
\nnm{StatisticCalculator::calculateBehaviorRateStatistics}
in \nnm{StatisticCalculator.cpp}
calculate the rate functions.

The \texttt{RateX} effect is defined in a funny way.
If you look for the string "RateX", it occurs nowhere in the cpp directory.
It is identified by being the only rate effect of type "covariate",
together with the dependent variable name and the explanatory variable,
given by the parameter "interaction1".


\newpage
\section{Likelihood-based calculations:
         \protect\newline Chain structures}

This algorithm follows the definitions in
\citet{SnijdersEA10a}.
The notation also is taken from that paper.

The basic data structure
for likelihood-based calculations
is called a \emph{chain}.
This is a sequence of changes that can take one (`observed') value
of $y$ to a next one.

To allow later generalization to valued networks as easily as possible,
we define a condition $D$ (for dichotomous) that is defined on the
level of variables (networks or behavioral variables);
in our current system $D$ is \nnm{True} for networks and \nnm{False} for
behavioral variables, but this can be different in future uses.


One change is called a \nnm{ministep}, denoted \ms, and is defined as:
\begin{equation}
    \ms \ = (w,i,j,r,d,\nnm{pred},\nnm{succ}, \adda{\nnm{lOptionSetProb}, }
                    \nnm{lChoiceProb}, \nnm{rRate})    \label{ministep}
\end{equation}
where
\begin{tabbing}
$w$ (`aspect') $\phantom{abcdefghij}$ \= = \= `network' or `behavior' (abbreviated to $N$ -- $B$ );\\
$i$ (`actor') \> = \> actor if $w$ = B, sending actor if $w$ = N;\\
$j$ (`actor') \> = \> meaningless 0 if $w$ = B, receiving actor if $w$ = N;\\
$r$ (`variable number') \> = \> number of variable ($1 \leq r \leq R_w$ );\\
$d$ (`difference') \> = \> meaningless 0 if $D$, amount of change if not $D$\\
   \> \>   (where $D $ depends on $w, r$);\\
   \> \>    currently we require $d \in \{-1, 0, 1\}$, but at some \\
   \> \>    later moment exceptions to this rule may be allowed;\\
\nnm{pred} (`predecessor') \> = \> pointer to preceding ministep;\\
\nnm{succ} (`successor') \> = \> pointer to next (succeeding) ministep;\\
\adda{
\nnm{lOptionSetProb} (`log OptionSet probability')} \\
\> = \> \adda{log probability of making a ministep of this OptionSet,}\\
    \> \>  \adda{ where the OptionSet is defined below as $(w,i,r)$;}\\
\adda{
\nnm{lChoiceProb}     (`log choice probability')} \\
\> = \> \adda{ log probability of making a ministep of this choice,}\\
    \> \>  \adda{ where the choice is $(j,d)$, given that $(w,i,r)$;}\\
\nnm{rRate} (`reciprocal rate') \> = \> reciprocal of aggregate (summed) rate function \\
   \> \>      immediately before this ministep.
\end{tabbing}
To indicate the components/fields of a ministep we use the notation
$\ms.w, \ms.i$, etc.\\
\addc{The precise definitions of \nnm{lOptionSetProb}, \nnm{lChoiceProb},
and \nnm{rRate} are
given below in the specification of function \textit{StepProb }.}
\addca{The values $(w,i,j,r,d)$ may also be called the coordinates of the ministep.}

In Siena 3, $d$, \nnm{pred} and \nnm{succ} are called \textit{difh},
\textit{predh} and \textit{such};
and the program uses rates instead of reciprocal rates, but this was
implemented only very incompletely anyway.

The ministep is practically the same as what is called a microstep
in Section~\ref{S_sim}, but used here in a more
precise way. These words are not intentionally different.
\\
The log probability and reciprocal rate depend not only on the
chain and the ministep, but also on the initial state $y$ or $y(t_{m-1})$
valid before the start of the chain; and on the model specification and
model parameters.
Their computation is done by procedure \textit{StepProb} described in
Section~\ref{S_prob}.

The interpretation is that a ministep operates on (i.e., changes)
outcome $y$ as implemented by the following function.
\begin{enumerate}
\item \textit{ChangeStep}$(y, \ms)$  transforms state $y$ as follows,\\
      where $ \ms \ = (w,i,j,r,d, ...)$;\\
      \addaa{This can also be denoted  \textit{ChangeStep}$(y, (w,i,j,r,d))$;}
      \begin{itemize}
      \item if $w$ = N and $i \neq j$, change $N^{(r)}_{ij}$ to $1 - N^{(r)}_{ij}$;
      \item if $w$ = B,  change $B^{(r)}_{i}$ to $B^{(r)}_{i} + d$.
      \end{itemize}
\end{enumerate}
\addca{The inverse operation is very simple:\\
in general, \textit{Inverse$\big($ChangeStep}$(y, (w,i,j,r,d))\big)$ =
             \textit{ChangeStep}$(y, (w,i,j,r,-d))$;\\
in particular, \textit{Inverse$\big($ChangeStep}$(y, (N,i,j,r,0))\big)$ =
                 \textit{ChangeStep}$(y, (N,i,j,r,0))$.}\\
The definition of \textit{ChangeStep}
implies that only those values of $d$ are allowed that do not lead
$B^{(r)}_{i}$ outside of the bounds of this variable.
I think this should not always be checked except perhaps for in a test phase,
but the creation and transformation of ministeps should contain
checks that ensure this condition.

\textit{ChangeStep} is called a lot, and it will be helpful
that it is implemented in a very fast way.

The \textit{\adda{Option}} of a ministep is defined as
(Network, $i,\, j,\, r$) for Network ministeps,
and as (Behavior, $i, \, r$) for Behavior ministeps.
This defines the variable changing by the ministep.
\adda{Recall that $j$ is meaningless for $w=B$.
In general, we can define the options of a ministep as $(w,i,j,r)$.}
\adda{\textit{Option} is called `kind' in Siena 3.}

The \adda{\textit{OptionSet}} of a ministep is defined as
$(w,i,r)$.
This defines the choice situation / option set for the ministep.


This definition also means that network ministeps with $i = j$ and behavior ministeps
with $d = 0$ have no effect on the outcome. Such ministeps are permitted,
and are called \emph{diagonal} ministeps.

A \emph{chain} from observation $y(t_{m-1})$
to observation $y(t_m)$ is a sequence of ministeps $\ms_1 , \ms_2 , ..., \ms_T$
which, when applied sequentially, transform $y(t_{m-1})$ into $y(t_{m})$.
We then say that the chain \emph{connects}  $y(t_{m-1})$ to $y(t_m)$.
For $M$ observations, therefore, we require a sequence of $M-1$ chains.

For a sequence of ministeps $\ms_1 , \ms_2 , ..., \ms_T$
we define the following functions.
For disregarded values of the ministep (depending on whether it is a N or B
ministep) we use the wildcard symbol *.
\begin{enumerate}[resume]
\item \textit{NetworkNumber}$(i,j,r,S)  =  \,
             \sharp\{ s \mid 1 \leq s \leq S, \textit{\adda{Option}}(\ms_s) = (N,i,j,r)  \} $.\\
     \addc{In words, this is the number of ministeps, up to and including ministep number $S$,
      which imply a change in tie variable $(i,j)$ for Network $r$.}
\item \textit{BehSum}$(i,B,r,S)  = \,   \Sigma_{s=1}^S \, (\ms_s.d_s)\,
                      I\{ \textit{\adda{Option}}(\ms_s) = (B,i,*,r) \} $\\
      where $I$ is the indicator function defined as $I(A) = 1$ if $A$ is \nnm{True}
      and 0 if $A$ is \nnm{False}.\\
     \addc{In words, this is the partial sum, ending at ministep number $S$,
      of the $d$ (difference) values of all ministeps by actor $i$ for Behavior $r$.}
\end{enumerate}

If the outcomes $y(t_{m-1})$ and $y(t_m)$ are completely defined
(without any missing data) then the requirements on this sequence are as follows.

\emph{Networks} : (since changes are defined as toggles)\\
For all $i, j, r$ with $1 \leq r \leq R_N$, $i \neq j$,
\begin{subequations}
\begin{equation}
N^{(r)}_{ij}(t_{m-1}) = N^{(r)}_{ij}(t_m) \ \Leftrightarrow \
       \text{\textit{NetworkNumber}}(i,j,N,r,T) \text{ is even };
 \end{equation}
\addc{which is equivalent to
\begin{equation}
N^{(r)}_{ij}(t_{m-1}) = 1 - N^{(r)}_{ij}(t_m) \ \Leftrightarrow \
       \text{\textit{NetworkNumber}}(i,j,N,r,T) \text{ is odd }.
 \end{equation}
}
\label{netrec}
\end{subequations}
\emph{Behavior} : (since changes are defined as increments)\\
For all $i, r$ with $1 \leq r \leq R_B$,
\begin{align}
B^{(r)}_{i}(t_{m-1})  {} &
+  \! \text{ \textit{BehSum}}(i,B,r,T) = B^{(r)}_{i}(t_m)  \label{behrec1} \\
\text{and} &  \nonumber \\
1 \leq B^{(r)}_{i}(t_{m-1})   {} &+  \! \text{ \textit{BehSum}}(i,B,r,S)
       \leq  \maxr   \text{ for all } 1 \leq S < T.   \label{behrec2}
\end{align}


\addaa{For each option there is a missingness indicator
\[
\textit{mis}(w,i,j,r)
\]
which is \nnm{True} or \nnm{False}, depending on whether in at least one of the two
end points of the chain, $y(t_{m-1})$ or $y(t_m)$, the corresponding variable
$N^{(r)}_{ij}$ or $B^{(r)}_{i}$ is missing. The use of these indicators is that
restrictions (\ref{netrec}) and  (\ref{behrec1}) are not required for the missing data.
For missing behavior data, however,
condition (\ref{behrec2}) still is required to ensure that the variable remains within range.
\begin{enumerate}[resume]
\item The number of Network options with missing values is defined as\\
      \[
        \textit{NumMisNet} =
     \sum_{i=1}^n \sum_{\stackrel{\scriptstyle j = 1}{\scriptstyle j \neq i}}^n
                                        I\{\textit{mis}(N,i,j,r) \} \ ,
      \]
      where $I\{\nnm{True}\} = 1$ and  $I\{\nnm{False}\} = 0$ .
\item The number of Behavior options with missing values is defined as\\
      \[
        \textit{NumMisBeh} =
               \sum_{i=1}^n    I\{\textit{mis}(B,i,*,r) \}  \ .
      \]
\end{enumerate}
}

It must be noted that missing data are not handled in the best possible way
in the likelihood-based procedures in Siena 3, and this
is done differently here.
Therefore, results for likelihood-based procedures in Siena 3 and RSiena
will be different.

Classes of functions are required which do the following:
\begin{enumerate}
\item Create and transform chains.
\item Calculate probabilities related to chains.
\item Store chains: read from and write to file.
\end{enumerate}

\section{Likelihood-based calculations:
         \protect\newline Create and transform chains}

\subsection{Data types}
\begin{enumerate}
\item  Ministep. See (\ref{ministep}).\\
       The \emph{\adda{Option}} of a ministep is (Network, $i,\, j,\, r$) for Network ministeps,
       and (Behavior, $i, \, r$) for Behavior ministeps.
       Note that this defines the variable that is being changed by the ministep.\\
       Note that in Siena 3 this is called the \emph{rKind} (\emph{restricted Kind}),
       and the `Kind' there also includes the value $d$.
       A ministep \ms \ is \emph{diagonal} if it is of \adda{Option} (Network, $i,\, j,\, r$)
       with $i = j$, or of \adda{Option} (Behavior, $i, \, r$) with $\ms.d$ = 0.
\item Chain. This is a sequence of ministeps connected by the pointers
      \nnm{pred} and \nnm{succ},
      with a \nnm{first} and \nnm{last} element.
      The \nnm{first} and \nnm{last} elements are dummies, i.e., they are of a special Option
      and OptionSet
      $(\textit{Extreme}, 0, 0, 0)$ which implies no change:\\
      $\textit{ChangeStep}(y,\nnm{first}) = \textit{ChangeStep}(y,\nnm{last}) = y$.\\
      Section \ref{S_struct} on structurally fixed values gives an exception to this rule,
      however, for the \nnm{last} element.\\
      The \nnm{first} and \nnm{last} elements are used just to have handles
      for the start and end of the chain.
      Of course, \nnm{first}.\nnm{pred} =\nnm{last}.\nnm{succ} = nil.\\
      \addaa{Or perhaps it is more convenient to define \nnm{first}.\nnm{pred} = \nnm{first}
      and \nnm{last}.\nnm{succ} = \nnm{last}.}\\
      The \nnm{first} and \nnm{last} ministeps are not \textit{diagonal}.\\
      \\
      The connection implies that if $\ms_a$ and $\ms_b$ are two ministeps
      with \\
      $\ms_b.\nnm{pred} = \ms_a$, then $\ms_a.\nnm{succ} = \ms_b$.
      The \nnm{first} element has a nil \nnm{pred}, and the \nnm{last} element
      has a nil \nnm{succ}.
\end{enumerate}

\subsection{Functions}

In Siena 3, I have defined the ministep type with various other pointers and attributes
useful for navigating in the chain.
These are functions of the chain, and including them in the ministep type
is for the purpose of computational efficiency.
These functions are the following. They are defined as functions of the ministep
in a given chain.
\addaa{They are not important in themselves, but might be useful for updating
the variables relating to CCPs (see below). }

\begin{enumerate}
\item \textit{\adda{nrOption}}. The total number of ministeps in the chain of the same \adda{Option}.
\item \nnm{predOption}.
     Pointer to the last earlier (`preceding') ministep of the same \adda{Option},
     \addaa{and \nnm{first} if such a ministep does not exist.}\\
                         Called \textit{predhrkind} in Siena 3.
\item \nnm{succOption}.
    Pointer to the first later (`succeeding') ministep of the same \adda{Option}
    \addaa{and \nnm{last} if such a ministep does not exist.}\\
                         Called \textit{suchrkind} in Siena 3.
\end{enumerate}

The chain defines an order relation (binary function) of ministeps
in an obvious way, representing the time order in which the ministeps take place.
When there may be the possibility of confusion,
this is called the \emph{chain order}.

\begin{enumerate}
\item $\ms_a < \ms_b$ if there is a sequence $\ms_1, ..., \ms_K$ ($K \geq 0$) of ministeps
      such that $\ms_a.\nnm{succ} = \ms_1, \ms_b.\nnm{pred}= \ms_K$,
      and $\ms_k.\nnm{succ} = \ms_{k+1}$
      for all $k$, $1 \leq k \leq K-1$.
\addaa{
\item For $\ms_a < \ms_b$, we denote by $\textit{length}(\ms_a, \ms_b)$
      the number of ministeps from $\ms_a$ to $\ms_b$,
      including these end points,
      which is the value $K+2$ according to the preceding definition.
\item For $\ms_a < \ms_b$, we denote by $[\ms_a, \ms_b]$
      the interval of ministeps from $\ms_a$ to $\ms_b$,
       i.e., the sequence
      $\ms_a, \ms_1, ..., \ms_K, \ms_b$
      of the definition in (1).\\
      The interval $[\ms_a, \ms_a]$ is defined as the ministep $\ms_a$.
\item In the obvious way, we define recursively
      \begin{align}
        \ms.\nnm{succ}^0 &=  \ms  \\
        \ms.\nnm{succ}^{k+1} &=  \big(\ms.\nnm{succ}^k\big).\nnm{succ} \ .
      \end{align}
      Thus, \textit{length}$(\ms, \ms.\nnm{succ}^{k}) = k+1$ for $k \geq 0$.
}
\end{enumerate}

An ordered pair of ministeps
\addaa{$(\ms_a, \ms_b)$ with $\ms_a < \ms_b$}
is called a CCP (\emph{consecutive canceling pair})
if they are of the same \adda{Option}, not \textit{diagonal},
cancel each other's effect (see next sentence),
have no other ministep of the same \adda{Option} in between,
and there is at least one ministep of a
different \adda{Option} in between
(i.e., \textit{length}$(\ms_a, \ms_b) \geq 3$), and \emph{neither ministep is
  missing at start or end of the interval}.
Two non-diagonal ministeps $\ms_a$ and $\ms_b$ cancel each other's effect
if the following hold: either they are both of the same \adda{Option}
(Network, $i,\, j,\, r$) (then they cancel because they toggle the same
binary variable), or both are of the same \adda{Option} (Behavior, $i, \, r$)
and $\ms_a.d + \ms_b.d = 0$.

For example, if the chain contains a total of three ministeps
$\ms_a, \ms_b, \ms_c$
of the \adda{Option} (Network, 1, 2, 1), with $\ms_a < \ms_b < \ms_c$,
and none of which are each others' immediate predecessors/successors,
then $(\ms_a, \ms_b)$ and $(\ms_b, \ms_c)$ are CCP's.

\addca{The reason for this definition is to use it later in
defining changes in the chain,
such that each change has a unique (i.e., exactly one) inverse operation.
Adding a CCP to a chain will not lead to violations of (\ref{netrec}, \ref{behrec1}),
although it may lead to violation of (\ref{behrec2}), which therefore
must be separately checked.
There is a one-to-one correspondence between the set of all operations of
dropping a CCP from a chain, and the set of all operations of
adding the two elements of a CCP to the chain
as immediate predecessors of two ministeps $\ms_a < \ms_b$
for which $\ms_a$ is not the \nnm{first} element, and there
is no ministep $\ms_c$ with $\ms_a < \ms_c < \ms_b$ of the same \textit{Option}
as $\ms_a$ and $\ms_b$, and which do not lead to violation of  (\ref{behrec2}).
All this is elaborated later, and given here only as a motivation for this definition.
}

Basic functions of the chain are the following.\\
\adda{Since these are frequently used, they should be stored
and updated when the chain is changed; this is done in the \textit{Update} function.}\\
\adda{The condition \textit{SimpleRates} is defined below.}
\begin{enumerate}
%\item \textit{Valid}, a boolean indicating whether the chain connects
%      $y(t_{m-1})$ to $y(t_m)$, and the log probabilities and rates
%      have been calculated.
\item \textit{TotNumber}, the number of ministeps of the chain, excluding the \nnm{first};\\
      so an empty chain consisting only of the \nnm{first} and \nnm{last} ministeps
      with \\
      \nnm{first}.\nnm{succ} = \nnm{last} has $\textit{TotNumber} = 1$.
\item \textit{DiagNumber}, the number of \textit{diagonal} ministeps of the chain.
\item \textit{CCPNumber}, the number of CCP's in the chain.
\addaa{
\item \textit{ChainNumMisNet}, the number of ministeps of
     some  \textit{Option} $(N,i,j,r)$ for which \textit{mis}$(N,i,j,r)$ is true.
\item \textit{ChainNumMisBeh}, the number of ministeps of
     some  \textit{Option} $(B,i,*,r)$ for which \textit{mis}$(B,i,*,r)$ is true.
}
\addab{
\item \textit{ChainNumInitMis}, the number of
     \textit{Options} $(w,i,j,r)$ for which
     the initial value $N_{ij}^{(r)}(t_{m-1})$
     or $B_{i}^{(r)}(t_{m-1})$ is a missing value.
}
\adda{
\item Used only if (not \textit{SimpleRates}):\\
      \textit{mu} = $\sum_{s=2}^{T-1} \ms_s.\nnm{rRate}$, where $T = $ \textit{TotNumber}.\\
      Note that the sum is over all ministeps in the chain except the two extremes
      (\nnm{first} and \nnm{last}).
\item Used only if (not \textit{SimpleRates}):\\
      \textit{sigma2} = $\sum_{s=2}^{T-1} (\ms_s.\nnm{rRate})^2$.
}
\end{enumerate}
%Note that \textit{mu} and \textit{sigma2} are used only if not \textit{SimpleRates}.
\adda{The chain can be denoted by
\[
  \ms_0, \ms_1, \ms_2, \ldots, \ms_\textit{TotNumber} \ ,
\]
in which $\ms_0$ and $\ms_\textit{TotNumber}$ are the extreme elements.
}


The following functions may be defined on the \adda{Option}s of ministeps:
\addaa{They are not important in themselves, but might be useful for updating
the variables relating to CCPs. }
\begin{enumerate}
\item \textit{NumberOption}$(w,i,j,r)$, the number of ministeps of the chain
      of \adda{Option} $(w,i,j,r)$.\\
      For the network ministeps this will be a sparse matrix, in the sense that for large networks
      most of the values \textit{NumberOption}$(N,i,j,r)$ will be 0.\\
      This is called \textit{NumberrKind} in Siena 3.
\item \textit{Multiple}$(w,i,j,r)$ = \nnm{True} if \textit{NumberOption}$(w,i,j,r) \geq 2$
      and \nnm{False} if \textit{NumberOption}$(w,i,j,r) \leq 1$.\\
      We say that a \adda{Option} can be multiple or non-multiple.
\end{enumerate}


\subsection{Operations}

Basic operations on chains are the following.
Of course they have to guarantee the consistency of all the
derived variables and pointers.
The consistency of the log probabilities and reciprocal rates
is treated separately (when it is needed), see Section~\ref{S_prob}.
\begin{enumerate}
\item \emph{Create} an empty chain consisting only
       of the elements (\nnm{first}, \nnm{last}).
%      with \textit{Valid} = \nnm{False}.
%\item \emph{Validate}, check that the chain connects $y(t_{m-1})$ to $y(t_m)$
%      and if so, calculate the log probabilities and rates
%      and set \textit{Valid} to \nnm{True}.
\item \emph{InsertBefore}$(\ms, w,i,j,d,r)$ :\\
      for a currently existing ministep $\ms \neq $  \nnm{first},
      insert the ministep
      with values $(w,i,j,d,r)$ between \ms.\nnm{pred} and \ms.
\item \emph{Delete}  a ministep, and link up its predecessor and successor.
\item \emph{RandomElement}      :\\
      draw a random ministep from the chain,
      excluding the \nnm{first} element;
      note that the probabilities are 1/\textit{TotNumber}.
\item \emph{RandomElementNotAfter(\ms)}      :\\
      draw a random ministep from the chain,
      among the elements after the \nnm{first} element up to and including
      ministep \ms;
\item \emph{Connect} : construct randomly a chain that connects
      two outcomes $y(t_{m-1})$ and $y(t_m)$ .\\
      This is done by repeatedly applying \emph{RandomElement}
      and \emph{InsertBefore}.
      If there are no \nnm{higher}, \nnm{disjoint}, or
      \nnm{atleastone} relations between the various different networks
      (in particular if $R_N = 1$), the following procedure can be used.
      \begin{tabbing}
      For all \= $R_N$ networks:\\
         \> For all \= $(i,j), i \neq j$:\\
          \> \> if $N^{(r)}_{ij}(t_{m-1}) \neq N^{(r)}_{ij}(t_m)$, \\
            \> \> then \textit{InsertBefore}
                               (\textit{RandomElement}, $N,i,j,0,r$);\\
      For all \= $R_B$ behaviors: \\
         \> For all \= $i$:\\
          \> \> Define $D = B^{(r)}_{i}(t_{m}) - B^{(r)}_{i}(t_{m-1})$;\\
            \> \> if $D > 0$, then $D$ times
                \textit{InsertBefore}(\textit{RandomElement}, $B,i,0,1,r$);\\
            \> \> if $D < 0$, then $-D$ times
               \textit{InsertBefore}(\textit{RandomElement}, $B,i,0,-1,r$).
       \end{tabbing}
       However, if there are any \nnm{higher}, \nnm{disjoint}, or
       \nnm{atleastone} relations in force, then inserting the changes
       at randomly chosen places can lead to violations of these
       requirements. Inserted network changes that might lead to
       such violations therefore must take account of
       earlier inserted changes for the same $(i,j)$.
       In such cases the connections for the networks can be made
       by the following procedure.
       With `incompatible' is referred to the incompatibility
       because of the \nnm{higher}, \nnm{disjoint}, or \nnm{atleastone}
       requirements.
      \begin{tabbing}
      For $ r $ \= running from 1 to $R_N$ :\\
         \> For all \= $(i,j), i \neq j$:\\
          \> \> if $N^{(r)}_{ij}(t_{m-1}) \neq N^{(r)}_{ij}(t_m)$,
                define by $\mathcal R$ the set of networks $r'$,
                       $1 \leq r' < r$, \\
          \> \> for which $ N^{(r)}_{ij}(t_m)$ is incompatible
                with $ N^{(r')}_{ij}(t_{m-1})$.\\
          \> \> (For these $r'$ it must hold that
                 $N^{(r')}_{ij}(t_{m-1}) \neq N^{(r')}_{ij}(t_m)$, \\
          \> \> because $ N^{(r)}_{ij}(t_m)$ must be compatible
                with $ N^{(r')}_{ij}(t_{m})$;\\
          \> \> therefore the following requires the change in option ($N,i,j,0,r$) to \\
          \> \> take place   before the changes in options ($N,i,j,0,r'$) for $r' \in \mathcal R$.)\\
          \> \> Let \ms \ be the first ministep among all inserted ministeps \\
          \> \> of option ($N, i,\, j,\, r'$) for $r' \in \mathcal R$;\\
            \> \> then \textit{InsertBefore}
                          (\textit{RandomElementNotAfter(\ms)}, $N,i,j,0,r$).
      \end{tabbing}
\end{enumerate}

\adda{
The \emph{Connect} procedure yields a chain connecting
the two outcomes $y(t_{m-1})$ and $y(t_m)$ which has minimum length.
}



The following random draws are not always possible, since the sets
from which a random element is drawn, may be empty.
(It may be noted, however, that usually the set will be non-empty.)
\addaa{ \emph{RandomMultipleOption}  and  \emph{RandomCCPOption} are dropped!
This will be simpler and probably at least as efficient as Siena 3.  }
\begin{enumerate}[resume]
\item \emph{RandomDiagonal}      :\\
      draw a random \textit{diagonal} ministep from the chain;
      note that the probabilities are 1/\textit{DiagNumber}.
\addaa{
\item \emph{RandomCCP}      :\\
      draw a random CCP $(\ms_a, \ms_b)$ from the chain;
      note that the probabilities are 1/\textit{CCPNumber}.
\item \emph{RandomMisNet}      :\\
      draw a random ministep $\ms_a$ from the chain
      of which the \textit{Option} $(w,i,j,r)$ satisfies
      $w=N$ and \textit{mis}$(N,i,j,r)$.\\
      Note that the probabilities are 1/\textit{ChainNumMisNet}.
\item \emph{RandomMisBeh}      :\\
      draw a random ministep $\ms_a$ from the chain
      of which the \textit{Option} $(w,i,j,r)$ satisfies
      $w=B$ and \textit{mis}$(B,i,*,r)$.\\
      Note that the probabilities are 1/\textit{ChainNumMisBeh}.
}
\addab{
\item \emph{RandomInitMis}      :\\
      draw a random \textit{Option} $(w,i,j,r)$
      for which
      the initial value $N_{ij}^{(r)}(t_{m-1})$
      or $B_{i}^{(r)}(t_{m-1})$ is a missing value.\\
      Note that the probabilities are 1/\textit{ChainNumInitMis}.
}
\end{enumerate}


\newpage
\section{Likelihood-based calculations:
     \protect\newline Calculate probabilities related to chains}
\label{S_prob}

\adda{An important special case is the case of state-constant rate functions, i.e., rate functions
$\lambda^W(r,i,y)$ depending only on $W$, $r$, and $i$ but not on $y$,
and therefore not changing as a consequence of the simulations.
This is important also because the majority of users will use
state-constant rate functions.
In the case of state-constant rate functions, everything related to $\lambda$ needs to be
calculated only when parameters are changed.\\
Denote this by the Boolean \textit{ConstantRates}.
}

\adda{We define a special Boolean condition \textit{SimpleRates}.
The default is to let \\
\textit{SimpleRates} = \textit{ConstantRates}, but this
may be changed by the user (not in the gui).
In practice it will be changed only for the purposes of algorithm comparison.
}

Ministeps are interpreted as changes in the chain (procedure \textit{ChangeStep}).
These changes are made with certain probabilities, and the
rate of change has a certain value when the ministep is going to be made.
The probabilities and rates depend on the state immediately before the ministep;
this depends in turn on the state at the start of the chain, and the
sequence of ministeps before the current ministep.
For a ministep \ms \ in the chain with a given initial state $y$
(say, $y = y(t_{m-1})$), the state obtaining before \ms \
can be defined recursively as follows (where \textit{ChangeStep} is treated
as a function with states as outcomes).
\begin{enumerate}
\item $\textit{StateBefore}(\nnm{first}) = y$.
\item $\textit{StateBefore}(\ms.\nnm{succ}) = \textit{ChangeStep}(\textit{StateBefore}(\ms),\ms)$.
\end{enumerate}
Thus, the state before \ms \ is obtained by repeatedly applying \textit{ChangeStep}:
\addaa{\[
\textit{StateBefore}(\ms) =
      \textit{ChangeStep}^{\displaystyle ( \textit{\normalsize length}(\nnm{first},\ms)\displaystyle -1)}(y_{\text{initial}})
\]}
(where the superscript indeed means raising the operator to a power,
i.e., executing it repeatedly.)

The log-probabilities and rates are defined by the following procedure.
For the mathematical symbols $\pi_j, \pi_v, \lambda(...), J_m$, see
the notation of Section~\ref{S_sim} where the microstep/ministep
is treated for the purpose of simulation of the model,
and where the same ingredients are used.

\addaa{The functions \textit{StepProb1} to \textit{StepProb3}
are often used one after each other,
and utilizing this will lead to efficiency gains.}


\begin{enumerate}
\item \addaa{ \textit{StepProb1}$(\textsf{input}\ y, w,i,r; \textsf{output}\ rr, lospr)$;\\
      this calculates the aggregate rate for current state $y$
      and returns $rr$ as its reciprocal:
      \[
      rr \leftarrow 1/\lambda^+(+,+,y) \ .
      \]
      it calculates the probability of getting the \textit{OptionSet} $(w,i,r)$
      and returns it as $lospr$ :
      \[
      lospr \leftarrow    \log\left(\frac{\lambda^w(r,i,y)}{\lambda^+(+,+,y)}\right)
      \]
      If \textit{ConstantRates} these are trivial look-up operations
      (the values then depend only on the parameters included in the functions $\lambda$,
      not on $y$).}

      \addaa{Note that in some cases this must be done for all $(w,i,r)$, sometimes
      only for one value of $(w,i,r)$. When it is done for all cases,
      this is denoted\\
       \textit{StepProb1}$(y, *; rr, lospr*)$, \\
       and then $lospr*$
      is an output array of suitable dimensions.}
\item \addaa{\textit{StepProb2}$(\textsf{input}\ y, w,i,j,r,d; \textsf{output}\ rr, lospr, lcpr)$;\\
      After doing the same as in \textit{StepProb1}$(y, w, i, r; rr, lospr)$,\\
      this calculates for the current state $y$, conditional on the
      assumption that a ministep of \adda{OptionSet} $(w,i,r)$ is made,
      the log of the conditional probability that this will be the ministep
      with value $(w,i,j,r,d)$;\\
      for $w=N$ (network) this is $lcpr \leftarrow \log(\pi_j)$ using $\pi_j$ defined in (\ref{pij});\\
      for $w=B$ (behavior) this is $lcpr \leftarrow \log(\pi_d)$
      using $\pi_v$ defined in (\ref{piv}).\\
      Output variables $ rr, lospr, lcpr$ are the \nnm{rRate}, \nnm{lOptionSetProb}, and\\
      \nnm{lChoiceProb} of this ministep.}

      \addaa{Note that for a given $(w,i,r)$, in some cases this must be done for all $j$ (if $w=N$)
      or $d$ (if $w=B$), in other cases only for one value of $j$ or $d$.
      If it is done for all $j$ or $d$, the notation is\\
      \textit{StepProb2}$(y, w,i,*,r,*; rr, lospr, lcpr*)$\\
       and again $lcpr*$ is an array
      of suitable dimension.\\
      If the function is denoted \textit{StepProb2}$(y, \ms; rr, lospr, lcpr)$, this is the same as
      \textit{StepProb2}$(y, w,i,j,r,d; rr, lospr, lcpr)$ with the coordinates $w,i,j,r,d$
      for the ministep filled in.
\item \textit{StepProb3}$(\textsf{input}\ y, w,i,j,r,d; \textsf{output}\  rr, sc)$;\\
      This calculates $rr$ as in
      \textit{StepProb1}$(y, w,i,r; rr, lospr)$;\\
      in addition,  it calculates the contribution of this ministep to the
      score function, which in Section~\ref{S_sim}
      is what is added (as described there for three occasions) to $J_m$,
      and stores this in the $p$-vector $sc$.}
      \addcd{Output $lospr$ and $lcpr$ dropped from \textit{StepProb3} because not needed.}
\end{enumerate}

While operating on the chain, it is important to keep the
log probabilities and rates up to date. This requires the following procedure.
It updates only part of the chain, and is applied when it is known
that the earlier and later parts do not need to be updated.

\begin{enumerate}[resume]
\item \adda{
       \textit{Update}($\ms$) for a ministep \ms:\\
       Update \textit{TotNumber},  \textit{DiagNumber}, \textit{CCPNumber}, \textit{ChainNumMisNet},\\
       \textit{ChainNumMisBeh}.\\
       Use \textit{StepProb2} to update the log probabilities and rates for ministep \ms .\\
       If not \textit{SimpleRates}:
       Update the values of \textit{mu} and \textit{sigma2}.
       }
\item \textit{Update}($\ms_a, \ms_b$) for ministeps $\ms_a < \ms_b$:\\
       Update \textit{TotNumber},  \textit{DiagNumber}, \textit{CCPNumber}, \textit{ChainNumMisNet},\\
       \textit{ChainNumMisBeh}.\\
       Use \textit{StepProb2} to update the log probabilities and rates for all ministeps
       from $\ms_a$ to $\ms_b$ (i.e., all ministeps between these two in the chain order,
       including these two ministeps themselves).\\
       If not \textit{SimpleRates}:
       Update the values of \textit{mu} and \textit{sigma2}.

       This is called \textit{UpdateRateslprobs} in Siena 3.
\end{enumerate}

\adda{In many cases, \textit{StepProb*} has been called just before \textit{Update},
so that the log probabilities and rates are known already and
the expensive procedure \textit{StepProb*} does not have to be called again.}

\addaa{Depending on the implementations, the auxiliary variables for working with CCPs
must also be suitably updated.}

\section{Likelihood-based calculations:
         \protect\newline Metropolis-Hastings steps}

A basic required functionality is to simulate from the distribution
of chains that connect $y(t_{m-1})$ to $y(t_m)$,
given the model specification and model parameters.
This is done by repeated application of Metropolis Hastings steps.
These are of the following types, with associated probabilities.
The probabilities are constants with default values that can be changed
by the very experienced user.
\begin{enumerate}
\item \textit{MH\_InsertDiag}\\
      (called \textit{MH\_TryInsertDiag} in Siena 3), associated probability \textit{pridg}.
\item \textit{MH\_CancelDiag}\\
      (called \textit{MH\_TryCancelDiag} in Siena 3), associated probability \textit{prcdg}.
\item \textit{MH\_Permute}, associated probability \textit{prper}.
\addaa{
\item \textit{MH\_InsPermute}, associated probability \textit{pripr}.
\item \textit{MH\_DelPermute}, associated probability \textit{prdpr}.
}
\addcf{
\item \textit{MH\_InsMis}, associated probability \textit{prims}.
\item \textit{MH\_DelMis}, associated probability \textit{prdms}.
\item \textit{MH\_RandomMis} dropped version May 30.
}
\end{enumerate}


\addaa{
Function \textit{MH\_DelPermute} also uses internal probabilities \textit{prmin}
and \textit{prmib}.
If the number of actors always is $n$, and all networks are one-mode,
these could have the default values
\begin{align}
  \textit{prmin} & \, = \frac{\textit{NumMisNet}}{\textit{NumMisNet} + R_N\, n(n-1)} \\[0.5em]
  \textit{prmib} & \, = \frac{\textit{NumMisBeh}}{\textit{NumMisBeh} + R_B\, n}\
\end{align}
In the general case, $R_N\, n(n-1)$ would be replaced by the total number of
dyadic tie variables, and $R_B\, n$ by the total number of individual
behaviour variables.
(The reasoning is as follows.
In procedure \textit{MH\_DelPermute} this probability serves to balance
changes in missings with changes in CCPs, and the
total available number of variables/options
for which there could be CCPs is $R_N\, n(n-1)$ and $R_B \,n$, respectively).}

\newpage
\addaa{The definitions of these procedures have three parts:
\begin{itemize}
\item[A.] Choose the proposal.
\item[B.] Calculate the probability (usually \textit{pra}) for this proposal.
\item[C.] With probability \textit{pra} carry it out in practice.
\end{itemize}
The earlier (version before February 1) description only contained parts B and C.
What were the input parameters for those earlier versions now are calculated in part A,
and therefore now only have an internal role.
The only remaining input parameter is $c_0$, the maximal order of the permutations.
}

\addaa{
Functions \textit{MH\_InsertDiag} and \textit{MH\_CancelDiag} are each other's inverses.
Similarly, \textit{MH\_InsPermute} and \textit{MH\_DelPermute} are each other's inverses.
Function \textit{MH\_Permute} is the inverse of another \textit{MH\_Permute},
for a suitable other permutation.
}
\addcf{Functions \textit{MH\_InsMis} and \textit{MH\_DelMis}
are each other's inverses.
}


\addaa{
Function \textit{MH\_Permute} basically is part of the two functions
\textit{MH\_InsPermute} and \textit{MH\_DelPermute}. Including it in those functions
is done for computational efficiency (most of the calculations have to be done anyway).
}

The variable below called \textit{KappaFactor} denotes the factor with which the
variable called $\kappa$ in
\citet{SnijdersEA10a}
(equation (16) for the  \textit{SimpleRates} case, and (21) else)
has to be multiplied if the \textit{MH} step is accepted.\\
\addcf{The correspondence for the   \textit{SimpleRates} case is as follows:\\
$n\alpha_1(t_2-t_1)$ in (16) is $\lambda^+(+,+,y) = 1/rr$ here;\\
$R$ in (16) is $\textit{TotNumber} -1$ here.}

As notation I use the \R convention of denoting an assignment statement by $a \leftarrow b$,
i.e., the variable $a$ gets the value $b$.

\subsection{Diagonal Insert}


The function
\textit{MH\_InsertDiag}$(%\textsf{input}\ \ms, w,i,r;
\textsf{output}\ \textit{pra, accept})$
is roughly described as follows.
The interpretation is that
the proposal is made to insert a \textit{diagonal} element of \adda{\textit{OptionSet}}
$(w,i,r)$ immediately before a ministep \ms;
according to a random decision with probability \textit{pra},
computed within the function,
this proposal is put into effect (yielding \textit{accept} = \nnm{True})
or not (yielding \textit{accept} = \nnm{False}).

\newpage
\addaa{
Part \emph{A}:
\begin{enumerate}
\item \ms\ $ \leftarrow $ \textit{RandomElement}
\item $y \leftarrow $ \textit{StateBefore}(\ms)
\item \textit{StepProb1}$(y, *; \textsf{output}\ rr, lospr*)$
\item With probabilities defined by $\exp(lospr*)$ choose \textit{OptionSet} $(w,i,r)$.\\
      If $i$ is not active, or $w=B$ and $B^{(r)}(i)$ is structurally fixed, then exit.
\end{enumerate}
Note: the proposal probability here is
\[
\frac{ \exp(lospr)}{\textit{TotNumber}}
\]
which is used below in the definition of \textit{pra}.
}

Part \emph{B}:
\begin{enumerate}[resume]
\item \textit{StepProb2}$(y, w,i,i,r,0 ;  rr, lospr, lcpr)$\\
      \addca{($lospr$ was already calculated above)}
\item If \textit{SimpleRates}, let
      \addcd{
      \begin{equation}
      \textit{KappaFactor} \leftarrow \frac{1}{rr \times \textit{TotNumber } }
      \end{equation}
      }\\
      else
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2} + rr^2} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu}  - rr \right)^2}{2\,(\textit{sigma2} + rr^2)}  \right) \ .
      \end{equation}
\item
    \begin{equation}
     \textit{pra}  \leftarrow \textit{KappaFactor} \ttimes \exp( lcpr) \ttimes
                                   \frac{\textit{TotNumber}\ttimes\textit{prcdg}}
                                    {(\textit{DiagNumber}+1) \ttimes\textit{pridg}}
    \end{equation}
    \addaa{Check the use of $lospr$ and $ lcpr$, which may be different
    from the earlier version.}\\
    Note: the proposal probability and the new chain probability both include
    factors $\exp(lospr)$ which cancel out.\\
    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\end{enumerate}

Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then
      \begin{enumerate}
      \item \textit{InsertBefore}$(\ms,w,i,i,r,0)$\\
      \addaa{Earlier the order was wrong: it said $w,i,i,0,r$ instead of $w,i,i,r,0$. }
      \item \textit{Update}(\ms)
      \end{enumerate}
\end{enumerate}

\newpage
\subsection{Diagonal Delete}

The function
\textit{MH\_CancelDiag}$(%\textsf{input}\ \ms;
\textsf{output}\ \textit{pra, accept})$
is roughly described as follows.\\
The interpretation is that
the proposal is made to delete a \textit{diagonal} ministep \ms;
according to a random decision with probability \textit{pra},
computed within the function,
this proposal is put into effect (yielding \textit{accept} = \nnm{True})
or not (yielding \textit{accept} = \nnm{False}).

\addaa{
Part \emph{A}:
\begin{enumerate}
\item \ms\ $ \leftarrow $ \textit{RandomDiagonal}
\end{enumerate}
Note: the proposal probability here is
\[
\frac{ 1 }{\textit{DiagNumber}}
\]
which is used below in the definition of \textit{pra}.
}

Part \emph{B}:
\begin{enumerate}[resume]
\item $rr \leftarrow $ \ms.\nnm{rRate}
\item If \textit{SimpleRates}, let
      \addcd{
      \begin{equation}
      \textit{KappaFactor} \leftarrow  rr \,\times\, (\textit{TotNumber } - 1)
      \end{equation}
      }\\
      else
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2} - rr^2} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu}  + rr \right)^2}{2\,(\textit{sigma2} - rr^2)}  \right) \ .
      \end{equation}
\item
     \begin{align}
     \textit{pra}  \leftarrow \  & \textit{KappaFactor} \ttimes
                    \exp(-\, \ms.\nnm{lChoiceProb} )
                    \ttimes
              \frac{\textit{DiagNumber}\ttimes \textit{pridg}}
                                    {(\textit{TotNumber}-1)\ttimes\textit{prcdg}} \ .
    \end{align}
%        (The earlier formula was, erroneously,)
%     \begin{align}
%     \textit{pra}  \leftarrow \  & \textit{KappaFactor} \ttimes
%                    \exp(-\, \ms.\nnm{lChoiceProb} )
%                    \ttimes
%              \frac{\textit{DiagNumber}\ttimes \textit{pridg}}
%                                    {(\textit{TotNumber}+1)\ttimes\textit{prcdg}} \ .
%    \end{align}
\\
    Note: the proposal probability and the new chain probability both include
    factors $\exp(\ms.\nnm{lOptionSetProb})$ which cancel out.\\
    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\end{enumerate}
Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then
      \begin{enumerate}
      \item  \textit{Delete}(\ms)
      \item  If (not \textit{SimpleRates}), let
           \begin{enumerate}
           \item \textit{mu} $\leftarrow \textit{mu} - rr$ \ ;
           \item \textit{sigma2} $\leftarrow \textit{sigma2} - rr^2 $ \ .
           \end{enumerate}
      \end{enumerate}
\end{enumerate}


\subsection{Permute}
\addaa{In this section, various formulations are changed but the main content is the same;
from the next section to Section \ref{S_struct}, the text is new.
This is not indicated by colour any more.}

A rough description of the function\\
\textit{MH\_Permute}$(\textsf{input}\ c_0; \textsf{output}\ \textit{pra, accept})$
is as follows.\\
The input parameter $c_0$ is a relatively small integer --
in Siena 3 it is determined adaptively with a maximum value of 40. \\
Within the function,
if $\ms_a.\nnm{succ}^{(c_0-1)} < \nnm{last}$
then $c = c_0$, else $c$ is truncated to $\textit{length}[\ms_a, \nnm{last}]-1$;
and $\ms_b = \ms_a.\nnm{succ}^{(c-1)}$.
Thus, $\ms_a$ and $\ms_b$ are two non-extreme ministeps with  $\ms_a < \ms_b$
and $c = \textit{length}[\ms_a, \ms_b] \leq c_0$.
Further, \textit{perm} is a permutation of the numbers
$1, 2, \ldots, c$.\\
The proposal made is to permute the $c$ ministeps in the interval $[\ms_a, \ms_b] $
by \textit{perm};
according to a random decision with probability \textit{pra},
computed within the function,
this proposal is put into effect (yielding \textit{accept} = \nnm{True})
or not (yielding \textit{accept} = \nnm{False}).


Part \emph{A}:
\begin{enumerate}
\item repeat $\ms_a \leftarrow $ \textit{RandomElement} until $\ms_a \neq \nnm{last}$.
\item $c \leftarrow \min \{c_0, \textit{length}(\ms_a, \nnm{last}) - 1\}$.\\
      If $c = 1$ then exit.
\item Let \textit{perm} be a random permutation of the numbers $1$ to $c$,\\
      and denote $\ms_b = \ms_a.\nnm{succ}^{(c-1)}$.
\item $y \leftarrow  \textit{StateBefore}(\ms_a)$
\item For all $r, 1 \leq r \leq R_B$, check (\ref{behrec2}) for the permuted chain. \\
      This condition needs to be checked
      here only for ministeps from $\ms_a$ to $\ms_b$.\\
\addce{ For any variable $(w,r)$ that is involved in a condition of the kind
      \nnm{higher}, \nnm{disjoint}, or \nnm{atleastone},  similarly check these conditions
      for the potential new chain.}\\
      If at least one of these conditions are not satisfied, exit.
\end{enumerate}
The inverse of the proposal is a proposal of exactly the same kind. The
proposal probability is
\[
   \frac1{(\textit{TotNumber}-1) \ttimes c!}
\]
but this needs not be used, since the proposal probability is the same as the
probability of the inverse proposal.

Part \emph{B}:
\begin{enumerate}[resume]
\item \[ \textit{sumlprob} \leftarrow \sum_{s=1}^{c}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{lChoiceProb} + \nnm{lOptionSetProb}) \ ;
      \]
      If (not \textit{SimpleRates}), then below we use
      the values \textit{mu} and \textit{sigma2}; \\
      these refer to the current chain.
      \[
      \textit{sumlprob\_new}  \leftarrow 0 \ ;
      \]
      \[
      \textit{mu\_new}  \leftarrow \textit{mu} -  \sum_{s=1}^{c}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{rRate}) \ ;
      \]
      \[
      \textit{sigma2\_new}  \leftarrow \textit{sigma2} -  \sum_{s=1}^{c}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{rRate})^2 \ ;
      \]
\item Note that still $y =  \textit{StateBefore}(\ms_a)$ as was assigned above.\\
      For $1 \leq s \leq c$ \ denote by $\textit{Coordinates}_s$ the values
      $(w,i,j,r,d)$ of $\ms_a.\nnm{succ}^{s-1}$.\\
      For $s$ running from 1 to $c$, do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y, \textit{Coordinates}_{\textit{perm}(s)};
                                            rr_s, lospr_s, lcpr_s)$;
      \item \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_s + lospr_s $ ;
      \item If (not \textit{SimpleRates}), then
        \begin{enumerate}
        \item  \textit{mu\_new} $ \leftarrow \textit{mu\_new} + rr_s $ ;
        \item  \textit{sigma2\_new} $ \leftarrow \textit{sigma2\_new}  + (rr_s)^2  $ ;
        \end{enumerate}
      \item \textit{ChangeStep}$(y, \textit{Coordinates}_{\textit{perm}(s)})$;
      \end{enumerate}
\item If \textit{SimpleRates}, let
      \begin{equation}
      \textit{KappaFactor} \leftarrow 1
      \end{equation}
      else
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2\_new}} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu\_new} \right)^2}{2\ttimes\textit{sigma2\_new}}  \right) \ .
      \end{equation}
\item \begin{equation}
     \textit{pra}  \leftarrow \textit{KappaFactor} \ttimes \exp(\textit{sumlprob\_new} - \textit{sumlprob})
    \end{equation}
    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\end{enumerate}
Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then permute the chain from $\ms_a$ to $\ms_b$ by \textit{perm},\\
      and  \textit{Update}$(\ms_a, \ms_b)$.
\end{enumerate}

\subsection{Insert -- Permute}

The function
\textit{MH\_InsPermute}
$(\textsf{input}\ c_0; % \ms_a, \ms_b, c, \textit{perm}, w,i,j,r,d ;
   \textsf{output}\ \textit{misdat, pra, accept})$
is defined as follows.\\
The input parameter $c_0$ is a relatively small integer --
in Siena 3 it is determined adaptively with a maximum value of 40.

First a rough description is given.
\medskip

First a vector of coordinates $(w_0,i_0,j_0,r_0,d_0)$ is selected.
The output variable \textit{misdat} indicates whether option
$(w_0,i_0,j_0,r_0)$ is missing (see function \textit{mis} defined above).

In the regular case, where \textit{misdat} = \nnm{False},
the proposal is made to insert the non-diagonal coordinates  $(w_0,i_0,j_0,r_0,d_0)$ before
a random ministep $\ms_a$
and insert $(w_0,i_0,j_0,r_0,-d_0)$ before some ministep $\ms_b$
with $\ms_a < \ms_b$,
such that the two inserted ministeps will be a CCP.
This requires the following:\\
$\blacktriangleright \textbf{(C1)}\ \Big((w_0 = N) \Rightarrow i_0 \neq j_0\Big)$ and
     $\Big((w_0 = B) \Rightarrow d_0 \neq 0\Big)$;\\
%$\blacktriangleright $ there is at least one ministep between $\ms_a$ and $\ms_b$
%(i.e., $\textit{length}(\ms_a,\ms_b) \geq 3$);\\ omitted 10-03-10
%     \addcc{Condition omitted }\\
$\blacktriangleright \textbf{(C2)}\  $ there are no ministeps of  the type  $(w_0,i_0,j_0,r_0)$ in the
interval  $[\ms_a, \ms_b.\nnm{pred}]$.

\addce{
Since we must be calculating various probabilities anyway, we shall
use the opportunity also to propose permuting an interval $[\ms_a, \ms_e]$
of ministeps; the length of this interval is $c$.
However, we do not wish to risk and create extra CCPs by doing so,
as this would require more complicated counting
for the calculation of acceptance probabilities.
Therefore, first we have a provisional value of $c$;
if the interval of $c$ ministeps starting from $\ms_a$ contains
two ministeps of the same \textit{Option}, then $c$ is decreased
to a value such that it is certain that permuting the interval
of this length starting from $\ms_a$ will not affect the
number of CCPs.
This truncation of $c$ uses the auxiliary ministep $\ms_f$.
}\\
If $c \geq 2$,
\textit{perm} is a permutation of the numbers $ 1, 2, \ldots, c$,
and the proposal includes permuting the ministeps in the interval
$[\ms_a, \ms_a.\nnm{succ}^{{c-1}}]$ by \textit{perm}.

If  \textit{misdat} = \nnm{True} and \emph{the link is missing at the end of the
period}, the proposal is made to insert the non-diagonal ministep
$(w_0,i_0,j_0,r_0,d_0)$ before
a random ministep $\ms_a$, and permute the $c$ ministeps starting with $\ms_a$,
where again $c$ is $c_0$ truncated to the number of available places.

If  \textit{misdat} = \nnm{True} and \emph{the link is not missing at the end of
  the period} do nothing.

According to a  probability \textit{pra},
the proposal is put into effect (yielding \textit{accept} = \nnm{True})
or not (yielding \textit{accept} = \nnm{False}).

The steps taken in the function are as follows.

Part \emph{A}:
\begin{enumerate}
\item Repeat $\ms_a \leftarrow $ \textit{RandomElement} \ until
%       $\ms_a.\nnm{succ} < \nnm{last}$.  changed 10-03-10
       $\ms_a < \nnm{last}$.
       %\ \addcc{Changed }
\item $y \leftarrow  \textit{StateBefore}(\ms_a) $
\item \textit{StepProb1}$(y, *;  rr, lospr*)$ \\
       With probabilities defined by $\exp(lospr*)$ choose \textit{OptionSet} $(w_0,i_0,r_0)$,\\
       \addce{under the condition that  $(w_0,i_0,r_0) \neq
       \textit{OptionSet}(\ms_a)$.}\\
\emph{We just choose one and if same var and actor as $\ms_a$ we exit.
Not sure how to express it in your notation.
I set $pr_2$ to 1 below because of this}\\
      If $i_0$ is not active, exit.\\
      \addce{
      If \nnm{uponly} or \nnm{downonly} holds for variable $(w_0, r_0)$,
      then exit.\\
      $pr_2 \leftarrow \ 1 - \exp(lospr\! *\! (\textit{Option}(\ms_a))) \ . $\\
      By $lospr*(\textit{Option}(\ms_a))$ is denoted the element
      of the array $lospr*$ giving the log-probability
      of the \textit{OptionSet} of the ministep $\ms_a$.
      }
\item \textit{StepProb2}$( y, w_0,i_0,*,r_0,*;  rr, lospr, lcpr*)$ \\
      With probabilities defined by $\exp(lcpr*)$: \\
      if $w_0=N$ choose $j_0$ and let $d_0 = 0$,
      if $w_0=B$ choose $d_0$ and let $j_0=0$.\\
      If $(w_0,i_0,j_0,r_0,d_0)$ is diagonal then exit.\\
      If $w_0=N$ and $N^{(r_0)}(i_0,j_0)$ is structurally fixed,
      or $w_0=B$ and $B^{(r_0)}(i_0)$ is structurally fixed, then exit.\\
\addce{ If $w_0 = B$ and the chain after inserting $(w_0,i_0,j_0,r_0,d_0)$ before $\ms_a$
      would not satisfy (\ref{behrec2}) any more at $\ms_a$, then exit.}

      Denote the log-probability of the realized choice by $lcpr$;\\
      note that also $lospr$ now is the log-probability of the  option set choice
      realized in the preceding step, so it was already calculated earlier.
\item $\textit{misdat}  \leftarrow \textit{mis}(w_0,i_0,j_0,r_0)$

\item \begin{enumerate}
      \item If (not \textit{misdat}):
             \begin{enumerate}
              \item             Let $\ms_d$ be \\
                the first ministep in the chain after $\ms_a$
                of \textit{Option} $(w_0,i_0,j_0,r_0)$; \\
                or the \nnm{last} ministep
                if there is no ministep after $\ms_a$ of this \textit{Option}.\\
                This can be determined using the pointer \nnm{succOption}.
             \item    $\textit{ChoiceLength} \leftarrow \textit{length}(\ms_a, \ms_d ) - 1$
  %              \   \addcc{Changed }
                \\ \addcc{Condition (iii) omitted }
  %           \item             If $\textit{ChoiceLength} \leq 0$ then exit.
             \end{enumerate}
            (Note that not going beyond this ministep $\ms_d$,
            and $\ms_a$ having an \textit{OptionSet} different
            from $(w_0, i_0, r_0)$,
            implies that \textbf{(C2)} will be satisfied.)
      \item If \textit{misdat}:\\
        If ministep is not missing at the end of the period, exit. Otherwise,
        $\textit{ChoiceLength} \leftarrow 1 $
      \end{enumerate}
\item \begin{enumerate}
       \item  If (not \textit{misdat}):\\
%              Let $\ms_b$ a random ministep in the interval $[\ms_a.\nnm{succ}^2, \ms_d ]$.\\
               Let $\ms_b$ a random ministep in the interval $[\ms_a.\nnm{succ}, \ms_d ]$.
 %              \  \addcc{Changed } \\
              Note that the number of choices here is \textit{ChoiceLength}.
       \item  If \textit{misdat}:\\
              $\ms_b \leftarrow \nnm{last}$
       \end{enumerate}
\item \begin{enumerate}
      \item $\textit{ThisLength} \leftarrow \textit{length}(\ms_a, \ms_b) -1$
      \item $c \leftarrow \min\{c_0, \textit{ThisLength} \}$\\
        \addce{(Note that $c \geq 1$; if $c=1$, then the permutation
        applied below is trivial, and the permutation as well as the checks
        involved can be skipped, because they have no effect.)
      \item If $\textit{ThisLength} \leq c_0$, \\
            then $\ms_g \leftarrow  \ms_b.\nnm{pred}$,\\
             else $\ms_g \leftarrow \ms_a.\nnm{succ}^{c}$ .
      \item
      If the interval $[\ms_a, \ms_g]$ contains
      any pair of two non-diagonal ministeps of the same \textit{Option}:
      \begin{enumerate}
      \item  define $\ms_f$ as the last ministep in $[\ms_a,  \ms_g]$
             such that all \textit{Options} of non-diagonal ministeps in $[\ms_a, \ms_f]$
             are distinct;
      \item  $c \leftarrow \min\{c, \textit{length}([\ms_a, \ms_f]) - 1 \}$.
      \item[ ] (The permutation below will then affect only ministeps strictly before $\ms_f$;
             this will ensure that the permuted ministeps, together with $\ms_f$,
             all have distinct \textit{Options} and therefore the permutation cannot
             affect the number of CCPs.)
      \end{enumerate}
      }
      \item $\ms_e \leftarrow \ms_a.\nnm{succ}^{(c-1)}$
      \end{enumerate}
%\item \remark{At this place the erroneous change in the version of November 17, 2010
%      is dropped again.}
\item Let \textit{perm} be a random permutation of the numbers $1$ to $c$.
\item \addce{
      For all variables $(w,r)$ involved in a condition of the kind
      \nnm{higher}, \nnm{disjoint}, or \nnm{atleastone},
      check these conditions for the chain
      changed as follows:\\
        $(w_0,i_0,j_0,r_0,d_0)$ inserted before $\ms_a$,\\
      if (not \textit{misdat}): $(w_0,i_0,j_0,r_0,-d_0)$ inserted before $\ms_b$, \\
       and  the interval
      $[\ms_a, \ms_e]$ permuted according to \textit{perm}. \\
      This needs to be checked
      here only for ministeps from $\ms_a$ to $\ms_b$.}\\
      If at least one of these conditions are not satisfied, exit.
\end{enumerate}
The proposal probability is
\[
%  \frac{\exp{(lospr + lcpr)}}{(\textit{TotNumber}-2)\ttimes \textit{ChoiceLength} \ttimes (c!)}
  \frac{\exp{(lospr + lcpr)}}{pr_2 \ttimes(\textit{TotNumber}-1)\ttimes \textit{ChoiceLength} \ttimes (c!)}
\]


This is used in \textit{pra} below.\\
We also need the probability of the inverse proposal
(except for the factor $c! $ which cancels),
and therefore calculate the following.
\begin{enumerate}[resume]
\item This item is used only if not \textit{misdat} :\\
      if $\textit{NumberOption}(w_0,i_0,j_0,r_0) = 0$ :
      \[
        \textit{NewCCPNumber} \leftarrow \textit{CCPNumber} + 1 \ ;
      \]
      if $\textit{NumberOption}(w_0,i_0,j_0,r_0) \geq 1 $ and $w_0 = N$ :
      \begin{enumerate}
      \item if $\ms_a.\nnm{pred}$ in the original chain
         (before the insertion) is of \textit{Option} $(w_0,i_0,j_0,r_0)$
         then
          \[
            \textit{NewCCPNumber} \leftarrow \textit{CCPNumber} + 1 \ ,
           \]
      \item else
          \[
            \textit{NewCCPNumber} \leftarrow \textit{CCPNumber} + 2 \ ;
           \]
      \end{enumerate}
      if $\textit{NumberOption}(w_0,i_0,j_0,r_0) \geq 1$  and $w_0 = B$ :
      \begin{frindentation}{2em}{2em}
      if \{ there is a ministep of option $(w_0,i_0,j_0,r_0)$
      before $\ms_a$, and the last such ministep is not $\ms_a.\nnm{pred}$ \}, \\
      define $d_-$ as the $d$-coordinate of the last such ministep,
      and $d_- \leftarrow 0$  otherwise;\\
      if there is a ministep of option $(w_0,i_0,j_0,r_0)$
      after $\ms_b$, \\
      define $d_+$ as the $d$-coordinate of the first such ministep,
      and $d_+ \leftarrow 0$  otherwise; and let
      \begin{multline*}
             \textit{NewCCPNumber}  \leftarrow  \\
         \hspace{2em}     \textit{CCPNumber} + I\{d_- \times d_0 = -1\}
                        +  I\{d_+ \times d_0 = +1 \}
      \end{multline*}
      where $I\{A\} = 1$ if $A$ is true and 0 otherwise.
      \end{frindentation}
\emph{We find NewCCPnumber by insertions, calculate pr1 and then
remove the insertions}
\item
      \begin{align*}
     & \text{if (not } \textit{misdat}): && \ pr_1 \leftarrow
                \frac{1 - \textit{prmin} - \textit{prmib}}{\textit{NewCCPNumber} } \ ; \\[1em]
     &  \text{if } \textit{misdat} \text{ and } w_0 = N:   && \ pr_1 \leftarrow
                              \frac{\textit{prmin}}{\textit{ChainNumMisNet}+1 } \ ; \\[1em]
     &  \text{if } \textit{misdat} \text{ and } w_0 = B:  && \ pr_1 \leftarrow
                 \frac{\textit{prmib}}{\textit{ChainNumMisBeh}+1 } \ .
      \end{align*}
(only the non-\textit{misdat} case was changed).
\end{enumerate}

Part \emph{B}:
\begin{enumerate}[resume]
\item
\[ \textit{sumlprob} \leftarrow \sum_{s=1}^{\textit{ThisLength}}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{lChoiceProb} + \nnm{lOptionSetProb}) \ ;
      \]
      If (not \textit{SimpleRates}), then below we use
      the values \textit{mu} and \textit{sigma2}; \\
      these refer to the current chain.
\vspace*{-0.3em}
      \[
      \textit{sumlprob\_new}  \leftarrow 0 \ ;
      \]
      \[
      \textit{mu\_new}  \leftarrow \textit{mu} -  \sum_{s=1}^{\textit{ThisLength}}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{rRate}) \ ;
      \]
      \[
      \textit{sigma2\_new}  \leftarrow \textit{sigma2} -  \sum_{s=1}^{\textit{ThisLength}}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{rRate})^2 \ ;
      \]
\item Note that still $y =  \textit{StateBefore}(\ms_a)$ as was assigned above.\\
      \textit{StepProb2}$(y,w_0,i_0,j_0,r_0,d_0;  rr_0, lospr_0, lcpr_0)$;\\
        \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_0 + lospr_0 $ ;\\
      If (not \textit{SimpleRates}), then
      \begin{enumerate}
      \item \textit{mu\_new} $ \leftarrow  \textit{mu\_new} + rr_0 $;
      \item \textit{sigma2\_new} $ \leftarrow \textit{sigma2\_new}  + (rr_0)^2  $.
      \end{enumerate}
      \textit{ChangeStep}$(y, (w_0,i_0,j_0,r_0,d_0))$
\item For $1 \leq s \leq \textit{ThisLength}$ \
      denote by $\textit{Coordinates}_s$ the values
      $(w,i,j,r,d)$ of $\ms_a.\nnm{succ}^{s-1}$.
\item For $s$ running from 1 to $c$, do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y,
          \textit{Coordinates}_{\textit{perm}(s)}; rr_s, lospr_s, lcpr_s)$ \ ;
      \item  \textit{sumlprob\_new} $ \leftarrow
          \textit{sumlprob\_new} + lcpr_s + lospr_s $ \ ;
      \item If (not \textit{SimpleRates}), then
        \begin{enumerate}
        \item \textit{mu\_new} $ \leftarrow \textit{mu\_new} + rr_s $\ ;
        \item \textit{sigma2\_new} $ \leftarrow
              \textit{sigma2\_new}  + (rr_s)^2  $ \ ;
        \end{enumerate}
      \item\textit{ChangeStep}$(y,
         \textit{Coordinates}_{\textit{perm}(s)})$ \ .
      \end{enumerate}
\item %\remark{(Here also an erroneous change in version 17 Nov 2010
      %was dropped, January 3, 2011.)} \\
      For $s$ running from $c+1$ to \textit{ThisLength} do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y, \textit{Coordinates}_{s}; rr_s, lospr_s, lcpr_s)$
      \item \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_s + lospr_s $
      \item If (not \textit{SimpleRates}), then
        \begin{enumerate}
        \item \textit{mu\_new} $ \leftarrow  \textit{mu\_new} + rr_s $
        \item \textit{sigma2\_new} $ \leftarrow \textit{sigma2\_new}  + (rr_s)^2  $
        \end{enumerate}
      \item \textit{ChangeStep}$(y, \textit{Coordinates}_{s})$
      \end{enumerate}
\item if (not \textit{misdat}):
      \begin{enumerate}
      \item \textit{StepProb2}$(y,w_0,i_0,j_0,r_0,-d_0; rr_0, lospr_0, lcpr_0)$
      \item \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_0 + lospr_0 $
      \item If (not \textit{SimpleRates}), then
         \begin{enumerate}
         \item  \textit{mu\_new} $ \leftarrow \textit{mu\_new} + rr_0 \ $ ;
         \item \textit{sigma2\_new} $ \leftarrow \textit{sigma2\_new}  + (rr_0)^2 \ $ .
         \end{enumerate}
      \item \textit{ChangeStep}$(y, (w_0,i_0,j_0,r_0,-d_0))$ \\
            Note that at this point, $y$ has been transformed to
            \textit{StateBefore}$(\ms_b)$ of the current (`old') chain.
      \end{enumerate}
\item If \textit{SimpleRates}, then
      \begin{enumerate}
      \item if (not \textit{misdat}):
        \[
        \textit{KappaFactor} \leftarrow \frac{1}{rr^2
                      \times \textit{TotNumber } \times(\textit{TotNumber } + 1)}
        \]
      \item if \textit{misdat}:
        \[
        \textit{KappaFactor} \leftarrow \frac{1}{rr \times \textit{TotNumber } }
        \]
      \end{enumerate}
(This was changed 30-05-10; earlier. the lines were, erroneously, as follows:)
      \begin{enumerate}
      \item if (not \textit{misdat}):
        \[
        \textit{KappaFactor} \leftarrow \frac{1}{rr^2
                      \times(\textit{TotNumber } + 1) \times(\textit{TotNumber } + 2)}
        \]
      \item if \textit{misdat}:
        \[
        \textit{KappaFactor} \leftarrow \frac{1}{rr \times(\textit{TotNumber } + 1) }
        \]
      \end{enumerate}
      else (i.e., if not \textit{SimpleRates})
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2\_new}} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu\_new} \right)^2}{2\ttimes\textit{sigma2\_new}}  \right) \ .
      \end{equation}
\item \addce{
      \begin{align}
     \textit{pra}  \leftarrow &
        \textit{KappaFactor} \ttimes \exp(\textit{sumlprob\_new} - \textit{sumlprob}) \\
      & \ttimes  \frac{prdpr \ttimes pr_1 \ttimes pr_2 \ttimes(\textit{TotNumber}-1)\ttimes \textit{ChoiceLength} }
                     {pripr \ttimes \exp{(lospr + lcpr)}}
    \end{align}
    }\\
    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\end{enumerate}
Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then
       \begin{enumerate}
       \item insert $(w_0,i_0,j_0,r_0,d_0)$ before $\ms_a$;
       \item if (not \textit{misdat}), insert $(w_0,i_0,j_0,r_0,-d_0)$ before $\ms_b$;
       \item permute the chain from $ms_a$ to $\ms_a.\nnm{succ}^{c-1}$ by \textit{perm};
       \item  \textit{Update} the chain for the changed part.
       \end{enumerate}
\end{enumerate}

\subsection{Delete -- Permute}
\label{S_delper}

The function
\textit{MH\_DelPermute}
$(\textsf{input}\ c_0;%  \ms_a, \ms_b, c, \textit{perm} ;
   \textsf{output}\ \textit{misdat, $w_0$, pra, accept})$
is defined as follows.\\
The input parameter $c_0$ is a relatively small integer --
in Siena 3 it is determined adaptively with a maximum value of 40.

First a rough description is given.

At the start, there is a choice between deleting a ministep
for a missing data variable, reflected by \textit{misdat} = \nnm{True}; or deleting a CCP,
reflected by \textit{misdat} = \nnm{False}.
Although this seems to be quite different, still it has been
combined in one procedure because the overlap in the
missing and non-missing cases is so large.

If \textit{misdat} = \nnm{False} (the regular case),
the proposal is made to delete two ministeps
$\ms_a$ and $\ms_b$ that together are a CCP.\\
Then the number $c$ is provisionally $\min\{c_0, \textit{length}(\ms_a,\ms_b)-2\}$.
\\
If \textit{misdat} = \nnm{True},
the proposal is made to delete one ministep
for a missing data variable, $\ms_a$.\\
Then provisionally $c = \min\{c_0, \textit{length}(\ms_a,\nnm{last})-2\}$.\\
\addce{Like in \textit{MH\_InsPermute}, a check is made for the
existence of several ministeps of the same \textit{Option} in the
interval of $c$ ministeps after $\ms_a$; if this is the case,
then $c$ is decreased just like in  \textit{MH\_InsPermute}.}

In both cases, the output parameter $w_0$ is the aspect
(Network or Behavior) of ministep $\ms_a$.
The reason for having this output is the possibility to tune
the values of the probabilities \textit{prmin} and  \textit{prmib}.

In addition, if $c \geq 2$,
\textit{perm} is a permutation of the numbers $ 1, 2, \ldots, c$,
and the proposal includes permuting the ministeps in the interval
$[\ms_a.\nnm{succ}, \ms_a.\nnm{succ}^{c}]$ by \textit{perm}.

According to aprobability \textit{pra},
the proposal is put into effect (yielding \textit{accept} = \nnm{True})
or not (yielding \textit{accept} = \nnm{False}).

The steps taken in the function are as follows.

Part \emph{A}:
\begin{enumerate}
\item With probability \textit{prmin} + \textit{prmib}
      let $\textit{misdat} \leftarrow \nnm{True}$,\\
      else $\textit{misdat} \leftarrow \nnm{False}$.
\item
\vspace*{-0.4em}
\begin{enumerate}
      \item if (not \textit{misdat}):
            \begin{enumerate}
            \item If \textit{CCPNumber} = 0, then exit.
            \item $(\ms_a, \ms_b) \leftarrow $ \textit{RandomCCP}
%            \addce{
%            \item  If $\ms_a$ and $\ms_a.\nnm{succ}$ have the same
%                   \textit{OptionSet}, then exit.
%            }  unnecessary; then (\ms_a, \ms_b) would not be a CCP.
            \item $\textit{ThisLength} \leftarrow \textit{length}(\ms_a, \ms_b)$
            \end{enumerate}
      \item if \textit{misdat}:
            \begin{enumerate}
            \item  With probability
                   \[
                      \frac{\textit{prmin}}{\textit{prmin} + \textit{prmib}}
                   \]
                   let $w_0 \leftarrow N$,  else  $w_0 \leftarrow B$.
            \item  if $w_0 = N$, then
                    \begin{enumerate}
                    \item if \textit{ChainNumMisNet} = 0, then exit.
                    \item $\ms_a \leftarrow $ \textit{RandomMisNet}
                    \end{enumerate}
            \item  if $w_0 = B$, then
                    \begin{enumerate}
                    \item if \textit{ChainNumMisBeh} = 0, then exit.
                    \item $\ms_a \leftarrow $ \textit{RandomMisBeh}
                    \end{enumerate}
            \item $\textit{ThisLength} \leftarrow \textit{length}(\ms_a, \nnm{last})$
            \item If $\ms_a.\nnm{succ} = \nnm{last}$ (i.e., \textit{ThisLength} = 2), exit.\\
%            \remark{This condition was added in the version of November 17, 2010,
%                    and corrected with the addition of \nnm{succ}
%                    in the version of November 18, 2010,
%                    and is needed in view of the requirement $\ms_a < \nnm{last}$
%                    in step 1.\ of procedure \textit{MH\_InsPermute}.}
            \end{enumerate}
      \end{enumerate}
\vspace*{-1em}
\item  $c \leftarrow \min\{c_0, \textit{ThisLength} - 2\}$.\\
       Note that in the case (not \textit{misdat}) the definition of a CCP implies $c \geq 1$.\\
      If \textit{misdat}, the `exit' in the preceding step implies that $c \geq 1$.\\
%      \remark{This was not so in the version before November 17, 2010.}
      \item If (($\textit{ThisLength}-2 \leq c_0$) and (not \textit{misdat})), \\
            then $\ms_g \leftarrow  \ms_b.\nnm{pred}$,\\
             else $\ms_g \leftarrow \ms_a.\nnm{succ}^{(c+1)}$ .\\
             ($\ms_g$ is an upper bound to the interval if ministeps
             that will be permuted.)
      \item If the interval $[\ms_a.\nnm{succ}, \ms_g]$ contains
             any pair of two non-diagonal ministeps of the same \textit{Option}:
      \begin{enumerate}
      \item  define $\ms_f$ as the last ministep in $[\ms_a.\nnm{succ},  \ms_g]$
             such that all \textit{Options} of non-diagonal ministeps
             in $[\ms_a.\nnm{succ}, \ms_f]$ are distinct;
      \item  $c \leftarrow \min\{c, \textit{length}([\ms_a.\nnm{succ}, \ms_f]) - 1 \}$.
      \item[ ] (The permutation below will then affect only ministeps strictly before $\ms_f$;
             this will ensure that the permuted ministeps, together with $\ms_f$,
             all have distinct \textit{Options} and therefore the permutation cannot
             affect the number of CCPs.)
      \end{enumerate}
\item Let \textit{perm} be a random permutation of the numbers $1$ to $c$.
\item $y \leftarrow  \textit{StateBefore}(\ms_a)$ .
\item For all variables $(w,r)$ involved in a condition of the kind
      \nnm{higher}, \nnm{disjoint}, or \nnm{atleastone},
      check these conditions for the chain
      with   $\ms_a$ and $\ms_b$ deleted, and with
      $\ms_a.\nnm{succ}$ to $\ms_a.\nnm{succ}^c$  permuted by \textit{perm}.\\
      This needs to be checked
      here only for ministeps from $\ms_a.\nnm{succ}$ to $\ms_b.\nnm{pred}$.\\
      If at least one of these conditions are not satisfied, exit.
\end{enumerate}
To calculate the proposal probabilities of this proposal and the
inverse proposal, used in $pra$ below, we need to calculate the following.
\begin{enumerate}[resume]
\item
      \begin{align*}
      \text{if (not } \textit{misdat}):\ \ & pr_1 \leftarrow
   \frac{1 - \textit{prmin} - \textit{prmib}}{\textit{CCPNumber} } \ ; \\[1em]
       \text{if } \textit{misdat} \text{ and } w_0 = N:\  \ & pr_1 \leftarrow
                   \frac{\textit{prmin}}{\textit{ChainNumMisNet} } \ ; \\[1em]
       \text{if } \textit{misdat} \text{ and } w_0 = B:\ \ & pr_1 \leftarrow
                 \frac{\textit{prmib}}{\textit{ChainNumMisBeh} } \ .
      \end{align*}
 \item If (not \textit{misdat}):
      \begin{enumerate}
      \item let $\ms_d$ be \\
          the first ministep in the chain after $\ms_b$
          of the same \textit{Option} $(w_0,i_0,j_0,r_0)$; \\
          or the \nnm{last} ministep
          if there is no ministep after $\ms_b$ of this \textit{Option}.\\
          This can be determined using the pointer \nnm{succOption}.
      \item       Let $\textit{ChoiceLength} \leftarrow \textit{length}(\ms_a, \ms_d ) - 3$.\\
      Note that \textit{ChoiceLength}$ \geq 1$.\\
      The information we need in the following is only \textit{ChoiceLength},
      we can forget about $\ms_d$ itself.
      \end{enumerate}
\item If \textit{misdat}, $\textit{ChoiceLength} \leftarrow 1$ . \\
\end{enumerate}
The probability of this proposal is
\[
\frac{pr_1}{c!} \ .
\]

Part \emph{B}:
\begin{enumerate}[resume]
\item If \textit{misdat} then $ \textit{MaxLength} \leftarrow c$, else
      $ \textit{MaxLength} \leftarrow \textit{ThisLength}$.\\
%      \remark{This is new in the version of November 17, 2010. The earlier version
%       could for \textit{misdat} request the
%      (\nnm{lChoiceProb} + \nnm{lOptionSetProb}) of the \nnm{last} ministep.}
\item \[ \textit{sumlprob} \leftarrow \sum_{s=1}^{\textit{MaxLength}}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{lChoiceProb} + \nnm{lOptionSetProb}) \ ;
      \]
      If (not \textit{SimpleRates}), then below we use
      the values \textit{mu} and \textit{sigma2}; \\
      these refer to the current chain.
      \[
      \textit{sumlprob\_new}  \leftarrow 0 \ ;
      \]
      \[
      \textit{mu\_new}  \leftarrow \textit{mu} -  \sum_{s=1}^{\textit{MaxLength}}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{rRate}) \ ;
      \]
      \[
      \textit{sigma2\_new}  \leftarrow \textit{sigma2} -  \sum_{s=1}^{\textit{MaxLength}}
             (\ms_a.\nnm{succ}^{s-1}).(\nnm{rRate})^2 \ ;
      \]
\item \[
      lpr_0 \leftarrow \ms_a.(\nnm{lChoiceProb} + \nnm{lOptionSetProb})
      \]
      (This is part of the log probability for the reverse proposal.)
\item Note that still $y =  \textit{StateBefore}(\ms_a)$ as was assigned above.
      \[
      (w_2, i_2, j_2, r_2, d_2) \leftarrow (\ms_a.\nnm{succ}).\textit{Coordinates} \ ;
      \]
      \[
      \textit{StepProb1}(y,w_2,i_2,r_2;rr_2,lospr_2)\ ;
      \]
      \[
      pr_2 \leftarrow 1 - \exp(lospr_2)
      \]
\emph{$pr_2$ is set to 1 as for insert permute.}
      ($pr_2$ also is part of the log probability for the reverse proposal;
      the others of these numbers with subscript 2 are not used any more.)
\item For $1 \leq s \leq \textit{ThisLength}-2$ \
      denote by $\textit{Coordinates}_s$ the values
      $(w,i,j,r,d)$ of $\ms_a.\nnm{succ}^{s}$.\\
%      \remark{Erroneous change in version 17 Nov 2010 dropped, 3 Jan 2011.}
\item Note that still $y =  \textit{StateBefore}(\ms_a)$ as was assigned above.\\
      For $s$ running from 1 to $c$, do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y, \textit{Coordinates}_{\textit{perm}(s)};
                                            rr_s, lospr_s, lcpr_s)$
      \item
        \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_s + lospr_s $
      \item If (not \textit{SimpleRates}), then
         \begin{enumerate}
          \item \textit{mu\_new} $ \leftarrow  \textit{mu\_new} + rr_s  $
          \item \textit{sigma2\_new} $ \leftarrow \textit{sigma2\_new}  + (rr_s)^2 $
         \end{enumerate}
      \item \textit{ChangeStep}$(y, \textit{Coordinates}_{\textit{perm}(s)})$
      \end{enumerate}

\item  % \remark{Erroneous change in version 17 Nov 2010 dropped, 3 Jan 2011.}\\
      For $s$ running from $c+1$ to $\textit{ThisLength} -2$ do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y, \textit{Coordinates}_{s};
                                            rr_s, lospr_s, lcpr_s)$
      \item
        \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_s + lospr_s $
      \item If (not \textit{SimpleRates}), then
         \begin{enumerate}
         \item  \textit{mu\_new} $ \leftarrow \textit{mu\_new} + rr_s  $
         \item   \textit{sigma2\_new} $ \leftarrow \textit{sigma2\_new}  + (rr_s)^2 $
         \end{enumerate}
      \item \textit{ChangeStep}$(y, \textit{Coordinates}_{s})$;\\
            note that at this point, if (not \textit{misdat}), $y$ has been transformed to
            \textit{StateBefore}$(\ms_b)$ of the current (`old') chain.
      \end{enumerate}
\item If \textit{SimpleRates}, then
\addcf{
      \begin{enumerate}
      \item if (not \textit{misdat}):
        \[
        \textit{KappaFactor} \leftarrow rr^2
                      \times ( \textit{TotNumber }-1) \times(\textit{TotNumber } -2)
        \]
      \item if \textit{misdat}:
        \[
        \textit{KappaFactor} \leftarrow rr \times ( \textit{TotNumber } -1)
        \]
      \end{enumerate}
(This was changed 30-05-10; earlier. the lines were, erroneously, as follows:)
      \begin{enumerate}
      \item if (not \textit{misdat}):
        \begin{equation}
        \textit{KappaFactor} \leftarrow rr^2
                      \times \textit{TotNumber } \times(\textit{TotNumber } -1) \nonumber
        \end{equation}
      \item if \textit{misdat}:
        \begin{equation}
        \textit{KappaFactor} \leftarrow rr \times \textit{TotNumber }  \nonumber
        \end{equation}
      \end{enumerate}
}
      else (i.e., if not \textit{SimpleRates})
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2\_new}} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu\_new} \right)^2}{2\ttimes\textit{sigma2\_new}}  \right) \ .
      \end{equation}
\iffalse
\item \begin{align}
     \textit{pra}  \leftarrow \ &
        \textit{KappaFactor} \ttimes \exp(\textit{sumlprob\_new} - \textit{sumlprob}) \\
%          & \ \ttimes  \frac{pripr \ttimes \exp(lpr_0)}{prdpr \ttimes pr_1 \ttimes \textit{TotNumber}
           & \ \ttimes  \frac{pripr \ttimes \exp(lpr_0)}{prdpr \ttimes pr_1 \ttimes
                                      pr_2 \ttimes   ( \textit{TotNumber}+1)
                                           \ttimes \textit{ChoiceLength} } \ ;
    \end{align}
\emph{TotNumber+1 replaced by positionchoice here: TotNumber -3  or -2 for
misdat}

    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\fi
\end{enumerate}

Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then
       \begin{enumerate}
       \item permute the chain from $ms_a$ to $\ms_a.\nnm{succ}^{c-1}$ by \textit{perm};
       \item delete $\ms_a$;
       \item if (not \textit{misdat}), delete $\ms_b$;
       \item  \textit{Update} the chain for the changed part.
       \end{enumerate}
\end{enumerate}

\subsection{Randomize Initial Missings: Insert}
\label{S_RMI}
\addcf{This section is new in the version of May 30, 2010.}

Two functions are used to randomize values of variables for which the
value at the start of the period, element of $y(t_{m-1})$,
is missing. These are \textit{MH\_InsMis} and \textit{MH\_DelMis}.
They are called only when there is at least
one missing value in $y(t_{m-1})$.

Introducing a natural Bayesian element in this otherwise frequentist
procedure, these procedures utilize \emph{prior probability distributions}
for the unobserved initial variables.
For the very first observation $y(t_1)$,
a simple specification of these prior distributions is as follows
(using $m=1$).
\begin{itemize}
\item All unobserved variables are prior independent.
\item[N.] For the network variables, $N_{ij}^{(r)}(t_{m-1})$
      has a prior distribution with probability\\
      $P\{N_{ij}^{(r)}(t_{m-1}) = 1\} = \textit{net\_prior}(r,i,j)$.
\item[B.] For the behavior variables, $B_{i}^{(r)}(t_{m-1})$
      has a prior distribution with probabilities\\
      $P\{B_{i}^{(r)}(t_{m-1}) = v\} = \textit{beh\_prior}(r,i,v)$
      for $v$ in the permitted range of his variable.
\end{itemize}
The simplest and presumably very adequate way is to let this depend
only on the dependent variable, $N^{(r)}$ or $B^{(r)}$:
\begin{align*}
 \textit{net\_prior}(r,i,j) &=
   \text{ observed density of network } r, \text{ observation } m-1, \\
 \textit{beh\_prior}(r,i,.) &=
   \text{ observed distribution of behavior } r, \text{ observation } m-1,
\end{align*}

For $m \geq 3$ the best solution would be to do the simulations of the $m-1$
chains in sequence, and utilize the simulated last version of the preceding
period as the first version of the following period.
This does not go well, I presume, with the current architecture
of RSiena which further
needs no communication between simulation of different periods.
A simple way out is to use the priors above here also
(i.e., for general $m$).
I think this will be very reasonable, unless there are a lot of
missing data or the interest of the analysis is precisely in the missings.

The function
\textit{MH\_InsMis}
$( \textsf{output}\  w_0,i_0,j_0,r_0,\, \textit{pra}, \textit{accept})$
is defined as follows.
First a rough description is given.

The initial state of the chain is denoted $y_{\text{init}}$;
this must be equal to $y(t_{m-1})$ except for the coordinates
for which there is an initial missing value, and these
coordinates are being changed in this procedure.

A random selection is made among the coordinates (options) for which
the initial value is missing.
This yields the values $(w_0,i_0,j_0,r_0)$.\\
For this variable, the proposal is made to insert
a ministep  $(w_0,i_0,j_0,r_0,d_0)$
before some existing ministep $\ms_a$;
this $\ms_a$ is randomly chosen, under the constraint
that there are no earlier ministeps of option  $(w_0,i_0,j_0,r_0)$;
the proposal also includes transforming the initial value
$y_{\text{init}}$
by the opposite change $(w_0,i_0,j_0,r_0,-d_0)$
(which amounts to changing  the current value of
$N_{i_0j_0}^{(r_0)}(t_{m-1})$ if $w_0 = N$
or $B_{i_0}^{(r_0)}(t_{m-1})$ if $w_0 = B$)
so that from $\ms_a$ onward, the chain is the same as it used to be.

The steps taken in the function are as follows.

Part \emph{A}:
\begin{enumerate}
\item If \textit{ChainNumInitMis} $ = 0$, then exit.
\item  $ (w_0,i_0,j_0,r_0)  \leftarrow  \emph{RandomInitMis}$ ; \\
      if $ w_0 = B$, then
        \begin{enumerate}
        \item     $ pr_1   \leftarrow  0.5$ ,
        \item $  d_0   \leftarrow \text{ random choice in } \{-1, +1\}  $ ;
        \end{enumerate}
        else
        \begin{enumerate}
        \item     $ pr_1   \leftarrow  1 $ ,
        \item $  d_0   \leftarrow 0 $ .
        \end{enumerate}
\item   If $w_0 = B$, and \emph{subtracting (rather than adding), as this is
    what is actually done.} $d_0$ to
        the current value of $B^{(r)}_{i_0}(t_{m-1})$
        (which is one of the coordinates in $y_{\text{init}}$)
         would lead to a value of this variable outside of its
         permitted range, then
        \begin{enumerate}
        \item     $ pr_1   \leftarrow  1 $ ,
        \item $  d_0   \leftarrow -d_0 $ ,
        \item $ \textit{reversed} \leftarrow $ true  ,
        \end{enumerate}
        else\\
        $ \textit{reversed} \leftarrow $ false .
\item Let $\ms_b$ be \\
      the first ministep in the chain
      of \textit{Option} $(w_0,i_0,j_0,r_0)$; \\
      or the \nnm{last} ministep
      if there is no ministep of this \textit{Option}.\\
      (Above there has been mention somewhere of the pointer \nnm{succOption};
      I do not know if something like that has been implemented,
      but clearly some of the same machinery might be used here.)
\item $\textit{ChoiceLength} \leftarrow \textit{length}(\nnm{first}, \ms_b ) - 1$
\item Let $\ms_a$ a random ministep in the interval
     $[\nnm{first}.\nnm{succ}, \ms_b]$.\\
     (Thus, $\ms_b$ also is permitted.)\\
     Note that the number of choices here is $\textit{ChoiceLength} \geq 1$.
\newpage
\item If variable $(w_0, r_0)$ is involved in any of the conditions
      \nnm{uponly}, \nnm{downonly}, \nnm{higher}, \nnm{disjoint},
      or \nnm{atleastone}, then :
      \begin{frindentation}{2em}{1em}
      If changing $y_{\text{init}}$
      by $(w_0,i_0,j_0,r_0,-d_0)$ while  inserting a ministep
      of option $(w_0,i_0,j_0,r_0,d_0)$ immediately before $\ms_a$ would
      lead to a violation of at least
      one of these conditions, then:
      \begin{frindentation}{2em}{1em}
        \begin{enumerate}
        \item if \textit{reversed} or ($w_0 = N$) then exit, else
         \begin{align*}
          d_0 & \leftarrow  -d_0 \\
          pr_1 & \leftarrow  1 \ ;
         \end{align*}
        \item If for this opposite value of $d_0$, changing $y_{\text{init}}$
           by $(w_0,i_0,j_0,r_0,-d_0)$ while  inserting a ministep
           of option $(w_0,i_0,j_0,r_0,d_0)$ immediately before $\ms_a$ would also violate
           one or more of these conditions, then exit.
        \end{enumerate}
      \end{frindentation}
      \end{frindentation}
\end{enumerate}
The proposal probability here is
\[
\frac{pr_1}{\textit{NumInitMis} \times \textit{ChoiceLength}} \ .
\]
This is used in \textit{pra} below.

Part \emph{B}:
\begin{enumerate}[resume]
\item
   \begin{align*}
       \textit{sumlprob} \ & \leftarrow \ \sum_{\ms = \nnm{first.succ}}^{\ms_a.\nnm{pred}}
             (\ms.(\nnm{lChoiceProb} + \nnm{lOptionSetProb}) \ ; \\[1em]
       \textit{sumlprob\_new} \ & \leftarrow  \ 0 \ ;   \\[1em]
      \textit{mu\_new} \  & \leftarrow \ \textit{mu} \ -
                \sum_{\ms = \nnm{first.succ}}^{ \ms_a.\nnm{pred}}
                        (\ms.\nnm{rRate}) \ ; \\[1em]
      \textit{sigma2\_new} \ & \leftarrow \ \textit{sigma2} \ -
                \sum_{\ms = \nnm{first.succ}}^{\ms_a.\nnm{pred}}
                          (\ms.\nnm{rRate})^2 \ .
      \end{align*}
\item % \remark{here an error was corrected in items 9 and 11, version 30/10/2010.}\\[1em]
      $y \leftarrow y_{\text{init}}$ ;\\[1em]
      Let $pr_2$ be the prior probability
      (see \textit{net\_prior} and \textit{beh\_prior} discussed above)
      of value $y(w_0,i_0,j_0,r_0)$
      and $pr_3$ the prior probability of the value when
      this variable is changed according to ministep
      $(w_0,i_0,j_0,r_0,-d_0)$ ;\\[1em]
      \textit{ChangeStep}$(y, (w_0,i_0,j_0,r_0,-d_0))$ (now we have the new initial value);\\[1em]
      \textit{StepProb1}$(y,w_0,i_0,j_0,r_0;  rr_0, lospr_0)$;\\[1em]
      If (not \textit{SimpleRates}), then
      \begin{enumerate}
      \item \textit{mu\_new} $ \leftarrow  \textit{mu\_new} + rr_0 $ ,
      \item \textit{sigma2\_new} $ \leftarrow
           \textit{sigma2\_new}  + (rr_0)^2  $ ;
      \end{enumerate}
\item For \ms \ running from \nnm{\nnm{first}.succ} to $\ms_a.\nnm{pred}$ do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y, \ms.\textit{Coordinates}; rr, lospr, lcpr)$,
            where \ms.\textit{Coordinates} are
            the coordinates of ministep \ms  ;
      \item  \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr + lospr $
      \item If (not \textit{SimpleRates}), then
        \begin{enumerate}
        \item \textit{mu\_new} $ \leftarrow \textit{mu\_new} + rr $\ ;
        \item \textit{sigma2\_new} $ \leftarrow   \textit{sigma2\_new}  + (rr)^2  $ \ .
        \end{enumerate}
      \item\textit{ChangeStep}$(y, \ms)$
      \end{enumerate}
\item \begin{enumerate}
      \item \textit{StepProb2}$(y,w_0,i_0,j_0,r_0,d_0; rr_0, lospr_0, lcpr_0)$
      \item \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr_0 + lospr_0 $
      \end{enumerate}
\item If \textit{SimpleRates} (which implies $rr = rr_0$), then
      \begin{enumerate}
      \item  \begin{equation}
        \textit{KappaFactor} \leftarrow \frac{1}{rr_0 \times \textit{TotNumber } } \nonumber
        \end{equation}
      \end{enumerate}
      else (i.e., if not \textit{SimpleRates})
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2\_new}} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu\_new} \right)^2}{2\ttimes\textit{sigma2\_new}}  \right) \ .
      \end{equation}
\item  \begin{align}
     \textit{pra}  \leftarrow & \
        \textit{KappaFactor} \ttimes \exp(\textit{sumlprob\_new} - \textit{sumlprob}) \\
      & \ \ttimes  \frac{\textit{prdms} \ttimes \textit{ChoiceLength} \ttimes pr_3}
                        {\textit{prims} \ttimes pr_1 \ttimes pr_2}  \ ;
    \end{align}
    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\end{enumerate}
Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then
       \begin{enumerate}
       \item       \textit{ChangeStep}$( y_{\text{init}} , (w_0,i_0,j_0,r_0,-d_0))$
       \item insert $(w_0,i_0,j_0,r_0,d_0)$ before $\ms_a$;
       \item  \textit{Update} the chain for the changed part.
       \end{enumerate}
\end{enumerate}

\subsection{Randomize Initial Missings: Delete}
\label{S_RMD}
\addcf{This section is new in the version of May 30, 2010.}

Two functions are used to randomize values of variables for which the
value at the start of the period, element of $y(t_{m-1})$,
is missing. These are \textit{MH\_InsMis} and \textit{MH\_DelMis}.
They are called only when there is at least
one missing value in $y(t_{m-1})$.
This section specifies \textit{MH\_DelMis}.
The same prior distributions
$\textit{net\_prior}(r,i,j)$ and
$\textit{beh\_prior}(r,i,v) $ are used as in the preceding section.

The function
\textit{MH\_DelMis}
$( \textsf{output}\  w_0,i_0,j_0,r_0,\, \textit{pra}, \textit{accept})$
is defined as follows.
First a rough description is given.

The initial state of the chain is denoted $y_{\text{init}}$;
this must be equal to $y(t_{m-1})$ except for the coordinates
for which there is an initial missing value, and one of these
coordinates may be changed in this procedure.

A random selection is made among the coordinates (options) for which
the initial value is missing.
This yields the output values $(w_0,i_0,j_0,r_0)$.\\
If no ministep of this option exist, then nothing happens.
If ministeps of this option do exist,
the proposal is made to
delete the first ministep of this option;
given that the ministep has the coordinates  $(w_0,i_0,j_0,r_0,d_0)$,
the proposal is combined with changing
transforming the initial value
$y_{\text{init}}$
by this change $(w_0,i_0,j_0,r_0,d_0)$
(which amounts to changing  $N_{i_0j_0}^{(r_0)}(t_{m-1})$ if $w_0 = N$
or $B_{i_0}^{(r_0)}(t_{m-1})$ if $w_0 = B$)
so that after the to-be-deleted ministep,
the chain will be the same as it used to be.

The quantities \textit{ChoiceLength} and $pr_1$ are computed
because they play a role in the proposal probability
for the reverse proposal, and hence in the acceptance
probability of this proposal.

The steps taken in the function are as follows.

\newpage
Part \emph{A}:
\begin{enumerate}
\item If \textit{ChainNumInitMis} $ = 0$, then exit.
\item  $ (w_0,i_0,j_0,r_0)  \leftarrow  \emph{RandomInitMis}$ .
\item Let $\ms_a$ be
      the first ministep in the chain
      of \textit{Option} $(w_0,i_0,j_0,r_0)$,
      and \\
      $d_0 \leftarrow \ms_a.d$ \ . \\
      (Above there has been mention somewhere of the pointer \nnm{succOption};
      I do not know if something like that has been implemented,
      but clearly some of the same machinery might be used here.)
\item Let $\ms_b$ be
      the second ministep in the chain
      of \textit{Option} $(w_0,i_0,j_0,r_0)$; and the \nnm{last} ministep
      if there is no second of this option;\\
       $\textit{ChoiceLength} \leftarrow
                \textit{length}(\nnm{first}, \ms_b ) - 2$.\\
      ($\ms_b$ is not used further.)
\item   If $w_0 = B$, and adding $2 d_0$ to $B^{(r)}_{i_0}(t_{m-1})$
         would not lead to a value of this variable outside of its
         permitted range, then
         \[
              pr_1   \leftarrow  0.5  ,
         \]
        else
       \[
            pr_1   \leftarrow  1  \ .
        \]
\item If variable $(w_0, r_0)$ is involved in any of the conditions
      \nnm{uponly}, \nnm{downonly}, \nnm{higher}, \nnm{disjoint},
      or \nnm{atleastone}:
      \begin{enumerate}
      \item       If changing $y_{\text{init}}$
             by $(w_0,i_0,j_0,r_0,d_0)$ while deleting ministep $\ms_a$ would
             lead to a violation of this condition, then exit.
      \item       If $pr_1 = 0.5$ and changing $y_{\text{init}}$
        by $(w_0,i_0,j_0,r_0,2 d_0)$ while simultaneously
        replacing ministep $\ms_a$ by a ministep with coordinates
        $(w_0,i_0,j_0,r_0,-d_0)$  would
      lead to a violation of this condition,
      then:
      \[
       pr_1  \leftarrow  1 \ .
      \]
      \end{enumerate}
\end{enumerate}
The proposal probability here is
\[
\frac{1}{\textit{NumInitMis}} \ .
\]
This is used in \textit{pra} below.

\newpage
Part \emph{B}:\\
%\remark{here an error was corrected in items 8 and 9, version 30/10/2010.}
\begin{enumerate}[resume]
\item \label{item7}
   \begin{align*}
       \textit{sumlprob} \ & \leftarrow \ \sum_{\ms = \nnm{first.succ}}^{\ms_a}
             (\ms.(\nnm{lChoiceProb} + \nnm{lOptionSetProb}) \ ; \\[1em]
       \textit{sumlprob\_new} \ & \leftarrow \ 0 \ ;   \\[1em]
      \textit{mu\_new}  \ & \leftarrow \ \textit{mu} \ -
                \sum_{\ms = \nnm{first.succ}}^{ \ms_a}
                        (\ms.\nnm{rRate}) \ ; \\[1em]
      \textit{sigma2\_new} \ & \leftarrow \ \textit{sigma2} \ -
                \sum_{\ms = \nnm{first.succ}}^{\ms_a}
                          (\ms.\nnm{rRate})^2 \ .
      \end{align*}
\vspace*{-1em}
\item  $y \leftarrow y_{\text{init}}$ \ ;\\[1em]
      Let $pr_2$ be the prior probability
      (see \textit{net\_prior} and \textit{beh\_prior} discussed above)
      of value $y(w_0,i_0,j_0,r_0)$
      and $pr_3$ the prior probability of the value when
      this variable is changed according to ministep
      $(w_0,i_0,j_0,r_0,d_0)$ \ ;\\[1em]
      \textit{ChangeStep}$(y, (w_0,i_0,j_0,r_0,d_0))$ \ ; \\[1em]
 \textit{StepProb1}$(y,w_0,i_0,j_0,r_0;  rr_0, lospr_0)$;\\
 \emph{Krists had added lospr0 and lcpr0 to sumlprobnew. I removed this.}\\[1em]
     If (not \textit{SimpleRates}), then
      \begin{enumerate}
      \item \textit{mu\_new} $ \leftarrow  \textit{mu\_new} + rr_0 $ \ ,
      \item \textit{sigma2\_new} $ \leftarrow
                   \textit{sigma2\_new}  + (rr_0)^2  $ \ .
      \end{enumerate}
\item For \ms \ running from \nnm{\nnm{first}.succ} to $\ms_a.\nnm{pred}$ do:
      \begin{enumerate}
      \item \textit{StepProb2}$(y, \ms.\textit{Coordinates}; rr, lospr, lcpr)$, \\
            where \ms.\textit{Coordinates} are
            the coordinates of ministep \ms \ ;
      \item  \textit{sumlprob\_new} $ \leftarrow \textit{sumlprob\_new} + lcpr + lospr
      $ \;
      \item If (not \textit{SimpleRates}) and ($\ms < \ms_a.\nnm{pred}$), then
        \begin{enumerate}
        \item \textit{mu\_new} $ \leftarrow \textit{mu\_new} + rr $\ ,
        \item \textit{sigma2\_new} $ \leftarrow   \textit{sigma2\_new}  + (rr)^2  $ \
        ;
        \item[comment : ] the reciprocal rate calculated for $\ms = \ms_a.\nnm{pred}$
                          is, in the proposed new chain, \nnm{rRate} for $\ms_a.\nnm{succ}$,
                          which is unchanged and has not been subtracted in step~\ref{item7}.
        \end{enumerate}
      \item\textit{ChangeStep}$(y, \ms)$ \ .
      \end{enumerate}
\item If \textit{SimpleRates} (which implies $rr = rr_0$), then
      \begin{equation}
        \textit{KappaFactor} \leftarrow  rr_0 \ttimes ( \textit{TotNumber }-1)  \nonumber
        \end{equation}
      else (i.e., if not \textit{SimpleRates})
      \begin{equation}
      \textit{KappaFactor} \leftarrow \sqrt{ \frac{\textit{sigma2}}{\textit{sigma2\_new}} }
                  \ttimes \exp\left( \frac{\left(1 - \textit{mu} \right)^2}{2\ttimes\textit{sigma2}}
                   - \frac{\left(1 - \textit{mu\_new} \right)^2}{2\ttimes\textit{sigma2\_new}}  \right) \ .
      \end{equation}
\item
 \begin{align}
     \textit{pra}  \leftarrow &
        \textit{KappaFactor} \ttimes \exp(\textit{sumlprob\_new} - \textit{sumlprob}) \\
      & \ \ttimes  \frac{ \textit{prims} \ttimes pr_1 \ttimes pr_3 }
                    {\ttimes \textit{prdms} \ttimes \textit{ChoiceLength} \ttimes pr_2 }  \ ;
    \end{align}
    if (\textit{pra} $ > 1$), then  \textit{pra} $ \leftarrow 1 $.
\end{enumerate}
Part \emph{C}:
\begin{enumerate}[resume]
\item With probability \textit{pra} let \textit{accept} $\leftarrow$ \nnm{True}, else
      \textit{accept} $\leftarrow$ \nnm{False}.
\item If \textit{accept}, then
       \begin{enumerate}
       \item       \textit{ChangeStep}$( y_{\text{init}} , (w_0,i_0,j_0,r_0,d_0))$
       \item delete ministep $\ms_a$;
       \item  \textit{Update} the chain for the changed part.
       \end{enumerate}
\end{enumerate}


\section{Likelihood-based calculations:
         \protect\newline Simulation complete-data log-likelihoods}
\label{S_colol}

The simulation of the complete-data log-likelihood
for a given parameter $\theta$ is done as follows (cf.\ Section 3.4 in
\citet{SnijdersEA10a}). Separate chains are made for all periods
$m = 1, \ldots, M-1$. This chapter describes what is done
for each period.

\begin{enumerate}
\item The chain is initialized by the procedure \emph{Connect} and
      by setting
      \[
      y_{\text{init}}\leftarrow y(t_{m-1}) \ .
      \]
\item After this initialization, a burn-in is necessary (only once, at the start of the
      likelihood-based MCMC process, i.e., only for the very first
      value of $\theta$ employed;
      search for ``previous value" in \citet{SnijdersEA10a}).
\item A large number of Metropolis Hastings steps is used
      to transform the current chain to a new chain which can be regarded
      as a random draw from the conditional distribution of chains
      given the observed data, for the current parameter value $\theta$.
\item For this chain, the complete-data log-likelihood is calculated.
\end{enumerate}

\subsection{Burn-in}

The burn-in procedure here is different from the one in Siena 3
(the difference is not important).

As a pre-burn-in it is good to insert, immediately after
\emph{Connect}, some \textit{diagonal} steps and some mutually canceling pairs of steps
as long as this increases the likelihood of the chain.
This is done as follows, using functions  \textit{MH\_InsertDiag} and \textit{MH\_InsPermute}
explained below.
\begin{enumerate}[resume]
\item Repeat \textit{MH\_InsertDiag}(\textit{pra, accept}) until 5 times `\textit{not accept}'.
\item Repeat \textit{MH\_InsPermute}(\textit{1, misdat, pra, accept}) until 5 times `\textit{not accept}'.
\end{enumerate}
The number 5 is arbitrary but reasonable.

After the pre-burn-in \textit{nummax} Metropolis Hastings steps are made
as described in the following section,
where \textit{nummax} is a suitable number.
I suggest \textit{nummax} = 500 for the moment; we may experiment
with other values.

To keep users at ease it will be good to have the gui
(if it is being used) display the message
`Burn-in iterations' during the burn-in phase.

\subsection{Metropolis Hastings steps}

The Metropolis Hastings step is a probabilistic choice
between the following procedures.

\begin{enumerate}
\item With probability  \textit{pridg}, make  step \textit{MH\_InsertDiag};
\item with probability  \textit{prcdg}, make  step \textit{MH\_CancelDiag};
\item with probability  \textit{prper}, make  step \textit{MH\_Permute};
\item with probability  \textit{pripr}, make  step \textit{MH\_InsPermute};
\item with probability  \textit{prdpr}, make  step \textit{MH\_DelPermute};
\addcf{
\item with probability  \textit{prims}, make step \textit{MH\_InsMis};
\item with probability  \textit{prdms}, make step \textit{MH\_DelMis};
}
\end{enumerate}



\noindent
\addcf{
Probabilities  \textit{prims} and  \textit{prdms}
will be 0 if \textit{ChainNumInitMis} = 0.}\\
Evidently, \textit{pridg} + \textit{prcdg} + \textit{prper}
+ \textit{pripr} + \textit{prdpr} + \textit{prims} + \textit{prdms} = 1,
and I suppose that it makes sense to try working with
\textit{pridg} = \textit{prcdg}, \textit{pripr} = \textit{prdpr},
and \textit{prims} = \textit{prdms}.

The probabilities of the various Metropolis Hastings steps
are parameters of the algorithm.
I believe that in Siena 3, the following values were used:\\
\textit{pridg} = \textit{prcdg} = 0.05;\\
\textit{prper} = 0.3;\\
\textit{pripr} = \textit{prdpr} = 0.3;\\
\textit{prims} = \textit{prdms} = 0.

\addce{If for all dependent variables \nnm{uponly} or \nnm{downonly} holds,
then \textit{pripr} = \textit{prdpr} = 0.}

\addcf{
If \textit{ChainNumInitMis} $> 0$, the
probabilities \textit{prims} and \textit{prdms} associated with the steps
\textit{MH\_InsMis} and \textit{MH\_DelMis}
could have the default value
\[
\textit{prims} = \textit{prdms}
   = \frac{\textit{ChainNumInitMis}}{2(R_N\, n(n-1) \,+\, R_B \,n)} \
,
\]
truncated to some range between, say, .001 and .05}.
(These values will still require further experimentation!)

The integer number $c_0 \geq 0$,
 used in procedures
\textit{MH\_Permute}, \textit{MH\_InsPermute}, and \textit{MH\_DelPermute}
to define the length of the sequence of ministeps to be permuted,
is a parameter of the algorithm and is determined adaptively.
It depends on the period.
(In Siena 3 this is stored in the variable \textit{numm}.)
The heuristic idea is that if proposals for permuting longer sequences
(which make larger steps in the outcome space but imply more work) have
a lower probability to be accepted, then we aim at an acceptance rate
of about 0.5; if proposals with longer sequences have a higher
acceptance probability, then we can propose the permutation of long sequences.
In all cases $c_0$ is bounded between $c_\text{min} = 2$ and $c_\text{max} = 40$.
(The constants 2 and 40 are parameters meant to be touched only
for algorithmic fine-tuning by very experienced users.)
$c_0$ can be initialised at 20 for all periods.
The adaptive procedure is as follows.\\
\medskip

\noindent
After each of the procedures
\textit{MH\_Permute}, \textit{MH\_InsPermute}, and \textit{MH\_DelPermute}:
\begin{itemize}
\item[`up'] If \textit{accept}, then $c_0 \leftarrow c_0 + 0.5$,\\
            but $c_0$ is not allowed to exceed $\min \{c_{\text{max}}, \textit{TotNumber-1}\}$ .
\item[`down'] If \textit{not accept}, then $c_0 \leftarrow c_0 - 0.5$,\\
            but $c_0$ is not allowed to become less than $c_\text{min}$.
\end{itemize}

This adaptive choice is suggested by the Robbins-Monro algorithm
as a way to aim at an acceptance rate of 0.5 under the conditions
and constraints mentioned above.
It is felt that the complications for the convergence of
the Metropolis Hastings algorithm that are caused by this adaptation
are of such a minor nature that they can safely be ignored.
However, it is important to check the algorithm and tune the
adaptation; in particular if $c_0$ would hover close to $c_\text{min}$
this might be a sign that something is not well.

The number of Metropolis Hastings steps until the next
recorded complete-data log-likelihood (with a Robbins-Monro step,
or for a Bayesian random update of the parameters)
for period $m-1$ is
\begin{equation}
\begin{split}
  \textit{NumStep} = &
\textit{MultiplicationFactor} \ttimes \\
   & \big(\textit{TotalDistance(Network)} + \textit{TotalDistance(Behavior)} \big)
          \label{NumStep}
\end{split}
\end{equation}
where
      \[
      \textit{TotalDistance(Network)} =
      \sum_{r=1}^{R_N} \sum_{i,j} \mid x^{(r)\,\rm{obs}}_{ij}(t_{m})  - x^{(r)\,\rm{obs}}_{ij}(t_{m-1}) \mid \ ,
      \]
      where the sum is over all tie variables that are not structurally fixed
      at $t_{m-1}$ or $t_m$ (note that
      it is possible that tie variables are structurally fixed but have different subsequent values)
and
      \[
\textit{TotalDistance(Behavior)} =
        \sum_{r=1}^{R_B} \sum_{i} \mid z^{(r)\,\rm{obs}}_{i}(t_{m})  - z^{(r)\,\rm{obs}}_{i}(t_{m-1}) \mid \ ,
      \]
      where the sum is over all actors that are not structurally inactive
      at $t_{m-1}$ or $t_m$.

The \textit{MultiplicationFactor} must be tuned by the user, based mainly on
information about the autocorrelations of generated function values (scores) in
Phase 3. A reasonable default value is 5.0, but often (mostly?) higher values
will be necessary. For the Robbins-Monro methods, autocorrelations should
preferably be less than 0.4, and a warning may be issued in the output file if
this is not the case.

For diagnostic output, it will be good to have information about
the proportion of accepted Metropolis Hastings proposals
of all the different step types for all the $R_N + R_B$ different
outcome variables (networks and behaviors); about the distribution
of $c_0$; and about the autocorrelations of the generated scores
in Phase 3. This information should be on the sienaFit object,
but only the autocorrelations should be reported by default;
the rest should be available on request.

\subsection{Score functions}

After the \textit{NumStep} Metropolis-Hastings steps of the preceding
subsection, the score function must be calculated for the resulting chain.
This can be done separately for each of the periods, yielding $J_m$
for $m = 2, \ldots, M$ as defined in Section~\ref{S_sim}.
This is elaborated in this section.

%There are two cases in view of a complication with respect to the rate function.
%This is related to the extra factor in the likelihood
%represented as  $\kappa$  in formula (15) in \citet{SnijdersEA10a}.

\iffalse
In the ``simple case"
the score function is the derivative of the log of (16) in
\citet{SnijdersEA10a},
which here is
\begin{equation}
  -n_\text{act} + \frac{\textit{TotNumber}-1}{\rho}   \ . \label{kappa1}
\end{equation}
where $\textit{TotNumber} - 1 = \textit{NumberVar} $ is the number of ministeps for this
variable.
Below we shall use a different expression, however, which is the linear
approximation to this function.
\fi

The score function for the parameters of the rate function
here is different from what is specified in
\textsf{simstats0c}.
The reason is that for the likelihood-based calculations,
the `complete data' is the embedded chain (without the time increments),
whereas in  \textsf{simstats0c}
the complete chain, including the time increments (`tau'), is used.
In \textsf{simstats0c} the contributions to the score function
are calculated immediately after each ministep.
Working with only the embedded chain is more efficient, because part
of the variability is integrated out; the advantage for the
likelihood-based calculations is that there are no complications springing
from changing dimensionality of the continuous part of the outcome space.
For rate functions depending on the state $y$, working with the
embedded chain entails the complications associated with the constant denoted $\kappa$
and its consequence \textit{KappaFactor}.
\bigskip
\\
\hspace*{\parindent}
First consider the case where the rate function does not depend on the
state $y = (x,z)$, although it may depend on the actor $i$,
and it will depend  (if $R_N + R_B > 1$) on the dependent variable labeled by $(w,r)$.
The rate function then is expressed by
\begin{equation}
 \lambda^w(\theta, r,i, y) = \rho^w_r \, \exp(\alpha^w_{r} s^w_{ri} )
               \label{tkappa5}
\end{equation}
(where $\alpha^w_{r} s^w_{ri}$ is the inner product of these two vectors).
Thus, there is a basic rate parameter $\rho^w_r$ for each  dependent
variable labeled $(w,r)$; in addition there may be actor-dependent variables $s^w_{ri}$ that
affect the rate for dependent variable $(w,r)$, but the parameters $\alpha^w_{r}$
are distinct for distinct dependent variables.

All sums over actors $i$ in the following are over the active actors.

Denote the number of ministeps of \textit{OptionSet} $(w,i,r)$
by $T^w_{ri}$ and $T^w_r = \sum_i T^w_{ri}$.
Note that $\sum_{w,r} T^w_r = \textit{TotNumber}-1$.
The score functions are given for the basic rate parameters by
\begin{equation}
 \frac{\partial\, \text{complete data log-likelihood}}{\partial \rho^w_r} \, =
 \, \frac{T^w_r}{\rho^w_r} - \sum_{\text{active } i}
             \exp(\alpha^w_{r} s^w_{ri}) \  , \label{tscore2}
\end{equation}
which for cases with \emph{only} a basic rate parameter
$ \lambda^w(\theta, r,i, y) = \rho^w_r $,
and constant number of active actors $n_{\text{act}}$, reduces to
\begin{equation}
\frac{\partial\, \text{complete data log-likelihood}}{\partial \rho^w_r} =
  \frac{T^w_r}{\rho^w_r}  - n_{\text{act}}  \  ; \label{tscore3}
\end{equation}
and, denoting
the index number of the covariate for the rate function
by $h$, the score functions for the other rate parameters are
\begin{align}
  \frac{\partial \, \text{complete data log-likelihood}}{\partial \alpha^w_{rh}}
  &=                                                        \label{tscore4}
   \sum_{\text{active } i} s^w_{rih} \big(T^w_{ri} -  \rho^w_r \,
                   \exp(\alpha^w_{r} s^w_{ri} )  \big) \\
   &= \sum_{\text{active } i}  s^w_{rih} \big(T^w_{ri} -  \lambda^w(\theta, r,i, y)   \big)
               \ . \nonumber
\end{align}
More background is given in Appendix~\ref{S_A1}.
\vspace{\parskip} \\
\hspace{\parindent}  % Just a \par does not agree with addce.
In the general case, where rates can depend on the state $y$,
we use a heuristic approximation which is the direct generalization
of the above.
Suppose the rate function is given by
\begin{equation}
 \lambda^w(\theta, r,i, y) = \rho^w_r \, \exp(\alpha^w_{r} s^w_{ri}(y) )
               \label{tkappa6}
\end{equation}
which is just like (\ref{tkappa5}) except that now the variables
$s^w_{ri}(y)$ are functions of $y$ and not only of $(w,r,i)$.
Then we must sum over the ministeps, denoted here by
$m = 1, \ldots, \textit{TotNumber}-1$, instead of over the
actors.
By $\Delta^w_{ri}(m)$ we denote the indicator function
of the event that in ministep $m$, actor $i$ makes a ministep
in variable $(w,r)$; in terms of the earlier sections,
the event that the \textit{OptionSet} of ministep $m$ is $(w,i,r)$;
we define $\Delta^w_{ri}(m) = 1$  if this event is true,
and 0 if it is false.
Thus, $\sum_m \Delta^w_{ri}(m) = T^w_{ri}$ and
$\sum_{w,r,i} \Delta^w_{ri}(m) = 1$.
The state before ministep $m$ is denoted $y_m$.
\medskip
\\
\hspace*{\parindent}
The score functions now are given for the basic rate parameters by
\begin{equation}
\begin{split}
& \frac{\partial\, \text{complete data log-likelihood}}{\partial \rho^w_r} \ =\\
& \hspace{3em}    \frac{T^w_r}{\rho^w_r} -
   \frac{1}{\textit{TotNumber}-1} \sum_{m=1}^{\textit{TotNumber}-1}
     \sum_{\text{active } i}
        \exp(\alpha^w_{r} s^w_{ri}(y_m)) \  , \label{tscore5}
\end{split}
\end{equation}
and for the other rate parameters they are
\begin{multline}
  \frac{\partial \, \text{complete data log-likelihood}}{\partial \alpha^w_{rh}}
  = \\
    \sum_{m=1}^{\textit{TotNumber}-1}
     \sum_{\text{active } i}
       s^w_{rih}(y_m) \Bigg(\Delta^w_{ri}(m) -
      \frac{ \rho^w_r \, \exp(\alpha^w_{r} s^w_{ri}(y_m) )}{\textit{TotNumber}-1} \,  \Bigg)
       \\
   =    \sum_{m=1}^{\textit{TotNumber}-1}
     \sum_{\text{active } i}
     s^w_{rih}(y_m) \Bigg( \Delta^w_{ri}(m) - \,
    \frac{\lambda^w(\theta, r,i, y_m)}{\textit{TotNumber}-1} \,  \Bigg) \ . \label{tscore6}
\iffalse
    =   \\
    \Bigg(  \sum_{m=1}^{\textit{TotNumber}-1}
     \sum_{\text{active } i}
     s^w_{rih}(y_m) \, \Delta^w_{ri}(m) \Bigg) -
      \Bigg(  \frac{1}{\textit{TotNumber}-1}  \sum_{m=1}^{\textit{TotNumber}-1}
     \sum_{\text{active } i}
           s^w_{rih}(y_m) \,  \lambda^w(\theta, r,i, y_m) \,  \Bigg)
               \ .
\fi
\end{multline}
This reduces to the equations above in case that the functions
$s^w_{ri}(y)$ are independent of $y$, so there is nothing against
using these last two equations in all cases.


\iffalse
In the ``other case", the formula is more complicated.
There was an implementation for constant rate functions of multiple dependent
variables (the simplest case of the ``other case")
in Siena 3, which consisted of just applying (\ref{kappa1}) to all variables
but with $\textit{NumberVar}(W,r)$ for the particular variable;
I have not checked whether this leads to approximately the same results as what
follows, but it might not, and then Siena 3 would be incorrect
(there have been cases with divergence of the estimate for the rate parameter,
and perhaps this is the cause).

The background of what follows is given in Appendix~\ref{S_A1}.

Internally if the number of dependent variables $R_N + R_B$ is greater than 1,
the rate parameters must be defined as follows (differently from what is done
externally in the output):
\begin{itemize}
\item By $\rho(W,r)$ denote the basic rate parameter for dependent variable
      $(W,r)$.
\item Assuming that always $R_N \geq 1$, denote $\rho = \rho(N,1) $.
\item Denote $\alpha(W,r) = \rho(W,r)/\rho$ for $\{W=N, r \geq 2\}$
      and for $W=B$.
\end{itemize}

The parameter $\theta$ used in the formula for Metropolis Hastings steps
and likelihood calculations must include the elements $\rho$ and
$\alpha(W,r)$ for $\{W=N, r \geq 2\}$;
for the output, results must be transformed back to $\rho(W,r)$.
In particular, score functions as calculated in \textit{StepProb3}
should use derivatives with respect to $\alpha(W,r)$, not $\rho(W,r)$
for $\{W=N, r \geq 2\}$ and $W=B$.

In the Metropolis Hastings steps
the parameter $\rho$ has no effect on the proposal probabilities
depending on $lospr, lcpr$,
but it does have an effect on the acceptance probabilities
through the \textit{KappaFactor}.

Note that \textit{StepProb3} calculates $rr$ as the reciprocal of
\[
   \lambda^+(\theta, +,+, y) = \sum_{r=1}^{R_N} \sum_i \lambda^N(\theta,r,i,y)
           +  \sum_{r=1}^{R_B} \sum{\text{active } i} \lambda^B(\theta,r,i,y)
\]
and $sc$ as the score function for all parameters except $\rho$.

The algorithm for the score function can be described by the following steps.
\begin{enumerate}
\item $\ms \leftarrow \nnm{first}$
\item $\ms \leftarrow \ms.\nnm{succ}; $ \label{i_continue1}\\
      if (\ms = \nnm{last}), then stop.
\item $\textit{Score} \leftarrow 0 ;\ \textit{mu0} \leftarrow 0;\ \textit{sigma02} \leftarrow 0$
\item $ \text{Get coordinates: }\  (w,i,j,r,d) \leftarrow \ms.\textit{Coordinates}$
\item $y \leftarrow \textit{StateBefore}(\ms)$
\item $\textit{StepProb3}(y,w,i,j,r,d,rr,sc)$
\item $ \textit{Score} \leftarrow \textit{Score} + sc$
\item if ``other case", then
       $\textit{mu0} \leftarrow \textit{mu0} + \rho \times rr$ \label{i_mu0}
\item if ``other case", then $\textit{sigma02} \leftarrow
          \textit{sigma0} + (\rho \times rr)^2$  \label{i_sig}
\item go to \ref{i_continue1}
\end{enumerate}

After the termination of this algorithm, the vector \textit{Score}
contains the score function for the parameters except for $\rho$.
The numbers \textit{mu0} and \textit{sigma02}
then are $\mu_0$ and $\sigma^2_0$ of Appendix~\ref{S_A1}.

In the ``simple case", we calculate
\begin{enumerate}
\item[\ref{i_mu0}.] $\textit{mu0}  \leftarrow
         \frac{\displaystyle \textit{TotNumber} - 1 }{\displaystyle  n_\text{act}}$
\item[\ref{i_sig}.] $\textit{sigma02}  \leftarrow
          \frac{\displaystyle \textit{TotNumber} - 1 }{\displaystyle n_\text{act}^2}$
\end{enumerate}
(This result would also have been obtained by carrying out the ``other case" computations,
but I suppose that would have been less efficient.)

The score function for $\rho$ then is given by
\begin{equation}
           \frac{ \textit{mu0} - \rho}{\textit{sigma02}} \ , \label{score3}
\end{equation}
cf.\ (\ref{score2}).
(In the ``simple case", this is the first-order Taylor approximation to (\ref{kappa1})
in the point $\rho = \textit{mu0}$.)

The total score function $J_m$ for this period then is obtained by putting
together (\ref{score3}) and \textit{Score}.
\fi

In Phase 3 of the Robbins-Monro algorithm, we need something extra, viz.,
the complete-data observed information matrix, which is the
$p \times p$ matrix of minus the second partial derivatives
of the log-likelihood function.
Denote this by $H_m$.


\section{Likelihood-based calculations:
         \protect\newline Robbins-Monro algorithm}
\label{S_MLRM}

The Robbins-Monro algorithm as above is applied, but now to the statistic
defined as the score function $S = \sum_{m=2}^M J_m$.
Note that equation to be solved here is
\[
 \E_{\theta} S = 0 \ ,
\]
so the role of the observed value $s$  in the MoM is now taken by the number 0.

The estimated covariance matrix of the ML estimator is
\[
\Big(\hat\Sigma_\text{complete} - \hat\Sigma_\text{missing} \Big)^{-1}
\]
where $\hat\Sigma_\text{complete}$ is the estimated complete data information
matrix while $\hat\Sigma_\text{missing} $ is the estimated information
matrix for the missing data (cf.\ (25) in \citet{SnijdersEA10a}).
$\hat\Sigma_\text{complete}$ is obtained as the average of the generated
$\sum_{m=2}^M H_m$ over all Phase-3 iterations,
and $\hat\Sigma_\text{missing} $ is obtained as the covariance matrix
of the generated score functions in Phase 3, i.e., $\Sigma$ in (a)
of Phase 3 of Section~\ref{S_RobMon}.

My experience with this estimated covariance matrix is mixed,
as the difference between two positive definite matrices sometimes
turns out to be not positive semi-definite itself.
In such cases having a long Phase 3 will improve things.
It will be worthwhile to think about a better way of
estimating $\Cov(\hat\theta)$.

\addce{
In the Robbins-Monro algorithm we might later experiment with an adaptation,
in which after the Robbins-Monro update step we
solve for $\rho$ using
the score function for $\rho$  --
consider (\ref{tscore2}), (\ref{tscore3}), and (\ref{tscore5}).
(I am not sure that this works well, as this
might conflict with the stochasticity of the Robbins-Monro algorithm.)
}

\section{Likelihood-based calculations:
         \protect\newline Store chains}
\label{S_store}

For communication with users and with other programs,
it is necessary to have a way of reading chains from files
and writing them to files.
Chains also have to be communicated to R.

For writing, I propose to have two ways of writing them.
Always, one line for each ministep, in their natural order.
Numbers within lines separated by separator that can be space, tab, (comma \& space).
\begin{enumerate}
\item brief: the line gives $w, r, i, j, d$ in this order (note $r$ comes second).
\item long: the line gives $w, r, i, j, d, \nnm{rRate}, \nnm{lOptionSetProb},
             \nnm{lChoiceProb} $ in this order.
\end{enumerate}

\section{Likelihood-based calculations:
         \protect\newline Structurally fixed values}
\label{S_struct}

If $N^{r}(i,j)$ is structurally fixed and $N^{r}(i,j)(t_{m-1}) = N^{r}(i,j)(t_m)$,
then the chain for period $m$ must not contain any ministeps of Option (Network, $i,j,r$).\\
If $B^{r}(i)$ is structurally fixed and $B^{r}(i)(t_{m-1}) = B^{r}(i)(t_m)$,
then the chain for period $m$ must not contain any ministeps of Option (Behavior, $i,*, r$).

For the variables that are structurally fixed but
have values at $t_m$ different from their
values at $t_{m-1}$, the principle is that these changes are enforced
either directly before the \nnm{last} ministep, or as part of the \nnm{last} ministep
(whichever is the simpler or more elegant;
I think the latter). These changes
do not contribute anything to probabilities or rates; this can be implemented
formally by omitting them from sums or by defining \nnm{lChoiceProb} = 0
and \nnm{rRate} = 0.
If there are several such variables, the order in which these changes are enforced
does not matter (and is inconsequential).

\section{Meta-analysis }
\label{S_meta}

Results from several independent network data sets can be combined
in a meta-analysis according to the method of
\citet{SnijdersBaerveldt03}, who applied the method of \citet{Cochran54}
(also described by \citet{HedgesOlkin85}) to this type of analysis.
This section also elaborates some further methods.

Suppose we have $N$ independent network data sets, in which the
same sets of covariates are used, and that were analyzed
by the same model specification. The meta-analysis is done for
each parameter separately.
Thus, for this explanation we
focus on any coordinate of the parameter vector,
and denote this coordinate by \th{}.
From the $j'$th data set we obtain estimate $\hat\theta_j$
with standard error $s_j$.
The model postulates that
\begin{equation}
 \hat\theta_j = \theta_j + E_j,
\end{equation}
where $\theta_j$ for $j = 1, \ldots, N$ is an i.i.d.\ sample
from a distribution with mean $\mu_\theta$ and variance
$\sigma^2_\theta$; and $E_j$ is independent of $\theta_j$
and has mean 0 and standard deviation $s_j$.
Thus, we ignore in this analysis the error in the estimation
of the estimation error $\var(E_j)$.
The purpose of the meta-analysis is
estimating and testing $\mu_\theta$ and $\sigma^2_\theta$.

What we observe from data set $j$ is not \th{j} but
the estimate ${\hat{\theta}}_j\,$. This is a random variable
with mean $\mu_\theta$ and variance $\sigma^2_\theta + s_j^2\,$.

\subsection{Preliminary and two-step estimator}

Here we give the unbiased estimator for $\sigma^2_\theta$ and
a two-stage estimator for the mean $\mu_\theta$ that were presented
in \citet{SnijdersBaerveldt03}, following \citet{Cochran54}.

A preliminary unbiased estimator for $\mu_\theta$ is given by
\begin{equation}
{\hat{\mu}}_\theta^{\mbox{\tiny OLS}} = \frac{1}{N}\, \sum_j {\hat{\theta}}_j \ .
         \label{muols}
\end{equation}
This estimator does not take into account the fact that the standard errors
$s_j^2$ may be different.
This implies that, although it is unbiased, the estimator may be inefficient.
Its standard error is
\begin{equation}
 \mbox{s.e.}\left( {\hat{\mu}}_\theta^{\mbox{\tiny OLS}} \right) =
    \sqrt{\frac{1}{N} \left( \sigma^2_\theta + {\bar{s}}^2 \right) }
\end{equation}
where
\begin{equation}
{\bar{s}}^2 =  \frac{1}{N} \sum_j s^2_j
\end{equation}
is the \emph{average error variance}.
An unbiased estimator for the variance $\sigma^2_\theta$ is
\begin{equation}
 {\hat{\sigma}}^{2, \mbox{\tiny OLS}}_\theta =
   \frac{1}{N-1} \sum_j \left( {\hat{\theta}}_j -
                         {\hat{\mu}}^{\mbox{\tiny OLS}}_\theta \right)^2
               \, - \, {\bar{s}}^2  \ .          \label{sigmahat}
\end{equation}
In words, this is the the \emph{observed variance} of the
estimates minus the \emph{average error variance}.
If this difference yields a negative value, it will be good to truncate it to 0.

Given that the latter estimator has been calculated, it can be used for an improved
estimation of $\mu_\theta$, viz., by the weighted least squares
(WLS) estimator
\begin{equation}
 {\hat{\mu}}_\theta^{\mbox{\tiny WLS}} =
       \frac{ \sum_j \big( {\hat{\theta}}_j / ({\hat{\sigma}}^{2, \mbox{\tiny OLS}}_\theta
                    + s^2_j ) \big) }
            { \sum_j \big( 1/({\hat{\sigma}}^{2, \mbox{\tiny OLS}}_\theta + s^2_j ) \big)} \ .
                                               \label{muwls}
\end{equation}
This is the `semi-weighted mean' of \citet{Cochran54},
treated also in \citet{HedgesOlkin85}, Section 9.F.
Its standard error can be calculated as
\begin{equation}
 \mbox{s.e.}\big( {\hat{\mu}}_\theta^{\mbox{\tiny WLS}} \big) =
     \frac {1}
     {\sqrt{ \sum_j 1/({\hat{\sigma}}^{2, \mbox{\tiny OLS}}_\theta + s^2_j ) } } \ .  \label{se1}
\end{equation}

\subsection{Maximum likelihood estimator}
\label{S_metamle}

The maximum likelihood estimator (MLE)
under the assumption that the $\hat\theta_j$ are independent and normally
distributed (note that this is an assumption about their marginal
distributions, not their distributions conditional on
the true values $\theta_j$)
is defined by two equations.
The first is the equation for $\hat\mu$ given ${\sigma}^2 $:
\begin{equation}
 {\hat{\mu}} \,=\,
       \frac{ \sum_j \big( {\hat{\theta}}_j / (\sigma^2
                    + s^2_j ) \big) }
            { \sum_j \big( 1/({\sigma}^2 + s^2_j ) \big)} \ .
                                               \label{mu2}
\end{equation}
The second is the requirement that the profile log-likelihood for $\sigma^2$
is maximized. This profile log-likelihood is given by
\begin{equation}
  p(\sigma^2) \,=\, - \,  \frac12 \sum_j \log\big(\sigma^2 + s_j^2) \,-\,
         \frac12 \sum_j \frac{\big(\hat\theta_j - \hat\mu\big)^2}{\sigma^2 + s_j^2} \ .
\end{equation}
As a first step to maximize this, the derivative
and second derivative can be computed; here
it should be kept in mind that $\hat\mu = \hat\mu(\hat\sigma^2)$ is given as
a function of $\hat\sigma^2$ in (\ref{mu2}) -- however, that part cancels out
in the derivative
so forgetting this might still yield the correct answer. Further it is
convenient to work with the function $c_j(\sigma^2) = 1/(\sigma^2 + s_j^2)$
and note that $dc_j/d\sigma^2 = - c_j^2$. The result is
\begin{align}
\frac{d\, p(\sigma^2)}{d\,\sigma^2} \ &= \
     - \, \frac12 \sum_j \frac{1}{\sigma^2 + s_j^2} \,+\,
     \frac12 \sum_j \frac{(\hat\theta_j - \hat\mu)^2}{(\sigma^2 + s_j^2)^2}
                                     \label{dpf} \\
\frac{d^2\, p(\sigma^2)}{d\,(\sigma^2)^2} \ &= \
     - \, \frac12 \sum_j \frac{1}{\big(\sigma^2 + s_j^2\big)^2} \,-\,
           \sum_j \frac{(\hat\theta_j - \hat\mu)^2}{(\sigma^2 + s_j^2)^3} \ .
                     \label{dpf2}
\end{align}
Thus, one way to compute the MLE is to iterate the two steps:
\begin{enumerate}
\item Compute $\hat\mu$ by (\ref{mu2})
\item Solve ${d\, p(\sigma^2)}/{d\,\sigma^2} = 0$ using definition (\ref{dpf}).
\end{enumerate}
Another way is to iterate the two steps:
\begin{enumerate}
\item Compute $\hat\mu$ by (\ref{mu2})
\item One Newton-Raphson step (or two):
\begin{equation}
      \sigma^2_{\text{new}} \,=\, \sigma^2 \,+\,
        \frac{\sum_j c_j (c_j d_j^2 - 1)}
         { \, \sum_j c_j^2 (2 c_j d_j^2 + 1) \, }
\end{equation}
where
\[
  c_j \,=\, \frac{1}{\sigma^2 + s_j^2} \ , \
         d_j \,=\, \hat\theta_j - \hat\mu \ .
\]
\end{enumerate}

The results of this iteration scheme will be denoted by
${\hat{\mu}}_\theta^{\mbox{\tiny IWLS}}$ and
${\hat{\sigma}}_\theta^{2, \mbox{\tiny IWLS}}$
(IWLS for \emph{iteratively reweighted least squares}),
but the name ML could equally well be used.

The standard error
of  $\hat\mu_\theta^{\mbox{\tiny IWLS}}$ can be calculated as
\begin{equation}
 \mbox{s.e.}\big( {\hat{\mu}}_\theta^{\mbox{\tiny IWLS}} \big) =
     \frac {1}
     {\sqrt{ \sum_j 1/({\hat{\sigma}}^{2, \mbox{\tiny IWLS}}_\theta + s^2_j ) } } \ .  \label{se2}
\end{equation}

\subsection{Testing}

(This section again follows \citet{SnijdersBaerveldt03}.)

For testing $\mu_\theta$ and $\sigma^2_\theta$,
it is assumed that the parameter estimates ${\hat{\theta}}_j$
conditional on $\theta_j$
are approximately normally distributed with mean \th{j} and variance $s^2_j$.
The first null hypothesis to be tested is that the effects are 0 in all groups.
This can be tested by the test statistic
\begin{equation}
 T^2 = \sum_j \left( \frac{{\hat{\theta}}_j}{s_j} \right)^2     \label{T^2}
\end{equation}
which has an approximate $\chi^2$ distribution with $N$ degrees of freedom
under the null hypothesis.
The test that the mean effect $\mu_\theta$ is zero can be tested
on the basis of the $t$-ratio
\begin{equation}
 t_{\mu_\theta} =   \frac{{\hat{\mu}}_\theta}
                    { \mbox{s.e.}\big( {\hat{\mu}}_\theta \big) }
\end{equation}
which has approximately a standard normal distribution
under the null hypothesis.
Finally, the test that the variance of the effects $\sigma^2_\theta$ is zero
can be tested using the test statistic
\begin{equation}
 Q = T^2 - {\tilde{t}}^2                                  \label{Q}
\end{equation}
where
\begin{equation}
 \tilde{t} = \frac{\sum_j {\hat{\theta}}_j / s^2_j }
                  {  \sqrt{ \sum_j 1/s^2_j  } }         \label{ttilde}
\end{equation}
which has under the null hypothesis approximately a chi-squared distribution
with $N-1$ degrees of freedom.

\subsection{Fisher combination of $p$-values}
Fisher's (1932) procedure for combination of independent $p$-values
is applied both to left-sided and right-sided $p$-values. In this way,
we are able to report tests for both the following testing problems:
\begin{align*}
 H_0^{(R)}: \ \ & \theta_j \leq 0 \ \ \mbox{ for all } j; \\
 H_1^{(R)}: \ \ & \theta_j > 0 \ \ \mbox{ for at least one } j .
\end{align*}
Significance is interpreted here,
that there is evidence that in \emph{some} (at least one) data set,
parameter $\theta_j$ is positive.
\begin{align*}
 H_0^{(L)}:\ \ & \theta_j \geq 0 \ \ \mbox{ for all } j; \\
 H_1^{(L)}:\ \ & \theta_j < 0 \ \ \mbox{ for at least one } j .
\end{align*}
Significance is interpreted here,
that there is evidence that in \emph{some} (at least one) data set,
parameter $\theta_j$ is negative.

Note that it is very well possible that both one-sided combination tests
are significant: then there is evidence for
some positive and some negative effects.

The procedure operates as follows.
Calculate $p_j^+$ and $p_j^-$, being the right and left one-sided
$p$-values:
\begin{align*}
   p_j^+ & = 1 - \Phi\left(\frac{\hat\theta_j}{s_j}\right) \\
   p_j^- & =  \Phi\left(\frac{\hat\theta_j}{s_j}\right) \ ,
\end{align*}
where $\Phi$ is the c.d.f.\ of the standard normal distribution.
The Fisher combination statistic is defined as
\begin{align*}
  C^+_j & = - 2\, \sum_{j=1}^N \ln\big(p_j^+\big) \\
  C^-_j & = - 2\, \sum_{j=1}^N \ln\big(p_j^-\big) \ .
\end{align*}
Both of these must be tested in a $\chi^2$ distribution with
$2\,N$ degrees of freedom.

\subsection{Combinations of score-type tests}

It is possible that for a parameter, score-type tests are given
instead of estimates. Then these score-type tests can be
combined also in a Fisher procedure.
This is done just as above; but now for $p$-values obtained from
the standard normal variates obtained as a result from the score-type test.
Of course this makes sense only if the tested null values are all the same
(usually 0).

\subsection{Further regression analyses}

The data frame of values $(\hat\theta_j, s_j),\, j = 1, \ldots, N$ is made
available for further analysis, possibly extended by other variables $x$,
for analysis according to the model
\begin{equation}
  \hat\theta_j \sim \mathcal{N}\big(x_j'\beta,\, \sigma^2 + s_j^2\big),
           \hspace{2em} \text{ independent for } j = 1, \ldots, N.
\end{equation}
Note that the IWLS estimates of Section~\ref{S_metamle}
are the estimates under such a model
if $x_j' \beta$ is comprised of just a constant term.

IWLS/ML regression analysis here can be carried out by
iteration of the two steps mentioned above, but now the step (\ref{mu2})
is replaced by a weighted least squares analysis with weights being normalised
versions of
\[
  w_j \,=\, \frac{1}{\sigma^2 + s_j^2} \ .
\]


\subsection{Differences in model specification}

In practice, it can happen that a set of data sets is being
offered for a meta-analysis in which the model specifications
are not identical. An example is the case where one of the
independent variables has variance 0 in some data sets
(e.g.: an analysis of networks in schools, with pupils' sex
as an independent variable; there may be some all-girls or
all-boys schools).

This then must be noted in the output; and the data sets combined
as if this parameter here has an estimate 0 but with an infinite
standard error -- in other words, this parameter should be ignored
for this data set;
and this data set should not add to the degrees of freedom
for this particular parameter.


\newpage
\section{Models for Dynamics of Non-directed Networks }
\label{S_nondir}

For notational simplicity only, this section assumes that there is only one network.
This is not a restriction, it merely means that we can denote
$x_{ij}$ instead  of $x^{(r)}_{ij}$.

In this section it is assumed that the network is
non-directed, i.e., ties have no directionality:
$X_{ij} = X_{ji}$ holds by necessity, and the
tie variables $X_{ij}$ and $X_{ji}$ are treated
as being one and the same variable. This is the case
in many types of tie, such as mutual collaboration or agreement.
Ties now are indicated by $i \leftrightarrow j$.

\subsection{Two-sided Choices}
For modeling non-directed networks, it is necessary to make
assumptions about the negotiation or coordination between
the two actors involved in the creation or termination of a tie.
We present several models, all based on a two-step process of
opportunity and choice, and making different assumptions
concerning the combination of choices between the two actors
involved in a tie.

For the opportunity, or timing, process, two options are presented.
\begin{itemize}
\item[1.] \emph{One-sided initiative}:
          One actor $i$ is selected and gets the opportunity to make a change,
          based on rate function $\lambda_{i}(x; \alpha, \rho_m)$.
\item[2.] \emph{Two-sided opportunity}:
          An ordered pair of actors $(i,j)$ (with $i \neq j$) is selected
          and gets the opportunity
          to make a new decision about the existence of a tie between them.\\
          This is based on pairwise rate functions
          denoted $\lambda_{ij}(x; \alpha, \rho_m)$.
          The waiting time until the next opportunity for change
          by any pair of actors has the exponential
          distribution
          with parameter
          \[\lambda_{\rm{tot}} = \sum_{i \neq j} \lambda_{ij}(x; \alpha, \rho_m) \ .
          \]
          The probability that the next opportunity for change is for pair $(i,j)$ is given by
          \begin{equation}
           \P\{ \text{Next opportunity for change is for pair } (i, j) \} = \,
            \frac{\lambda_{ij}(x; \alpha, \rho_m)}{\lambda_{\rm{tot}}(x; \alpha, \rho_m)} \ .
          \end{equation}
          For the moment, we have only implemented the case where $\lambda_{ij}$ is a product
          \[
          \lambda_{ij}(x; \alpha, \rho_m) = \lambda_{i}(x; \alpha, \rho_m) \lambda_{j}(x; \alpha, \rho_m) \ ,
          \]
          so that
          \begin{equation}
           \lambda_{\rm{tot}} = \Big( \sum_{i} \lambda_{i}(x; \alpha, \rho_m)\Big)^2
                         \, - \,  \sum_{i} \big(\lambda_{i}(x; \alpha, \rho_m)\big)^2 \ .
          \end{equation}
\end{itemize}

The choice process is modeled as one of three options
D(ictatorial), M(utual) and C(ompensatory).
We now define, for graphs $x$ and $i \neq j$, by $x^{(+ij)}$ the graph
which is identical to $x$ in all tie variables except possibly
for the tie between $i$ and $j$,
and to which the tie $i \leftrightarrow j$
is added if it was not already there:
$x^{(+ij)}_{ij} = 1$.
For the non-directed case, $x^{(\pm ij)}$ is defined
by analogy to the definition above:
it is the graph identical to $x$
except that the indicator for the non-directed
tie $i \leftrightarrow j$ has been toggled:
$x^{(\pm ij)}_{ij} = x^{(\pm ij)}_{ji} = 1 - x_{ij} = 1 - x_{ji}$.
Thus if $x_{ij} = 0$ then $x^{(+ ij)} = x^{(\pm ij)}$;
if $x_{ij} = 1$ then $x^{(+ ij)} = x$.

In all cases assumption (7.) as defined for the directed case is retained,
and assumption (8.) is replaced as indicated below.
\begin{enumerate}
\item[D.] \emph{Dictatorial}:
      One actor can impose a decision about a tie on the other.\\
      Like in the directed case, actor $i$ selects the (myopically)
      best toggle of a single tie variable
      $X_{ij}$ given the objective function $f_i(x; \beta)$
      plus a random disturbance,
      and actor $j$ just has to accept.
      Combined with the two opportunity options,
      this yields the following cases.
      \begin{itemize}
      \item[8.D.1.] (alias A-1 alias AFORCE) \\
      The probability that the tie variable changed is $X_{ij}$,
      so that the network $x$ changes into $x^{(\pm ij)}$, is given by
      \begin{equation}
        p_{ij}(x, \beta) = \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
         {\sum_{h=1}^n \exp\big(f_i(x^{(\pm ih)}; \beta)\big)}  \ .
                                                         \label{eq:acbD1}
      \end{equation}
      \item[8.D.2.] (alias B-1 alias BFORCE) \\
      The probability that
      network $x$ changes into $x^{(\pm ij)}$, is given by
      \begin{equation}
        p_{ij}(x, \beta) = \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
         {\exp\big(f_i(x; \beta)\big) + \exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
                                       \ .         \label{eq:acbD2}
      \end{equation}
      \end{itemize}
\item[M.] \emph{Mutual}:\\
      Both actors must agree for a tie between them to exist,
      in line with Jackson and Wolinsky (1996).
      \begin{itemize}
      \item[8.M.1.] (alias A-2 alias AAGREE) \\
      In the case of one-sided initiative, actor $i$ selects the best
      possible choice, with probabilities (\ref{eq:acbD1}).
      If currently $x_{ij} = 0$
      so that this means creation of a new tie $i \leftrightarrow j$, this is
      proposed to actor $j$, who then accepts according to a binary choice
      based on objective function $f_j(x; \beta)$, with
      acceptance probability
      \[
      \P\{j \text{ accepts tie proposal}\} =
                   \frac{\exp\big(f_j(x^{(+ij)}; \beta) + \beta^\text{b} \big)}
        {\exp\big(f_j(x; \beta)\big) + \exp\big(f_j(x^{(+ij)}; \beta) + \beta^\text{b} \big)} \ ,
      \]
      where $ \beta^\text{b} $ is an offset; this is a fixed parameter,
      given as \texttt{UniversalOffset} in \nm{sienaAlgorithmCreate},
      and is not estimated.\\
      If the choice by $i$ means termination of an existing tie,
      the proposal is always put into effect.
      Jointly these rules lead to the following probability
      that the current network $x$ changes into $x^{(\pm ij)}$:
      \begin{equation}
      \begin{split}
      &  p_{ij}(x, \beta) =
          \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
         {\sum_{h=1}^n \exp\big(f_i(x^{(\pm ih)}; \beta)\big)}\\
       & \hspace{3em} \times
        \left(\frac{\exp\big(f_j(x^{(+ ij)}; \beta) + \beta^\text{b}  \big)}
          {\exp\big(f_j(x; \beta)\big) + \exp\big(f_j(x^{(+ ij)}; \beta) + \beta^\text{b} \big)}
                         \right)^{1-x_{ij}}
          \ .   \label{eq:acbM}
      \end{split}
      \end{equation}
      (Note that the second factor comes into play only if $x_{ij} = 0$,
      which implies $x^{(+ ij)} = x^{(\pm ij)}$.)
      \item[\hspace*{3em}8.M.2.] (alias B-2 alias BAGREE) \\
      In the case of two-sided opportunity, actors $i$ and $j$
      both reconsider the value of the tie variable $X_{ij}$.
      Actor $i$ proposes a change (toggle) with probability (\ref{eq:acbD2})
      and actor $j$ similarly.
      If currently there is no tie, $x_{ij} = 0$, then the tie is created
      if this is proposed by both actors, which has probability
      \begin{subequations}
      \begin{flalign}
        & p_{ij}(x, \beta) =   \label{eq:pM2}         \\
        & \ \frac{\exp\big(f_i(x^{(+ ij)}; \beta)\big)}
         {\Big(\exp\big(f_i(x; \beta)\big) + \nonumber
                           \exp\big(f_i(x^{(+ ij)}; \beta)\big)\Big)} \times\\
        & \hspace{4em} \frac{\exp\big(f_j(x^{(+ ij)}; \beta)\big)}
         {\Big(\exp\big(f_j(x; \beta)\big) +
                                \exp\big(f_j(x^{(+ ij)}; \beta)\big)\Big)}  \ .
            \nonumber
      \end{flalign}
      If currently there is a tie, $x_{ij} = 1$, then the tie is terminated
      if one or both actors wish to do this, which has probability
      \begin{align}
        & p_{ij}(x, \beta) =    1 \, -      \label{eq:qM2} \\
        &      \ \Biggl\{ \frac{\exp\big(f_i(x; \beta)\big)}
         {\Big(\exp\big(f_i(x; \beta)\big) +
                       \exp\big(f_i(x^{(\pm ij)}; \beta)\big)\Big)} \times \\
        &  \hspace{4em}  \frac{\exp\big(f_j(x; \beta)\big)}
         {\Big(\exp\big(f_j(x; \beta)\big) +
                       \exp\big(f_j(x^{(\pm ij)}; \beta)\big)\Big)} \Biggr\} \ .
          \nonumber
      \end{align}
      \end{subequations}
      \end{itemize}
\item[C.] \emph{Compensatory}: (alias B-3 alias BJOINT) \\
      The two actors decide on the basis of their combined interests.\\
      The combination with one-sided initiative is rather artificial here,
      and we only elaborate this option for the two-sided initiative.
      \begin{itemize}
      \item[8.C.2.]
      The binary decision about the existence of the tie $i \leftrightarrow j$
      is based on the objective function $f_i(x; \beta)+f_j(x; \beta)$.
      The probability that network $x$ changes into $x^{(\pm ij)}$,
      now is given by
      \begin{equation}
       p_{ij}(x, \beta) =  \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)
                                 + f_j(x^{(\pm ij)}; \beta)\big)}
                 {\exp\big(f_i(x; \beta) + f_j(x; \beta)\big) +
         \exp\big(f_i(x^{(\pm ij)}; \beta) + f_j(x^{(\pm ij)}; \beta)\big)} \ .
                                                              \label{eq:pC2}
      \end{equation}
      \end{itemize}
\end{enumerate}

The two model components, rate function and objective function,
can be put together by considering the transition rates.
Given that the only permitted transitions between networks are toggles of
a single tie variable, the transition rates can be defined as
\begin{equation}
  q_{ij}(x) = \lim_{\Delta t \downarrow 0}
         \frac{\P \{ X(t + \Delta t) = x^{(\pm ij)} \mid X(t) = x \}}
                                                {\Delta t}
\end{equation}
for $i \neq j$.
Note that this definition implies that the probabilities of toggling
a particular tie variable $X_{ij}$ in a short time interval
are approximated by
\[
  \P \{ X(t + \Delta t) = x^{(\pm ij)} \mid X(t) = x \} \approx
    q_{ij}(x)\, \Delta t\,  .
\]

In the derivation of the score functions
account must be taken of the fact that
toggling variable $X_{ij}$ is the same as toggling $X_{ji}$,
and the rules described above give different roles
for the first and the second actor in the pair $(i,j )$.
For the models with one-sided initiative, the transition rate is
\begin{equation}
  q_{ij}(x) = \lambda_i(x; \alpha, \rho_m)\,p_{ij}(x, \beta) +
              \lambda_j(x; \alpha, \rho_m)\,p_{ji}(x, \beta) \ , \label{q1}
\end{equation}
and for the models with two-sided opportunity
\begin{equation}
  q_{ij}(x) = \lambda_{ij}(x; \alpha, \rho_m)\,p_{ij}(x, \beta) +
              \lambda_{ji}(x; \alpha, \rho_m)\,p_{ji}(x, \beta) \ . \label{q2}
\end{equation}



\subsection{Score function for two-sided ties: objective function}
The score function for the complete data likelihood with respect to the
$\beta$ parameters is as follows. (Explained here in shorthand for readers
knowing theory and terminology from other papers.) It can be calculated as
the sum of the contributions from all the ministeps, where ministeps leading
to no change are also included. Therefore, we only need to give the
expression for the score function contribution for a single ministep leading
from $x$ to $x^{( \pm ij)}$; and the contribution for a ministep leading to
no change, but where the actor or pair of actors concerned in the ministep is
known.

We must realize that the phrase ``score function for the complete data likelihood"
is ambiguous, because we have liberty to define what we take as complete data;
perhaps `augmented data' would be a better term.
We can use any data augmentation that allows an easy calculation
of the score functions.
More extensive augmentation introduces extra variability and thereby
extra noise in the score function.
For example, in the models with one-sided initiative, if the data augmentation
includes the knowledge of who took the initiative, then effectively we are working
with the transition rates
\begin{equation}
  q_{ij}(x) = \lambda_i(x; \alpha, \rho_m)\,p_{ij}(x, \beta)
\end{equation}
instead of (\ref{q1}). We shall always do this.
This leads to simpler but somewhat noisier score functions.

In the following, the complete/augmented data always includes the knowledge
of the actor $i$ who takes the initiative,
and in the two-sided opportunity case the ordered pair $(i,j)$ who make the ministep.

For the case M.1 we shall give two different expressions
which differ in this way.

By $\mathcal A_-$ is denoted the set of potential alters in a given ministep
and we denote $\mathcal A = \mathcal A_- \cup \{i\}$ where $i$ is the actor making
the ministep, but signifying here `no change' so
alternatively this could be denoted by 0. In the default case, $\mathcal A = \{1, \ldots, n\}$;
if not all actors are active then $\mathcal A_-$ will contain only active actors;
but it will be potentially a smaller set if one of the conditions \nnm{uponly},
\nnm{downonly}, \nnm{higher}, \nnm{disjoint}, or \nnm{atleastone} holds.

Denote the change in objective function when toggling $x_{ij} $ by
\[
(\Delta \, f)_{ij}(x, \beta) = f_i\big(x^{(\pm ij)}; \beta\big) - f_i(x; \beta) \ ,
\]
which implies
\[
(\Delta \, f)_{ii}(x, \beta) = 0 .
\]
Further denote
\begin{equation}        p_{1ij}(x, \beta) =
                  \frac{\exp\big((\Delta f)_{ij}(x, \beta)\big)}
         {\sum_{h \in \mathcal A} \exp\big((\Delta f)_{ih})(x, \beta)\big)}  \label{eq:p1}
\end{equation}
and
\begin{equation}        p_{0ij}(x, \beta) =
                  \frac{\exp\big((\Delta f)_{ij}(x, \beta)\big)}
         {1 +  \exp\big((\Delta f)_{ij})(x , \beta)\big)}  \label{eq:p0}
          \ .
\end{equation}
In the following, we often use expressions such as
\[
\frac{\partial (\Delta\, f)_{ij}(x, \beta)}{\partial \beta_k} \ .
\]
Since the changes $\Delta\, f_{ij}(x, \beta)$ are linear combinations
(\ref{eq:Deltaf}), these partial derivatives are
the change contributions of the effects,
\begin{equation}
\frac{\partial (\Delta\, f)_{ij}(x, \beta)}{\partial \beta_k}  = s_{ki}(x^{(\pm ij)}) - s_{ki}(x) \ .
\end{equation}
This is denoted in shorthand by
\begin{equation}
s_{ijk}  = s_{ki}(x^{(\pm ij)}) - s_{ki}(x) \ ,  \label{eq:s}
\end{equation}
where the dependence on $x$ is omitted. Note that $s_{iik} = 0$.




\subsection{Dictatorial D.1 (alias A-1 alias AFORCE)}
Probability of change, see (\ref{eq:acbD1})
      \begin{equation}
        p_{ij}(x, \beta) = \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
         {\sum_{h \in \mathcal A} \exp\big(f_i(x^{(\pm ih)}; \beta)\big)}  =
%                  \frac{\exp\big((\Delta f)_{ij}(x, \beta)\big)}
%         {\sum_{h=1}^n \exp\big((\Delta f)_{ih}), \beta)\big)}
        p_{1ij}(x, \beta)
          \ .
      \end{equation}
This is just the same as the directed case.
Well-known calculus leads to the score function
\begin{subequations}
      \begin{align}
      \frac{\partial \log\, p_{ij}(x, \beta)}{\partial \beta_k}
      &=      \frac{\partial \log\, p_{1ij}(x, \beta)}{\partial \beta_k} \nonumber \\
      &=  \frac{\partial  (\Delta f)_{ij}(x, \beta)}{\partial \beta_k} -  \nonumber
           \sum_{h  \in \mathcal A}  p_{1ih}(x, \beta) \frac{\partial  (\Delta f)_{ih}(x, \beta)}{\partial \beta_k} \\
      &=  s_{ijk} -
           \sum_{h  \in \mathcal A}  p_{1ih}(x, \beta)\,s_{ihk} \ .
             \label{eq:cD1}
      \end{align}
This formula also applies to the case of no change $j = i$, where it yields
      \begin{align}
      \frac{\partial \log\, p_{ii}(x, \beta)}{\partial \beta_k}
      &=   -  \sum_{h \in \mathcal A}  p_{1ih}(x, \beta) \, s_{ijh} \ .
             \label{eq:cD2}
      \end{align}
\end{subequations}

\subsection{Dictatorial D.2 (alias B-1 alias BFORCE)}
Note that we condition here on the choice, in the opportunity process, of $i$
and $j$, in this order: $i$ is allowed to make a decision which then is
imposed on $j$. The probability of change is, see (\ref{eq:acbD2}),
      \begin{equation}
        p_{ij}(x, \beta) = \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
         {\exp\big(f_i(x; \beta)\big) + \exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
%                  = \frac{\exp\big((\Delta\, f)_{ij}(x, \beta)\big)}
%         {1 + \exp\big((\Delta\,f)_{ij}(x, \beta)\big)}
           = p_{0ij}(x, \beta) \ .
      \end{equation}
%Here $p_{ij} = p_{0ij}$.
Well-known calculus leads to the score function for change:
\begin{subequations}
      \begin{align}
      \frac{\partial \log\, p_{ij}(x, \beta)}{\partial \beta_k}  \nonumber
      &=      \frac{\partial \log\, p_{0ij}(x, \beta)}{\partial \beta_k}  \\
      &=      (1 - p_{0ij}(x, \beta))
        \frac{\partial  (\Delta f)_{ij}(x, \beta)}{\partial \beta_k} \nonumber \\
      &=      (1 - p_{0ij}(x, \beta))\, s_{ijk} \ ;
             \label{eq:cD2a}
      \end{align}
for no change:
      \begin{equation}
      \frac{\partial \log\, \big(1 - p_{ij}(x, \beta)\big)}{\partial \beta_k} =
          - p_{0ij}(x, \beta)
        \frac{\partial  (\Delta f)_{ij}(x, \beta)}{\partial \beta_k}
        =  - p_{0ij}(x, \beta)\, s_{ijk} \ .
             \label{eq:cD2b}
      \end{equation}
\end{subequations}



\subsection{Mutual M.1 (alias A-2 alias AAGREE) }
The probability of change is
      \begin{align}
        p_{ij}(x, \beta) &= \frac{\exp\big(f_i(x^{(\pm ij)}; \beta)\big)}
 {\sum_{h \in \mathcal A} \exp\big(f_i(x^{(\pm ih)}; \beta)\big)}  \nonumber \\
       & \hspace{2em} \times \left(\frac{\exp\big(f_j(x^{(+ ij)}; \beta)
             + \beta^\text{b} \big)}
                        {\exp\big(f_j(x; \beta)\big) +
    \exp\big(f_j(x^{(+ ij)}; \beta) + \beta^\text{b} \big)} \right)^{1-x_{ij}}
                                    \nonumber \\[1em]
       &= p_{1ij}(x, \beta) \, \big( p_{0ji}(x, \beta)  \big)^{(1-x_{ij})} \\
    & =  p_{1ij}(x, \beta)\, \big(x_{ij} + (1-x_{ij})p_{0ji}(x, \beta) \big)\ ,
          \label{eq:cM}
      \end{align}
where it may be noted that $x_{ij}$ is the current state (before the possible
change).

The outcome `no change' may be further refined into the following list
of possibilities:
\begin{itemize}
\item[[$i-\!\!\!$]]: $i$ decides not to propose any change.
\item[[$ij-\!\!\!$]]: ($i$ proposes to $j$ to add a tie, but this is rejected by $j$), for $j \in \mathcal A_-$.
\end{itemize}

The probability that there is no change is as follows
(somewhat arbitrarily we denote this here by $p_{i0}$)
\begin{equation}
  p_{i0}(x, \beta) =
  1 - \sum_{j \in \mathcal A_-}   p_{1ij}(x, \beta)\, \big(x_{ij} + (1-x_{ij})p_{0ji}(x, \beta) \big)\ .
\end{equation}
This can be decomposed into the sub-events as listed above, with probabilities
\begin{equation}
  p_{[i-]}(x, \beta) = p_{1ii}(x, \beta) \ ,
\end{equation}
\begin{equation}
  p_{[ij-]}(x, \beta) = (1-x_{ij})\big(1-p_{0ji}(x, \beta) \big)\ .
\end{equation}

For the score functions, we combine (\ref{eq:cD2a}) and (\ref{eq:cD1}).
\begin{subequations}
\noindent
The score function for a change is
      \begin{align}
      \frac{\partial \log\,  p_{ij}(x, \beta)}{\partial \beta_k} &=
           \frac{\partial  (\Delta f)_{ij}(x, \beta)}{\partial \beta_k} -          \nonumber
           \sum_{h \in \mathcal A}  p_{1ih}(x, \beta) \frac{\partial  (\Delta f)_{ih}(x, \beta)}{\partial \beta_k} \\
     &  \hspace{8em}  + (1 - x_{ij})(1 - p_{0ji})\frac{\partial  (\Delta f)_{ji}(x, \beta)}{\partial \beta_k} \nonumber \\
     &=   s_{ijk} -   \left( \sum_{h \in \mathcal A}  p_{1ih}(x, \beta) s_{ihk} \right) + (1 - x_{ij})(1 - p_{0ji})s_{jik} \ .
             \label{eq:cM2a}
      \end{align}
The score function for no change is a bit tedious,
\begin{align}
      \frac{\partial \log\, p_{i0}(x, \beta)}{\partial \beta_k} &=
     \frac{-1}{p_{i0}(x, \beta)} \sum_{j  \in \mathcal A_-}   \nonumber
           \bigg\{ x_{ij} \frac{\partial p_{1ij}}{\partial \beta_k} \\
   &  \hspace{8em}     + (1-x_{ij})\big( p_{0ji}\frac{\partial p_{1ij}}{\partial \beta_k}
    +    p_{1ij} \frac{\partial p_{0ji}}{\partial \beta_k} \big)     \bigg\}
\end{align}
where ${\partial p_{0ij}}/{\partial \beta_k}$ and
${\partial p_{1ij}}/{\partial \beta_k}$ must be substituted from (\ref{eq:cD2a}) and
(\ref{eq:cD1}).
I do not think this can be simplified to an important extent.

If instead of `no change' we work with the sub-events $[i-]$ = `$i$ wishes no change'
and $[ij-]$ = `$i$ proposed to add a tie which is rejected by $j$', the score functions are
\begin{equation}
      \frac{\partial \log\, p_{[i-]}(x, \beta)}{\partial \beta_k} =
         -  \sum_{h \in \mathcal A}  p_{1ih}(x, \beta) \, s_{ijh}
\end{equation}
as in (\ref{eq:cD2}), and
\begin{equation}
      \frac{\partial \log\, p_{[ij-]}(x, \beta)}{\partial \beta_k} =
         (1-x_{ij}) \bigg(s_{ijk} - \sum_{h \in \mathcal A}  p_{1ih}(x, \beta)\,s_{ihk}
                               - p_{0ji}(x, \beta)\, s_{jik}   \bigg)
\end{equation}
which is much simpler indeed.
\end{subequations}


\subsection{Mutual M.2 (alias B-2 alias BAGREE) }
\begin{subequations}
The probability of change is:\\
\noindent
if $x_{ij} = 0$,
\begin{equation}
  p_{ij}(x, \beta) = p_{0ij}(x, \beta)\,  p_{0ji}(x, \beta)
\end{equation}
and if $x_{ij} = 1$,
\begin{equation}
  p_{ij}(x, \beta) =  p_{0ij}(x, \beta) + p_{0ji}(x, \beta) - p_{0ij}(x, \beta)\,  p_{0ji}(x, \beta) \ .
\end{equation}
For the case  $x_{ij} = 1$, it is easier to start working with
the probability of no change,
\begin{equation}
  1 - p_{ij}(x, \beta) = \big( 1- p_{0ij}(x, \beta)\big) \big(1- p_{0ji}(x, \beta)\big)  \ .
\end{equation}
\end{subequations}
This gives the following four cases:
\begin{subequations}
\begin{enumerate}
\item $x_{ij} = 0$, change:
      \begin{equation}
        \frac{\partial \log\, p_{ij}(x, \beta)}{\partial \beta_k} =
%               p_{0ij}(x, \beta) \frac{\partial (\Delta\, f_{ij}(x, \beta))}{\partial \beta_k}
%               + p_{0ji}(x, \beta) \frac{\partial (\Delta\, f_{ji}(x, \beta))}{\partial \beta_k} \ ;
               \big(1- p_{0ij}(x, \beta)\big) s_{ijk} + \big(1-p_{0ji}(x, \beta)\big) s_{jik} \ ;
      \end{equation}
\item $x_{ij} = 0$, no change:
      \begin{equation}
      \begin{split}
     &   \frac{\partial \log\, (1 - p_{ij}(x, \beta))}{\partial \beta_k} = \\
     & \ \   - \, \frac{p_{ij}(x, \beta)}{1 - p_{ij}(x, \beta)} \,
%             \left(  p_{0ij}(x, \beta) \frac{\partial (\Delta\, f_{ij}(x, \beta))}{\partial \beta_k}
%               + p_{0ji}(x, \beta) \frac{\partial (\Delta\, f_{ji}(x, \beta))}{\partial \beta_k} \right) \ ;
        \big\{   \big(1- p_{0ij}(x, \beta)\big) s_{ijk} + \big(1-p_{0ji}(x, \beta)\big) s_{jik}\big\} \ ;
      \end{split}
      \end{equation}
\item $x_{ij} = 1$, change:
      \begin{equation}
        \frac{\partial \log\, p_{ij}(x, \beta)}{\partial \beta_k} =
          \frac{1 - p_{ij}(x, \beta)}{p_{ij}(x, \beta)} \,
%               \left( p_{0ij}(x, \beta) \frac{\partial (\Delta\, f_{ij}(x, \beta))}{\partial \beta_k}
%               + p_{0ji}(x, \beta) \frac{\partial (\Delta\, f_{ji}(x, \beta))}{\partial \beta_k} \right) \ ;
                \big( p_{0ij}(x, \beta) s_{ijk}  + p_{0ji}(x, \beta) s_{jik} \big) \ ;
      \end{equation}
\item $x_{ij} = 1$, no change:
      \begin{equation}
        \frac{\partial \log\, (1 - p_{ij}(x, \beta))}{\partial \beta_k} =
%               - p_{0ij}(x, \beta) \frac{\partial (\Delta\, f_{ij}(x, \beta))}{\partial \beta_k}
%               - p_{0ji}(x, \beta) \frac{\partial (\Delta\, f_{ji}(x, \beta))}{\partial \beta_k} \ .
               \ - p_{0ij}(x, \beta)\,s_{ijk}   - p_{0ji}(x, \beta)\, s_{jik} \ .
      \end{equation}
\end{enumerate}
\end{subequations}
Here also a further decomposition would be possible, following the two consecutive
choices by $i$ and $j$, However, in this case this leads to
more variability but not much more simplicity, so this is not elaborated now.



\subsection{Compensatory C (alias B-3 alias BJOINT) }
The probability of change is
      \begin{equation}
       p_{ij}(x, \beta) =  \frac{\exp\big((\Delta\,f_i)(x^{(\pm ij)}; \beta) + (\Delta\,f_j)(x^{(\pm ij)}; \beta)\big)}
                 {1 +  \exp\big((\Delta\,f_i)(x^{(\pm ij)}; \beta) + (\Delta\,f_j)(x^{(\pm ij)}; \beta)\big)} \ .
      \end{equation}
(Note that $x^{(\pm ij)} = x^{(\pm ji)}$, so indeed this is symmetric in $i$ and $j$.)\\
Here it is convenient to define
\begin{equation}       p_{2ij}(x, \beta) =
                  \frac{\exp\big((\Delta f)_{ij}(x, \beta) + (\Delta f)_{ji}(x, \beta)\big)}
         {1 +  \exp\big((\Delta f)_{ij})(x, \beta)+ (\Delta f)_{ji}(x, \beta)\big)}  \label{eq:p2}
          \ .
\end{equation}
\begin{subequations}
Well-known calculus leads to the score function:\\
\noindent
for change:
      \begin{align}
      \frac{\partial \log\, p_{ij}(x, \beta)}{\partial \beta_k}           \nonumber
      &=      \frac{\partial \log\, p_{2ij}(x, \beta)}{\partial \beta_k}  \\
      &=      \big(1 - p_{2ij}(x, \beta)\big)\,
        \frac{\partial  (\Delta f)_{ij}(x, \beta) + (\Delta f)_{ji}(x, \beta) }{\partial \beta_k} \nonumber \\
      &=      \big(1 - p_{2ij}(x, \beta)\big) (s_{ijk} + s_{jik}) \ ;             \label{eq:cCa}
      \end{align}
for no change:
      \begin{equation}
      \frac{\partial \log\, \big(1 - p_{ij}(x, \beta)\big)}{\partial \beta_k} =
          - p_{2ij}(x, \beta) \,
%        \frac{\partial  (\Delta f)_{ij}(x, \beta)+ (\Delta f)_{ji}(x, \beta)}{\partial \beta_k}  \ .
        ( s_{ijk} + s_{jik})  \ .
             \label{eq:cCb}
      \end{equation}
\end{subequations}



\subsection{Coding the score calculations}
The easiest way is to code the calculation of the scores
in one routine together with the
probabilistic choices; and decompose all choices into their components
(sub-choices).
For all five procedures this can be done
using the following three basic procedures.
Here $\mathcal A_-$ is the set of actors who are candidate alters for a given
ministep (varies between ministeps), $\mathcal A$ is
$\mathcal A_-$ to which an option `no change' has been added
(represented by $i$ = the actor making the ministep,  or 0), while
 $\mathcal K$ is the set indexing the parameters (always the same).

\noindent
\textbf{MultipleChoice}\,(\textsf{input:} $\pi, s$; \textsf{output:} $j$;  \textsf{in/output:} $c$).\\
\textit{Interpretation:}\\
$\pi$ = probability vector for outcomes $h \in \mathcal A$;\\
$s$ = array of change contributions $s_{hk}$ for $h \in \mathcal A, k \in \mathcal K$;\\
$j$ = choice made;\\
$c$ = vector of scores to which something will be added.\\
\textit{Body:}\\
Define $j$ as outcome of probabilistic choice according to $\pi$.\\
For all $k \in \mathcal K$ do $c_k  \leftarrow c_k + s_{jk} - \sum_{h \in \mathcal A} \pi_h s_{hk}$.\\
\textit{EndBody.}
\medskip

\noindent
\textbf{BinaryChoice}\,(\textsf{input:} $\pi_0, s_0$; \textsf{output:} $r$;  \textsf{in/output:}  $c$).\\
\textit{Interpretation:}\\
$\pi_0$ = probability (number between 0 and 1);\\
$s_0$ = vector of change contributions $s_{k}$ for $ k \in \mathcal K$;\\
$r$ = result $\in \{T, F\}$, where $T$ denotes True = Accept and $F$ denotes False = Reject;\\
$c$ = vector of scores to which something will be added.\\
\textit{Body:}\\
Define $r$ as outcome of probabilistic choice according to $\pi_0$.\\
For all $k \in \mathcal K$ do: \big(if $r$ then $c_k \leftarrow c_k + (1-\pi) s_{k}$,
else $c_k  \leftarrow c_k -\pi\,s_k $\big).\\
\textit{EndBody.}
\medskip

\noindent
\textbf{Change}\,(\textsf{input:} $ i,j$).\\
\textit{Interpretation:}\\
Toggle tie $i \leftrightarrow j$ .\\
\textit{Body:}\\
If $j \in \mathcal A_-$ then \big(set $x_{ij} \leftarrow 1-x_{ij},\, x_{ji} \leftarrow 1-x_{ji}$\big).\\
\textit{EndBody.}
\medskip

\noindent
The models can can be coded as follows.
We use $p_{0ij}$, $p_{1ij}$, $s_{ijk}$ as defined in (\ref{eq:p0}, \ref{eq:p1}, \ref{eq:s}).

\begin{itemize}
\item[D.1 = A-1] Given $i$, use $\pi = p_{1i.}$, $s = s_{i..}$.\\
                 Apply \textbf{MultipleChoice}\,($\pi,s,j,c$).\\
                 If $j \neq i$, apply \textbf{Change}($i,j$).
\item[D.2 = B-1] Given $i$ and $j$ in this order,
                 use $\pi_0 = p_{0ij}$, $s_0 = s_{ijk}$.\\
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r,c$).\\
                 If $r$, apply \textbf{Change}($i,j$).
\item[M.1 = A-2] Given $i$, use $\pi = p_{1i.}$, $s = s_{i..}$.\\
                 Apply \textbf{MultipleChoice}\,($\pi,s,j,c$).\\
                 If $x_{ij} = 1$, apply \textbf{Change}($i,j$);\\
                 else
                 \begin{frindent}{2em}{1em}{10em}
                 Use $\pi_0 = p_{0ji}$, $s_0 = s_{jik}$;\\
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r,c$);\\
                 If $r$, apply \textbf{Change}($i,j$).
                 \end{frindent}
\item[M2 = B-2]  One implementation is as follows:\\
                  Given $i$ and $j$ in arbitrary order:\\
                 Use $\pi_0 = p_{0ij}$, $s_0 = s_{ijk}$.\\
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r_A,c$).\\
                 Use $\pi_0 = p_{0ji}$, $s_0 = s_{jik}$.\\
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r_B,c$).\\
                 If $(x_{ij} = 0$ and $r_A$ and $r_B$)
                 or $(x_{ij} = 1$ and \big($r_A$ or $r_B$)\big), apply \textbf{Change}($i,j$).

                 Another implementation, which integrates out over the double choice
                 and therefore has a less variable score function
                 and is preferable:\\
                 If $x_{ij} = 0$:
                 \begin{frindent}{2em}{1em}{6em}
                 Use $\pi_0 = p_{0ij}\,p_{0ji}$,
                 \[
                 s_0 = \frac{\big(1-p_{0ij}(x, \beta)\big)\, s_{ijk} + \big(1-p_{0ji}(x, \beta)\big)\, s_{jik}}
                             {1 - p_{0ij}(x, \beta)\,p_{0ji}(x, \beta)} \ ;
                 \]
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r,c$);\\
                 If $r$, apply \textbf{Change}($i,j$).
                 \end{frindent}
                 If $x_{ij} = 1$:
                 \begin{frindent}{2em}{1em}{6em}
                 Use $\pi_0 = 1 - \big(1 - p_{0ij}\big)\big(1-p_{0ji}\big) = p_{0ij} + p_{0ji} - p_{0ij}p_{0ji}$,
                 \[
                 s_0 = \frac{p_{0ij}(x, \beta)\, s_{ijk} + p_{0ji}(x, \beta)\, s_{jik}}
                             { p_{0ij}(x, \beta) + p_{0ji}(x, \beta) -  p_{0ij}(x, \beta) p_{0ji}(x, \beta)} \ ;
                 \]
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r,c$);\\
                 If $r$, apply \textbf{Change}($i,j$).
                 \end{frindent}
\item[C = B-3] Given $i$ and $j$ in arbitrary order,
                 use $\pi_0 = p_{2ij}$ as in (\ref{eq:p2}), $s_0 = s_{ijk} + s_{jik}$.\\
                 Apply \textbf{BinaryChoice}\,($\pi_0,s_0,r,c$).\\
                 If $r$, apply \textbf{Change}($i,j$).
\end{itemize}



\section{Score function: rate function}
We consider here two types of rate function, for the one-sided and
two-sided opportunity.

\subsection{One-sided opportunity}
 \label{S_14.1}

For the one-sided opportunity (D.1 and M.1) the rate of change is
\begin{equation}
\lambda_i = \rho\,\exp\Big(\sum_k \alpha_k z_{ik}\Big)  \label{eq:lambda1}
\end{equation}
where $z_{ik}$ is a vector which is allowed to depend on the network $x$.
A ministep by actor $i$,
when associated with an elapsed waiting time $t$, contributes
to the probability density of the entire process a factor
\[
\lambda_i e^{-t\,\lambda_+}
\]
where $\lambda_+ = \sum_i \lambda_i$ is the total rate of change.

Calculus shows that the score function with respect to any parameter
$\theta_k$ ($\alpha_k$ as well as $\rho$) is given by
\begin{equation}
    \frac{\partial }{\partial \theta_k} \log \Big( \lambda_i e^{-t\,\lambda_+} \Big)
  = \frac{1}{\lambda_i} \frac{\partial \lambda_i}{\partial \theta_k} \, - \,
  t  \frac{\partial \lambda_+}{\partial \theta_k} \ .
\nonumber %     \label{eq:scorelambda1}
\end{equation}
This yields the following contributions to the overall score function.

Since $\partial \lambda_i/\partial \rho = \lambda_i/\rho$, we get
\begin{equation}
    \frac{\partial }{\partial \rho} \log \Big( \lambda_i e^{-t\,\lambda_+} \Big)
  = \frac{1 - t \lambda_+}{\rho}  \ .   \label{eq:scorelambda1r}
\end{equation}
Since $\partial \lambda_i/\partial \alpha_k = z_{ik}\,\lambda_i$, we get
\begin{equation}
    \frac{\partial }{\partial \alpha_k} \log \Big( \lambda_i e^{-t\,\lambda_+} \Big)
  = z_{ik} \,-\, t \sum_{j} z_{jk}\lambda_j  \ ,   \label{eq:scorelambda1ak}
\end{equation}
where the $\sum_j$ is taken over the set of all active actors.

For variables $z_{jk}$ that are not changing during the simulations,
i.e., actor covariates that are not dependent behavior variables,
the term $\sum_{j} z_{jk}\lambda_j $
does not change during the simulations, unless there is composition change
and the set of active actors changes.
For such variables, it is efficient to calculate $\sum_{j} z_{jk}\lambda_j $
at the start of each period, and at each composition change,
but not at each ministep.



\subsection{Two-sided opportunity}
\label{S_14.2}

For the two-sided opportunity (D.2, M.1 and C) the pairwise rate of change is
the product of two actor-dependent rates,
\begin{equation}
\lambda_{ij} = \lambda_{0i}\lambda_{0j} =
   \rho^2\exp\Big(\sum_k \alpha_k (z_{ik} + z_{jk})\Big) \ . \label{eq:lambda2}
\end{equation}
(This may be extended later -- this is how it was implemented in Siena 3.)

To avoid double summations over actors in the case where some variables $X_{ij}$
are not allowed to change because of structurally missing values or
having reached an absorbing state,
we assume that the pairwise meetings take place
according to (\ref{eq:lambda2}) between all pairs $(i,j)$
of actors subject to the conditions that $i$ and $j$ are active, and $i \neq j$.
If a pair $(i,j)$ meets  which is not allowed to change anything,
then time will advance, so
there will be a contribution to the score function for the parameters of the
rate function, but further nothing happens.

Ministep are made by all pairs of active actors $i$ and $j$ ($i \neq j$).
When the elapsed waiting time for a ministep is $t$, the meeting event contributes
to the probability density of the entire process a factor
\[
\lambda_{ij} e^{-t\,\lambda_{++}}
\]
where
\begin{equation}
\lambda_{++} = \sum_{i,j} \lambda_{ij} =
       \sum_{\text{active }i} \ \sum_{\text{active }j, j \neq i}
                     \lambda_{0i}\lambda_{0j}         \label{eq:lambda++}
\end{equation}

is the total rate of change,
the summation being over all pairs $(i,j)$ of active actors, $i \neq j$.


The pairwise meeting process accordingly proceeds as follows.
\begin{enumerate}
\item Define
      \begin{equation}
          \lambda_{0+} =   \sum_{\text{active }i} \lambda_{0i} \ .
      \end{equation}
\item Repeat
      \begin{enumerate}
       \item Choose $i$ among the active $i$ with probabilities
             \[
              \frac{\lambda_{0i}} {\lambda_{0+}} \ ,
              \]
       \item choose $j$ among the active $j$ with probabilities
             \[
              \frac{\lambda_{0j}} {\lambda_{0+}} \ ,
              \]
       \end{enumerate}
       until $i \neq j$.
\item Increase time by a value drawn from an exponential distribution with parameter
      (\ref{eq:lambda++}).
\item If $X_{ij}$ is allowed to change, then determine the new value according
      to its distribution; if $X_{ij}$ is not allowed to change, continue
      (one could say that $i$ and $j$ meet but do not change how they are related).
\end{enumerate}

The variable $X_{ij}$ is not allowed to change if:
\begin{tabbing}
\hspace*{3em} \=  \hspace*{3em} \=  ($X_{ij}$ is structurally fixed) \\
       \> or \> ($X_{ij}$ has reached an absorbing state because of a \nnm{$\ast$only} condition).
\end{tabbing}
Here \nnm{$\ast$only} stands for any of \nnm{uponly}, \nnm{downonly}, \nnm{higher},
\nnm{disjoint}, or \nnm{atleastone}.\\
(The last three conditions depend on other networks, and for those conditions
the word `absorbing' is perhaps not appropriate
because when the other network changes the variable $X_{ij}$ also might be allowed to change again.)
\\
(Variable $X_{ij}$ also cannot change if $i$ or $j$ is inactive, but this already is excluded
in the choice of $i$ and $j$.)

See below for a remark about the calculation of $\lambda_{++}$.

The rates $\lambda_{ij}$ may depend on variables changing during the simulations, like
in- or outdegrees or behavioral dependent variables. Then also $\lambda_{++}$ will change
as a consequence. In addition,  $\lambda_{++}$ can change during the simulations
even if the $\lambda_{ij}$ remain constant, namely,  by a composition change.

The development further is analogous to the case of one-sided opportunity.
The score function with respect to any parameter
$\theta_k$ ($\alpha_k$ as well as $\rho$) is given by
\[
    \frac{\partial }{\partial \theta_k} \log \Big( \lambda_{ij} e^{-t\,\lambda_{++}} \Big)
  = \frac{1}{\lambda_{ij}} \frac{\partial \lambda_{ij}}{\partial \theta_k} \, - \,
  t  \frac{\partial \lambda_{++}}{\partial \theta_k} \ .
\]


This yields the following contributions to the overall score function.

Since $\partial \lambda_{ij}/\partial \rho = 2 \lambda_{ij}/\rho$, we get
\begin{equation}
    \frac{\partial }{\partial \rho} \log \Big( \lambda_{ij} e^{-t\,\lambda_{++}} \Big)
  = 2\, \frac{1 - t \lambda_{++}}{\rho}  \ .   \label{eq:scorelambda2r}
\end{equation}
Since $\partial \lambda_{ij}/\partial \alpha_k = (z_{ik}+z_{jk})\,\lambda_{ij}$, we get
\begin{equation}
    \frac{\partial }{\partial \alpha_k} \log \Big( \lambda_{ij} e^{-t\,\lambda_{++}} \Big)
  = (z_{ik}+z_{jk}) \,-\, t \sum_{g,h} (z_{gk}+z_{hk})\lambda_{gh}  \ ,   \label{eq:scorelambda2ak}
\end{equation}
where the $\sum_{g,h}$ is taken over
all pairs $(g,h)$ where $g$ as well as $h$ are active and $g \neq h$.
This can change because of a composition change, but also because of a change in $z_{gk}+z_{hk}$
or in $\lambda_{gh}$,
which is possible if and only if  $\lambda_{gh}$ depends on endogenously changing variables.



\subsubsection{Remark about calculation of the double summations}
Some attention is needed for the terms
$\lambda_{++}$ and
$\sum_{g,h} (z_{gk}+z_{hk})\lambda_{gh} $,
as these are defined as summations over $n(n-1)$ terms,
where $n$ is the number of active actors.
We have
\begin{align*}
 \lambda_{++} &=  \sum_{\text{active } i} \ \sum_{\text{active } j, j \neq i} \lambda_{0i}\lambda_{0j} \\
              &=  \sum_{\text{active } i} \lambda_{0i} \ \sum_{\text{active } j, j \neq i} \lambda_{0j}
                  \,=\,   \sum_{\text{active } i} \lambda_{0i} \
                              \Bigg( \Big( \sum_{\text{active } j} \lambda_{0j} \Big) \ - \lambda_{0i} \Bigg) \\
              &=  \Big(\sum_{\text{active } i} \lambda_{0i} \Big)^2
                     \,-\,  \Big(\sum_{\text{active } i} \lambda_{0i}^2 \Big)
\end{align*}
and
\begin{align*}
\sum_{g,h} (z_{gk}+z_{hk})\lambda_{gh} \, &= \,
            \sum_{\text{active } i} \ \sum_{\text{active } j, j \neq i} (z_{ik}+z_{jk})\lambda_{0i} \lambda_{0j}\\
           \,   &= \, \Big( \sum_{\text{active } i,j}   (z_{ik}+z_{jk})\lambda_{0i} \lambda_{0j} \Big)
               \,-\, \Big(  \sum_{\text{active } i}   2 \, z_{ik} \lambda_{0i}^2   \Big)  \\
           \,   &= \,   2 \,  \Big(\sum_{\text{active } i} \lambda_{0i} \Big)
                         \Big(\sum_{\text{active } i} z_{ik} \lambda_{0i} \Big)
                     \,-\, 2\, \Big(\sum_{\text{active } i} z_{ik} \lambda_{0i}^2 \Big) \ .
\end{align*}
Thus, everything can be expressed as a combination of single summations.

The remark at the end of Section  \ref{S_14.1} applies here, too:
if the functions $\lambda_{0i}$
are not changing during the simulations,
i.e., either they do not depend on $i$ or only through
actor covariates that are not dependent behavior variables,
these sums do not change during the simulations, unless there is composition change
and the set of active actors changes.
For such model specifications, it is efficient to calculate the sums
at the start of each period, and at each composition change,
but not again at each ministep because they do not change.

For model specifications where the rate function depends on endogenously
changing variables, the sums will need to be calculated repeatedly.

\section{Modeltype for behavior}

This section describes the option \texttt{BehaviorModelType},
introduced in RSiena version 1.1-306.


The enumerated types  \texttt{NetworkModelType} and
\texttt{BehaviorModelType} are defined
in \texttt{DependentVariable.h} as

\noindent
\texttt{\small
enum NetworkModelType \{ NOTUSED, NORMAL, AFORCE, AAGREE,\\
\hspace{8em}                         BFORCE, BAGREE, BJOINT \};\\
enum BehaviorModelType \{ OUTOFUSE, RESTRICT, ABSORB \};
}

Functions \texttt{modelType()} and \texttt{modelType(type)}
are defined in  \\
\texttt{NetworkLongitudinalData.h},
which is a class covering one dependent network.

Functions \texttt{behaviorModelType()} and \texttt{behaviorModelType(type)}
are defined in
\texttt{BehaviorLongitudinalData.h},
which is a class covering one dependent behavioral variable.

The model types are transferred from these classes
to \texttt{NetworkVariable.h} and \texttt{BehaviorVariable.h}
where they have the accessor functions\\
\texttt{networkModelType} and
\texttt{behaviorModelType}.
This makes it possible to specify different model types
for each dependent network or behavioral variable.
The network model type then is used further in
\texttt{NetworkVariable.cpp}.
In  \texttt{BehaviorVariable.cpp}, the behavioral model type
then is used further through the function \texttt{behaviorModelTypeABSORB}.

\subsection{Behavior micro-step}
Whenever actor $i$ may make a change in variable $Z$,
she changes $z_{i}$ to the new value $v$ (changes can be --1, 0, +1). \\
Denote the new vector by $z(i \leadsto v)$.
Change probabilities are given by\\
\[
p_{i}(v; \beta, z, x) = \frac{\exp(f(i,v))}
             {\displaystyle \sum_{u \in \mathcal C} \exp(f(i,u))}
\]
where
\[
f(i,v) = f^{Z}_i(\beta, z(i \leadsto v), x) \ ,
\]
$f_i^Z$ is the objective function of actor $i$ for behavior $Z$,
and $\mathcal C$ is the set of allowed changes:\\
$\{-1,0,1\}, \ \{0,1\}, \ \{-1,0\}$, \\
depending on whether $z_i$ currently is at a boundary of its range.

Thus, the range is restricted to the permissible values,
and the objective function is evaluated accordingly.
Therefore, this model option is called \texttt{RESTRICT}.

The new model option is called behavioral model type \texttt{ABSORB}.\\
It calculates, when $z_i$ currently is at the boundary of the range,
hypothetically, the objective function for
changing to the next value \emph{outside} the range;
but if this value is chosen, then it is \emph{absorbed}
into the range of $Z$.

The choice between these options
is specified by parameter \texttt{behModelType} in
\texttt{sienaAlgorithmCreate}.
This parameter can be given as an integer (1 or 2);
or, for several dependent networks requiring different model types,
as a named integer vector.

In the earlier only available option, which is the case \texttt{RESTRICT},
if \\$z_i = z^- = \min\{\text{range}(Z)\}$,
the probabilities are
\[
p_{i}(z^- ; \beta, z^-, x) = \frac{\exp\big(f(i,z^- )\big)}
             { \exp\big(f(i,z^- )\big) + \exp\big(f(i,z^- + 1)\big)}
\]
and
\[
p_{}(z^- + 1; \beta, z^-, x) = \frac{\exp\big(f(i,z^- + 1)\big) }
             {\exp\big(f(i,z^- )\big) + \exp\big(f(i,z^- + 1)\big)} \ .
\]

Similarly, if $z_i = z^+ = \max\{\text{range}(Z)\}$, the probabilities are

\[
p_{i}(z^+ ; \beta, z^+, x) = \frac{\exp\big(f(i,z^+ )\big)  }
             {\exp\big(f(i,z^+ - 1)\big) + \exp\big(f(i,z^+ )\big) }
\]
and
\[
p_{}(z^+ - 1; \beta, z^+, x) = \frac{\exp\big(f(i,z^+ - 1)\big) }
             {\exp\big(f(i,z^+ - 1)\big) + \exp\big(f(i,z^+ + 1)\big)} \ .
\]

For the new option, the case \texttt{ABSORB},
if $z_i = z^- = \min\{\text{range}(Z)\}$,
the probabilities are
\[
p_{i}(z^- ; \beta, z^-, x) = \frac{2\, \exp\big(f(i,z^- )\big)}
             {2\, \exp\big(f(i,z^- )\big) + \exp\big(f(i,z^- + 1)\big)}
\]
and
\[
p_{}(z^- + 1; \beta, z^-, x) = \frac{\exp\big(f(i,z^- + 1)\big) }
             {2\, \exp\big(f(i,z^- )\big) + \exp\big(f(i,z^- + 1)\big)} \ .
\]

Similarly, if $z_i = z^+ = \max\{\text{range}(Z)\}$, the probabilities are

\[
p_{i}(z^+ ; \beta, z^+, x) = \frac{2\,\exp\big(f(i,z^+ )\big) }
             {\exp\big(f(i,z^+ - 1)\big)
             + 2\,\exp\big(f(i,z^+ )\big) }
\]
and
\[
p_{}(z^+ - 1; \beta, z^+, x) = \frac{\exp\big(f(i,z^+ - 1)\big) }
             {\exp\big(f(i,z^+ - 1)\big)
             + 2\,\exp\big(f(i,z^+ )\big) } \ .
\]

\subsection{Score function}

To implement this requires more than changing the probabilities;
also the score function $J_\theta$ needs to be changed.
This is used for calculating
\[
    \frac{\partial \E_\theta Z}{\partial \theta} \,=\,
             \E_\theta \big\{ J_\theta \, Z \big\} \ .
\]



For a change in the behavior variable, define by $\Delta_{ik}(d, z)$
the change statistic for effect
$k$, actor $i$, current state $z$, difference $d$.

Define the change in the objective function by
\[
  \Delta f_i(d) \,=\, \sum_k \beta^Z_k \, \Delta_{ik}(d, z) \ .
\]
Note that $\Delta f_i(0) = 0$, which will be used repeatedly in the sequel.

If there are no boundary effects, change probabilities are defined by
\begin{equation}\label{pchange}
     p_i(d) \,=\, \frac{\exp\big(\Delta f_i(d)\big)}
                 { \sum_{d=-1}^1 \exp\big(\Delta f_{i}(d)\big)} \ .
\end{equation}
The scores for changes in behavioral variables are
\begin{subequations}\label{sc1}
\begin{equation}
  J_k(d) \,=\, \frac{\partial}{\partial \beta_k} \log p^Z_{i}(d; z, \beta) \,=\,
                  \Delta_{ik}(d, z) \, - \, \overline{ \Delta_{ik}(. , z)}
\end{equation}
where
\begin{equation}
    \overline{\Delta_{ik}(. , z)} \,=\, \sum_{d=-1}^1  p_i(d) \,  \Delta_{ik}(d, z) \ .
\end{equation}
\end{subequations}


In the standard model (`\texttt{RESTRICT}'), for the boundary cases:\\
if the current state is at the minimum, we have
\begin{subequations}\label{p.r}
\begin{equation}
     \pi_i(-1)  \,=\,  0 \ , \hspace{1em}
     \pi_i(0)  \,=\, \frac{1}
                 { 1 \,+\, \exp\big(\Delta f_{i}(1)\big)} \ ,
\end{equation}
\begin{equation}
     \pi_i(1)  \,=\, \frac{ \exp\big(\Delta f_{i}(1)\big)}
                 { 1 \,+\, \exp\big(\Delta f_{i}(1)\big)} \ ;
\end{equation}
if the current state is at the maximum, we have
\begin{equation}
     \pi_i(-1)  \,=\, \frac{ \exp\big(\Delta f_{i}(-1)\big)}
                 { 1 \,+\, \exp\big(\Delta f_{i}(-1)\big)} \ ,
\end{equation}
\begin{equation}
     \pi_i(0)  \,=\, \frac{1}
                 { 1 \,+\, \exp\big(\Delta f_{i}(-1)\big)} \ ,\hspace{1em}
     \pi_i(1)  \,=\,  0 \ .
\end{equation}
\end{subequations}

The scores in the boundary cases still are given by (\ref{sc1})
but with $p_i(d) = \pi_i(d)$ given in (\ref{p.r}).


In the new model (`\texttt{ABSORB}'), the probabilities are:

\noindent
if the current state is at the minimum,
\begin{subequations}\label{p.a}
\begin{equation}
     \pi_i(-1)  \,=\, 0 \ , \hspace{1em}
     \pi_i(0)  \,=\, p_i(-1) \,+\, p_i(0)\ , \hspace{1em}  %\,=\, \frac{ \exp\big(\Delta f_{i}(-1)\big) \,+\, 1}
                % { \sum_{d=-1}^1 \exp\big(\Delta f_{i}(d)\big)} \ , \hspace{2em}
     \pi_i(1)  \,=\, p_i(1) \ ; %\frac{ \exp\big(\Delta f_{i}(1)\big)}
                 % { \sum_{d=-1}^1 \exp\big(\Delta f_{i}(d)\big)} \ ;
\end{equation}
and if the current state is at the maximum,
\begin{equation}
     \pi_i(-1)  \,=\,  p_i(-1) \ , \hspace{1em}  %\frac{ \exp\big(\Delta f_{i}(-1)\big)}
                %  { \sum_{d=-1}^1 \exp\big(\Delta f_{i}(d)\big)} \ ; \hspace{2em}
     \pi_i(0)  \,=\,  p_i(0) \,+\, p_i(1) \ ,  \hspace{1em} % \frac{1 \,+\, \exp\big(\Delta f_{i}(1)\big)}
                % { \sum_{d=-1}^1 \exp\big(\Delta f_{i}(d)\big)}  \ .
 \pi_i(1)  \,=\, 0 \ .
\end{equation}
\end{subequations}

To calculate the scores in the second model type (`\texttt{ABSORB}'),
for the boundary cases,
we may note that this is based on a multinomial regression model
with three options $\{-1, 0, 1\}$, of which the first two outcomes
are combined.
Consider the case for the right boundary.
The first two outcomes have the same value
$\Delta_{ik}(-1,z) = \Delta_{ik}(0,z) = 0$; the value is 0
because this option means no change.
A score function is a function of the sufficient statistic,
and for this 3-option statistical model the sufficient statistic corresponds
to the partition of the outcome space into  $\big\{\{-1, 0\}, \{1\} \big\}$.
Therefore the score function that we need is again (\ref{sc1}),
for the original probabilities $p_i$. However, since
$\Delta_{ik}(-1,z) = \Delta_{ik}(0,z) = 0$ and $\pi_i(1) = p_i(1)$,
it does not matter whether we calculate (\ref{sc1})
for $p_i$ or for $\pi_i$.

\newpage

\section{sienaBayes}

This is the start of work on documenting \nnm{sienaBayes}.\\
Incomplete, to be expanded.
\bigskip


\subsection{Initialization}

If a \nnm{prevBayes} object is supplied, this initialization phase is
skipped.

\begin{enumerate}
\item Unless \nnm{prevAns=NULL}:\\
     Estimate using MoM with multigroup option, i.e., under assumption
    of common parameter values, using only 2 subphases.
    This yields the \nnm{sienaFit} object \nnm{startupGlobal}.\\
    Defaults can be changed by parameters
    \nnm{initgainGlobal}, \nnm{initgainGroupwise}, and
    \nnm{initML}. \\
    Stop if some of the estimated non-rate parameters are larger than 40.
\item If $ \nnm{priorPrecFactor} > 0$, use a weighted mean of the parameter estimate
    in  \nnm{startupGlobal} and the prior mean.
    This weighted mean is called the Kelley estimator\footnote{After
    the psychometrician Truman Lee Kelley}. \\
    The weights are the inverses of the covariance matrices
    of the estimate in \nnm{startupGlobal} and of \{the prior
    for the global parameters, multiplied by \nnm{priorPrecFactor}\}.
    Names of variables related to this include the string \nnm{prec}.
\item
  For all groups seperately, if $\nnm{initgainGroupwise} > 0$,
  one subphase of the Robbins-Monro algorithm for MoM is executed,
  starting from the overall estimate, with step size \nnm{initgain},
  to estimate the group-varying parameters.\\
 This provides get initial values \nnm{initialEstimates} per group,
    and covariance matrices \nnm{proposalCov} for proposal distributions
    per group.\\
    Stop if some of the estimated non-rate parameters are larger than 40.
\item The scale of basic rate parameters can be easily modified,
   e.g., to the sqrt scale,
    by changing functions \nnm{trafo}, \nnm{antitrafo}, \nnm{devtrafo}.
\end{enumerate}



\section{Main algorithm}

What follows is very incomplete.\\
It is an older text by Ruth Ripley.
\bigskip

\begin{enumerate}
  \item Set up data in C as usual
  \item Create minimal chain and do burnin
  \item \nnm{improveMH}: Get scalefactors such that about 25 out of 100
  Bayesian proposals after single MH steps are accepted.
  See below for details of
      generation and probabilities for Bayesian proposals. Keep theta
      unchanged throughout this step. \label{item5}
\item Do a warming phase of \nnm{nwarm} iterations of some number of MH steps.
\item Repeat step \ref{item5}.
\item Do requested number of Bayesian iterations. The
  length of the ML ones are determined by the multiplication factor and the
  observed distance.
\end{enumerate}
\paragraph{Bayesian proposals}
\label{sec:prop}
\begin{algorithmic}
\FORALL {groups}
\STATE Create a mask to exclude basic rate effects for other periods than this
group.
\STATE Get a multivariate normal with mean 0 and  \nnm{proposalCov}
 $\times$ scale factor for this group
\STATE Calculate the proposal probability:
\begin{description}
\item[prior] Multivariate normal density for the parameters with mean 0
  and covariance as supplied in the input argument.
\item[chain] Add
\begin{enumerate}
\item sum of log probabilities of choice of variable/actor
\item sum of log choice probabilities
\item minus the sum of basic rate parameters times the
relevant number of actors
\item sum of log(basic rate) parameter times the
number of real steps in the chain for the corresponding variable.
\end{enumerate}
(If not constant rates, use mu and sigma from the normal approximation instead.)
Since chain does not change size, ignore the log factorial of chain length.
\end{description}
\STATE The log probability of acceptance is then new - old  of log prior +
log chain
\ENDFOR
\end{algorithmic}



\newpage
\begin{appendices}

\section{The contribution of the number of ministeps to the likelihood }
\label{S_A1}

In \citet{SnijdersEA10a},
a special role is played by the quantity
\begin{multline}
   \kappa\big(\theta, x^{(0)}, (i_1, j_1), \ldots, (i_T,j_T) \big)  \\
    = P_{\theta}\{\text{time}_{T} \leq t_2 < \text{time}_{T+1} \mid x^{(0)}, (i_1, j_1), \ldots, (i_T,j_T) \}  \ .
                                                           \label{kappa2}
\end{multline}
defined there with equation number (15),
and given here with some notational changes to make it correspond better
with the current paper.
$T = \textit{TotNumber} - 1$ is the total number of `real' ministeps;\\
and $(i_t, j_t)$ indicates the option of the $t'$th network ministep.
This was already used above in the acceptance probabilities for the Metropolis
Hastings steps. We now elaborate its role for the score function.

The case is rather simple if the aggregate rate function
%For models with a constant actor-level rate function
\begin{equation}
    \sum_{\text{active } i} \bigg( \sum_{r=1}^{R_N} \lambda^N(\theta, r,i, y)
      + \sum_{r=1}^{R_B} \lambda^B(\theta, r,i, y) \bigg)       \label{rate1}
\end{equation}
%where $y(t)$ is the state at time $t$
is constant: i.e., independent of the state $y$ and of time;
the time must be mentioned because even if this aggregate rate
does not depend on $y$, a changing number of actors (changing composition)
could make the sum time-dependent.

Let us denote the number of active actors, when assumed
constant, by $n_{\text{act}}$, and
\begin{equation}
   \lambda_{\text{ave}} = \frac{1}{n_{\text{act}}} \sum_{\text{active } i} \bigg( \sum_{r=1}^{R_N} \lambda^N(\theta, r,i, y)
      + \sum_{r=1}^{R_B} \lambda^B(\theta, r,i, y) \bigg)     \ .  \label{rate2}
\end{equation}
Note that formally we assume that all time durations are unity,
$t_{m+1} - t_m = 1$.
Then the total number of `real' ministeps
$T$ has a Poisson distribution with parameter $n_{\text{act}}\lambda_{\text{ave}}$.

The latter also holds with changing composition,
provided that
\[
   \lambda^+(\theta, +,i, y) = \lambda_{\text{ave}}
\]
is independent of $i$ as well as $y$, and that we let $n_{\text{act}}$
denote the \emph{average} number of active actors over the time period
from $t_{m}$ to $t_{m+1}$.

In this case (constant aggregate rate function),
$\kappa$ is equal to
\begin{equation}
   \kappa\big(\theta, x^{(0)},(i_1, j_1), \ldots, (i_T,j_T) \big) =
           \exp(-n_{\text{act}} \lambda_{\text{ave}} ) \frac{(n_{\text{act}} \lambda_{\text{ave}} )^T}{T!} \ ,
                                                           \label{kappa4}
\end{equation}
cf.\ (16) in \citet{SnijdersEA10a}.


\subsection{Score functions for rate parameters}

If there is only a single dependent variable ($R_N + R_B = 1$) and
$\rho$ is the basic and only rate parameter,
then $\lambda_{\text{ave}} = \rho$ and
the score function for $\rho$ is
\begin{equation}
\frac{\partial \log(\kappa)}{\partial \rho} =
   -n_{\text{act}} + \frac{T}{\rho} \  . \label{score1}
\end{equation}
More generally, now suppose that $R_N$ and $R_B$
are arbitrary and the rate function is given by
\begin{equation}
 \lambda^w(\theta, r,i, y) = \rho^w_r \, \exp(\alpha^w_{r} s_{ri} )
               \label{kappa5}
\end{equation}
(where $\alpha^w_{r} s_{ri}$  is the inner product of these two vectors).
Thus, there is a basic rate parameter $\rho^w_r$ for each given dependent
variable labeled $(w,r)$; in addition there may be actor-dependent variables $s_{ri}$ that
affect the rate for dependent variable $(w,r)$, but the parameters $\alpha^w_{r}$
are distinct for distinct dependent variables.

Then for the `complete data' situation, the information is equivalent to
the information in the variables $T^w_{ri}$, indicating the
number of ministeps made of \textit{OptionSet} $(w,i,r)$.
Denote $T^w_r = \sum_i T^w_{ri}$.
The variables $T^w_{ri}$ have independent Poisson distributions
with parameter (\ref{kappa5}). Therefore the score functions
can be derived from the Poisson distribution.
The score functions are given
for the basic rate parameters by
\begin{equation}
\frac{\partial\, \text{complete data log-likelihood}}{\partial \rho^w_r} =
    \frac{T^w_r}{\rho^w_r} - \sum_i \exp(\alpha^w_{r} s_{ri}) \  , \label{score2}
\end{equation}
which for cases with \emph{only} a basic rate parameter
$ \lambda^w(\theta, r,i, y) = \rho^w_r $ reduces to
\begin{equation}
\frac{\partial\, \text{complete data log-likelihood}}{\partial \rho^w_r} =
  \frac{T^w_r}{\rho^w_r}  - n_{\text{act}}  \  ; \label{score3}
\end{equation}
and the score functions for the other rate parameters are
\begin{align}
\frac{\partial \, \text{complete data log-likelihood}}{\partial \alpha^w_{rh}}  & =
   \sum_i s_{rih} \big(T^w_{ri} - \rho^w_r \, \exp(\alpha^w_{r} s_{ri} )  \big)\\
  & = \sum_i s_{rih} \big(T^w_{ri} -  \lambda^w(\theta, r,i, y) )  \big)
               \  . \label{score4}
\end{align}

\iffalse
\subsection{$\kappa$ in the general case}

\remark{This section is a leftover, and will be changed.
Please ignore for the moment.}

For general models, formula (21) in \citet{SnijdersEA10a} shows that $\kappa$ is
approximated by
\begin{equation}
\kappa\big(\theta, x^{(0)}, (i_1, j_1), \ldots, (i_T,j_T) \big)
   \approx    \frac{1}{\lambda(\theta, x^{(T)}) \sqrt{(2\pi \sigma^2(\theta))}}
              \, \exp\left( \frac{-(1 - \mu(\theta))^2}{2\sigma^2(\theta)}\right) \ .
                                   \label{kappa3}
\end{equation}
Here
$\lambda(\theta, x) =  \sum_i \lambda(\theta,i,x)$ (there are no $r$ or $z$ in
that paper);\\
$\mu(\theta)$ is the sum of the reciprocal rates valid \emph{before} the $T$
ministeps,
\begin{equation}
  \mu(\theta) \,=\, \sum_{t=0}^{T-1} \frac1{\lambda(\theta, x^{(t)})}  \label{kmu}
\end{equation}
and $\sigma^2(\theta)$ the sum of the reciprocal squared rates,
\begin{equation}
  \sigma^2(\theta) \,=\, \sum_{t=0}^{T-1} \frac1{\big(\lambda(\theta, x^{(t)})\big)^2} \
  . \label{ksigma2}
\end{equation}

These equations for the case of constant aggregate rate functions
now must be generalized;
note that in Siena 3 only constant rate functions are
considered.

First suppose that the overall rate function $ \lambda^+(\theta, +,+, y)$
is constant, i.e., independent of $y$ and of time during the simulations.
In practice this will mean that the actor-dependent
total rate functions are constant
\[
   \lambda^+(\theta, +,i, y) = \rho
\]
and the number of active actors is constant at $n_{\text{act}}$.
Then $R_N + R_B$ still has a Poisson distribution
with parameter $n_{\text{act}}\,\lambda$
and (\ref{kappa4}) still holds.
The parameter, however, now is not this $\rho$ but
the array of $\rho^w_r = \lambda^w(\theta, r, i, y)$ (assumed independent
of $i$ and $y$).
This means exactly the same can be done

We define
\begin{equation}
  \lambda^w(\theta,r,x,z) = \sum_i \lambda^w(\theta,r,i,x,z)
\end{equation}
where the sum is over all active actors (which may be a changing set, viz., if there is
change of composition).
We shall be using an approximation with the purpose to have no more complications
than is reasonable.

The approximation uses a parametrization
such that there is a multiplicative parameter for all rate functions:
the parameter $\theta$ can be split as $\theta = (\rho, \theta_0)$ and
\begin{equation}
  \lambda^w(\theta,r,x,z) = \rho\,  \lambda_0^w(\theta_0,r,x,z)
\end{equation}
where $\rho$ does not depend on $w$, $x$, or $z$, and is a parameter distinct from $\theta_0$
in the sense that $\rho$ and $\theta_0$ vary independently;
and $ \lambda_0^w(\theta_0,r,x,z)$ indeed do not depend on $\rho$,
nor do any other of the components of the model.
This implies
\[
 \mu(\theta) = \frac{\mu_0(\theta_0)}{\rho}
\]
and
\[
 \sigma^2(\theta) = \frac{\sigma^2_0(\theta_0)}{\rho^2}
\]
in evident notation.
The approximation is based on the intuition that although (\ref{kmu}) and
(\ref{ksigma2}) depend also on $\theta_0$,
their contribution through (\ref{kappa3}) to the score function
for $\theta_0$ will be negligible.
This might be wrong -- but I think it is right.

Note that all probabilities in the Metropolis Hastings steps depend on
rate functions as ratios of rate functions,
and therefore are independent of $\rho$.

We now can write
\begin{multline}
\kappa  \approx    \frac{1}{\rho\,\lambda_0(\theta_0, x^{(T)})
                \sqrt{2\pi \sigma_0^2(\theta_0)/\rho^2}}
              \, \exp\left( \frac{-\rho^2\,
              \big(1 -\mu_0(\theta_0)/\rho\big)^2}{2\sigma_0^2(\theta_0)}\right) \\
       =  \frac{1}{\lambda_0(\theta_0, x^{(T)})
                \sqrt{2\pi \sigma_0^2(\theta_0)}}
              \, \exp\left( \frac{-\big(\rho - \mu_0(\theta_0)\big)^2}{2\sigma_0^2(\theta_0)}\right)
\end{multline}
with the particularly nice contribution to the score function
\begin{equation}
   \frac{\partial \log(\kappa)}{\partial \rho} =
             \frac{ \mu_0(\theta_0) - \rho}{\sigma_0^2(\theta_0)} \ . \label{escore2}
\end{equation}

This expression might seem pretty different from (\ref{score1}).
However, it is not. In the case of $R_N=1, R_B=0$ and a constant rate function
with $n_{\text{act}}$ active actors, we have
\[
 \mu_0(\theta_0) = \frac{T}{n_{\text{act}}} \ ,\  \sigma_0^2(\theta_0) =
 \frac{T}{n_{\text{act}}^2} \ .
\]
Substituting this in (\ref{escore2}) yields
\[
  n_{\text{act}} - \frac{n_{\text{act}}^2\,\rho}{T}
\]
and this is precisely the first-order Taylor approximation
to (\ref{score1}) in the solution point $\rho = T/{n_{\text{act}}}$.
Since the reasoning leading to $\kappa$ in (\ref{kappa3}) is a normal
approximation, it is natural indeed that this should be the linear approximation.
\fi


\end{appendices}

\bibliographystyle{Chicago}
\bibliography{RSiena}

\end{document}

\newpage

\section*{Definition of a group of effects based on alter averages}

\emph{Version 2}\\

Addition to the manual, Section 13.1.1 (definition of network evaluation
effects), as addition to the list of effects for an actor-dependent covariate
$v_i$. This also applies for $v_i$ being a dependent actor variable
(note that this still is about network dynamics, in which
$v_i$ and its transformations play the role of independent variables).

The following group of effects uses an auxiliary variable $\breve v_i$ which
can be called ``alters' $v$-average''.
It is described as the average value of $v_j$ for those
to whom $i$ is tied, and is defined mathematically as
\[
  \breve v_i = \left\{\begin{array}{ll} \displaystyle
         \frac{\sum_j x_{ij}v_j}{x_{i+}}  &  \text{ if } x_{i+} > 0     \\
         0                                &  \text{ if } x_{i+} = 0  .
  \end{array}   \right.
\]
(Since $v$ is centered, the value of 0 in case $x_{i+} = 0$ is also the mean value
of the original variable.)\\
(It may be noted that this constructed variable $\breve v_i$
will not itself have a zero mean generally - so be it.)

This value will have to be updated during the simulations!
Network changes will change $\breve v_i$; if $v_i$ is a dependent behaviour
variable, then behaviour changes will also change $\breve v_i$.

\emph{Improved names now.}

In the following list, there is no ego effect, because the ego effect
of $\breve v_i$ would be the same as the alter effect of $v_i$.

\begin{enumerate}
\setcounter{enumi}{46}
\item \emph{covariate - alter at distance 2},
      defined as the sum of alters' covariate-average over all actors
      to whom $i$ has a tie,
\[
 s^{\rm net}_{i\vit }(x) = \sum_j x_{ij} \breve v_j
\]

\item \emph{covariate - similarity at distance 2},
      defined as the sum of centered similarity
      values for alters' covariate-average between $i$ and all actors
      $j$ to whom $i$ has a tie,
\[
 s^{\rm net}_{i\vit }(x) = \sum_j x_{ij} ({\rm sim}(\breve v)_{ij}
  - \widehat{{\rm sim}(\breve v)}) \ ,
\]
 where the similarity scores ${\rm sim}(\breve v)_{ij}$ are defined as
\[
{\rm sim}(\breve v)_{ij}=
 \frac{\Delta-\vert \breve v_i - \breve v_j \vert}{\Delta} \ ,
\]
 while
 $\Delta=\max_{ij}\vert v_i - v_j \vert$ is the observed range of the
 \emph{original} covariate $v$, and\\
 $\widehat{{\rm sim}(\breve v)}$ is the
 \emph{observed} mean of all these similarity scores;
 this observed mean is defined by calculating the $\breve v_i$ values
 for each of the observations $t_1$ to $t_{M-1}$, and averaging
 these \\
 $(M-1)n(n-1)$ (or $(M-1)n(n-1)/2$) similarity values.
\end{enumerate}
If there are multiple networks, their roles can be crossed here --
the alters' covariate average is calculated
in turn, respectively, for each dependent network variable;
and this is then used as an effect respectively for each of the
dependent network variables -- giving a total of $2 R_N^2$ effects
if $R_N$ is the number of dependent network variables. At this moment
I do not care for all this generality, but I guess the request
could come up at a later stage, so perhaps it is efficient to include
the generality already now.

For the behavior dynamics (Section 13.2.1 of the manual) the following
effect would be added.

\begin{enumerate}
\setcounter{enumi}{20}
\item \emph{alter's covariate average } effect on behavior $z$,
      defined as the product of $i$'s behavior and
      $i$'s alters' covariate-average,
\\
 $s^{\rm beh}_{i\vit}(x) = z_i \, \breve v_i $.
\end{enumerate}
This is already included as the `average alter' effect in case $v_i$ happens
to be equal to $z_i$; now this is extended to $v_i$ different from $z_i$.

\end{document}

% From discussion between Tom and Ruth about constructing effects:


\section{Effects}

Effects are calculated as
\begin{equation}
  f_i(x) \,=\, \sum_k \text{AlterFunction}_k\,x_{ij}
\end{equation}

\section{Interactions}

Users can specify two-effect and three-effect interactions.

I think your construction indeed allows
what I call user-defined interactions.
The following specifies what I think is needed for this;
this includes two-effect but also three-effect interactions.
It is more or less the same as what is used in Siena 3.
I use the names RSiena and CSiena to refer to the R and C
parts of the program code.
Whether the following works exactly as it should
depends also on the precise way in which AlterFunction
is used in the calculation of statistics.
Could you give me the specification of that?

\begin{enumerate}


\item  RSiena: The dataframe of effects must contain
three extra columns ef1, ef2, ef3 which will be used
only for interaction effects. Let us call these effect specifiers.
They define the effects that are interacting: ef1 and ef2 are required;
if ef3 is undefined or zero, then it is a two-effect interaction,
if ef3 is defined then it is a three-effect interaction.
The values ef1, ef2, ef3 refer to the index number
of the effects (or whatever identified is convenient;
I suppose the index number is a good way of referring to it).
\item  Csiena: Effects must have a tag �kind� with values
{ego, dyadic, interaction, other}.
Ego effects are those depending only on the value of ego
on an actor variable (actor covariate or dependent behavior variable).
Dyadic effects are those depending only on (ego, alter)
and not on other actors.
Interaction variables are the ones we are now defining.
\item  Four types of interaction are allowed:
A: ef1 is an ego effect, ef2 is not an interaction effect, ef3 is undefined.
B: ef1 and ef2 are ego effects, ef3 is defined and not an interaction effect.
C: ef1 and ef2 are dyadic effects, ef3 is undefined.
D: ef1, ef2, ef3 are dyadic effects.
\item  If RSiena passes a non-permitted combination of
effect specifiers to CSiena, then CSiena uses the value 0
for the effect and for the statistic, and transmits to RSiena
a message that the combination of effect specifiers is not allowed.
\item  For types A and C, CSiena uses ProductFunction applied
to the AlterFunction of effects with index numbers ef1 and ef2.
\item  For types B and D, CSiena uses TripleProductFunction
applied to the AlterFunction of effects with index numbers
ef1 and ef2 and ef3, where TripleProductFunction
is just the product of three factors.
\end{enumerate}