Difference between revisions of "Introduction and notation"

From Popix
Jump to navigation Jump to search
m
 
(31 intermediate revisions by one other user not shown)
Line 1: Line 1:
<div style="font-size:12pt; font-family:garamond">
+
<div style="color: #2E5894; padding-left: 1.4em; padding-right:2.2em; padding-bottom:0.8em; padding:top:1">[[Image:attention4.jpg|45px|left|link=]]  
<!-- some LaTeX macros we want to use: -->
+
(If you are experiencing problems with the display of the mathematical formula, you can either try to use another browser, or use this link which should work smoothly:  http://popix.lixoft.net)
$
+
</div>
<!-- Methods macros -->
 
\def\ieta{m}
 
\def\Meta{M}
 
\def\imh{\ell}
 
\def\ceta{\tilde{\eta}}
 
\def\cpsi{\tilde{\psi}}
 
\def\transpose{t}
 
\newcommand{\Dt}[1]{\partial_\theta #1}
 
\newcommand{\DDt}[1]{\partial^2_{\theta} #1}
 
\newcommand{\cov}[1]{\mbox{Cov}\left(#1\right)}
 
\def\jparam{j}
 
\newcommand{\Dphi}[1]{\partial_\phi #1}
 
\newcommand{\Dpsi}[1]{\partial_\psi #1}
 
\def\llike{\cal LL}
 
\def\tqpsii{\tilde{p}_{\psi_i}}
 
\def\tppsii{\tilde{\pmacro}}
 
\def\petai{\pmacro}
 
\def\pcetaiyi{\pmacro}
 
\def\pyietai{\pmacro}
 
\def\pcyietai{\pmacro}
 
\def\pphii{\pmacro}
 
\def\pyiphii{\pmacro}
 
\def\pcyiphii{\pmacro}
 
\def\pcphiiyi{\pmacro}
 
\def\bphi{\boldsymbol{\phi}}
 
<!-- .......... -->
 
 
 
\newcommand{\argmin}[1]{ \mathop{\rm arg} \mathop{\rm min}\limits_{#1} }
 
\newcommand{\argmax}[1]{ \mathop{\rm arg} \mathop{\rm max}\limits_{#1} }
 
\newcommand{\nominal}[1]{#1^{\star}}
 
\newcommand{\psis}{\psi{^\star}}
 
\newcommand{\phis}{\phi{^\star}}
 
\newcommand{\hpsi}{\hat{\psi}}
 
\newcommand{\hphi}{\hat{\phi}}
 
\newcommand{\teps}{\varepsilon}
 
\newcommand{\limite}[2]{\mathop{\longrightarrow}\limits_{\mathrm{#1}}^{\mathrm{#2}}}
 
\newcommand{\DDt}[1]{\partial^2_\theta #1}
 
 
 
\def\cpop{c_{\rm pop}}
 
\def\Vpop{V_{\rm pop}}
 
\def\iparam{l}
 
\newcommand{\trcov}[1]{#1}
 
 
 
\def\bu{\boldsymbol{u}}
 
\def\bt{\boldsymbol{t}}
 
\def\bT{\boldsymbol{T}}
 
\def\by{\boldsymbol{y}}
 
\def\bx{\boldsymbol{x}}
 
\def\bc{\boldsymbol{c}}
 
\def\bw{\boldsymbol{w}}
 
\def\bz{\boldsymbol{z}}
 
\def\bpsi{\boldsymbol{\psi}}
 
\def\bbeta{\beta}
 
 
 
 
 
\def\aref{a^\star}
 
\def\kref{k^\star}
 
\def\model{M}
 
\def\hmodel{m}
 
\def\mmodel{\mu}
 
\def\imodel{H}
 
\def\thmle{\hat{\theta}}
 
\def\ofim{I^{\rm obs}}
 
\def\efim{I^{\star}}
 
 
 
\def\Imax{\rm Imax}
 
\def\probit{\rm probit}
 
\def\vt{t}
 
\def\id{\rm Id}
 
\def\teta{\tilde{\eta}}
 
\newcommand{\eqdef}{\mathop{=}\limits^{\mathrm{def}}}
 
 
 
\newcommand{\deriv}[1]{\frac{d}{dt}#1(t)}
 
 
 
\newcommand{\pred}[1]{\tilde{#1}}
 
\def\phis{\phi{^\star}}
 
\def\hphi{\tilde{\phi}}
 
\def\hw{\tilde{w}}
 
\def\hpsi{\tilde{\psi}}
 
\def\hatpsi{\hat{\psi}}
 
\def\hatphi{\hat{\phi}}
 
\def\psis{\psi{^\star}}
 
\def\transy{u}
 
\def\psipop{\psi_{\rm pop}}
 
\newcommand{\psigr}[1]{\hat{\bpsi}_{#1}}
 
\newcommand{\Vgr}[1]{\hat{V}_{#1}}
 
 
 
\def\pmacro{\text{p}}
 
\def\py{\pmacro}
 
\def\pt{\pmacro}
 
\def\pc{\pmacro}
 
\def\pu{\pmacro}
 
\def\pyi{\pmacro}
 
\def\pyj{\pmacro}
 
\def\ppsi{\pmacro}
 
\def\ppsii{\pmacro}
 
\def\pcpsith{\pmacro}
 
\def\pcpsiiyi{\pmacro}
 
\def\pth{\pmacro}
 
\def\pypsi{\pmacro}
 
\def\pcypsi{\pmacro}
 
\def\ppsic{\pmacro}
 
\def\pcpsic{\pmacro}
 
\def\pypsic{\pmacro}
 
\def\pypsit{\pmacro}
 
\def\pcypsit{\pmacro}
 
\def\pypsiu{\pmacro}
 
\def\pcypsiu{\pmacro}
 
\def\pypsith{\pmacro}
 
\def\pypsithcut{\pmacro}
 
\def\pypsithc{\pmacro}
 
\def\pcypsiut{\pmacro}
 
\def\pcpsithc{\pmacro}
 
\def\pcthy{\pmacro}
 
\def\pyth{\pmacro}
 
\def\pcpsiy{\pmacro}
 
\def\pz{\pmacro}
 
\def\pw{\pmacro}
 
\def\pcwz{\pmacro}
 
\def\pw{\pmacro}
 
\def\pcyipsii{\pmacro}
 
\def\pyipsii{\pmacro}
 
\def\pcetaiyi{\pmacro}
 
\def\pypsiij{\pmacro}
 
\def\pyipsiONE{\pmacro}
 
\def\ptypsiij{\pmacro}
 
\def\pcyzipsii{\pmacro}
 
\def\pczipsii{\pmacro}
 
\def\pcyizpsii{\pmacro}
 
\def\pcyijzpsii{\pmacro}
 
\def\pcyiONEzpsii{\pmacro}
 
\def\pcypsiz{\pmacro}
 
\def\pccypsiz{\pmacro}
 
\def\pypsiz{\pmacro}
 
\def\pcpsiz{\pmacro}
 
\def\peps{\pmacro}
 
\def\petai{\pmacro}
 
 
 
\def\psig{\psi}
 
\def\psigprime{\psig^{\prime}}
 
\def\psigiprime{\psig_i^{\prime}}
 
\def\psigk{\psig^{(k)}}
 
\def\psigki{\psig_i^{(k)}}
 
\def\psigkun{\psig^{(k+1)}}
 
\def\psigkuni{\psig_i^{(k+1)}}
 
\def\psigi{\psig_i}
 
\def\psigil{\psig_{i,\ell}}
 
\def\phig{\phi}
 
\def\phigi{\phig_i}
 
\def\phigil{\phig_{i,\ell}}
 
 
 
 
 
\def\etagi{\eta_i}
 
\def\IIV{\Omega}
 
\def\thetag{\theta}
 
\def\thetagk{\theta_k}
 
\def\thetagkun{\theta_{k+1}}
 
\def\thetagkunm{\theta_{k-1}}
 
\def\sgk{s_{k}}
 
\def\sgkun{s_{k+1}}
 
\def\yg{y}
 
\def\xg{x}
 
 
 
\def\qx{p_x}
 
\def\qy{p_y}
 
\def\qt{p_t}
 
\def\qc{p_c}
 
\def\qu{p_u}
 
\def\qyi{p_{y_i}}
 
\def\qyj{p_{y_j}}
 
\def\qpsi{p_{\psi}}
 
\def\qpsii{p_{\psi_i}}
 
\def\qcpsith{p_{\psi|\theta}}
 
\def\qth{p_{\theta}}
 
\def\qypsi{p_{y,\psi}}
 
\def\qcypsi{p_{y|\psi}}
 
\def\qpsic{p_{\psi,c}}
 
\def\qcpsic{p_{\psi|c}}
 
\def\qypsic{p_{y,\psi,c}}
 
\def\qypsit{p_{y,\psi,t}}
 
\def\qcypsit{p_{y|\psi,t}}
 
\def\qypsiu{p_{y,\psi,u}}
 
\def\qcypsiu{p_{y|\psi,u}}
 
\def\qypsith{p_{y,\psi,\theta}}
 
\def\qypsithcut{p_{y,\psi,\theta,c,u,t}}
 
\def\qypsithc{p_{y,\psi,\theta,c}}
 
\def\qcypsiut{p_{y|\psi,u,t}}
 
\def\qcpsithc{p_{\psi|\theta,c}}
 
\def\qcthy{p_{\theta | y}}
 
\def\qyth{p_{y,\theta}}
 
\def\qcpsiy{p_{\psi|y}}
 
\def\qcpsiiyi{p_{\psi_i|y_i}}
 
\def\qcetaiyi{p_{\eta_i|y_i}}
 
\def\qz{p_z}
 
\def\qw{p_w}
 
\def\qcwz{p_{w|z}}
 
\def\qw{p_w}
 
\def\qcyipsii{p_{y_i|\psi_i}}
 
\def\qyipsii{p_{y_i,\psi_i}}
 
\def\qypsiij{p_{y_{ij}|\psi_{i}}}
 
\def\qyipsi1{p_{y_{i1}|\psi_{i}}}
 
\def\qtypsiij{p_{\transy(y_{ij})|\psi_{i}}}
 
\def\qcyzipsii{p_{z_i,y_i|\psi_i}}
 
\def\qczipsii{p_{z_i|\psi_i}}
 
\def\qcyizpsii{p_{y_i|z_i,\psi_i}}
 
\def\qcyijzpsii{p_{y_{ij}|z_{ij},\psi_i}}
 
\def\qcyi1zpsii{p_{y_{i1}|z_{i1},\psi_i}}
 
\def\qcypsiz{p_{y,\psi|z}}
 
\def\qccypsiz{p_{y|\psi,z}}
 
\def\qypsiz{p_{y,\psi,z}}
 
\def\qcpsiz{p_{\psi|z}}
 
\def\qeps{p_{\teps}}
 
\def\qetai{p_{\eta_i}}
 
 
 
\def\neta{n_\eta}
 
\def\ncov{M}
 
\def\npsi{n_\psig}
 
  
 +
== Different representations of the same model ==
  
\def\beeta{\eta}
 
  
\def\logit{\rm logit}
+
The description of a model requires variables such as observations $(y_i)$, individual parameters $(\psi_i)$, population parameters $\theta$, covariates $(c_i)$, etc.
\def\transy{u}
 
\def\so{O}
 
  
\newcommand{\prob}[1]{ \mathbb{P}\left(#1\right)}
+
Tasks to be performed (estimation, simulation, likelihood calculation, etc.) involve these variables.
\newcommand{\probs}[2]{ \mathbb{P}_{#1}\left(#2\right)}
+
Algorithms used to perform these tasks can use different parameterizations, i.e.,  different mathematical representations of the same model. We will see that depending on the task, some mathematical representations are more suitable than others.
\newcommand{\esp}[1]{\mathbb{E}\left(#1\right)}
 
\newcommand{\esps}[2]{\mathbb{E}_{#1}\left(#2\right)}
 
\newcommand{\var}[1]{\mbox{Var}\left(#1\right)}
 
\newcommand{\vars}[2]{\mbox{Var}_{#1}\left(#2\right)}
 
\newcommand{\std}[1]{\mbox{sd}\left(#1\right)}
 
\newcommand{\stds}[2]{\mbox{sd}_{#1}\left(#2\right)}
 
\newcommand{\corr}[1]{\mbox{Corr}\left(#1\right)}
 
\newcommand{\Rset}{\mbox{$\mathbb{R}$}}
 
\newcommand{\Yr}{\mbox{$\mathcal{Y}$}}
 
\newcommand{\teps}{\varepsilon}
 
\newcommand{\like}{\cal L}
 
\newcommand{\logit}{\rm logit}
 
\newcommand{\transy}{u}
 
\newcommand{\repy}{y^{(r)}}
 
\newcommand{\brepy}{\boldsymbol{y}^{(r)}}
 
\newcommand{\vari}[3]{#1_{#2}^{{#3}}}
 
\newcommand{\dA}[2]{\dot{#1}_{#2}(t)}
 
\newcommand{\nitc}{N}
 
\newcommand{\itc}{I}
 
\newcommand{\vl}{V}
 
\newcommand{tstart}{t_{start}}
 
\newcommand{tstop}{t_{stop}}
 
\newcommand{\one}{\mathbb{1}}
 
\newcommand{\hazard}{h}
 
\newcommand{\cumhaz}{H}
 
\newcommand{\std}[1]{\mbox{sd}\left(#1\right)}
 
\newcommand{\eqdef}{\mathop{=}\limits^{\mathrm{def}}}
 
  
\def\mlxtran{\text{MLXtran}}
+
There exists for a modeler a natural parametrization involving a vector of individual parameters $\psi_i$ which have a physical or biological meaning (rate, volume, bioavailability, etc.). We will denote by ''$\psi$-representation'' the mathematical representation of the model which uses $\psi_i$:
\def\monolix{\text{Monolix}}
 
$
 
 
 
== Different representations of the same model ==
 
 
 
A description of the model involves several kinds of variables, including observations, individual parameters, population parameters, covariates,...
 
 
 
The tasks to execute concern these variables (estimation of the individual parameters, of the population parameters, ...).
 
The algorithms used to perform these tasks may use different parametrizations, that is, different mathematical representations of the same model. We will see that, according to the task (estimation, simulation, likelihood calculation,...) some mathematical representations are more suitable than others.
 
 
 
There exist for the modeler a natural parametrization which involves a vector of individual parameters $\psi_i$ which have a physical or biological meaning (rate, volume, bioavailability,...). We will call {\it $\psi$-representation} the mathematical representation of the model which uses $\psi_i$.
 
  
 
{{EquationWithRef
 
{{EquationWithRef
 
|equation=<div id="eq:representation1"><math>
 
|equation=<div id="eq:representation1"><math>
\pyipsii(y_i , \psi_i ; \theta) = \pcyipsii(y_i {{!}} \psi_i)\ppsii( \psi_i ; \theta).
+
\pyipsii(y_i , \psi_i ; \theta) = \pcyipsii(y_i {{!}} \psi_i)\ppsii( \psi_i ; \theta, c_i).
 
</math></div>
 
</math></div>
 
|reference=(1) }}
 
|reference=(1) }}
  
When there exists a transformation $h: \Rset^d \to \Rset^d$ such that $\phi_i=h(\psi_i)$ is a Gaussian vector, we can use equivalently the ''$\phi$-representation''  which involves the transformed parameters (log-rate, log-volume, logit-bioavailability,...) and which represents the joint distribution of $y_i$ and $\phi_i$:
+
When there exists a transformation $h: \Rset^d \to \Rset^d$ such that $\phi_i=h(\psi_i)$ is a Gaussian vector, we can use equivalently the ''$\phi$-representation''  which involves the transformed parameters (log-rate, log-volume, logit-bioavailability, etc.) and now represents the joint distribution of $y_i$ and $\phi_i$:
  
 
{{EquationWithRef
 
{{EquationWithRef
 
|equation=<div id="eq:representation2"><math>
 
|equation=<div id="eq:representation2"><math>
\pyiphii(y_i , \phi_i ; \theta, c_i) = \pcyiphii(y_i {{!}} \phi_i)\pphii( \phi_i ; \theta, c_i).
+
\pyiphii(y_i , \phi_i ; \theta, c_i) = \pcyiphii(y_i {{!}} \phi_i)\pphii( \phi_i ; \theta, c_i),
 
</math></div>
 
</math></div>
 
|reference=(2) }}
 
|reference=(2) }}
  
where $ \phi_i =h(\psi_i) \sim {\cal N}( \mu(\beta,c_i) , \Omega)$ and $\theta=(\beta,\Omega)$.
+
where $ \phi_i =h(\psi_i) \sim {\cal N}( \mu(\beta,c_i) , \Omega)$ and $\theta=(\beta,\Omega)$.  
  
Another mathematical representation uses the vector of random effects $\eta_i$ to represent the model of the individual parameters:
+
There is yet another mathematical representation which uses the vector of random effects $\eta_i$ to represent the individual parameters model:
  
 
{{Equation1
 
{{Equation1
 
|equation=<math>\begin{eqnarray}
 
|equation=<math>\begin{eqnarray}
\phi_i &=& \mu(\beta,c_i) + \eta_i
+
\phi_i &=& \mu(\beta,c_i) + \eta_i ,
 
\end{eqnarray}</math> }}
 
\end{eqnarray}</math> }}
  
where $\eta_i \sim {\cal N}( 0 , \Omega)$.
+
where $\eta_i \sim {\cal N}( 0 , \Omega)$. This ''$\eta$-representation'' leads to the joint distribution of $y_i$ and $\eta_i$:
The ''$\eta$-representation'' then represents the joint distribution of $y_i$ and $\eta_i$:
 
  
 
{{EquationWithRef
 
{{EquationWithRef
 
|equation=<div id="eq:representation3"><math>
 
|equation=<div id="eq:representation3"><math>
 
\pyietai(y_i , \eta_i ; \theta, c_i) = \pcyietai(y_i {{!}} \eta_i;\beta,c_i)\petai( \eta_i ; \Omega).
 
\pyietai(y_i , \eta_i ; \theta, c_i) = \pcyietai(y_i {{!}} \eta_i;\beta,c_i)\petai( \eta_i ; \Omega).
</math></div>  
+
</math></div>
|reference=(3)}}
+
|reference=(3) }}
  
One can see that the fixed effects $\beta$ now appear in the conditional distribution of the observations.  
+
We can see that the fixed effects $\beta$ now appear in the conditional distribution of the observations. This will have a strong impact on tasks such as estimation of population parameters since a sufficient statistic for estimating $\beta$ derived from this representation  will be a function of the observations $\by$, as opposed to the other representations, where the sufficient statistic is a function of the individual parameters $\bpsi$ (or equivalently, $\bphi$).
This will have a strong impact for tasks such as estimation of the population parameters since a sufficient statistic derived from this representation for estimating $\beta$ will be a function of the observations $\by$, contrary to the other representations for which the sufficient statistic is a function of the individual parameters $\bpsi$ (or equivalently $\bphi$).
 
  
In the $\psi$-representation [[#eq:representation1|(1)]], if model $\ppsii( \psi_i ; \theta, c_i)$ is not a regular statistical model (some components of $\psi_i$ may have no variability for instance, or more generally $\Omega$ may not be positive definite),  there does not exist any sufficient statistic $S(\psi_i)$ for estimating $\theta$. Thus, algorithms for estimation will not use the representation as in [[#eq:representation1|(1)]], but another decomposition into regular statistical models.
+
In the $\psi$-representation [[#eq:representation1|(1)]], if the model $\ppsii( \psi_i ; \theta, c_i)$ is not a regular statistical model (some components of $\psi_i$ may have no variability, or more generally $\Omega$ may not be positive definite),  no sufficient statistic $S(\psi_i)$ for estimating $\theta$ exists. Thus, estimation algorithms will not use representation [[#eq:representation1|(1)]] in these cases, but another decomposition into regular statistical models.
  
<!-- %L'art du modeler est de représenter mathématiquement le modèle qu'il souhaite décrire. L'art du methods and software developer sera d'identifier la représentation optimale de ce modèle -->
 
  
 
{{Example
 
{{Example
 
|title=Some examples
 
|title=Some examples
|text=
+
|text=  
 +
<ol>
 +
<li> Consider the following model for continuous data with a constant error model: </li>
  
1. Consider the following model for continuous data with a constant error model:
+
{{Equation1
 
+
|equation=<math>\begin{eqnarray}
:{{Equation1
 
|equation=\begin{eqnarray}<math>
 
 
y_{ij} &\sim& {\cal N}(f(t_{ij},\phi_i) ,a_i^2) \\
 
y_{ij} &\sim& {\cal N}(f(t_{ij},\phi_i) ,a_i^2) \\
 
\phi_i &\sim& {\cal N}(\beta, \Omega) \\
 
\phi_i &\sim& {\cal N}(\beta, \Omega) \\
a_i &\sim& p_a(\, \cdot \, ; \theta_a)
+
a_i &\sim& p_a(\, \cdot \, ; \theta_a) .
 
\end{eqnarray}</math> }}
 
\end{eqnarray}</math> }}
  
: Here, the variance of the residual error is a random variable. The vector of individual parameters is $(\phi_i, a_i)$ and the vector of population parameters is $\theta=(\beta,\Omega,\theta_a)$. Assuming that $\Omega$ is positive definite, the joint model of $y_i$ and $(\psi_i , a_i)$ can be decomposed as a product of three regular models:
+
Here, the variance of the residual error is a random variable. The vector of individual parameters is $(\phi_i, a_i)$ and the vector of population parameters is $\theta=(\beta,\Omega,\theta_a)$. Assuming that $\Omega$ is positive definite, the joint model of $y_i$, $\phi_i$ and $a_i$ can be decomposed as a product of three regular models:
  
:{{Equation1
+
{{Equation1
|equation=<math>
+
|equation=<math>  
\pyiphii(y_i , \phi_i, a_i ; \theta) = \pcyiphii(y_i {{!}} \phi_i ,a_i)\pphii( \phi_i ; \beta, \Omega)p_a(a_i ; \theta_a).
+
\pyiphii(y_i , \phi_i, a_i ; \theta) = \pcyiphii(y_i {{!}} \phi_i ,a_i)\pphii( \phi_i ; \beta, \Omega)\pmacro(a_i ; \theta_a).
 
</math> }}
 
</math> }}
  
  
2.  Assume now that the variance  of the residual error is fixed in the population
+
<br>
 +
<li>Assume instead that the variance  of the residual error is fixed for the whole population:</li>
  
 
{{Equation1
 
{{Equation1
 
|equation=<math>\begin{eqnarray}
 
|equation=<math>\begin{eqnarray}
y_{ij} &\sim& {\cal N}(f(t_{ij},\phi_i) ,a^2)  
+
y_{ij} &\sim& {\cal N}(f(t_{ij},\phi_i) ,a^2) .
 
\end{eqnarray}</math> }}
 
\end{eqnarray}</math> }}
  
:The vector of population parameters is $\theta=(\beta,\Omega,a)$ and the joint model of $y_i$ and $\phi_i$ can be decomposed as
+
The vector of population parameters is now $\theta=(\beta,\Omega,a)$ and the joint model of $y_i$ and $\phi_i$ can be decomposed as
  
 
{{Equation1
 
{{Equation1
|equation=<math>\pyiphii(y_i , \phi_i ; \theta) = \pcyiphii(y_i {{!}} \phi_i ; a)\pphii( \phi_i ; \beta, \Omega).
+
|equation= <math>  
 +
\pyiphii(y_i , \phi_i ; \theta) = \pcyiphii(y_i {{!}} \phi_i ; a)\pphii( \phi_i ; \beta, \Omega).
 
</math> }}
 
</math> }}
  
  
3. Assume now that some components of $\phi_i$ have no inter-individual variability. More precisely, let $\phi_i=(\phi_i^{(1)},\phi_i^{(0)})$ and $\beta=(\beta_1,\beta_0)$ such that
+
<br>
 +
<li>Suppose that some components of $\phi_i$ have no inter-individual variability. More precisely, let $\phi_i=(\phi_i^{(1)} \phi_i^{(0)})$ and $\beta=(\beta_1,\beta_0)$, such that</li>
  
 
{{Equation1
 
{{Equation1
 
|equation=<math>\begin{eqnarray}
 
|equation=<math>\begin{eqnarray}
 
\phi_i^{(1)} &\sim& {\cal N}(\beta_1, \Omega_1) \\
 
\phi_i^{(1)} &\sim& {\cal N}(\beta_1, \Omega_1) \\
\phi_i^{(0)} &=& \beta_0
+
\phi_i^{(0)} &=& \beta_0 ,
 
\end{eqnarray}</math> }}
 
\end{eqnarray}</math> }}
  
:where $\Omega_1$ is positive definite. Here, $\theta=(\beta_1,\beta_0,\Omega,a)$ and
+
and $\Omega_1$ is positive definite. Here, $\theta=(\beta_1,\beta_0,\Omega_1,a)$ and
  
 
{{Equation1
 
{{Equation1
Line 360: Line 103:
  
  
4. Assume that $\phi_i = (\phi_{i,1}, \phi_{i,2})$ such that
+
<br>
 +
<li>Assume instead that $\phi_i = (\phi_{i,1}, \phi_{i,2})$, where</li>
  
 
{{Equation1
 
{{Equation1
 
|equation=<math>\begin{eqnarray}
 
|equation=<math>\begin{eqnarray}
 
\phi_{i,1} &=& \beta_1 + \omega_1\eta_i \\
 
\phi_{i,1} &=& \beta_1 + \omega_1\eta_i \\
\phi_{i,2} &=& \beta_2 + \omega_2\eta_i
+
\phi_{i,2} &=& \beta_2 + \omega_2\eta_i ,
 
\end{eqnarray}</math> }}
 
\end{eqnarray}</math> }}
  
:where $\eta_i \sim {\cal N}(0,1)$. A regular model is the joint distribution of $y_i$ and $\eta_i$. One can then use the following $\eta$-representation for instance:
+
and $\eta_i \sim {\cal N}(0,1)$. Here, the useful model is the joint distribution of $y_i$ and $\eta_i$. We can use for instance the following $\eta$-representation:
  
 
{{Equation1
 
{{Equation1
|equation=<math>\pyietai(y_i , \eta_i ; \theta) = \pcyietai(y_i {{!}} \eta_i ;\theta)\petai( \eta_i).
+
|equation=<math>
 +
\pyietai(y_i , \eta_i ; \theta) = \pcyietai(y_i {{!}} \eta_i ;\theta)\petai( \eta_i),
 
</math> }}
 
</math> }}
  
:where $\theta= (\beta_1,\beta_2, \omega_1,\omega_2,a)$.
+
: where $\theta= (\beta_1,\beta_2, \omega_1,\omega_2,a)$.
 +
</ol>
 
}}
 
}}
  
  
 
<br>
 
<br>
 +
== Some notation ==
  
== Some notations ==
+
We assume that the set of population parameters $\theta$ takes its values in $\Theta$, an open subset of $\Rset^m$.
 
 
Let $\theta$ be the set of population parameters. We assume that $\theta$ takes its values in $\Theta$, an open subset of $\Rset^m$.
 
  
Let $f : \Theta \to \Rset$ be a twice differentiable function of $\theta$. We will denote $\Dt{f(\theta)} = (\partial f(\theta)/\partial \theta_j, 1 \leq j \leq m) $  the gradient of $f$ (i.e. the vector of partial derivatives of $f$) and $\DDt{f(\theta)} = = (\partial^2 f(\theta)/\partial \theta_j\partial \theta_k, 1 \leq j,k \leq m) $  the Hessian  of $f$ (i.e.  the square matrix of second-order partial derivatives of $f$).
+
Let $f : \Theta \to \Rset$ be a twice differentiable function of $\theta$. We will denote $\Dt{f(\theta)} = (\partial f(\theta)/\partial \theta_j, 1 \leq j \leq m) $  the gradient of $f$ (i.e., the vector of partial derivatives of $f$) and $\DDt{f(\theta)} = (\partial^2 f(\theta)/\partial \theta_j\partial \theta_k, 1 \leq j,k \leq m) $  the Hessian  of $f$ (i.e., the square matrix of second-order partial derivatives of $f$).
  
</div>
+
{{Next
<!-- %\begin{eqnarray} -->
+
|link=The SAEM algorithm for estimating population parameters }}
<!--%y_i | \psi_i &\sim& \pcyipsii(y_i | \psi_i) \\ -->
 
<!--%h(\psi_i) &\sim_{i.i.d}& {\cal N}( \mu , \Omega), -->
 
<!--%\end{eqnarray} -->
 
<!--%where $h(\psi_i)=(h_1(\psi_{i,1}), h_2(\psi_{i,2}), \ldots , h_d(\psi_{i,d}) )^\prime$ is a $d$-vector of (transformed) individual parameters, $\mu$ a $d$-vector of fixed effects and  $\Omega$ a $d\times d$ variance-covariance matrix. -->
 

Latest revision as of 13:54, 7 June 2013

Attention4.jpg

(If you are experiencing problems with the display of the mathematical formula, you can either try to use another browser, or use this link which should work smoothly: http://popix.lixoft.net)

Different representations of the same model

The description of a model requires variables such as observations $(y_i)$, individual parameters $(\psi_i)$, population parameters $\theta$, covariates $(c_i)$, etc.

Tasks to be performed (estimation, simulation, likelihood calculation, etc.) involve these variables. Algorithms used to perform these tasks can use different parameterizations, i.e., different mathematical representations of the same model. We will see that depending on the task, some mathematical representations are more suitable than others.

There exists for a modeler a natural parametrization involving a vector of individual parameters $\psi_i$ which have a physical or biological meaning (rate, volume, bioavailability, etc.). We will denote by $\psi$-representation the mathematical representation of the model which uses $\psi_i$:

\( \pyipsii(y_i , \psi_i ; \theta) = \pcyipsii(y_i | \psi_i)\ppsii( \psi_i ; \theta, c_i). \)
(1)

When there exists a transformation $h: \Rset^d \to \Rset^d$ such that $\phi_i=h(\psi_i)$ is a Gaussian vector, we can use equivalently the $\phi$-representation which involves the transformed parameters (log-rate, log-volume, logit-bioavailability, etc.) and now represents the joint distribution of $y_i$ and $\phi_i$:

\( \pyiphii(y_i , \phi_i ; \theta, c_i) = \pcyiphii(y_i | \phi_i)\pphii( \phi_i ; \theta, c_i), \)
(2)

where $ \phi_i =h(\psi_i) \sim {\cal N}( \mu(\beta,c_i) , \Omega)$ and $\theta=(\beta,\Omega)$.

There is yet another mathematical representation which uses the vector of random effects $\eta_i$ to represent the individual parameters model:

\(\begin{eqnarray} \phi_i &=& \mu(\beta,c_i) + \eta_i , \end{eqnarray}\)

where $\eta_i \sim {\cal N}( 0 , \Omega)$. This $\eta$-representation leads to the joint distribution of $y_i$ and $\eta_i$:

\( \pyietai(y_i , \eta_i ; \theta, c_i) = \pcyietai(y_i | \eta_i;\beta,c_i)\petai( \eta_i ; \Omega). \)
(3)

We can see that the fixed effects $\beta$ now appear in the conditional distribution of the observations. This will have a strong impact on tasks such as estimation of population parameters since a sufficient statistic for estimating $\beta$ derived from this representation will be a function of the observations $\by$, as opposed to the other representations, where the sufficient statistic is a function of the individual parameters $\bpsi$ (or equivalently, $\bphi$).

In the $\psi$-representation (1), if the model $\ppsii( \psi_i ; \theta, c_i)$ is not a regular statistical model (some components of $\psi_i$ may have no variability, or more generally $\Omega$ may not be positive definite), no sufficient statistic $S(\psi_i)$ for estimating $\theta$ exists. Thus, estimation algorithms will not use representation (1) in these cases, but another decomposition into regular statistical models.


Man02.jpg
Some examples


  1. Consider the following model for continuous data with a constant error model:
  2. \(\begin{eqnarray} y_{ij} &\sim& {\cal N}(f(t_{ij},\phi_i) ,a_i^2) \\ \phi_i &\sim& {\cal N}(\beta, \Omega) \\ a_i &\sim& p_a(\, \cdot \, ; \theta_a) . \end{eqnarray}\)

    Here, the variance of the residual error is a random variable. The vector of individual parameters is $(\phi_i, a_i)$ and the vector of population parameters is $\theta=(\beta,\Omega,\theta_a)$. Assuming that $\Omega$ is positive definite, the joint model of $y_i$, $\phi_i$ and $a_i$ can be decomposed as a product of three regular models:

    \( \pyiphii(y_i , \phi_i, a_i ; \theta) = \pcyiphii(y_i | \phi_i ,a_i)\pphii( \phi_i ; \beta, \Omega)\pmacro(a_i ; \theta_a). \)



  3. Assume instead that the variance of the residual error is fixed for the whole population:
  4. \(\begin{eqnarray} y_{ij} &\sim& {\cal N}(f(t_{ij},\phi_i) ,a^2) . \end{eqnarray}\)

    The vector of population parameters is now $\theta=(\beta,\Omega,a)$ and the joint model of $y_i$ and $\phi_i$ can be decomposed as

    \( \pyiphii(y_i , \phi_i ; \theta) = \pcyiphii(y_i | \phi_i ; a)\pphii( \phi_i ; \beta, \Omega). \)



  5. Suppose that some components of $\phi_i$ have no inter-individual variability. More precisely, let $\phi_i=(\phi_i^{(1)} \phi_i^{(0)})$ and $\beta=(\beta_1,\beta_0)$, such that
  6. \(\begin{eqnarray} \phi_i^{(1)} &\sim& {\cal N}(\beta_1, \Omega_1) \\ \phi_i^{(0)} &=& \beta_0 , \end{eqnarray}\)

    and $\Omega_1$ is positive definite. Here, $\theta=(\beta_1,\beta_0,\Omega_1,a)$ and

    \( \pyiphii(y_i , \phi_i^{(1)} ; \theta) = \pcyiphii(y_i | \phi_i^{(1)} ; \beta_0, a)\pphii( \phi_i^{(1)} ; \beta_1, \Omega_1). \)



  7. Assume instead that $\phi_i = (\phi_{i,1}, \phi_{i,2})$, where
  8. \(\begin{eqnarray} \phi_{i,1} &=& \beta_1 + \omega_1\eta_i \\ \phi_{i,2} &=& \beta_2 + \omega_2\eta_i , \end{eqnarray}\)

    and $\eta_i \sim {\cal N}(0,1)$. Here, the useful model is the joint distribution of $y_i$ and $\eta_i$. We can use for instance the following $\eta$-representation:

    \( \pyietai(y_i , \eta_i ; \theta) = \pcyietai(y_i | \eta_i ;\theta)\petai( \eta_i), \)

    where $\theta= (\beta_1,\beta_2, \omega_1,\omega_2,a)$.



Some notation

We assume that the set of population parameters $\theta$ takes its values in $\Theta$, an open subset of $\Rset^m$.

Let $f : \Theta \to \Rset$ be a twice differentiable function of $\theta$. We will denote $\Dt{f(\theta)} = (\partial f(\theta)/\partial \theta_j, 1 \leq j \leq m) $ the gradient of $f$ (i.e., the vector of partial derivatives of $f$) and $\DDt{f(\theta)} = (\partial^2 f(\theta)/\partial \theta_j\partial \theta_k, 1 \leq j,k \leq m) $ the Hessian of $f$ (i.e., the square matrix of second-order partial derivatives of $f$).

Forward.png