library("readxl")
my_data <- read_excel("mydatax.xlsx")
m = my_data$Markup
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
lines(mm, col = 2, lwd = 2)
Minimum markup in the data \(m_{min}\):
(mmin = min(m))
## [1] 1.001387
Maximum markup in the data \(m_{max}\):
(mn = max(m))
## [1] 9.977231
Number of observations \(n\):
(nn = length(m))
## [1] 2457
We assume that firm productivities follow a Pareto distribution \(\phi\sim \mathcal{P}(\underline{\phi}, k)\) with CDF: \[\begin{equation} G_{P}(\phi) = 1 - \underline{\phi}^k \phi^{-k} \label{pareto} \end{equation}\]
Parameters of Pareto distribution: lower bound \(\underline\phi>0\) and shape \(k>0\).
The CDF of the markup distribution implied by the assumptions of Pareto productivity and CREMR demand \(p(x)=\frac{\beta }{x}\left( x-\gamma \right)^{\frac{\sigma-1}{\sigma}}\) is:
\[\begin{align} B(m)={}&1 - \underline{\phi}^k \left(\frac{1}{\beta}\frac{\sigma}{\sigma-1}\left(\frac{\frac{\sigma-1}{\sigma}m}{1-\frac{\sigma-1}{\sigma}m}\gamma\right)^{\frac{1}{\sigma}}\right)^{-k}\;\;\; \text{with} \;\;\; m \in \left[\underline{m}, \frac{\sigma}{\sigma - 1}\right]\\ ={}&1-\omega^{k}\left(\frac{(\sigma-1)m}{m+\sigma-\sigma m}\right)^{-\frac{k}{\sigma}} \end{align}\]
where \(\omega=\frac{\underline{\phi}\beta}{\gamma^{\frac{1}{\sigma}}}\frac{\sigma-1}{\sigma}=\left(\frac{(\sigma-1)\underline{m}}{\underline{m}+\sigma-\sigma \underline{m}}\right)^{\frac{1}{\sigma}}\)
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m) &= \frac{k\omega^k}{m(m+\sigma-m\sigma)}\left(\frac{(\sigma-1)m}{m+\sigma-\sigma m}\right)^{-\frac{k}{\sigma}}\\ &=\left(\frac{\underline{m}}{\underline{m}+\sigma-\sigma \underline{m}}\right)^{\frac{k}{\sigma}}\frac{k}{m(m+\sigma-m\sigma)}\left(\frac{m}{m+\sigma-\sigma m}\right)^{-\frac{k}{\sigma}} \end{align}\]
The log-likelihood function is:
\[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))={}&\sum_{i = 1}^n\log\left(\left(\frac{\underline{m}}{\underline{m}+\sigma-\sigma \underline{m}}\right)^{\frac{k}{\sigma}}\frac{k}{m(m+\sigma-m\sigma)}\left(\frac{m}{m+\sigma-\sigma m}\right)^{-\frac{k}{\sigma}}\right)\\ ={}&n\left(\log k+\frac{k}{\sigma}\log\underline{m}-\frac{k}{\sigma}\log(\underline{m}+\sigma-\sigma\underline{m})\right)+\sum_{i = 1}^n\left(-\frac{\sigma+k}{\sigma}\log m_i+\frac{k-\sigma}{\sigma}\log(m_i+\sigma-m_i\sigma)\right) \end{align}\]
The log-likelihood function is monotonically increasing in \(\underline{m}\), hence \(\hat{\underline{m}}=m_{min}\).
Differentiating the log-likelihood with respect to \(k\) yields:
\[\begin{align} \frac{dL}{dk} &=n(\frac{1}{k}+\log\omega-\frac{1}{\sigma}\log(\sigma-1))+\sum_{i = 1}^n\left(-\frac{1}{\sigma}\log m_i+\frac{1}{\sigma}\log(m_i+\sigma-m_i\sigma)\right)\\ &\Rightarrow \hat k=\frac{\sigma}{\frac{1}{n}\sum_{i = 1}^n(\log m_i-\log(m_i+\sigma-m_i\sigma))+\log(\sigma-1)-\sigma\log\omega} \end{align}\]
And differentiating the log-likelihood with respect to \(\sigma\) yields:
\[\begin{align} \frac{dL}{d\sigma} &=\frac{nk}{\sigma^2}\left(\log(\sigma-1)-\frac{\sigma}{\sigma-1}\right)+\sum_{i = 1}^n\left(\frac{k}{\sigma^2}\left(\log m_i+\log(m_i+\sigma-m_i\sigma)\right)-\frac{(k-\sigma)\sigma(m_i-1)}{m_i+\sigma-m_i\sigma}\right) \end{align}\]
It is not obvious to get a closed-form solution for \(\hat{k}\) and \(\hat{\sigma}\), but we can optimize numerically.
We define the objective function (i.e. minus log-likelihood) for \(b(m)\) (to be minimized):
log_likelihood_b_CREMR = function(theta, m){
k = theta[1]
sigma = theta[2]
-sum(log((k*(mmin/(mmin+sigma-sigma*mmin))^(k/sigma)/(m*(m+sigma-m*sigma)))*(m/(m+sigma-m*sigma))^(-k/sigma)))
}
For the first optimization, we use the following starting values. Let \(m_{max}\) denote the largest markup value (in the data), we have that
\[m_{max} < \frac{\sigma}{\sigma - 1},\]
and
\[\sigma < \frac{m_{max}}{m_{max} - 1}.\]
Therefore, we propose using
\[k_{\text{start}} = 2 \;\;\; \text{and} \;\;\; \sigma_{\text{start}} = \frac{1}{2} \left( 1 + \frac{m_{max}}{m_{max}-1} \right).\] The estimation of \(\sigma\), \(k\) is simply:
# Estimation (CREMR+Pareto)
(theta_startCREMR = c(2, 0.5*(1 + mn/(mn-1))))
## [1] 2.000000 1.055696
(estimCREMR = optim(par = theta_startCREMR, log_likelihood_b_CREMR, m = m))
## $par
## [1] 1.232891 1.111174
##
## $value
## [1] 3021.716
##
## $counts
## function gradient
## 87 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
The estimated parameters are therefore:
(kCREMR=estimCREMR$par[1])
## [1] 1.232891
(sigmaCREMR=estimCREMR$par[2])
## [1] 1.111174
(omegaCREMR=((sigmaCREMR-1)*mmin/(mmin+sigmaCREMR-sigmaCREMR*mmin))^(1/sigmaCREMR))
## [1] 0.1386922
To illustrate this estimation, we plot the empirical distribution (using a histogram) and the estimated pdf:
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = (kCREMR*((sigmaCREMR-1)*mmin/(mmin+sigmaCREMR-sigmaCREMR*mmin))^(kCREMR/sigmaCREMR)/(mm*(mm+sigmaCREMR-mm*sigmaCREMR)))*((sigmaCREMR-1)*mm/(mm+sigmaCREMR-mm*sigmaCREMR))^(-kCREMR/sigmaCREMR)
lines(mm, yy, col = 2, lwd = 2)
We calculate the Akaike Information Criterion (AIC): \(AIC=2p-2\log(\hat L)\) where \(p\) is the number of estimated parameters and \(\hat L\) is the maximum value of the likelihood function.
#Three estimated parameters
pCREMR = 3
(AIC_CREMR = 2*pCREMR - 2*(-estimCREMR$value))
## [1] 6049.433
Assume the demand is linear \(p(x)=\alpha-\beta x\). Maximum output is \(\bar x=\frac{\alpha}{2\beta}\). The markup is \(m(x)=\frac{\alpha-\beta x}{\alpha-2\beta x}\). Maximum markup is infinite: \(m(x)\to\infty\) as \(x\to\bar x\). Minimum markup \(\underline{m}\).
The CDF of the markup distribution implied by the assumptions of Pareto productivity and linear demand is:
\[\begin{align} B(m) &=1-\left(\frac{2m-1}{\alpha\underline{\phi}}\right)^{-k} \;\;\; \text{with} \;\;\; m \in \left(\underline{m}, \infty\right), \end{align}\]
where \(k > 0\), \(\underline{\phi}>0\), \(\alpha > 0\), and \(\underline{\phi}=\phi(\underline{m})=\frac{2\underline{m}-1}{\alpha}\).
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m) &= 2k(2\underline{m}-1)^k\left(2m-1\right)^{-k-1} \end{align}\]
The log-likelihood function is:
\[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))=\sum_{i = 1}^n\log\left(2k(2\underline{m}-1)^k\left(2m_i-1\right)^{-k-1}\right)&=\sum_{i = 1}^n\left(\log(2k)+k\log(2\underline{m}-1)-(k+1)\log(2m_i-1)\right)\\ &=n\left(\log(2k)+k\log(2\underline{m}-1)\right)-(k+1)\sum_{i = 1}^n\log(2m_i-1) \end{align}\]
The log-likelihood function is monotonically increasing in \(\underline{m}\), hence \(\hat{\underline{m}}=m_{min}\).
Differentiating \(L\) with respect to \(k\), we obtain a closed-form expression for the MLE estimator of \(k\):
\[\begin{align} \frac{dL}{dk} &=\frac{n}{k}+n\log(2\underline{m}-1)-\sum_{i = 1}^n\log(2m_i-1)=0\Rightarrow \hat k=\frac{1}{\frac{1}{n}\sum_{i = 1}^n\log(2m_i-1)-\log(2\hat{\underline{m}}-1)} \end{align}\]
MLE estimation:
(omegaLIN=2*mmin-1)
## [1] 1.002774
(kkLIN = nn/(sum(log(2*m-1))-nn*log(omegaLIN)))
## [1] 1.001127
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
xx = 2*kkLIN*(omegaLIN^kkLIN)*(2*mm-1)^(-kkLIN-1)
lines(mm, xx, col = 3, lwd = 2)
We calculate the Akaike Information Criterion (AIC):
#Two estimated parameters
pLIN = 2
(AIC_LIN = 2*pLIN - 2*(sum(log(2*kkLIN*(omegaLIN^kkLIN)*(2*m-1)^(-kkLIN-1)))))
## [1] 6428.425
Assume the demand is LES \(p(x)=\frac{\delta}{x+\gamma}\). The markup is \(m(x)=\frac{x+\gamma}{\gamma}\). Maximum markup is again infinite: \(m(x)\to\infty\) as \(x\to\infty\).
The CDF of the markup distribution implied by the assumptions of Pareto productivity and LES demand is:
\[\begin{align} B(m) &=1-\left(\frac{\gamma}{\delta\underline{\phi}}m^2\right)^{-k} \;\;\; \text{with} \;\;\; m \in \left(\underline{m}, \infty\right), \end{align}\]
where \(k > 0\), \(\underline{\phi}>0\), \(\gamma>0\), \(\delta > 0\), and \(\underline{\phi}=\frac{\gamma}{\delta}\underline{m}^2\).
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m)&=2k\left(\underline{m}\right)^{2k}m^{-2k-1} \end{align}\]
The log-likelihood function is:
\[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))=\sum_{i = 1}^n\log\left(2k\left(\underline{m}\right)^{2k}m^{-2k-1}\right)&=\sum_{i = 1}^n\left(\log(2k)+2k\log\underline{m}-(2k+1)\log(m_i)\right)\\ &=n(\log(2k)+2k\log\underline{m})-(2k+1)\sum_{i = 1}^n\log(m_i) \end{align}\]
The estimating pocedure is the same as for linear above. The log-likelihood function is monotonically increasing in \(\underline{m}\), hence \(\hat{\underline{m}}=m_{min}\).
Furthermore, we can get a closed-form solution for the MLE estimator of \(k\). Differentiate \(L\) with respect to \(k\):
\[\begin{align} \frac{dL}{dk} &=\frac{n}{k}+2n\log\underline{m}-2\sum_{i = 1}^n\log(m_i)=0\Rightarrow \hat k=\frac{1}{\frac{2}{n}\sum_{i = 1}^n\log(m_i)-2\log\hat{\underline{m}}} \end{align}\]
MLE estimation:
(omegaLES=mmin^2)
## [1] 1.002776
(kkLES = 1/((2/nn)*sum(log(m))-log(omegaLES)))
## [1] 0.7466883
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = 2*kkLES*omegaLES^(kkLES)*(mm)^(-2*kkLES-1)
lines(mm, yy, col = 4, lwd = 2)
We calculate the Akaike Information Criterion (AIC):
#Two estimated parameters
pLES = 2
(AIC_LES = 2*pLES - 2*(sum(log((2*kkLES*(omegaLES^(kkLES))*(m)^(-2*kkLES-1))))))
## [1] 6244.631
Assume the demand is translog \(x(p)=\frac{1}{p}(\gamma-\eta\log p)\). The markup is \(m(x)=1+W(\frac{e^{\frac{\gamma}{\eta}}}{\eta}x)\). Maximum markup is again infinite: \(m(x)\to\infty\) as \(x\to\infty\).
The CDF of the markup distribution implied by the assumptions of Pareto productivity and translog demand is:
\[\begin{align} B(m) &=1-\left(\frac{e^{-1-\frac{\gamma}{\eta}}}{\underline{\phi}}me^{m}\right)^{-k} \;\;\; \text{with} \;\;\; m \in \left(?, \infty\right), \end{align}\]
where \(k > 0\), \(\underline{\phi}>0\), \(\gamma>0\), \(\eta > 0\), and \(\underline{\phi}=\phi(\underline{m})=\underline{m}e^{\underline{m}}e^{-1-\frac{\gamma}{\eta}}\)
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m) &= k\left(\underline{m}e^{\underline{m}}\right)^{k}(m+1)m^{-k-1}e^{-km} \end{align}\]
The log-likelihood function is:
\[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))=\sum_{i = 1}^n\log\left(k\left(\underline{m}e^{\underline{m}}\right)^{k}(m_{i}+1)m_{i}^{-k-1}e^{-km_{i}}\right)&=\sum_{i = 1}^n\left(\log k+k\log\left(\underline{m}e^{\underline{m}}\right)+\log(m_{i}+1)-(k+1)\log m_{i}-km_{i}\right)\\ &=n\log k +nk\log\left(\underline{m}e^{\underline{m}}\right) +\sum_{i = 1}^n\log(m_{i}+1)-(k+1)\sum_{i = 1}^n\log m_{i}-k\sum_{i = 1}^n m_{i} \end{align}\]
The estimating pocedure is the same as for linear and LES above. The log-likelihood function is monotonically increasing in \(\underline{m}\), hence \(\hat{\underline{m}}=m_{min}\).
Then we can again solve for the MLE estimator of \(k\) in closed form. Differentiate \(L\) with respect to \(k\):
\[\begin{align} \frac{dL}{dk} &=\frac{n}{k}+n\log\left(\underline{m}e^{\underline{m}}\right)-\sum_{i = 1}^n\log m_{i}-\sum_{i = 1}^n m_{i}=0\Rightarrow \hat k=\frac{1}{\frac{1}{n}\sum_{i = 1}^n(m_{i}+\log m_{i})-\log\left(\hat{\underline{m}}e^{\hat{\underline{m}}}\right)} \end{align}\]
MLE estimation:
(omegaTLOG=mmin*exp(mmin))
## [1] 2.72583
(kkTLOG = 1/((1/nn)*sum(log(m)+m)-log(omegaTLOG)))
## [1] 0.4868533
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = kkTLOG*(omegaTLOG^(kkTLOG))*(1+mm)*mm^(-kkTLOG-1)*exp(-kkTLOG*mm)
lines(mm, yy, col = 5, lwd = 2)
We calculate the Akaike Information Criterion (AIC):
#Two estimated parameters
pTLOG = 2
(AIC_TLOG = 2*pTLOG - 2*(sum(log(kkTLOG*(omegaTLOG^(kkTLOG))*(1+m)*m^(-kkTLOG-1)*exp(-kkTLOG*m)))))
## [1] 6258.079
hist(m, probability = TRUE, col = "lightgrey", breaks = 20)
mm = seq(from = min(m), to = max(m), length.out = 10^4)
xx = 2*kkLIN*(omegaLIN^kkLIN)*(2*mm-1)^(-kkLIN-1)
lines(mm, xx, col = 3, lwd = 2)
vv = 2*kkLES*omegaLES^(kkLES)*(mm)^(-2*kkLES-1)
lines(mm, vv, col = 4, lwd = 2)
ww = kkTLOG*(omegaTLOG^(kkTLOG))*(1+mm)*mm^(-kkTLOG-1)*exp(-kkTLOG*mm)
lines(mm, ww, col = 5, lwd = 2)
yy = (kCREMR/mm^2)*(mm/(mm+sigmaCREMR-mm*sigmaCREMR))^((sigmaCREMR-kCREMR)/sigmaCREMR)
lines(mm, yy, col = 2, lwd = 2)
legend("topright", c("CREMR","Linear","LES","Translog"), col = 2:5, lwd = 2)
AIC ranking
# install.packages("kableExtra")
library(kableExtra)
dt = data.frame(models = c("CREMR", "LES", "Translog", "Linear"), AIC = c(AIC_CREMR, AIC_LES, AIC_TLOG, AIC_LIN))
kable(dt)
models | AIC |
---|---|
CREMR | 6049.433 |
LES | 6244.631 |
Translog | 6258.079 |
Linear | 6428.425 |
Consider a Lognornal distribution \(\phi\sim \mathcal{LN}(\mu, s)\) with CDF: \[\begin{equation} G_{LN}(\phi) = \Phi\left(\frac{\log\phi-\mu}{s}\right) \label{lognormal} \end{equation}\]
Parameters of Lognormal distribution: \(\mu\in(-\infty,+\infty)\) and \(s>0\).
We assume that firm productivities follow a left-truncated version of \(G_{LN}(\phi)\):
\[\begin{equation} G_{LN}^{t}(\phi) = \begin{cases} 0 &\mbox{for } \phi <\underline\phi \\ \frac{G_{LN}(\phi)-G_{LN}(\underline\phi)}{1-G_{LN}(\underline\phi)} & \mbox{for } \underline\phi \leq \phi\leq +\infty\end{cases} \label{lognormalTrunc} \end{equation}\]
where \[\begin{align} \frac{G_{LN}(\phi)-G_{LN}(\underline\phi)}{1-G_{LN}(\underline\phi)} ={}&\frac{\Phi\left(\frac{\log \phi-\mu}{s}\right)-\Phi\left(\frac{\log \underline\phi-\mu}{s}\right)}{1-\Phi\left(\frac{\log \underline\phi-\mu}{s}\right)} \end{align}\]
For the four different cases (different demand functions) considered below, we start with estimating a untruncated version which gives us useful starting values for the estimation of the truncated version.
As a starting point for the estimation, let us consider the case of the markup distribution implied by the assumptions of (untrucated) Lognormal productivity and CREMR demand \(p(x)=\frac{\beta }{x}\left( x-\gamma \right)^{\frac{\sigma-1}{\sigma}}\):
\[\begin{equation} B(m)=G_{LN}(\phi(m))=\Phi\left(\frac{\log\left(\frac{\frac{\sigma-1}{\sigma}m}{1-\frac{\sigma-1}{\sigma}m}\right)-\tilde\mu}{\sigma s}\right) \label{mCREMRLN} \end{equation}\]
where \(\tilde\mu=\mu-\sigma\left(\log\left(\frac{\sigma}{\sigma-1}\frac{\gamma^{\frac{1}{\sigma}}}{\beta}\right)\right)\).
Note that \(B(\underline{m})\neq0\) so this is not a well-defined CDF! This will be sorted in the truncated case. For the moment, ignore this problem and let us try to fit this to the data.
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m) &=\frac{e^{-\frac{\left(\log\left(\frac{\sigma}{m+\sigma-m\sigma}-1\right)-\tilde\mu\right)^2}{2(\sigma s)^2}}}{\sqrt{2\pi}ms(m+\sigma-m\sigma)} \end{align}\]
The log-likelihood function is
\[\begin{align} L(\theta) &= \sum_{i = 1}^n \log(b(m_i))=\sum_{i = 1}^n\log\left(\frac{e^{-\frac{\left(\log\left(\frac{\sigma}{m_i+\sigma-m_i\sigma}-1\right)-\tilde\mu\right)^2}{2(\sigma s)^2}}}{\sqrt{2\pi}m_is(m_i+\sigma-m_i\sigma)}\right)\\ &=n(-\frac{1}{2}\log(2\pi)-\log(s))+\sum_{i = 1}^n\left(-\log(m_i)-\log(m_i+\sigma-m_i\sigma)-\frac{\left(\log(m_i(\sigma-1))-\log(m_i+\sigma-m_i\sigma)-\tilde\mu\right)^2}{2(\sigma s)^2}\right) \end{align}\]
We use the following transformations of \(\sigma\) and \(s\) to ensure that, in the optimization below, these parameters are considered within the appropriate bounds, namely \(1<\sigma<\frac{m_{max}}{m_{max}-1}\) and \(s>0\):
\[\begin{equation} \sigma=u(z)=\frac{a-1}{\pi}\arctan(z)+\frac{a+1}{2} \end{equation}\] with \(a=\frac{m_{max}}{m_{max}-1}\), and
\[\begin{equation} s=v(\tilde{s})=e^{\tilde{s}} \end{equation}\]
(a=mn/(mn-1))
## [1] 1.111393
u=function(z){
((a-1)/pi)*atan(z)+(a+1)/2
}
x <- seq(-100,100,0.1)
plot(x, u(x))
We define the objective function (i.e. minus log-likelihood) for \(b(m)\) (to be minimized):
log_likelihood_b_CREMR_ln = function(theta, m){
mu = theta[1]
z = theta[2]
ss=theta[3]
-sum(log((exp(-((log((m*(u(z)-1))/(m+u(z)-m*u(z)))-mu)^2/(2*(exp(ss)*u(z))^2))))/(sqrt(2*pi)*m*exp(ss)*(m+u(z)-m*u(z)))))
}
# Initial estimation
(sigma_start=0.5*(1 + mn/(mn-1)))
## [1] 1.055696
(z_start=tan((pi/(a-1))*(sigma_start-(a+1)/2)))
## [1] 0
(theta_start = c(2, z_start, 1))
## [1] 2 0 1
(estimCREMR_ln = optim(par = theta_start, log_likelihood_b_CREMR_ln,method = "BFGS",m = m))
## $par
## [1] -6.1145127 -31.4177698 -0.5466739
##
## $value
## [1] 3788.063
##
## $counts
## function gradient
## 365 97
##
## $convergence
## [1] 0
##
## $message
## NULL
The estimated parameters are
(muCREMR_ln=estimCREMR_ln$par[1])
## [1] -6.114513
(zCREMR_ln=estimCREMR_ln$par[2])
## [1] -31.41777
(ssCREMR_ln=estimCREMR_ln$par[3])
## [1] -0.5466739
(sigmaCREMR_ln=u(zCREMR_ln))
## [1] 1.001128
(sCREMR_ln=exp(ssCREMR_ln))
## [1] 0.578872
This is how the fit looks like…
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = (exp(-((log((mm*sigmaCREMR_ln-mm)/(sigmaCREMR_ln+mm-sigmaCREMR_ln*mm))-muCREMR_ln)^2/(2*(sCREMR_ln*sigmaCREMR_ln)^2))))/(sqrt(2*pi)*mm*sCREMR_ln*(mm+sigmaCREMR_ln-mm*sigmaCREMR_ln))
lines(mm, yy, col = 2, lwd = 2)
AIC:
#Three estimated parameters
pCREMR_ln = 3
(AIC_CREMR_ln = 2*pCREMR_ln - 2*(-estimCREMR_ln$value))
## [1] 7582.125
The fit is not great as expected, but this was meant to be only an intermediate step. So now let us turn to the truncated case. The CDF of the markup distribution implied by the assumptions of truncated Lognormal productivity and CREMR demand is:
\[\begin{equation} B(m)=G_{LN}^{t}(\phi(m))= \begin{cases} 0 &\mbox{for } m <\underline{m} \\ \frac{G_{LN}(\phi(m))-G_{LN}(\underline\phi)}{1-G_{LN}(\underline\phi)} & \mbox{for } \underline{m} \leq m\leq \frac{\sigma}{\sigma-1} \end{cases} \label{mCREMRLN-c} \end{equation}\]
where \[\begin{align} \frac{G_{LN}(\phi(m))-G_{LN}(\underline\phi)}{1-G_{LN}(\underline\phi)}={}&\frac{G_{LN}(\phi(m))-G_{LN}(\phi(\underline{m}))}{1-G_{LN}(\phi(\underline{m}))}\\ ={}&\frac{\Phi\left(\frac{\log\left(\frac{\frac{\sigma-1}{\sigma}m}{1-\frac{\sigma-1}{\sigma}m}\right)-\tilde\mu}{\sigma s}\right)-\Phi\left(\frac{\log\left(\frac{\frac{\sigma-1}{\sigma}\underline{m}}{1-\frac{\sigma-1}{\sigma}\underline{m}}\right)-\tilde\mu}{\sigma s}\right)}{1-\Phi\left(\frac{\log\left(\frac{\frac{\sigma-1}{\sigma}\underline{m}}{1-\frac{\sigma-1}{\sigma}\underline{m}}\right)-\tilde\mu}{\sigma s}\right)} \end{align}\]
The pdf is
\[\begin{align} b(m) &=\frac{1}{1-\Phi\left(\frac{\log\left(\frac{\frac{\sigma-1}{\sigma}\underline{m}}{1-\frac{\sigma-1}{\sigma}\underline{m}}\right)-\tilde\mu}{\sigma s}\right)}\frac{e^{-\frac{\left(\log\left(\frac{\sigma}{m+\sigma-m\sigma}-1\right)-\tilde\mu\right)^2}{2(\sigma s)^2}}}{\sqrt{2\pi}ms(m+\sigma-m\sigma)} \end{align}\]
We calibrate the truncation point to the minimum markup from the data: \(\underline m=m_{min}\).
(mt=min(m))
## [1] 1.001387
We define the new objective function (i.e. minus log-likelihood) for \(b(m)\) (to be minimized) with the same arctan transformation:
log_likelihood_b_CREMR_lnt = function(theta, m){
mu = theta[1]
z = theta[2]
s=theta[3]
-sum(log((1/(1-pnorm((log((mt*(u(z)-1))/(mt+u(z)-mt*u(z)))-mu)/(u(z)*exp(s)),0,1)))*(exp(-((log((m*(u(z)-1))/(m+u(z)-m*u(z)))-mu)^2/(2*(exp(s)*u(z))^2))))/(sqrt(2*pi)*m*exp(s)*(m+u(z)-m*u(z)))))
}
For starting values, we take the MLE estimators from the untruncated case:
# Initial estimation
(theta_start_t = c(muCREMR_ln, zCREMR_ln, ssCREMR_ln))
## [1] -6.1145127 -31.4177698 -0.5466739
(estimCREMR_lnt = optim(par = theta_start, log_likelihood_b_CREMR_lnt,m = m))
## $par
## [1] -49.982070 29.043672 1.800142
##
## $value
## [1] 3026.494
##
## $counts
## function gradient
## 347 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
The estimated parameters are
(muCREMR_lnt=estimCREMR_lnt$par[1])
## [1] -49.98207
(zCREMR_lnt=estimCREMR_lnt$par[2])
## [1] 29.04367
(ssCREMR_lnt=estimCREMR_lnt$par[3])
## [1] 1.800142
(sigmaCREMR_lnt=u(zCREMR_lnt))
## [1] 1.110173
(sCREMR_lnt=exp(ssCREMR_lnt))
## [1] 6.050505
(omegaCREMR_lnt=((mt*(sigmaCREMR_lnt-1))/(mt+sigmaCREMR_lnt-mt*sigmaCREMR_lnt))^(1/sigmaCREMR_lnt))
## [1] 0.1373217
(truncCREMR_lnt=pnorm((log((mt*(sigmaCREMR_lnt-1))/(mt+sigmaCREMR_lnt-mt*sigmaCREMR_lnt))-muCREMR_lnt)/(sigmaCREMR_lnt*sCREMR_lnt),0,1))
## [1] 1
(1/(1-pnorm((log((mt*(sigmaCREMR_lnt-1))/(mt+sigmaCREMR_lnt-mt*sigmaCREMR_lnt))-muCREMR_lnt)/(sigmaCREMR_lnt*sCREMR_lnt),0,1)))
## [1] 1.759906e+12
(1/(1-truncCREMR_lnt))
## [1] 1.759906e+12
(formatC(1/(1-truncCREMR_lnt), digits = 4, format = "f"))
## [1] "1759906067749.3147"
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = ((1/(1-pnorm((log((mt*(sigmaCREMR_lnt-1))/(mt+sigmaCREMR_lnt-mt*sigmaCREMR_lnt))-muCREMR_lnt)/(sigmaCREMR_lnt*sCREMR_lnt),0,1))))*(exp(-((log((mm*sigmaCREMR_lnt-mm)/(sigmaCREMR_lnt+mm-sigmaCREMR_lnt*mm))-muCREMR_lnt)^2/(2*(sCREMR_lnt*sigmaCREMR_lnt)^2))))/(sqrt(2*pi)*mm*sCREMR_lnt*(mm+sigmaCREMR_lnt-mm*sigmaCREMR_lnt))
lines(mm, yy, col = 2, lwd = 2)
We calculate the Akaike Information Criterion (AIC):
#Four estimated parameters
pCREMR_lnt = 4
(AIC_CREMR_lnt = 2*pCREMR_lnt - 2*(-estimCREMR_lnt$value))
## [1] 6060.988
Assume the demand is linear \(p(x)=\alpha-\beta x\). Let us again start with the untruncated case (for which we can get closed-form expressions of the MLE estimators).
The markup distribution implied by the assumptions of Lognornal productivity and linear demand is:
\[\begin{align} B(m) &=\Phi\left(\frac{\log\left(\frac{2m-1}{\alpha}\right)-\mu}{s}\right) \;\;\; \text{with} \;\;\; m \in \left(\underline{m}, \infty\right), \end{align}\]
where \(\alpha > 0\) and \(s>0\).
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m) &=\sqrt{\frac{2}{\pi}}\frac{1}{s(2m-1)}e^{-\frac{(\log(2m-1)-\tilde\mu)^2}{2s^2}} \end{align}\]
where \(\tilde\mu=\mu+\log\alpha\).
The log-likelihood function is \[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))&=\sum_{i = 1}^n\log\left(\sqrt{\frac{2}{\pi}}\frac{1}{s(2m_i-1)}e^{-\frac{(\log(2m_i-1)-\tilde\mu)^2}{2s^2}}\right)\\ &=\sum_{i = 1}^n\left(\frac{1}{2}\log(\frac{2}{\pi})-\log(2m_i-1)-\log(s)-\frac{(\log(2m_i-1)-\tilde\mu)^2}{2s^2}\right)\\ &=n(\frac{1}{2}\log(\frac{2}{\pi})-\log(s))-\sum_{i = 1}^n\left(\log(2m_i-1)+\frac{(\log(2m_i-1)-\tilde\mu)^2}{2s^2}\right) \end{align}\]
Differentiating with respect to \(\tilde\mu\) and \(s\) \[\begin{align} \frac{\partial L}{\partial \tilde\mu}&=\sum_{i = 1}^n\frac{2(\log(2m_i-1)-\tilde\mu)}{2s^2}\\ \frac{\partial L}{\partial s}&=-\frac{n}{s}+\sum_{i = 1}^n\frac{(\log(2m_i-1)-\tilde\mu)^2}{s^3} \end{align}\]
Setting these equal to zero and solving for \(\tilde\mu\) and \(s\) gives the MLE estimators \[\begin{align} \hat{\tilde\mu}&=\frac{1}{n }\sum_{i = 1}^n\log(2m_i-1)\\ \hat{s}^2&=\frac{1}{n}\sum_{i = 1}^n(\log(2m_i-1)-\hat{\tilde\mu})^2 \end{align}\]
MLE estimation:
(muLIN=(1/nn)*sum(log(2*m-1)))
## [1] 1.001645
(sLIN=sqrt((1/nn)*sum((log(2*m-1)-muLIN)^2)))
## [1] 0.7497347
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = sqrt(2/pi)*(1/(sLIN*(2*mm-1)))*exp(-((log(2*mm-1)-muLIN)^2)/(2*sLIN^2))
lines(mm, yy, col = 3, lwd = 2)
AIC:
#Two estimated parameters
pLIN_ln = 2
(AIC_lin_ln = 2*pLIN_ln - 2*(sum(log(sqrt(2/pi)*(1/(sLIN*(2*m-1)))*exp(-((log(2*m-1)-muLIN)^2)/(2*sLIN^2))))))
## [1] 7077.213
The fit is not great again. Let us now consider the truncated version. The CDF of the markup distribution implied by the assumptions of truncated Lognormal productivity and linear demand is:
\[\begin{align} B(m) &=\frac{\Phi\left(\frac{\log\left(\frac{2m-1}{\alpha}\right)-\mu}{s}\right)-\Phi\left(\frac{\log\left(\frac{2\underline{m}-1}{\alpha}\right)-\mu}{s}\right)}{1-\Phi\left(\frac{\log\left(\frac{2\underline{m}-1}{\alpha}\right)-\mu}{s}\right)} \end{align}\]
where \(\alpha > 0\) and \(s>0\).
Let \(b(m)\) denote the the pdf of \(B(m)\):
\[\begin{align} b(m)&=\frac{1}{1-\Phi\left(\frac{\log\left(2\underline{m}-1\right)-\tilde\mu}{s}\right)}\sqrt{\frac{2}{\pi}}\frac{1}{s(2m-1)}e^{-\frac{(\log(2m-1)-\tilde\mu)^2}{2s^2}} \end{align}\]
where \(\tilde\mu=\mu+\log\alpha\).
The log-likelihood function is \[\begin{align} L(\theta) &= \sum_{i = 1}^n \log(b(m_i))=\sum_{i = 1}^n\log\left(\frac{1}{1-\Phi\left(\frac{\log\left(2\underline{m}-1\right)-\tilde\mu}{s}\right)}\sqrt{\frac{2}{\pi}}\frac{1}{s(2m_i-1)}e^{-\frac{(\log(2m_i-1)-\tilde\mu)^2}{2s^2}}\right)\\ &=\sum_{i = 1}^n\left(-\log\left(1-\Phi\left(\frac{\log\left(2\underline{m}-1\right)-\tilde\mu}{s}\right)\right)+\frac{1}{2}\log(\frac{2}{\pi})-\log(2m_i-1)-\log(s)-\frac{(\log(2m_i-1)-\tilde\mu)^2}{2s^2}\right)\\ &=n(\log\left(1-\Phi\left(\frac{\log\left(2\underline{m}-1\right)-\tilde\mu}{s}\right)\right)+\frac{1}{2}\log(\frac{2}{\pi})-\log(s))-\sum_{i = 1}^n\left(\log(2m_i-1)+\frac{(\log(2m_i-1)-\tilde\mu)^2}{2s^2}\right) \end{align}\]
We again calibrate the trucation point to the minimum value from the data \(\underline{m}=m_{min}\). It’s hard to get a closed-form solution for the MLE estimators of \(\tilde\mu\) and \(s\) in the truncated case so we fit numerically… Define objective function:
log_likelihood_h_LIN_lnt = function(theta, m){
muLINt = theta[1]
sLINt = theta[2]
-sum(log((1/(1-pnorm((log(2*min(m)-1)-muLINt)/sLINt, 0, 1)))*sqrt(2/pi)*(1/(sLINt*(2*m-1)))*exp(-((log(2*m-1)-muLINt)^2)/(2*sLINt^2))))
}
For starting values, let’s take the MLE estimators from the untruncated case:
# Estimation
(theta_startLIN_lnt = c(muLIN, sLIN))
## [1] 1.0016449 0.7497347
(estimLIN_lnt = optim(par = theta_startLIN_lnt, log_likelihood_h_LIN_lnt, m = m))
## $par
## [1] 0.05394534 1.22821098
##
## $value
## [1] 3087.331
##
## $counts
## function gradient
## 51 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
The fit looks much better than for the untruncated case:
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = (1/(1-pnorm((log(2*min(m)-1)-estimLIN_lnt$par[1])/estimLIN_lnt$par[2], 0, 1)))*sqrt(2/pi)*(1/(estimLIN_lnt$par[2]*(2*mm-1)))*exp(-((log(2*mm-1)-estimLIN_lnt$par[1])^2)/(2*estimLIN_lnt$par[2]^2))
lines(mm, yy, col = 3, lwd = 2)
AIC:
#Three estimated parameters
pLIN_lnt = 3
(AIC_LIN_lnt = 2*pLIN_lnt - 2*(-estimLIN_lnt$value))
## [1] 6180.661
Assume the demand is LES \(p(x)=\frac{\delta}{x+\gamma}\). Let us again start with the untruncated case (for which we can get closed-form expressions of the MLE estimators).
The markup distribution implied by the assumptions of Lognornal productivity and LES demand is:
\[\begin{align} B(m) &= \Phi\left(\frac{\log(\frac{m^2\gamma}{\delta})-\mu}{s}\right)\;\;\; \text{with} \;\;\; m \in \left(\underline{m}, \infty\right)\\ &=\Phi\left(\frac{2\log(m)-(\mu+\log(\frac{\delta}{\gamma}))}{s}\right) \end{align}\]
where \(\gamma > 0\), \(\delta > 0\) and \(s>0\).
The PDF is
\[\begin{align} b(m) &= \sqrt{\frac{2}{\pi}}\frac{1}{ms}e^{-\frac{\left(2\log(m)-\tilde\mu\right)^2}{2s^2}} \end{align}\]
with \(\tilde\mu=\mu+\log(\frac{\delta}{\gamma})\).
The log-likelihood function:
\[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))&=\sum_{i = 1}^n\log\left(\sqrt{\frac{2}{\pi}}\frac{1}{m_is}e^{-\frac{\left(2\log(m_i)-\tilde\mu\right)^2}{2s^2}}\right)\\ &=n(\frac{1}{2}\log(\frac{2}{\pi})-\log(s))-\sum_{i = 1}^n\left(\log(m_i)+\frac{(2\log(m_i)-\tilde\mu)^2}{2s^2}\right) \end{align}\]
Similar derivations as for the case of linear demand above yield the MLE estimators of \(\tilde\mu\) and \(s\):
\[\begin{align} \hat{\tilde\mu}&=\frac{2}{n }\sum_{i = 1}^n\log(m_i)\\ \hat{s}^2&=\frac{1}{n}\sum_{i = 1}^n(2\log(m_i)-\tilde\mu)^2 \end{align}\]
(muLES=(2/nn)*sum(log(m)))
## [1] 1.342019
(sLES=sqrt((1/nn)*sum((2*log(m)-muLES)^2)))
## [1] 1.155128
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = sqrt(2/pi)*(1/(sLES*mm))*exp(-((2*log(mm)-muLES)^2)/(2*sLES^2))
lines(mm, yy, col = 4, lwd = 2)
AIC:
#Two estimated parameters
pLES_ln = 2
(AIC_LES_ln = 2*pLES_ln - 2*(sum(log(sqrt(2/pi)*(1/(sLES*m))*exp(-((2*log(m)-muLES)^2)/(2*sLES^2))))))
## [1] 7576.533
Let us now consider the truncated version. The CDF of the markup distribution implied by the assumptions of truncated Lognormal productivity and LES demand is:
\[\begin{align} B(m) &=\frac{\Phi\left(\frac{2\log(m)-(\mu+\log(\frac{\delta}{\gamma}))}{s}\right)-\Phi\left(\frac{2\log(\underline{m})-(\mu+\log(\frac{\delta}{\gamma}))}{s}\right)}{1-\Phi\left(\frac{2\log(\underline{m})-(\mu+\log(\frac{\delta}{\gamma}))}{s}\right)} \\ &=\frac{\Phi\left(\frac{2\log(m)-\tilde\mu}{s}\right)-\Phi\left(\frac{2\log(\underline{m})-\tilde\mu}{s}\right)}{1-\Phi\left(\frac{2\log(\underline{m})-\tilde\mu}{s}\right)} \end{align}\] with \(\tilde\mu=\mu+\log(\frac{\delta}{\gamma})\).
The pdf is
\[\begin{align} b(m) &=\frac{1}{1-\Phi\left(\frac{2\log(\underline{m})-\tilde\mu}{s}\right)}\sqrt{\frac{2}{\pi}}\frac{1}{ms}e^{-\frac{\left(2\log(m)-\tilde\mu\right)^2}{2s^2}} \end{align}\]
Again, calibrate truncation to minimum markup from data and let’s optimize to find \(\tilde\mu\) and \(s\).
Define objective function:
log_likelihood_h_LES_lnt = function(theta, m){
muLESt = theta[1]
sLESt = theta[2]
-sum(log((1/(1-pnorm((2*log(min(m))-muLESt)/sLESt, 0, 1)))*sqrt(2/pi)*(1/(sLESt*m))*exp(-((2*log(m)-muLESt)^2)/(2*sLESt^2))))
}
For starting values, let’s take the MLE estimators from the untruncated case.
# Estimation
(theta_startLES_lnt = c(muLES, sLES))
## [1] 1.342019 1.155128
(estimLES_lnt = optim(par = theta_startLES_lnt, log_likelihood_h_LES_lnt, m = m))
## $par
## [1] -3.234377 2.731872
##
## $value
## [1] 3089.392
##
## $counts
## function gradient
## 75 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
The fit looks again much better than for the untruncated case:
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = (1/(1-pnorm((2*log(min(m))-estimLES_lnt$par[1])/estimLES_lnt$par[2], 0, 1)))*sqrt(2/pi)*(1/(estimLES_lnt$par[2]*mm))*exp(-((2*log(mm)-estimLES_lnt$par[1])^2)/(2*estimLES_lnt$par[2]^2))
lines(mm, yy, col = 4, lwd = 2)
AIC:
#Three estimated parameters
pLES_lnt = 3
(AIC_LES_lnt = 2*pLES_lnt - 2*(-estimLES_lnt$value))
## [1] 6184.785
Assume the demand is translog \(x(p)=\frac{1}{p}(\gamma-\eta\log p)\). Let us again start with the untruncated case (for which we can get closed-form expressions of the MLE estimators).
The markup distribution implied by the assumptions of Lognornal productivity and translog demand is:
\[\begin{align} B(m) &=\Phi\left(\frac{\log(e^{m-1-\frac{\gamma}{\eta}}m)-\mu}{s}\right)\;\;\; \text{with} \;\;\; m \in \left(\underline{m}, \infty\right)\\ &=\Phi\left(\frac{\log(e^{m}m)-(\mu+1+\frac{\gamma}{\eta})}{s}\right) \end{align}\]
where \(\gamma>0\), \(\eta > 0\) and \(s>0\).
The PDF is
\[\begin{align} b(m) &= \frac{1}{\sqrt{2\pi}s}\frac{m+1}{m}e^{-\frac{\left(\log(e^{m}m)-\tilde\mu\right)^2}{2s^2}} \end{align}\]
with \(\tilde\mu=\mu+1+\frac{\gamma}{\eta}\).
The log-likelihood function:
\[\begin{align} L(\theta) = \sum_{i = 1}^n \log(b(m_i))&=\sum_{i = 1}^n\log\left(\frac{1}{\sqrt{2\pi}s}\frac{m_i+1}{m_i}e^{-\frac{\left(\log(e^{m_i}m_i)-\tilde\mu\right)^2}{2s^2}}\right)\\ &=n(-\frac{1}{2}\log(2\pi)-\log s)+\sum_{i = 1}^n(\log(m_i+1)-\log(m_i))-\sum_{i = 1}^n\frac{\left(\log(e^{m_i}m_i)-\tilde\mu\right)^2}{2s^2} \end{align}\]
Similar derivations as for the case of linear and LES demands above yield the MLE estimators of \(\tilde\mu\) and \(s\):
\[\begin{align} \hat{\tilde\mu}&=\frac{1}{n }\sum_{i = 1}^n\log(e^{m_i}m_i)\\ \hat{s}^2&=\frac{1}{n}\sum_{i = 1}^n\left(\log(e^{m_i}m_i)-\hat{\tilde\mu}\right)^2 \end{align}\]
(muTLOG=(1/nn)*sum(log(m*exp(m))))
## [1] 3.05678
(sTLOG=sqrt((1/nn)*sum((log(m*exp(m))-muTLOG)^2)))
## [1] 2.39093
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
yy = (1/(sqrt(2*pi)*sTLOG))*((mm+1)/mm)*exp(-((log(mm*exp(mm))-muTLOG)^2)/(2*sTLOG^2))
lines(mm, yy, col = 5, lwd = 2)
AIC:
pTLOG_ln = 2
(AIC_TLOG_ln = 2*pTLOG_ln - 2*(sum(log((1/(sqrt(2*pi)*sTLOG))*((m+1)/m)*exp(-((log(m*exp(m))-muTLOG)^2)/(2*sTLOG^2))))))
## [1] 9063.13
Let us now consider the truncated version. The CDF of the markup distribution implied by the assumptions of truncated Lognormal productivity and translog demand is:
\[\begin{align} B(m) &=\frac{\Phi\left(\frac{\log(e^{m}m)-(\mu+1+\frac{\gamma}{\eta})}{s}\right)-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-(\mu+1+\frac{\gamma}{\eta})}{s}\right)}{1-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-(\mu+1+\frac{\gamma}{\eta})}{s}\right)} \\ &=\frac{\Phi\left(\frac{\log(e^{m}m)-\tilde\mu}{s}\right)-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-\tilde\mu}{s}\right)}{1-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-\tilde\mu}{s}\right)} \quad\mbox{for} \quad\underline{m}\leq m\leq+\infty \end{align}\]
where \(\gamma>0\), \(\eta > 0\), \(s>0\) and \(\tilde\mu=\mu+1+\frac{\gamma}{\eta}\).
The PDF is
\[\begin{align} b(m)&=\frac{1}{1-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-\tilde\mu}{s}\right)}\frac{1}{\sqrt{2\pi}s}\frac{m+1}{m}e^{-\frac{\left(\log(e^{m}m)-\tilde\mu\right)^2}{2s^2}} \end{align}\]
So to fit this distribution, we need to estimate three parameters: \(\underline{m}\), \(\tilde\mu\) and \(s\).
The log-likelihood function is \[\begin{align} L(\theta)& = \sum_{i = 1}^n \log(b(m_i))=\sum_{i = 1}^n\log\left(\frac{1}{1-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-\tilde\mu}{s}\right)}\frac{1}{\sqrt{2\pi}s}\frac{m+1}{m}e^{-\frac{\left(\log(e^{m}m)-\tilde\mu\right)^2}{2s^2}}\right)\\ &=n\left(-\log\left(1-\Phi\left(\frac{\log(e^{\underline{m}}\underline{m})-\tilde\mu}{s}\right)\right)-\frac{1}{2}\log(2\pi)-\log s\right)+\sum_{i = 1}^n(\log(m_i+1)-\log(m_i))-\sum_{i = 1}^n\frac{\left(\log(e^{m_i}m_i)-\tilde\mu\right)^2}{2s^2} \end{align}\]
Again, calibrate truncation to minimum markup from data and let’s optimize to find \(\tilde\mu\) and \(s\).
Define objective function:
log_likelihood_h_TLOG_lnt = function(theta, m){
mu = theta[1]
s = theta[2]
mt=min(m)
-sum(log((1/(1-pnorm((log(mt*exp(mt))-mu)/exp(s), 0, 1)))*(1/(sqrt(2*pi)*exp(s)))*((m+1)/m)*exp(-((log(m*exp(m))-mu)^2)/(2*exp(s)^2))))
}
For starting values, let’s take the MLE estimators from the untruncated case:
# Estimation
(theta_startTLOG_lnt = c(muTLOG, log(sTLOG)))
## [1] 3.0567798 0.8716824
(estimTLOG_lnt = optim(par = theta_startTLOG_lnt, log_likelihood_h_TLOG_lnt,method = "BFGS",m = m))
## $par
## [1] -77.641030 2.569909
##
## $value
## [1] 3138.547
##
## $counts
## function gradient
## 167 46
##
## $convergence
## [1] 0
##
## $message
## NULL
So estimated parameters are:
(muTLOGt=estimTLOG_lnt$par[1])
## [1] -77.64103
(sTLOGt=exp(estimTLOG_lnt$par[2]))
## [1] 13.06463
The fit looks again much better than for the untruncated case:
hist(m, probability = TRUE, col = "lightgrey")
mm = seq(from = min(m), to = max(m), length.out = 10^4)
mt=min(m)
yy = (1/(1-pnorm((log(mt*exp(mt))-muTLOGt)/sTLOGt, 0, 1)))*(1/(sqrt(2*pi)*sTLOGt))*((mm+1)/mm)*exp(-((log(mm*exp(mm))-muTLOGt)^2)/(2*sTLOGt^2))
lines(mm, yy, col = 5, lwd = 2)
AIC:
#Three estimated parameters
pTLOG_lnt = 3
(AIC_TLOG_lnt = 2*pTLOG_lnt - 2*(-estimTLOG_lnt$value))
## [1] 6283.095
hist(m, probability = TRUE, col = "lightgrey", breaks = 20)
mm = seq(from = min(m), to = max(m), length.out = 10^4)
ww = (1/(1-pnorm((log(mt*exp(mt))-muTLOGt)/sTLOGt, 0, 1)))*(1/(sqrt(2*pi)*sTLOGt))*((mm+1)/mm)*exp(-((log(mm*exp(mm))-muTLOGt)^2)/(2*sTLOGt^2))
lines(mm, ww, col = 5, lwd = 2)
xx = (1/(1-pnorm((log(2*min(m)-1)-estimLIN_lnt$par[1])/estimLIN_lnt$par[2], 0, 1)))*sqrt(2/pi)*(1/(estimLIN_lnt$par[2]*(2*mm-1)))*exp(-((log(2*mm-1)-estimLIN_lnt$par[1])^2)/(2*estimLIN_lnt$par[2]^2))
lines(mm, xx, col = 3, lwd = 2)
vv = (1/(1-pnorm((2*log(min(m))-estimLES_lnt$par[1])/estimLES_lnt$par[2], 0, 1)))*sqrt(2/pi)*(1/(estimLES_lnt$par[2]*mm))*exp(-((2*log(mm)-estimLES_lnt$par[1])^2)/(2*estimLES_lnt$par[2]^2))
lines(mm, vv, col = 4, lwd = 2)
yy = ((1/(1-pnorm((log((mt*(sigmaCREMR_lnt-1))/(mt+sigmaCREMR_lnt-mt*sigmaCREMR_lnt))-muCREMR_lnt)/(sigmaCREMR_lnt*sCREMR_lnt),0,1))))*(exp(-((log((mm*sigmaCREMR_lnt-mm)/(sigmaCREMR_lnt+mm-sigmaCREMR_lnt*mm))-muCREMR_lnt)^2/(2*(sCREMR_lnt*sigmaCREMR_lnt)^2))))/(sqrt(2*pi)*mm*sCREMR_lnt*(mm+sigmaCREMR_lnt-mm*sigmaCREMR_lnt))
lines(mm, yy, col = 2, lwd = 2)
legend("topright", c("CREMR","Linear","LES","Translog"), col = 2:5, lwd = 2)
AIC ranking:
library(kableExtra)
dt = data.frame(models = c("CREMR","Linear", "LES", "Translog"), AIC = c(AIC_CREMR_lnt,AIC_LIN_lnt, AIC_LES_lnt, AIC_TLOG_lnt))
kable(dt)
models | AIC |
---|---|
CREMR | 6060.988 |
Linear | 6180.661 |
LES | 6184.785 |
Translog | 6283.095 |
library(kableExtra)
dt = data.frame(models = c("Pareto - CREMR", "LN - CREMR","LN - Linear", "LN - LES", "Pareto-LES","Pareto-Translog", "LN - Translog","Pareto-Linear"), AIC = c(AIC_CREMR,AIC_CREMR_lnt,AIC_LIN_lnt, AIC_LES_lnt,AIC_LES,AIC_TLOG,AIC_TLOG_lnt,AIC_LIN))
kable(dt)
models | AIC |
---|---|
Pareto - CREMR | 6049.433 |
LN - CREMR | 6060.988 |
LN - Linear | 6180.661 |
LN - LES | 6184.785 |
Pareto-LES | 6244.631 |
Pareto-Translog | 6258.079 |
LN - Translog | 6283.095 |
Pareto-Linear | 6428.425 |
CDFxMarketP = function(sigma,gama,omega,k, x){
1-gama^(k/sigma)*omega^k*(x-gama)^(-k/sigma)
}
PDFxMarketP=function(sigma,gama,omega,k, x){
gama^(k/sigma)*(k/sigma)*omega^k*(x-gama)^(-(k+sigma)/sigma)
}
CDFxPlanP = function(sigma,gama,omega,k, x){
1-gama^(k/sigma)*((omega^(sigma-1)/(1+omega^sigma))*x*(x-gama)^((1-sigma)/sigma))^(-k)
}
PDFxPlanP = function(sigma,gama,omega,k, x){
gama^(k/sigma)*(k/sigma)*((x-gama*sigma)/(x*(x-gama)))*((omega^(sigma-1)/(1+omega^sigma))*x*(x-gama)^((1-sigma)/sigma))^(-k)
}
PDFdifferenceP=function(sigma,gama,omega,k, x){
PDFxPlanP(sigma,gama,omega,k, x)-PDFxMarketP(sigma,gama,omega,k, x)
}
CDFxMarketLN = function(sigma,gama,mu,s, x){
pnorm((log(x-gama)-mu)/(sigma*s),0,1)
}
CDFxMarketTLN = function(sigma,gama,mu,s,xbar,x){
(pnorm((log(x-gama)-mu)/(sigma*s),0,1)-pnorm((log(xbar-gama)-mu)/(sigma*s),0,1))/(1-pnorm((log(xbar-gama)-mu)/(sigma*s),0,1))
}
PDFxMarketTLN=function(sigma,gama,mu,s,xbar,x){
(1/(1-pnorm((log(xbar-gama)-mu)/(sigma*s),0,1)))*(exp(-(mu-log(x-gama))^2/(2*s^2*sigma^2))/(sqrt(2*pi)*s*(x-gama)*sigma))
}
CDFxPlanLN = function(sigma,gama,mu,s,omega,x){
pnorm((log((omega^sigma/(1+omega^sigma))*x*(x-gama)^((1-sigma)/sigma))-mu)/(s),0,1)
}
CDFxPlanTLN = function(sigma,gama,mu,s,omega,xbar,x){
(pnorm((log((omega^sigma/(1+omega^sigma))*x*(x-gama)^((1-sigma)/sigma))-mu)/(s),0,1)-pnorm((log((omega^sigma/(1+omega^sigma))*xbar*(xbar-gama)^((1-sigma)/sigma))-mu)/(s),0,1))/(1-pnorm((log((omega^sigma/(1+omega^sigma))*xbar*(xbar-gama)^((1-sigma)/sigma))-mu)/(s),0,1))
}
PDFxPlanTLN = function(sigma,gama,mu,s,omega,xbar, x){
(1/(1-pnorm(((log((omega^sigma/(1+omega^sigma))*xbar*(xbar-gama)^((1-sigma)/sigma))-mu)/s),0,1)))*(exp(-(mu-log((omega^sigma/(1+omega^sigma))*x*(x-gama)^(1/sigma-1)))^2/(2*s^2))*(x-gama*sigma)/(sqrt(2*pi)*s*x*(x-gama)*sigma))
}
PDFdifferenceLN=function(sigma,gama,muM,muP,s,omega,xbar,x){
PDFxPlanTLN(sigma,gama,muP,s,omega,xbar, x)-PDFxMarketTLN(sigma,gama,muM,s,xbar, x)
}