Note

This tutorial was generated from an IPython notebook that can be downloaded here.

# ref: https://radvel.readthedocs.io/en/latest/tutorials/K2-24_Fitting+MCMC.html
# ref: https://arxiv.org/abs/1511.04497

%matplotlib inline
%config InlineBackend.figure_format = "retina"

from matplotlib import rcParams
rcParams["savefig.dpi"] = 100
rcParams["figure.dpi"] = 100
rcParams["font.size"] = 16
rcParams["text.usetex"] = False
rcParams["font.family"] = ["sans-serif"]
rcParams["font.sans-serif"] = ["cmss10"]
rcParams["axes.unicode_minus"] = False

import corner
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

from astropy.io import fits
from astropy.stats import BoxLeastSquares, LombScargle
from astropy import constants, units

import pymc3 as pm
import pymc3.distributions.transforms as tr

import theano
import theano.tensor as tt

import exoplanet
from exoplanet import distributions
from exoplanet.gp import terms, GP
lc_url = "https://archive.stsci.edu/hlsps/everest/v2/c02/203700000/71098/hlsp_everest_k2_llc_203771098-c02_kepler_v2.0_lc.fits"
with fits.open(lc_url) as hdus:
    lc = hdus[1].data
    lc_hdr = hdus[1].header

texp = lc_hdr["FRAMETIM"] * lc_hdr["NUM_FRM"]
texp /= 60.0 * 60.0 * 24.0

m = (np.arange(len(lc)) > 100) & np.isfinite(lc["FLUX"]) & np.isfinite(lc["TIME"])

bad_bits=[1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17]
qual = lc["QUALITY"]
for b in bad_bits:
    m &= qual & 2 ** (b - 1) == 0

x = lc["TIME"][m]
y = lc["FLUX"][m]
mu = np.median(y)
y = (y / mu - 1) * 1e3

smooth = savgol_filter(y, 501, polyorder=5)
resid = y - smooth
sigma = np.sqrt(np.mean(resid**2))
m = resid < 3 * sigma

plt.plot(x, y, "k", label="data")
plt.plot(x, smooth, label="smoothed")
plt.plot(x[~m], y[~m], "xr", label="outliers")
plt.legend(fontsize=12)
plt.xlabel("time")
plt.ylabel("flux")

x_ref = np.min(x[m])
x = np.ascontiguousarray(x[m], dtype=np.float64)
x -= x_ref
y = np.ascontiguousarray(y[m], dtype=np.float64)
smooth = savgol_filter(y, 501, polyorder=5)
../../_images/k2-24-3_1_0.png
data = pd.read_csv("https://raw.githubusercontent.com/California-Planet-Search/radvel/master/example_data/epic203771098.csv", index_col=0)

rv_x = np.array(data.t - x_ref)
rv_y = np.array(data.vel)
rv_yerr = np.array(data.errvel)

plt.errorbar(rv_x, rv_y, yerr=rv_yerr, fmt=".k")

plt.xlabel("time")
plt.ylabel("radial velocity");
../../_images/k2-24-3_2_0.png
m = np.zeros(len(x), dtype=bool)
period_grid = np.exp(np.linspace(np.log(10), np.log(50), 50000))
bls_results = []
periods = []
t0s = []
depths = []

for i in range(2):
    bls = BoxLeastSquares(x[~m], y[~m] - smooth[~m])
    bls_power = bls.power(period_grid, 0.1, oversample=20)
    bls_results.append(bls_power)

    index = np.argmax(bls_power.power)
    periods.append(bls_power.period[index])
    t0s.append(bls_power.transit_time[index])
    depths.append(bls_power.depth[index])

    m |= bls.transit_mask(x, periods[-1], 0.5, t0s[-1])

fig, axes = plt.subplots(len(bls_results), 1, figsize=(10, 6), sharex=True)

for i in range(len(bls_results)):
    ax = axes[i]
    ax.axvline(np.log10(periods[i]), color="C1", lw=5, alpha=0.8)
    ax.plot(np.log10(bls_results[i].period), bls_results[i].power, "k")
    ax.set_ylabel("bls power")
    ax.set_yticks([])

ax = axes[-1]
ax.set_xlim(np.log10(period_grid.min()), np.log10(period_grid.max()))
ax.set_xlabel("$\log_{10}$ period");
../../_images/k2-24-3_3_0.png
fig, axes = plt.subplots(len(bls_results), 1, figsize=(10, 6), sharex=True)

for i in range(len(bls_results)):
    ax = axes[i]
    p = periods[i]
    x_fold = (x - t0s[i] + 0.5*p) % p - 0.5*p
    ax.plot(x_fold, y - smooth, ".k")

    bins = np.linspace(-0.4, 0.4, 20)
    denom, _ = np.histogram(x_fold, bins)
    num, _ = np.histogram(x_fold, bins, weights=y - smooth)
    denom[num == 0] = 1.0
    ax.plot(0.5*(bins[1:] + bins[:-1]), num / denom, color="C1")

ax = axes[-1]
ax.set_xlim(-0.4, 0.4)
ax.set_xlabel("time since transit");
../../_images/k2-24-3_4_0.png
msini = exoplanet.estimators.estimate_minimum_mass(periods, rv_x, rv_y, rv_yerr, t0s=t0s).to(units.M_earth)
print(msini)
[30.3178443  22.08252394] earthMass
N_planets = len(periods)

rv_x_grid = np.linspace(rv_x.min(), rv_x.max(), 1000)

def build_model(mask=None):
    if mask is None:
        mask = np.ones(len(x), dtype=bool)
    with pm.Model() as model:

        # Stellar properties
        mean = pm.Normal("mean", mu=0.0, sd=1.0)
        u = distributions.Triangle("u", shape=2)
        mstar = pm.Bound(pm.Normal, lower=0.0)("mstar", mu=1.12, sd=0.05)
        rstar = pm.Bound(pm.Normal, lower=0.0)("rstar", mu=1.21, sd=0.11)

        t0 = pm.Normal("t0", mu=t0s, sd=0.1, shape=N_planets)
        logperiod = pm.Normal("logperiod", mu=np.log(periods), sd=1e-4, shape=N_planets)
        period = pm.Deterministic("period", tt.exp(logperiod))

        logm = pm.Uniform(
            "logm",
            lower=np.log(10.0),
            upper=np.log(50.0),
            shape=N_planets, testval=np.log(msini.value))
        m = pm.Deterministic("m", tt.exp(logm))

        # Radius/impact parameter joint distribution
        rb_test = 0.5 + np.zeros((2, N_planets))
        rb_test[0, :] = np.sqrt(1e-3)*np.sqrt(depths)
        rb = distributions.RadiusImpactParameter(
            "rb", min_radius=0.01, max_radius=0.1,
            shape=(2, N_planets), testval=rb_test)
        ror = pm.Deterministic("ror", rb[0])
        r = pm.Deterministic("r", ror * rstar)
        b = pm.Deterministic("b", rb[1])

        # Log-uniform prior on r
        pm.Potential("logrprior", -tt.log(r))

        ecc = pm.Beta("ecc", alpha=0.867, beta=3.03, shape=N_planets,
                      testval=np.array([0.1, 0.1]))
        omega = distributions.Angle("omega", shape=N_planets)

        logjitter2 = pm.Normal("logjitter2", mu=np.log(0.05), sd=5.0)
        v0 = pm.Normal("v0", mu=0.0, sd=1.0)
        dvdt = pm.Normal("dvdt", mu=0.0, sd=0.1)
        d2vdt2 = pm.Normal("d2vdt2", mu=0.0, sd=0.01)

        # Set up the orbit
        orbit = exoplanet.orbits.KeplerianOrbit(
            period=period,
            t0=t0,
            b=b,
            ecc=ecc,
            omega=omega,
            m_planet=m,
            m_planet_units=msini.unit,
            r_star=rstar,
            m_star=mstar,
        )
        pm.Deterministic("incl", orbit.incl)
        pm.Deterministic("a", orbit.a)

        lc = exoplanet.StarryLightCurve(u, r_star=rstar)
        transit = lc.get_light_curve(r, orbit, x[mask], texp=texp, oversample=5, order=2) * 1e3
#         transit = tt.zeros(len(y))
#         transit = tt.set_subtensor(transit[mask_near], tt.sum(transits, axis=-1) * 1e3)
        pm.Deterministic("transit", transit)

        # Likelihood
        logs2 = pm.Normal("logs2", mu=-5.0, sd=5.0)
        logw0 = pm.Normal("logw0", mu=0.0, sd=5.0)
        logQ = pm.Normal("logQ", mu=1.0, sd=5.0)
        delta = pm.Normal("delta", mu=0.0, sd=10.0)
        logS0 = pm.Deterministic("logS0", delta - logw0 - logQ)

        kernel = terms.SHOTerm(S0=tt.exp(logS0), w0=tt.exp(logw0), Q=tt.exp(logQ))

        gp = GP(kernel, x[mask], tt.exp(logs2) + np.zeros_like(x[mask]), J=2)

        resid = y[mask] - transit - mean
        pm.Potential("obs", gp.log_likelihood(resid))

        gp_pred = gp.predict()
        pm.Deterministic("gp", mean + gp_pred)
        pm.Deterministic("resid", resid - gp_pred)

        # RV model
        # Convert R_sun / day to m / s
        rv_x_mid = 0.5 * (rv_x.max() + rv_x.min())
        rv_dx = rv_x - rv_x_mid
        rv_dx_grid = rv_x_grid - rv_x_mid
        rv_conv = (1 * units.R_sun / units.day).to(units.m / units.s).value
        vrad = rv_conv * tt.sum(orbit.get_star_velocity(rv_x)[2], axis=-1)
        bkg = v0 + dvdt * rv_dx + d2vdt2 * rv_dx**2
        pm.Normal("rv_obs", mu=vrad + bkg, sd=tt.sqrt(rv_yerr**2 + tt.exp(logjitter2)), observed=rv_y)
        pm.Deterministic("vrad", rv_conv * orbit.get_star_velocity(rv_x_grid)[2])
        pm.Deterministic("bkg", v0 + dvdt * rv_dx_grid + d2vdt2 * rv_dx_grid**2)

        soln = pm.find_MAP(start=model.test_point, vars=[logs2, delta, logw0, logQ, mean])
        soln = pm.find_MAP(start=soln, vars=[logperiod, t0])
        soln = pm.find_MAP(start=soln)

    return model, soln
model0, soln0 = build_model()
logp = 1,875.4, ||grad|| = 1.5226: 100%|██████████| 27/27 [00:00<00:00, 211.07it/s]
logp = 2,069.1, ||grad|| = 702.99: 100%|██████████| 33/33 [00:00<00:00, 58.22it/s]
logp = 4,711.9, ||grad|| = 1,894.7: 100%|██████████| 3307/3307 [00:22<00:00, 143.83it/s]
# Sigma clipping
resid = soln0["resid"]
sigma = np.sqrt(np.mean(resid**2))
mask = np.abs(resid) < 4 * sigma

plt.plot(x, resid)
plt.plot(x[~mask], resid[~mask], "xr")
[<matplotlib.lines.Line2D at 0x1c3b66af98>]
../../_images/k2-24-3_8_1.png
model, soln = build_model(mask)
logp = 1,929.7, ||grad|| = 0.0020057: 100%|██████████| 33/33 [00:00<00:00, 245.05it/s]
logp = 2,143, ||grad|| = 2,207.3: 100%|██████████| 45/45 [00:00<00:00, 93.59it/s]
logp = 5,246.1, ||grad|| = 776.46: 100%|██████████| 3354/3354 [00:20<00:00, 164.11it/s]
fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=True)

ax = axes[0]
ax.plot(x[mask], y[mask], ".k")
ax.plot(x[mask], soln["gp"], color="C1", alpha=0.8, label="gaussian process")
ax.set_ylabel("raw data")
ax.legend()

ax = axes[1]
ax.plot(x[mask], y[mask] - soln["gp"], ".k")
ax.plot(x[mask], soln["transit"], color="C1", alpha=0.8, label="transit")
ax.set_ylabel("de-trended data")
ax.legend()

ax = axes[2]
ax.plot(x[mask], soln["resid"], ".k")
ax.set_ylabel("residuals")
ax.set_xlabel("time")
ax.set_xlim(x[mask].min(), x[mask].max());
../../_images/k2-24-3_10_0.png
plt.plot(rv_x_grid, soln["vrad"], color="k", lw=2, alpha=0.5)
plt.plot(rv_x_grid, soln["bkg"], "--", color="k", lw=2, alpha=0.5)
plt.plot(rv_x_grid, np.sum(soln["vrad"], axis=-1) + soln["bkg"], color="C1", lw=2)
plt.errorbar(rv_x, rv_y, yerr=rv_yerr, fmt=".k")
plt.xlabel("time")
plt.ylabel("radial velocity")
Text(0,0.5,'radial velocity')
../../_images/k2-24-3_11_1.png
schedule = exoplanet.sampling.TuningSchedule(start=50, window=50)

with model:
    burnin = schedule.tune(tune=2000, start=soln, step_kwargs=dict(regular_window=0))
Only 2 samples in chain.
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 104/104 [00:44<00:00,  1.05s/draws]
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
Only 2 samples in chain.
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 104/104 [00:49<00:00,  1.03s/draws]
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
Only 2 samples in chain.
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 204/204 [01:42<00:00,  1.79s/draws]
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
Only 2 samples in chain.
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 404/404 [03:37<00:00,  1.95s/draws]
Only 2 samples in chain.
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 804/804 [06:50<00:00,  1.28draws/s]
Only 2 samples in chain.
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 2404/2404 [24:13<00:00,  1.19draws/s]
plt.plot(burnin["ecc"])
[<matplotlib.lines.Line2D at 0x1c4e950518>,
 <matplotlib.lines.Line2D at 0x1c4e950668>]
../../_images/k2-24-3_13_1.png
samples = pm.trace_to_dataframe(burnin, varnames=["u", "rstar", "mstar", "b"])
samples.columns = [k.replace("_", " ") for k in samples.columns]
corner.corner(samples);
../../_images/k2-24-3_14_0.png
samples = pm.trace_to_dataframe(burnin, varnames=["t0", "period", "r", "b", "ecc", "omega"])
samples.columns = [k.replace("_", " ") for k in samples.columns]
corner.corner(samples);
../../_images/k2-24-3_15_0.png
samples = pm.trace_to_dataframe(burnin, varnames=["mean", "logs2", "logS0", "logw0", "logQ"])
samples.columns = [k.replace("_", " ") for k in samples.columns]
corner.corner(samples);
../../_images/k2-24-3_16_0.png
with model:
    trace = schedule.sample(draws=2000)
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [delta, logQ, logw0, logs2, d2vdt2, dvdt, v0, logjitter2, omega, ecc, rb, logm, logperiod, t0, rstar, mstar, u, mean]
Sampling 2 chains: 100%|██████████| 4100/4100 [38:10<00:00,  1.23s/draws]
There were 70 divergences after tuning. Increase target_accept or reparameterize.
The acceptance probability does not match the target. It is 0.8850118570461237, but should be close to 0.8. Try to increase the number of tuning steps.
There were 318 divergences after tuning. Increase target_accept or reparameterize.
The estimated number of effective samples is smaller than 200 for some parameters.
plt.plot(trace["rstar"])
[<matplotlib.lines.Line2D at 0x1c39a679b0>]
../../_images/k2-24-3_18_1.png
samples = pm.trace_to_dataframe(trace, varnames=["u", "rstar", "mstar"])
samples.columns = [k.replace("_", " ") for k in samples.columns]
corner.corner(samples);
../../_images/k2-24-3_19_0.png
samples = pm.trace_to_dataframe(trace, varnames=["t0", "period", "r", "m", "b", "ecc", "omega"])
samples.columns = [k.replace("_", " ") for k in samples.columns]
corner.corner(samples);
../../_images/k2-24-3_20_0.png
samples = pm.trace_to_dataframe(trace, varnames=["mean", "logs2", "logS0", "logw0", "logQ"])
samples.columns = [k.replace("_", " ") for k in samples.columns]
corner.corner(samples);
../../_images/k2-24-3_21_0.png
fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=True)

gp_mod = np.median(trace["gp"], axis=0)
transit_mod = np.median(trace["transit"], axis=0)

ax = axes[0]
ax.plot(x[mask], y[mask], ".k")
ax.plot(x[mask], gp_mod, color="C1", alpha=0.8, label="gaussian process")
ax.set_ylabel("raw data")
ax.legend()

ax = axes[1]
ax.plot(x[mask], y[mask] - gp_mod, ".k")
ax.plot(x[mask], transit_mod, color="C1", alpha=0.8, label="transit")
ax.set_ylabel("de-trended data")
ax.legend()

ax = axes[2]
ax.plot(x[mask], np.median(trace["resid"], axis=0), ".k")
ax.set_ylabel("residuals")
ax.set_xlabel("time")
ax.set_xlim(x[mask].min(), x[mask].max());
../../_images/k2-24-3_22_0.png
vrad_mod = np.median(trace["vrad"], axis=0)
bkg_mod = np.median(trace["bkg"], axis=0)

plt.plot(rv_x_grid, vrad_mod, color="k", lw=2, alpha=0.5)
plt.plot(rv_x_grid, bkg_mod, "--", color="k", lw=2, alpha=0.5)
plt.plot(rv_x_grid, np.sum(vrad_mod, axis=-1) + bkg_mod, color="C1", lw=2)
plt.errorbar(rv_x, rv_y, yerr=rv_yerr, fmt=".k")
plt.xlabel("time")
plt.ylabel("radial velocity")
Text(0,0.5,'radial velocity')
../../_images/k2-24-3_23_1.png