{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a1b2c3d4",
   "metadata": {},
   "source": [
    "# Mojito Processing Pipeline\n",
    "\n",
    "Demonstrates the full TDI data processing pipeline using the `process_pipeline` utility from `MojitoProcessor`.\n",
    "\n",
    "The pipeline applies the following steps in order:\n",
    "\n",
    "| Step | Operation | Default |\n",
    "|------|-----------|------|\n",
    "| 1 | Band-pass filter | configurable (f_low, f_high), order |\n",
    "| 2 | Trim edge artefacts | 2.2% from each end |\n",
    "| 3 | Truncate to working length | configurable |\n",
    "| 4 | Downsample | configurable target rate |\n",
    "| 5 | Tukey window | α = 0.025 |\n",
    "\n",
    "The final cell verifies consistency by comparing the periodogram against the Mojito L1 noise estimate."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2c3d4e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from mojito import MojitoL1File\n",
    "from MojitoProcessor import process_pipeline\n",
    "\n",
    "# Show pipeline progress at INFO level\n",
    "logging.basicConfig(\n",
    "    level=logging.INFO,\n",
    "    format=\"%(name)s | %(message)s\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c3d4e5f6",
   "metadata": {},
   "source": [
    "## 1. Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4e5f6a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "mojito_data_file = (\n",
    "    \"../Mojito_Data/NOISE_731d_0.25s_L1_source0_0_20251206T220508924302Z.h5\"\n",
    ")\n",
    "\n",
    "# ── How many days to load (lazy slicing — only reads what is needed) ───────────\n",
    "load_days = None  # set to None to load the full dataset\n",
    "\n",
    "with MojitoL1File(mojito_data_file) as f:\n",
    "    tdi_sampling = f.tdis.time_sampling\n",
    "    ltt_sampling = f.ltts.time_sampling\n",
    "    orbit_sampling = f.orbits.time_sampling\n",
    "\n",
    "    # Consistent sample counts across all data streams\n",
    "    n_tdi = int(load_days * 86400 * tdi_sampling.fs) if load_days else tdi_sampling.size\n",
    "    n_ltt = int(load_days * 86400 * ltt_sampling.fs) if load_days else ltt_sampling.size\n",
    "    n_orbit = (\n",
    "        int(load_days * 86400 * orbit_sampling.fs) if load_days else orbit_sampling.size\n",
    "    )\n",
    "\n",
    "    data = {\n",
    "        # ── TDI observables ──────────────────────────────────────────────────\n",
    "        \"tdis\": {\n",
    "            \"X\": f.tdis.x2[:n_tdi],\n",
    "            \"Y\": f.tdis.y2[:n_tdi],\n",
    "            \"Z\": f.tdis.z2[:n_tdi],\n",
    "            \"A\": f.tdis.a2[:n_tdi],\n",
    "            \"E\": f.tdis.e2[:n_tdi],\n",
    "            \"T\": f.tdis.t2[:n_tdi],\n",
    "        },\n",
    "        \"fs\": tdi_sampling.fs,\n",
    "        \"dt\": tdi_sampling.dt,\n",
    "        \"t_tdi\": tdi_sampling.t()[:n_tdi],\n",
    "        # ── Light travel times ───────────────────────────────────────────────\n",
    "        \"ltts\": {\n",
    "            \"12\": f.ltts.ltt_12[:n_ltt],\n",
    "            \"13\": f.ltts.ltt_13[:n_ltt],\n",
    "            \"21\": f.ltts.ltt_21[:n_ltt],\n",
    "            \"23\": f.ltts.ltt_23[:n_ltt],\n",
    "            \"31\": f.ltts.ltt_31[:n_ltt],\n",
    "            \"32\": f.ltts.ltt_32[:n_ltt],\n",
    "        },\n",
    "        \"ltt_derivatives\": {\n",
    "            \"12\": f.ltts.ltt_derivative_12[:n_ltt],\n",
    "            \"13\": f.ltts.ltt_derivative_13[:n_ltt],\n",
    "            \"21\": f.ltts.ltt_derivative_21[:n_ltt],\n",
    "            \"23\": f.ltts.ltt_derivative_23[:n_ltt],\n",
    "            \"31\": f.ltts.ltt_derivative_31[:n_ltt],\n",
    "            \"32\": f.ltts.ltt_derivative_32[:n_ltt],\n",
    "        },\n",
    "        \"ltt_times\": ltt_sampling.t()[:n_ltt],\n",
    "        # ── Spacecraft orbits ────────────────────────────────────────────────\n",
    "        \"orbits\": f.orbits.positions[:n_orbit],  # (n_orbit, 3, 3)\n",
    "        \"velocities\": f.orbits.velocities[:n_orbit],  # (n_orbit, 3, 3)\n",
    "        \"orbit_times\": orbit_sampling.t()[:n_orbit],\n",
    "        # ── Noise estimates (frequency-domain, not truncated) ────────────────\n",
    "        \"noise_estimates\": {\n",
    "            \"xyz\": f.noise_estimates.xyz[:],\n",
    "            \"aet\": f.noise_estimates.aet[:],\n",
    "            \"freqs\": f.noise_estimates.freq_sampling.f(),\n",
    "        },\n",
    "        # ── Metadata ─────────────────────────────────────────────────────────\n",
    "        \"metadata\": {\n",
    "            \"laser_frequency\": f.laser_frequency,\n",
    "            \"pipeline_name\": f.pipeline_names,\n",
    "        },\n",
    "    }\n",
    "\n",
    "n_samples = len(data[\"tdis\"][\"X\"])\n",
    "duration = n_samples * data[\"dt\"]\n",
    "print(f\"Loaded: {n_samples:,} samples @ {data['fs']} Hz ({duration / 86400:.2f} days)\")\n",
    "print(f\"TDI channels: {list(data['tdis'].keys())}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5f6a7b8",
   "metadata": {},
   "source": [
    "## 2. Run the Processing Pipeline\n",
    "\n",
    "All pipeline parameters are configurable here. The pipeline runs in this order:\n",
    "\n",
    "```\n",
    "bandpass/highpass filter → downsample → trim → truncate → window\n",
    "```\n",
    "\n",
    "We remark that the time-domain data will be normalised by the central frequency of the laser through the processing pipeline. The units are thus dimensionless. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6a7b8c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ── Pipeline parameters ───────────────────────────────────────────────────────\n",
    "\n",
    "# Downsampling parameters\n",
    "downsample_kwargs = {\n",
    "    \"target_fs\": 0.2,  # Hz — target sampling rate (None = no downsampling).\n",
    "    \"kaiser_window\": 31.0,  # Kaiser window beta parameter (higher = more aggressive anti-aliasing)\n",
    "}\n",
    "\n",
    "# Filter parameters\n",
    "filter_kwargs = {\n",
    "    \"highpass_cutoff\": 5e-6,  # Hz — high-pass cutoff (always applied)\n",
    "    \"lowpass_cutoff\": 0.8\n",
    "    * downsample_kwargs[\n",
    "        \"target_fs\"\n",
    "    ],  # Hz — low-pass cutoff (set None for high-pass only)\n",
    "    \"order\": 2,  # Butterworth filter order\n",
    "}\n",
    "\n",
    "# Trim parameters\n",
    "trim_kwargs = {\n",
    "    \"fraction\": 0.02,  # Fraction of post-downsample duration trimmed from each end.\n",
    "    # Total amount of data remaining is (1 - fraction) * N, for N\n",
    "    # the number of samples after downsampling.\n",
    "}\n",
    "\n",
    "# Segmentation parameters\n",
    "truncate_kwargs = {\n",
    "    \"days\": 7.0,  # Segment length in days (splits dataset into 7-day chunks)\n",
    "}\n",
    "\n",
    "# Window parameters\n",
    "window_kwargs = {\n",
    "    \"window\": \"tukey\",  # Window type: 'tukey', 'hann', 'hamming', 'blackman', 'planck', or None for no windowing\n",
    "    \"alpha\": 0.0125,  # Taper fraction for Tukey window\n",
    "}\n",
    "# ─────────────────────────────────────────────────────────────────────────────\n",
    "\n",
    "processed_segments = process_pipeline(\n",
    "    data,\n",
    "    downsample_kwargs=downsample_kwargs,\n",
    "    filter_kwargs=filter_kwargs,\n",
    "    trim_kwargs=trim_kwargs,\n",
    "    truncate_kwargs=truncate_kwargs,\n",
    "    window_kwargs=window_kwargs,\n",
    ")\n",
    "\n",
    "# For the rest of the notebook, use the first segment\n",
    "sp_0 = processed_segments[\"segment0\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab9d3dba",
   "metadata": {},
   "outputs": [],
   "source": [
    "t_days = (sp_0.data[\"t\"] - sp_0.data[\"t\"][0]) / 86400  # relative time in days\n",
    "\n",
    "fig, axes = plt.subplots(3, 1, figsize=(14, 7), sharex=True)\n",
    "\n",
    "for i, ch in enumerate([\"X\", \"Y\", \"Z\"]):\n",
    "    axes[i].plot(t_days, sp_0.data[ch], linewidth=0.4, color=f\"C{i}\", label=f\"TDI-{ch}\")\n",
    "    axes[i].set_ylabel(\"Frac. freq.\", fontsize=12)\n",
    "    axes[i].legend(loc=\"upper right\", fontsize=11)\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "    axes[i].set_title(f\"TDI-{ch}\", fontsize=12)\n",
    "\n",
    "axes[2].set_xlabel(\"Time [days]\", fontsize=12)\n",
    "fig.suptitle(\n",
    "    f\"TDI X, Y, Z — segment 0 ({truncate_kwargs['days']:.0f}-day window)\",\n",
    "    fontsize=14,\n",
    "    fontweight=\"bold\",\n",
    ")\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7b8c9d0",
   "metadata": {},
   "source": [
    "## 3. Compute FFT and Periodogram\n",
    "\n",
    "The one-sided periodogram estimate of the noise Power Spectral Density $S$ is\n",
    "\n",
    "$$\\hat{S}(f_k) = \\frac{2\\,\\Delta t}{N} \\left|\\tilde{n}(f_k)\\right|^2$$\n",
    "\n",
    "where $\\tilde{n}$ is the FFT of the processed time series, $\\Delta t$ the sampling interval and $N$ the length of the truncated data set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8c9d0e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data is already normalised by laser_frequency inside process_pipeline.\n",
    "# Use the new periodogram() and to_aet() methods directly.\n",
    "CENTRAL_FREQ = data[\"metadata\"][\"laser_frequency\"]\n",
    "\n",
    "freq, fft_xyz = sp_0.fft()\n",
    "\n",
    "sp_0_aet = sp_0.to_aet()\n",
    "_, fft_aet = sp_0_aet.fft()\n",
    "\n",
    "# psd_norm for reference (this is the factor baked into periodogram())\n",
    "psd_norm = 2 * sp_0.dt / sp_0.N\n",
    "\n",
    "psd_xyz = {ch: (np.abs(fft_xyz[ch]) ** 2) * psd_norm for ch in [\"X\", \"Y\", \"Z\"]}\n",
    "psd_aet = {ch: (np.abs(fft_aet[ch]) ** 2) * psd_norm for ch in [\"A\", \"E\", \"T\"]}\n",
    "\n",
    "# Mojito L1 noise estimates, normalised to fractional frequency units\n",
    "noise_freqs = data[\"noise_estimates\"][\"freqs\"]\n",
    "noise_cov_xyz = data[\"noise_estimates\"][\"xyz\"]\n",
    "noise_cov_aet = data[\"noise_estimates\"][\"aet\"]\n",
    "\n",
    "l1_xyz = {\n",
    "    ch: noise_cov_xyz[0][:, i, i] / CENTRAL_FREQ**2\n",
    "    for i, ch in enumerate([\"X\", \"Y\", \"Z\"])\n",
    "}\n",
    "l1_aet = {\n",
    "    ch: noise_cov_aet[0][:, i, i] / CENTRAL_FREQ**2\n",
    "    for i, ch in enumerate([\"A\", \"E\", \"T\"])\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9d0e1f2",
   "metadata": {},
   "source": [
    "## 4. Periodogram vs Mojito L1 Estimate\n",
    "\n",
    "A good processing pipeline produces a periodogram (coloured lines) that traces the Mojito L1 noise model (red dashed) throughout the science band (1×10⁻⁴ to 1×10⁻¹ Hz). Potential deviations at the lowest frequencies indicate residual artefacts from filtering or insufficient trimming."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0e1f2a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "nyquist = sp_0.fs / 2\n",
    "xyz_colors = [\"C0\", \"C1\", \"C2\"]\n",
    "aet_colors = [\"#e377c2\", \"#9467bd\", \"#8c564b\"]\n",
    "\n",
    "fig, axes = plt.subplots(3, 2, figsize=(16, 10), sharex=False, sharey=True)\n",
    "\n",
    "for i, ch in enumerate([\"X\", \"Y\", \"Z\"]):\n",
    "    ax = axes[i, 0]\n",
    "    ax.loglog(\n",
    "        freq[1:],\n",
    "        psd_xyz[ch][1:],\n",
    "        linewidth=1.0,\n",
    "        color=xyz_colors[i],\n",
    "        label=f\"TDI-{ch}\",\n",
    "    )\n",
    "    ax.loglog(\n",
    "        noise_freqs,\n",
    "        l1_xyz[ch],\n",
    "        linestyle=\"dashed\",\n",
    "        color=\"red\",\n",
    "        linewidth=2.0,\n",
    "        label=\"Mojito L1 estimate\",\n",
    "    )\n",
    "    ax.axvspan(1e-4, 1e-1, alpha=0.15, color=\"green\", label=\"Science band\")\n",
    "    ax.set_xlim(1e-5, nyquist)\n",
    "    ax.set_ylim(1e-53, 1e-35)\n",
    "    ax.set_ylabel(\"PSD [1/Hz]\", fontsize=20)\n",
    "    ax.grid(True, which=\"both\", alpha=0.3)\n",
    "    ax.legend(loc=\"upper left\", fontsize=14, framealpha=0.95)\n",
    "    ax.tick_params(axis=\"both\", which=\"major\", labelsize=16)\n",
    "for i, ch in enumerate([\"A\", \"E\", \"T\"]):\n",
    "    ax = axes[i, 1]\n",
    "    ax.loglog(\n",
    "        freq[1:],\n",
    "        psd_aet[ch][1:],\n",
    "        linewidth=1.0,\n",
    "        color=aet_colors[i],\n",
    "        label=f\"TDI-{ch}\",\n",
    "    )\n",
    "    ax.loglog(noise_freqs, l1_aet[ch], linestyle=\"dashed\", color=\"red\", linewidth=2.0)\n",
    "    ax.axvspan(1e-4, 1e-1, alpha=0.15, color=\"green\")\n",
    "    ax.set_xlim(1e-5, nyquist)\n",
    "    ax.set_ylim(1e-53, 1e-35)\n",
    "    ax.grid(True, which=\"both\", alpha=0.3)\n",
    "    ax.legend(loc=\"upper left\", fontsize=14, framealpha=0.95)\n",
    "    ax.tick_params(axis=\"both\", which=\"major\", labelsize=16)\n",
    "\n",
    "axes[2, 0].set_xlabel(\"Frequency [Hz]\", fontsize=20)\n",
    "axes[2, 1].set_xlabel(\"Frequency [Hz]\", fontsize=20)\n",
    "\n",
    "fig.suptitle(\n",
    "    f\"Processed Mojito Periodogram vs L1 Estimate (segment0)\\n\"\n",
    "    f\"HP={filter_kwargs['highpass_cutoff']:.0e} Hz, \"\n",
    "    f\"LP={filter_kwargs['lowpass_cutoff']} Hz, \"\n",
    "    f\"trim={trim_kwargs['fraction']:.1%}, \"\n",
    "    f\"{truncate_kwargs['days']} days/segment, \"\n",
    "    f\"fs={sp_0.fs} Hz, \"\n",
    "    f\"window={window_kwargs['window']} α={window_kwargs.get('alpha', 'N/A')}\",\n",
    "    fontsize=20,\n",
    "    fontweight=\"bold\",\n",
    ")\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 5
}