| name | molecular-clock |
| description | Estimate divergence times and evolutionary rates using molecular clock methods and Bayesian dating. |
Molecular Clock Analysis
Estimate the timing of evolutionary events using molecular sequence data and fossil calibrations.
BEAST2 Analysis
<!-- Example BEAST2 XML configuration -->
<beast version="2.6">
<data id="alignment" dataType="nucleotide">
<!-- Sequence data goes here -->
</data>
<!-- Substitution model -->
<substModel id="hky" spec="HKY">
<kappa idref="kappa"/>
<frequencies id="freqs" spec="Frequencies" data="@alignment"/>
</substModel>
<!-- Clock model -->
<branchRateModel id="strictClock" spec="StrictClockModel">
<clock.rate id="clockRate" spec="parameter.RealParameter" value="1.0"/>
</branchRateModel>
<!-- Tree prior (coalescent) -->
<distribution id="coalescent" spec="Coalescent">
<populationModel id="constantPop" spec="ConstantPopulation">
<popSize id="popSize" spec="parameter.RealParameter" value="1.0"/>
</populationModel>
<treeIntervals spec="TreeIntervals" tree="@tree"/>
</distribution>
</beast>
Running BEAST2
# Run BEAST analysis
beast -threads 4 analysis.xml
# Analyze trace (check convergence)
tracer analysis.log
# Summarize trees
treeannotator -burnin 10 -heights mean analysis.trees summary.tree
# Visualize tree
figtree summary.tree
Calibration Points
def create_calibration_prior(min_age, max_age, distribution='uniform'):
"""Create calibration prior for molecular dating."""
calibrations = {
'uniform': {
'type': 'Uniform',
'lower': min_age,
'upper': max_age
},
'normal': {
'type': 'Normal',
'mean': (min_age + max_age) / 2,
'sigma': (max_age - min_age) / 4
},
'lognormal': {
'type': 'LogNormal',
'M': np.log((min_age + max_age) / 2),
'S': 0.5,
'offset': min_age
}
}
return calibrations[distribution]
# Example calibration
calibration = create_calibration_prior(
min_age=65, # Million years ago
max_age=75,
distribution='normal'
)
RelTime Analysis (MEGA)
import subprocess
def run_reltime(alignment_file, tree_file, calibrations_file):
"""Run RelTime for molecular dating."""
# RelTime is part of MEGA
cmd = [
'megacc',
'-a', 'reltime_ml.mao',
'-d', alignment_file,
'-t', tree_file,
'-c', calibrations_file,
'-o', 'reltime_output'
]
subprocess.run(cmd)
r8s Analysis
# r8s input file format
#NEXUS
begin trees;
tree mytree = ((A:0.1,B:0.2):0.3,(C:0.1,D:0.2):0.4);
end;
begin rates;
blformat lengths=persite nsites=1000;
collapse;
mrca calibration_node A B;
fixage taxon=calibration_node age=100;
divtime method=pl algorithm=tn;
showage;
end;
Python Molecular Clock Calculations
import numpy as np
from scipy import stats
def estimate_substitution_rate(divergence, time_mya, ci=0.95):
"""Estimate substitution rate from divergence and time."""
# Rate = divergence / (2 * time) for bidirectional divergence
rate = divergence / (2 * time_mya)
# Bootstrap confidence interval
return {
'rate': rate,
'rate_per_site_per_my': rate,
'rate_per_site_per_year': rate / 1e6
}
def test_molecular_clock(branch_lengths, tree_topology):
"""Test for clock-like behavior using likelihood ratio test."""
# Compare likelihoods of clock vs. no-clock models
# Returns p-value for clock hypothesis
# Chi-square test with df = number of tips - 2
pass
def calculate_tmrca(sequences, mutation_rate, generation_time=1):
"""
Estimate time to most recent common ancestor.
Parameters:
- sequences: aligned sequences
- mutation_rate: mutations per site per generation
- generation_time: years per generation
"""
# Calculate average pairwise differences
n_diffs = calculate_pairwise_differences(sequences)
mean_diff = np.mean(n_diffs)
# TMRCA estimation
tmrca_generations = mean_diff / (2 * mutation_rate)
tmrca_years = tmrca_generations * generation_time
return {
'tmrca_generations': tmrca_generations,
'tmrca_years': tmrca_years
}
Relaxed Clock Models
def uncorrelated_lognormal_rate(mean_rate, sigma):
"""
Generate branch rates under uncorrelated lognormal relaxed clock.
Used in BEAST analyses.
"""
# Each branch rate drawn independently
branch_rate = np.random.lognormal(
mean=np.log(mean_rate) - sigma**2/2,
sigma=sigma
)
return branch_rate
def autocorrelated_rate(parent_rate, sigma, branch_length):
"""
Generate branch rate under autocorrelated relaxed clock.
Rates correlated with parent branch.
"""
variance = sigma**2 * branch_length
child_rate = np.random.lognormal(
mean=np.log(parent_rate) - variance/2,
sigma=np.sqrt(variance)
)
return child_rate
Visualization
import matplotlib.pyplot as plt
from Bio import Phylo
def plot_timetree(tree_file, time_scale='mya'):
"""Plot phylogenetic tree with time axis."""
tree = Phylo.read(tree_file, 'nexus')
fig, ax = plt.subplots(figsize=(12, 8))
Phylo.draw(tree, axes=ax, do_show=False)
# Add time scale
max_depth = max(tree.depths().values())
if time_scale == 'mya':
ax.set_xlabel('Time (Million years ago)')
ax.invert_xaxis()
plt.tight_layout()
plt.savefig('timetree.png', dpi=150)
Key Concepts
- Strict clock: Constant rate across all branches
- Relaxed clock: Rates vary among branches
- Calibration: Fossil or biogeographic constraints
- Prior: Probability distribution on parameters
- Posterior: Updated probabilities after seeing data