| name | newick |
| description | Parse, manipulate, and visualize phylogenetic trees in Newick format. |
Newick Format
The Newick format is the standard text representation for phylogenetic trees. This skill covers parsing, manipulating, and visualizing Newick trees.
Newick Format Syntax
# Basic formats:
(A,B); # Unrooted, no lengths
(A:0.1,B:0.2); # With branch lengths
(A:0.1,B:0.2):0.3; # With root branch length
((A,B),C); # Nested clades
((A:0.1,B:0.2):0.3,C:0.4); # Full format
((A,B)0.95:0.3,(C,D)0.99:0.2); # With support values
BioPython Phylo Module
from Bio import Phylo
from io import StringIO
# Read tree from file
tree = Phylo.read('tree.nwk', 'newick')
# Read tree from string
newick_str = "((A:0.1,B:0.2):0.3,(C:0.1,D:0.2):0.3);"
tree = Phylo.read(StringIO(newick_str), 'newick')
# ASCII visualization
Phylo.draw_ascii(tree)
# Write tree
Phylo.write(tree, 'output.nwk', 'newick')
Tree Navigation
# Get all terminal nodes (leaves)
terminals = tree.get_terminals()
for term in terminals:
print(f"Leaf: {term.name}")
# Get all internal nodes
internals = tree.get_nonterminals()
# Find specific clade
clade = tree.find_any(name='A')
# Get path between two nodes
path = tree.get_path('A', 'B')
# Calculate distances
distance = tree.distance('A', 'B')
# Get common ancestor
common = tree.common_ancestor('A', 'B')
Tree Manipulation
from Bio.Phylo import BaseTree
# Root/unroot tree
tree.root_with_outgroup('A')
tree.root_at_midpoint()
# Collapse branches
def collapse_short_branches(tree, threshold=0.01):
for clade in tree.find_clades():
if clade.branch_length and clade.branch_length < threshold:
clade.branch_length = 0
return tree
# Prune taxa
def prune_taxa(tree, taxa_to_remove):
for taxon in taxa_to_remove:
tree.prune(taxon)
return tree
# Rename tips
for tip in tree.get_terminals():
tip.name = tip.name.replace('_', ' ')
Using ete3 Library
from ete3 import Tree
# Read tree
tree = Tree('tree.nwk')
# Or from string
tree = Tree('((A:0.1,B:0.2):0.3,(C:0.1,D:0.2):0.3);')
# Tree traversal
for node in tree.traverse('preorder'): # or 'postorder', 'levelorder'
print(node.name, node.dist)
# Get leaf names
leaves = tree.get_leaf_names()
# Calculate Robinson-Foulds distance between trees
tree1 = Tree('((A,B),(C,D));')
tree2 = Tree('((A,C),(B,D));')
rf, max_rf, common, parts1, parts2, set1, set2 = tree1.robinson_foulds(tree2)
print(f"RF distance: {rf}")
# Midpoint rooting
midpoint = tree.get_midpoint_outgroup()
tree.set_outgroup(midpoint)
Tree Visualization
from Bio import Phylo
import matplotlib.pyplot as plt
# Basic plot
fig, ax = plt.subplots(figsize=(10, 8))
Phylo.draw(tree, axes=ax)
plt.savefig('tree.png', dpi=150)
# Circular tree with ete3
from ete3 import Tree, TreeStyle, NodeStyle
tree = Tree('((A,B),(C,D));')
ts = TreeStyle()
ts.mode = 'c' # Circular
ts.show_leaf_name = True
ts.show_branch_length = True
tree.render('circular_tree.png', tree_style=ts)
# Customize node styles
for node in tree.traverse():
nstyle = NodeStyle()
if node.is_leaf():
nstyle['fgcolor'] = 'blue'
nstyle['size'] = 10
node.set_style(nstyle)
Tree Statistics
def tree_statistics(tree):
"""Calculate basic tree statistics."""
from Bio import Phylo
terminals = tree.get_terminals()
internals = tree.get_nonterminals()
# Branch lengths
branch_lengths = [c.branch_length for c in tree.find_clades()
if c.branch_length is not None]
# Tree depth
depths = tree.depths()
max_depth = max(depths.values()) if depths else 0
return {
'num_tips': len(terminals),
'num_internal': len(internals),
'total_branch_length': sum(branch_lengths),
'mean_branch_length': sum(branch_lengths) / len(branch_lengths) if branch_lengths else 0,
'max_depth': max_depth
}
stats = tree_statistics(tree)
print(f"Tips: {stats['num_tips']}")
print(f"Total branch length: {stats['total_branch_length']:.4f}")
Converting Between Formats
from Bio import Phylo
# Newick to Nexus
tree = Phylo.read('tree.nwk', 'newick')
Phylo.write(tree, 'tree.nex', 'nexus')
# Newick to PhyloXML
Phylo.write(tree, 'tree.xml', 'phyloxml')
# Convert multiple trees
trees = Phylo.parse('bootstrap_trees.nwk', 'newick')
Phylo.write(trees, 'bootstrap_trees.nex', 'nexus')
Bootstrap Support Values
def add_bootstrap_support(main_tree, bootstrap_trees):
"""Map bootstrap support values to main tree."""
from Bio.Phylo.Consensus import get_support
support_tree = get_support(main_tree, bootstrap_trees)
return support_tree
# Read bootstrap trees
boot_trees = list(Phylo.parse('bootstrap.nwk', 'newick'))
main_tree = Phylo.read('best_tree.nwk', 'newick')
# Add support
supported_tree = add_bootstrap_support(main_tree, boot_trees)