An example of loading a model from the model database, and running inference with it#

Load model & set up genome#

from pathlib import Path
import numpy as np
import keras
import crested
model_path, output_names_biccn = crested.get_model('BICCN')
model_biccn = keras.models.load_model(model_path)
# Set the genome
genome_dir = Path("../../../../mouse/biccn/")
genome = crested.Genome(genome_dir / "mm10.fa", genome_dir / "mm10.chrom.sizes")
crested.register_genome(genome)
2025-02-04T13:16:07.490469+0100 INFO Genome mm10 registered.

Region predictions and contribution scores#

chrom = "chr3"
start = 72535878 - 807
end = 72536378 + 807
sequence = genome.fetch(chrom, start, end).upper()

prediction = crested.tl.predict(sequence, model_biccn)
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step
%matplotlib inline
crested.pl.bar.prediction(prediction, classes=output_names_biccn, x_label_rotation=90)
../_images/2dc435c42d86e14a6fdfb6f3b4d7cbee8d7e7bd0ce6e3955bf7a127906cf2d72.png
classes_of_interest = [output_names_biccn[np.argmax(prediction)]]
class_idx = [np.argmax(prediction)]

scores, one_hot_encoded_sequences = crested.tl.contribution_scores(
    sequence,
    target_idx=class_idx,
    model=model_biccn,
)
2025-02-04T13:16:08.698614+0100 INFO Calculating contribution scores for 1 class(es) and 1 region(s).
%matplotlib inline
crested.pl.patterns.contribution_scores(
    scores,
    one_hot_encoded_sequences,
    sequence_labels=[''],
    class_labels=classes_of_interest,
    zoom_n_bases=500,
    title="Example region",
)  # zoom in on the center 500bp
../_images/7546c43616e3670ab3102de62badb93c9ce5528c58a99c03ce303f1ed50a8042.png

Same region with another model#

model_path, output_names_dmb3 = crested.get_model('DeepMouseBrain3')
model_dmb3 = keras.models.load_model(model_path)
prediction = crested.tl.predict(sequence, model_dmb3)
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 984ms/step
crested.pl.bar.prediction(prediction, classes=output_names_dmb3, x_label_rotation=90, width=45)
../_images/85cf39ee2335839eec3b28e2777ca8ba422e1acf096f18dbe152b8c9668a7598.png
classes_of_interest = [output_names_dmb3[np.argmax(prediction)]]
class_idx = [np.argmax(prediction)]

scores, one_hot_encoded_sequences = crested.tl.contribution_scores(
    sequence,
    target_idx=class_idx,
    model=model_dmb3,
)
2025-02-04T13:16:23.557007+0100 INFO Calculating contribution scores for 1 class(es) and 1 region(s).
%matplotlib inline
crested.pl.patterns.contribution_scores(
    scores,
    one_hot_encoded_sequences,
    sequence_labels=[''],
    class_labels=classes_of_interest,
    zoom_n_bases=500,
    title="Example region",
)  # zoom in on the center 500bp
../_images/407c4d571e6138f9d29898ea91ec18fd18282b6d2fc6f685e2bf9c0d975ef45e.png

Gene locus predictions#

With BICCN model#

chrom = "chr4"
start = 91209533
end = 91374781

cell_type = 'Sst'
class_idx = output_names_biccn.index(cell_type)

upstream=50000
downstream=25000

strand= '-'

scores, coordinates, min_loc, max_loc, tss_position = crested.tl.score_gene_locus(
    chr_name=chrom,
    gene_start=start,
    gene_end=end,
    target_idx=class_idx,
    model=model_biccn,
    strand=strand,
    upstream=upstream,
    downstream=downstream,
    step_size=100,
)
2382/2382 ━━━━━━━━━━━━━━━━━━━━ 2s 687us/step
# Optional
bigwig = "../../../../mouse/biccn/bigwigs/bws/"+cell_type+".bw"

values = crested.utils.read_bigwig_region(bigwig, (chrom,start-upstream,end+downstream)) if strand =='+' else crested.utils.read_bigwig_region(bigwig, (chrom,start-downstream,end+upstream))
bw_values=values[0]
midpoints=values[1]
%matplotlib inline
crested.pl.hist.locus_scoring(
    scores,
    (min_loc, max_loc),
    gene_start=start,
    gene_end=end,
    title="CREsted prediction around Elavl2 gene locus for Sst",
    bigwig_values=bw_values,
    bigwig_midpoints=midpoints,
)
../_images/d18ab43df839d77e7c8438c4e47ff55845221c873e825fd6eba738760c987632.png

With DeepMouseBrain3#

scores, coordinates, min_loc, max_loc, tss_position = crested.tl.score_gene_locus(
    chr_name=chrom,
    gene_start=start,
    gene_end=end,
    target_idx=class_idx,
    model=model_dmb3,
    strand=strand,
    upstream=upstream,
    downstream=downstream,
    step_size=100,
)
2382/2382 ━━━━━━━━━━━━━━━━━━━━ 4s 1ms/step
%matplotlib inline
crested.pl.hist.locus_scoring(
    scores,
    (min_loc, max_loc),
    gene_start=start,
    gene_end=end,
    title="CREsted prediction around Elavl2 gene locus for Sst",
    bigwig_values=bw_values,
    bigwig_midpoints=midpoints,
)
../_images/55d39e0dc7dc4b3042596db908c2091c8a9f8a329b5b83bf3878322ada1c93f4.png