"""
An experiment that uses hosts which implements all currently existing modules.
"""
from __future__ import annotations
from typing import Optional, List, Tuple, Literal
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from Bio.Seq import Seq
import pandas as pd
from silvio import (
DATADIR, # config
Experiment, ExperimentException, Host, HostException, # base classes
alldef, coalesce, first, Generator, # utilities
DataOutcome, DataWithPlotOutcome, combine_data, # outcome
InsertGeneEvent, # events
GenomeList, GenomeExpression, GrowthBehaviour, # modules
Gene, # records
)
# additional non-renamed utilities
from silvio.extensions.utils.misc import check_primer_integrity_and_recombination
from silvio.extensions.utils.visual import Help_Progressbar
from silvio.extensions.utils.laboratory import ErrorRate
[docs]class RecExperiment (Experiment) :
"""
Recombinant Expression Experiment with culture growth and genetic recombination.
Has a budget system to limit bruteforcing.
"""
budget: int
suc_rate: float
# Keep track of how many hosts were created
host_counter: int
def __init__ ( self, seed:Optional[int] = None, equipment_investment:int = 0, max_budget:int = 10000 ) :
if equipment_investment > max_budget :
raise ExperimentException("Investment cost is higher than maximal budget.")
super().__init__(seed=seed)
self.budget = max_budget - equipment_investment
self.suc_rate = ErrorRate(equipment_investment, max_budget)
self.host_counter = 0
[docs] def create_host ( self, species:Literal['ecol','pput'], name:Optional[str] ) -> RecHost:
self.host_counter += 1
seed = self.rnd_gen.pick_seed() # The experiment provides stable seed generation for hosts.
new_host = None
chosen_name = coalesce( name, species + str(self.host_counter) )
if species == 'ecol' :
new_host = self.build_ecol_host( name=chosen_name, seed=seed )
elif species == 'pput' :
new_host = self.build_ecol_host( name=chosen_name, seed=seed )
if species is None :
raise ExperimentException("Invalid species provided. The possible species are: 'ecol', 'pput'")
self.bind_host(new_host)
return new_host
[docs] def build_ecol_host ( self, name:str, seed:int ) -> RecHost :
gen = Generator( seed )
host = RecHost( name=name, seed=seed )
host.make(
opt_growth_temp= gen.pick_integer(25,40), # unit: degree celsius, source: https://application.wiley-vch.de/books/sample/3527335153_c01.pdf
max_biomass= gen.pick_integer(30,100), # unit: in gDCW/l, source (german): https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=2&cad=rja&uact=8&ved=2ahUKEwjzt_aJ9pzpAhWGiqQKHb1jC6MQFjABegQIAhAB&url=https%3A%2F%2Fwww.repo.uni-hannover.de%2Fbitstream%2Fhandle%2F123456789%2F3512%2FDissertation.pdf%3Fsequence%3D1&usg=AOvVaw2XfGH11P9gK2F2B63mY4IM
infl_prom_str= gen.pick_integer(30,50), # explanation see Plot_ExpressionRate
species_prom_str= 0.057,
opt_primer_len= gen.pick_integer(16,28), # unit: nt, source: https://link.springer.com/article/10.1007/s10529-013-1249-8
regressor_file= DATADIR / 'expression_predictor' / 'Ecol-Promoter-predictor.pkl',
addparams_file= DATADIR / 'expression_predictor' / 'Ecol-Promoter-AddParams.pkl',
)
host.sync()
return host
[docs] def build_pput_host ( self, name:str, seed:int ) -> RecHost :
gen = Generator( seed )
host = RecHost( name=name, seed=seed )
host.make(
opt_growth_temp= gen.pick_integer(25,40), # unit: degree celsius, source: https://application.wiley-vch.de/books/sample/3527335153_c01.pdf
max_biomass= gen.pick_integer(45,145), # unit: in gDCW/l, source 1: https://onlinelibrary.wiley.com/doi/pdf/10.1002/bit.25474, source 2: https://link.springer.com/article/10.1385/ABAB:119:1:51
infl_prom_str= gen.pick_integer(30,50), # explanation see Plot_ExpressionRate
species_prom_str= 0.04,
opt_primer_len= gen.pick_integer(16,28), # unit: nt, source: https://link.springer.com/article/10.1007/s10529-013-1249-8
regressor_file= DATADIR / 'expression_predictor' / 'Ptai-Promoter-predictor.pkl',
addparams_file= DATADIR / 'expression_predictor' / 'Ptai-Promoter-AddParams.pkl',
)
host.sync()
return host
[docs] def find_host_or_abort ( self, host_name:str ) -> RecHost :
"""
Find a host by a given name or abort with an exception.
Throws: ExperimentException
"""
host:Optional[RecHost] = first( self.hosts, lambda h: h.name == host_name )
# Find the first host matching the name.
if host is None :
raise ExperimentException("Experiment has not found the host '{}'".format(host_name))
return host
[docs] def spend_budget_or_abort ( self, amount:int ) -> None :
"""
Each operation may use up a certain amount of the budget.
When the budget is not sufficient those operations will all fail.
By using error handling via raised exceptions we don't need to check each method for a
success flag on their return value.
Throws: ExperimentException
"""
if amount > self.budget :
raise ExperimentException("Experiment has surpassed its budget. No more operations are allowed.")
self.budget -= amount
[docs] def simulate_growth ( self, host_name:str, temps:List[int], wait:float = 0.001 ) -> GrowthOutcome :
""" Simulate a growth under multiple temperatures and return expected biomasses over time. """
self.spend_budget_or_abort(100)
host = self.find_host_or_abort(host_name)
( df, pauses ) = host.growth.Make_TempGrowthExp(CultTemps=temps, exp_suc_rate=self.suc_rate)
# wait = 0.001 # has to be adjusted, waiting time for loading bar
for pause in pauses :
loading_time = wait * pause.loading_len
Help_Progressbar(45, loading_time, pause.exp)
return GrowthOutcome(value=df)
[docs] def clone_with_recombination ( self, host_name:str, primer:Seq, gene:Gene, tm:int ) -> Tuple[RecHost,str] :
"""
Clone the host while trying to insert a gene. The Clone might or might not contain
the added Gene.
Returns: ( new_host, outcome_message )
"""
self.spend_budget_or_abort(200)
host = self.find_host_or_abort(host_name)
rnd = host.make_generator()
# A clone is always made.
# TODO: Maybe there is a failure to clone and the return should be `Optional[RecHost]`
new_host = RecHost( ref=host ) # Clone the host but with different seed.
new_host.copy( ref=host )
self.bind_host(new_host)
if rnd.pick_uniform(0,1) < self.suc_rate: # experiment failure depending on investment to equipment
return( new_host, 'Cloning failed: Bad Equipment' )
primer_integrity = check_primer_integrity_and_recombination(
gene.prom, primer, tm, RecHost.ref_prom, host.genexpr.opt_primer_len
)
if not primer_integrity.value :
return ( new_host, "Primer Failed: " + primer_integrity.error )
# Both primer integrity and recombination succeeded. Insert the new gene into the clone.
new_host.insert_gene(gene)
return ( new_host, "Cloning with recombination succeeded." )
[docs] def simulate_vaccine_production (
self, host_names:List[str], gene_name:Gene, cult_temp:int, growth_rate:float, biomass:int
) -> DataOutcome :
"""
Simulate the protein production of multiple host cultures.
"""
self.spend_budget_or_abort(500)
# Equipment failure can prematurely end the simulation.
if self.rnd_gen.pick_uniform(0,1) < self.suc_rate :
return DataOutcome( None, 'Experiment failed, bad equipment.' )
all_outcomes:List[DataOutcome] = []
for host_name in host_names :
# Skip this host if it does not exist.
host = first( self.hosts, lambda h: h.name == host_name )
if host is None :
outcome = DataOutcome( pd.Series({ 'Host': host_name }), 'Host does not exist.' )
all_outcomes.append(outcome)
continue
# Skip this host if no matching gene is found.
gene = first( host.genome.genes, lambda g: g.name == gene_name )
if gene is None :
outcome = DataOutcome( pd.Series({ 'Host': host.name, 'Gene_Name': gene_name }), 'Gene does not exist.' )
all_outcomes.append(outcome)
continue
# Simulate the production for this host.
production = host.growth.Make_ProductionExperiment(
gene=gene,
CultTemp=cult_temp,
GrowthRate=growth_rate,
Biomass=biomass,
ref_prom=RecHost.ref_prom,
accuracy_Test=.9
)
outcome = DataOutcome(
value=pd.Series({
'Host': host.name,
'Gene_Name': gene.name,
'Promoter_Sequence': str(gene.prom),
'Promoter_GC-content': (gene.prom.count('C') + gene.prom.count('G')) / len(gene.prom),
'Expression_Temperature': cult_temp,
'Expression_Biomass': biomass,
'Expression_Rate': production.value,
}),
error=production.error
)
all_outcomes.append(outcome)
return combine_data(all_outcomes)
[docs] def print_status ( self ) -> None :
print("Experiment:")
print(" budget = {}".format( self.budget ))
print(" failure rate = {}".format( round(self.suc_rate, 2) ))
print(" hosts = [ {} ]".format( " , ".join([h.name for h in self.hosts]) ))
# Could display the status of each host if wanted.
[docs]class RecHost (Host) :
growth: GrowthBehaviour
genome: GenomeList
genexpr: GenomeExpression
ref_prom:str = 'GCCCATTGACAAGGCTCTCGCGGCCAGGTATAATTGCACG'
[docs] def make ( self,
opt_growth_temp:int, max_biomass:int,
infl_prom_str:int, species_prom_str:int, opt_primer_len:int,
regressor_file:Path, addparams_file:Path
) -> None :
if not alldef( opt_growth_temp, max_biomass, infl_prom_str, species_prom_str, opt_primer_len, regressor_file, addparams_file ) :
raise HostException("Host not initialized. Reason: incomplete arguments.")
# Setup GenomeList module
self.genome = GenomeList()
self.genome.make()
self.genome.bind( host=self )
# Setup GenomeExpression module
self.genexpr = GenomeExpression()
self.genexpr.make(
opt_primer_len=opt_primer_len,
infl_prom_str=infl_prom_str,
species_prom_str=species_prom_str,
regressor_file=regressor_file,
addparams_file=addparams_file
)
self.genexpr.bind( host=self, genome=self.genome )
# Setup GrowthBehaviour module
self.growth = GrowthBehaviour()
self.growth.make(
opt_growth_temp=opt_growth_temp,
max_biomass=max_biomass
)
self.growth.bind( host=self, genexpr=self.genexpr )
[docs] def copy ( self, ref:RecHost ) -> None :
# Setup GenomeList module using the ref
self.genome = GenomeList()
self.genome.copy( ref=ref.genome )
self.genome.bind( host=self )
# Setup GenomeExpression module using the ref
self.genexpr = GenomeExpression()
self.genexpr.copy( ref=ref.genexpr )
self.genexpr.bind( host=self, genome=self.genome )
# Setup GrowthBehaviour module using the ref
self.growth = GrowthBehaviour()
self.growth.copy( ref=ref.growth )
self.growth.bind( host=self, genexpr=self.genexpr )
[docs] def sync ( self ) -> None :
self.sync_modules([ self.genome, self.genexpr, self.growth ])
[docs] def find_gene_or_abort ( self, gene_name:str ) -> Gene :
"""
Find a gene by a given name or abort with an exception.
Throws: HostException
"""
gene:Optional[Gene] = first( self.genome.genes, lambda g: g.name == gene_name )
# Find the first gene matching the name.
if gene is None :
raise HostException("Host has not found the gene '{}'".format(gene_name))
return gene
[docs] def insert_gene ( self, gene:Gene ) -> None :
self.emit( InsertGeneEvent( gene=gene, locus=0 ) )
[docs] def print_status ( self ) -> None :
print("Host [{}]:".format( self.name ))
print(" seed plus counter = {} + {}".format( self.rnd_seed, self.rnd_counter ))
print(" optimal growth temperature = {}".format( self.growth.opt_growth_temp ))
print(" max biomass = {}".format( self.growth.max_biomass ))
print(" optimal primer length = {}".format( self.genexpr.opt_primer_len ))
print(" Gene List: {} genes".format(len(self.genome.genes)))
for gene in self.genome.genes :
print(" - {} = {} * {}".format(gene.name, gene.prom, gene.orf))
print(" Event History: {} events".format(len(self.event_log)))
for el in self.event_log :
print(" - {}".format(el))
[docs]class GrowthOutcome ( DataWithPlotOutcome ) :
[docs] def make_plot ( self ) -> plt.Figure :
"""
Plotting with pyplot is unfortunately unintuitive. You cannot display a single figure by
using the object-oriented API. When you do `plt.subplots` (or create a plot by any other
means) it will be stored in the global context. You can only display things from the glbbal
context, and displaying it will remove it from there.
"""
Time, Biomass = self.value.iloc[:,0], self.value.iloc[:,1:]
LnBiomass = np.log(Biomass)
fig, ax = plt.subplots()
for Exp,X in LnBiomass.iteritems() :
ax.scatter(Time, X, label=Exp)
ax.legend()
return fig