2018 |
Barghi, Neda; Tobler, Raymond; Nolte, Viola; Jaksic, Ana Marija; Mallard, Francois; Otte, Kathrin; Dolezal, Marlies; Taus, Thomas; Kofler, Robert; Schlötterer, Christian Polygenic adaptation fuels genetic redundancy in Drosophila Artikel bioRxiv, S. 332122, 2018. @article{Barghi2018, title = {Polygenic adaptation fuels genetic redundancy in Drosophila}, author = {Neda Barghi and Raymond Tobler and Viola Nolte and Ana Marija Jaksic and Francois Mallard and Kathrin Otte and Marlies Dolezal and Thomas Taus and Robert Kofler and Christian Schlötterer}, url = {https://www.biorxiv.org/content/early/2018/05/28/332122?%3Fcollection=}, doi = {10.1101/332122}, year = {2018}, date = {2018-01-01}, journal = {bioRxiv}, pages = {332122}, abstract = {The genetic architecture of adaptive traits is of key importance to predict evolutionary responses. Most adaptive traits are polygenic - i.e. result from selection on a large number of genetic loci - but most molecularly characterized traits have a simple genetic basis. This discrepancy is best explained by the difficulty in detecting small allele frequency changes across many contributing loci. To resolve this, we use laboratory natural selection, a framework that is powerful enough to detect signatures for selective sweeps and polygenic adaptation. We exposed 10 replicates of a Drosophila simulans population to a new temperature regime and uncovered a polygenic architecture of an adaptive trait with high genetic redundancy among adaptive alleles. We observed convergent phenotypic responses, e.g. fitness, metabolic rate and fat content, and a strong polygenic response (99 selected alleles; mean s=0.061). However, each of these selected alleles increased in frequency only in a subset of the evolving replicates. Our results show that natural D. simulans populations harbor a vast reservoir of adaptive variation facilitating rapid evolutionary responses. The observed genetic redundancy potentiates this genotypic variation through multiple genetic pathways leading to phenotypic convergence. This key property of adaptive alleles requires the modification of testing strategies in natural populations beyond the search for convergence on the molecular level.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The genetic architecture of adaptive traits is of key importance to predict evolutionary responses. Most adaptive traits are polygenic - i.e. result from selection on a large number of genetic loci - but most molecularly characterized traits have a simple genetic basis. This discrepancy is best explained by the difficulty in detecting small allele frequency changes across many contributing loci. To resolve this, we use laboratory natural selection, a framework that is powerful enough to detect signatures for selective sweeps and polygenic adaptation. We exposed 10 replicates of a Drosophila simulans population to a new temperature regime and uncovered a polygenic architecture of an adaptive trait with high genetic redundancy among adaptive alleles. We observed convergent phenotypic responses, e.g. fitness, metabolic rate and fat content, and a strong polygenic response (99 selected alleles; mean s=0.061). However, each of these selected alleles increased in frequency only in a subset of the evolving replicates. Our results show that natural D. simulans populations harbor a vast reservoir of adaptive variation facilitating rapid evolutionary responses. The observed genetic redundancy potentiates this genotypic variation through multiple genetic pathways leading to phenotypic convergence. This key property of adaptive alleles requires the modification of testing strategies in natural populations beyond the search for convergence on the molecular level. |
Kofler, Robert; Nolte, Viola; Tobler, Ray; Schlötterer, Christian Molecular dissection of a natural transposable element invasion Artikel Genome Research, 2018. @article{Kofler2018, title = {Molecular dissection of a natural transposable element invasion}, author = {Robert Kofler and Viola Nolte and Ray Tobler and Christian Schlötterer}, url = {http://www.ncbi.nlm.nih.gov/pubmed/29757461%0Ahttp://doi.wiley.com/10.1111/nph.15201}, year = {2018}, date = {2018-01-01}, journal = {Genome Research}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2017 |
Kofler, Robert SimulaTE: simulating complex landscapes of transposable elements of populations Artikel Bioinformatics, (November 2017), S. 1–2, 2017, ISSN: 1367-4803. @article{Kofler2017, title = {SimulaTE: simulating complex landscapes of transposable elements of populations}, author = {Robert Kofler}, url = {http://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btx772/4665422}, doi = {10.1093/bioinformatics/btx772}, issn = {1367-4803}, year = {2017}, date = {2017-01-01}, journal = {Bioinformatics}, number = {November 2017}, pages = {1--2}, abstract = {Motivation: Estimating the abundance of transposable elements (TEs) in populations (or tissues) promises to answer many open research questions. However, progress is hampered by the lack of concordance between different approaches for TE identification and thus potentially unreliable results. Results: To address this problem, we developed SimulaTE a tool that generates TE landscapes for populations using a newly developed domain specific language (DSL). The simple syntax of our DSL allows for easily building even complex TE landscapes that have, for example, nested, truncated and highly diverged TE insertions. Reads may be simulated for the populations using different sequencing technologies (PacBio, Illumina paired-ends) and strategies (sequencing individuals and pooled populations). The comparison between the expected (i.e. simulated) and the observed results will guide researchers in finding the most suitable approach for a particular research question.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Estimating the abundance of transposable elements (TEs) in populations (or tissues) promises to answer many open research questions. However, progress is hampered by the lack of concordance between different approaches for TE identification and thus potentially unreliable results. Results: To address this problem, we developed SimulaTE a tool that generates TE landscapes for populations using a newly developed domain specific language (DSL). The simple syntax of our DSL allows for easily building even complex TE landscapes that have, for example, nested, truncated and highly diverged TE insertions. Reads may be simulated for the populations using different sequencing technologies (PacBio, Illumina paired-ends) and strategies (sequencing individuals and pooled populations). The comparison between the expected (i.e. simulated) and the observed results will guide researchers in finding the most suitable approach for a particular research question. |
Oppold, Ann Marie; Schmidt, Hanno; Rose, Marcel; Hellmann, Sören Lukas; Dolze, Florian; Ripp, Fabian; Weich, Bettina; Schmidt-Ott, Urs; Schmidt, Erwin; Kofler, Robert; Hankeln, Thomas; Pfenninger, Markus Molecular Ecology, 26 (12), S. 3256–3275, 2017, ISSN: 1365294X. @article{Oppold2017, title = {Chironomus riparius (Diptera) genome sequencing reveals the impact of minisatellite transposable elements on population divergence}, author = {Ann Marie Oppold and Hanno Schmidt and Marcel Rose and Sören Lukas Hellmann and Florian Dolze and Fabian Ripp and Bettina Weich and Urs Schmidt-Ott and Erwin Schmidt and Robert Kofler and Thomas Hankeln and Markus Pfenninger}, doi = {10.1111/mec.14111}, issn = {1365294X}, year = {2017}, date = {2017-01-01}, journal = {Molecular Ecology}, volume = {26}, number = {12}, pages = {3256--3275}, abstract = {Active transposable elements (TEs) may result in divergent genomic insertion and abundance patterns among conspecific populations. Upon secondary contact, such divergent genetic backgrounds can theoretically give rise to classical Dobzhansky-Muller incompatibilities (DMI), a way how TEs can contribute to the evolution of endogenous genetic barriers and eventually population divergence. We investigated whether differential TE activity created endogenous selection pressures among conspecific populations of the non-biting midge Chironomus riparius , focussing on a Chironomus -specific TE, the minisatellite-like Cla-element , whose activity is associated with speciation in the genus. Using an improved and annotated draft genome for a genomic study with five natural C. riparius populations, we found highly population-specific TE insertion patterns with many private insertions. A highly significant correlation of pairwise population FST from genome-wide SNPs with the FST estimated from TEs suggests drift as the major force driving TE population differentiation. However, the significantly higher Cla-element FST level due to a high proportion of differentially fixed Cla-element insertions indicates that segregating, i.e. heterozygous insertions are selected against. With reciprocal crossing experiments and fluorescent in-situ hybridisation of Cla-elements to polytene chromosomes, we documented phenotypic effects on female fertility and chromosomal mispairings that might be linked to DMI in hybrids. We propose that the inferred negative selection on heterozygous Cla-element insertions causes endogenous genetic barriers and therefore acts as DMI among C. riparius populations. The intrinsic genomic turnover exerted by TEs, thus, may have a direct impact on population divergence that is operationally different from drift and local adaptation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Active transposable elements (TEs) may result in divergent genomic insertion and abundance patterns among conspecific populations. Upon secondary contact, such divergent genetic backgrounds can theoretically give rise to classical Dobzhansky-Muller incompatibilities (DMI), a way how TEs can contribute to the evolution of endogenous genetic barriers and eventually population divergence. We investigated whether differential TE activity created endogenous selection pressures among conspecific populations of the non-biting midge Chironomus riparius , focussing on a Chironomus -specific TE, the minisatellite-like Cla-element , whose activity is associated with speciation in the genus. Using an improved and annotated draft genome for a genomic study with five natural C. riparius populations, we found highly population-specific TE insertion patterns with many private insertions. A highly significant correlation of pairwise population FST from genome-wide SNPs with the FST estimated from TEs suggests drift as the major force driving TE population differentiation. However, the significantly higher Cla-element FST level due to a high proportion of differentially fixed Cla-element insertions indicates that segregating, i.e. heterozygous insertions are selected against. With reciprocal crossing experiments and fluorescent in-situ hybridisation of Cla-elements to polytene chromosomes, we documented phenotypic effects on female fertility and chromosomal mispairings that might be linked to DMI in hybrids. We propose that the inferred negative selection on heterozygous Cla-element insertions causes endogenous genetic barriers and therefore acts as DMI among C. riparius populations. The intrinsic genomic turnover exerted by TEs, thus, may have a direct impact on population divergence that is operationally different from drift and local adaptation. |
Jaksic, Ana Marija; Kofler, Robert; Schlötterer, Christian Molecular Ecology, (May), 2017, ISSN: 09621083. @article{Jaksic2017, title = {Regulation of transposable elements: interplay between TE-encoded regulatory sequences and host-specific trans-acting factors in Drosophila melanogaster}, author = {Ana Marija Jaksic and Robert Kofler and Christian Schlötterer}, url = {http://doi.wiley.com/10.1111/mec.14259}, doi = {10.1111/mec.14259}, issn = {09621083}, year = {2017}, date = {2017-01-01}, journal = {Molecular Ecology}, number = {May}, abstract = {Transposable elements (TEs) are mobile genetic elements that can move around the genome and their expression is one precondition for this mobility. Because the insertion of TEs in new genomic positions is largely deleterious, the molecular mechanisms for transcriptional suppression have been extensively studied. In contrast, very little is known about their primary transcriptional regulation. Here, we characterize the expression dynamics of TE families in Drosophila melanogaster across a broad temperature range (13-29°C). In 71% of the expressed TE families the expression is modulated by temperature. We show that this temperature dependent regulation is specific for TE families and strongly affected by the genetic background. We deduce that TEs carry family-specific regulatory sequences, which are targeted by host-specific trans-acting factors, such as transcription factors. Consistent with the widespread dominant inheritance of gene expression, we also find prevailing dominance of TE family expression. We conclude that TE family expression across a range of temperatures is regulated by an interaction between TE family specific regulatory elements and trans-acting factors of the host. This article is protected by copyright. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Transposable elements (TEs) are mobile genetic elements that can move around the genome and their expression is one precondition for this mobility. Because the insertion of TEs in new genomic positions is largely deleterious, the molecular mechanisms for transcriptional suppression have been extensively studied. In contrast, very little is known about their primary transcriptional regulation. Here, we characterize the expression dynamics of TE families in Drosophila melanogaster across a broad temperature range (13-29°C). In 71% of the expressed TE families the expression is modulated by temperature. We show that this temperature dependent regulation is specific for TE families and strongly affected by the genetic background. We deduce that TEs carry family-specific regulatory sequences, which are targeted by host-specific trans-acting factors, such as transcription factors. Consistent with the widespread dominant inheritance of gene expression, we also find prevailing dominance of TE family expression. We conclude that TE family expression across a range of temperatures is regulated by an interaction between TE family specific regulatory elements and trans-acting factors of the host. This article is protected by copyright. All rights reserved. |
2016 |
Kofler, Robert; Langm, Anna Maria; Nouhaud, Pierre; Otte, Anna; Schlötterer, Christian Suitability of different mapping algorithms for genome-wide polymorphism scans with Pool-Seq data Artikel Submitted, 6 (November), S. 1–20, 2016, ISSN: 2160-1836. @article{Kofler2016, title = {Suitability of different mapping algorithms for genome-wide polymorphism scans with Pool-Seq data}, author = {Robert Kofler and Anna Maria Langm and Pierre Nouhaud and Anna Otte and Christian Schlötterer}, doi = {10.1101/052845}, issn = {2160-1836}, year = {2016}, date = {2016-01-01}, journal = {Submitted}, volume = {6}, number = {November}, pages = {1--20}, abstract = {The cost-effectiveness of sequencing pools of individuals (Pool-Seq) provides the basis for the popularity and wide-spread use of this method for many research questions, ranging from unravelling the genetic basis of complex traits to the clonal evolution of cancer cells. Because the accuracy of Pool-Seq could be affected by many potential sources of error, several studies determined, for example, the influence of the sequencing technology, the library preparation protocol, and mapping parameters. Nevertheless, the impact of the mapping tools has not yet been evaluated. Using simulated and real Pool-Seq data, we demonstrate a substantial impact of the mapping tools leading to characteristic false positives in genome-wide scans. The problem of false positives was particularly pronounced when data with different read lengths and insert sizes were compared. Out of 14 evaluated algorithms novoalign, bwa mem and clc4 are most suitable for mapping Pool-Seq data. Nevertheless, no single algorithm is sufficient for avoiding all false positives. We show that the intersection of the results of two mapping algorithms provides a simple, yet effective strategy to eliminate false positives. We propose that the implementation of a consistent Pool-seq bioinformatics pipeline building on the recommendations of this study can substantially increase the reliability of Pool-Seq results, in particular when libraries generated with different protocols are being compared}, keywords = {}, pubstate = {published}, tppubtype = {article} } The cost-effectiveness of sequencing pools of individuals (Pool-Seq) provides the basis for the popularity and wide-spread use of this method for many research questions, ranging from unravelling the genetic basis of complex traits to the clonal evolution of cancer cells. Because the accuracy of Pool-Seq could be affected by many potential sources of error, several studies determined, for example, the influence of the sequencing technology, the library preparation protocol, and mapping parameters. Nevertheless, the impact of the mapping tools has not yet been evaluated. Using simulated and real Pool-Seq data, we demonstrate a substantial impact of the mapping tools leading to characteristic false positives in genome-wide scans. The problem of false positives was particularly pronounced when data with different read lengths and insert sizes were compared. Out of 14 evaluated algorithms novoalign, bwa mem and clc4 are most suitable for mapping Pool-Seq data. Nevertheless, no single algorithm is sufficient for avoiding all false positives. We show that the intersection of the results of two mapping algorithms provides a simple, yet effective strategy to eliminate false positives. We propose that the implementation of a consistent Pool-seq bioinformatics pipeline building on the recommendations of this study can substantially increase the reliability of Pool-Seq results, in particular when libraries generated with different protocols are being compared |
Kofler, Robert; Gómez-Sánchez, Daniel; Schlötterer, Christian PoPoolationTE2: Comparative Population Genomics of Transposable Elements Using Pool-Seq Artikel Molecular Biology and Evolution, 33 (10), S. 2759–2764, 2016, ISSN: 15371719. @article{Kofler2016b, title = {PoPoolationTE2: Comparative Population Genomics of Transposable Elements Using Pool-Seq}, author = {Robert Kofler and Daniel Gómez-Sánchez and Christian Schlötterer}, doi = {10.1093/molbev/msw137}, issn = {15371719}, year = {2016}, date = {2016-01-01}, journal = {Molecular Biology and Evolution}, volume = {33}, number = {10}, pages = {2759--2764}, abstract = {The evolutionary dynamics of transposable elements (TEs) are still poorly understood. One reason is that TE abundance needs to be studied at the population level, but sequencing individuals on a population scale is still too expensive to characterize TE abundance in multiple populations. While sequencing pools of individuals (Pool-Seq) dramatically reduces sequencing costs, a comparison of TE abundance between pooled samples has been dificult, if not impossible, due to various biases. Here, we introduce a novel bioinformatic tool, PoPoolationTE2, which is specifically tailored for the comparison of TE abundance among pooled population samples or different tissues. Using computer simulations we demonstrate that PoPoolationTE2 not only faithfully recovers TE insertion frequencies and positions but, by homogenizing the power to identify TEs acrosss samples, it provides an unbiased comparison of TE abundance between pooled population samples. We anticipate that PoPoolationTE2 will greatly facilitate the analysis of TE insertion patterns in a broad range of applications.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The evolutionary dynamics of transposable elements (TEs) are still poorly understood. One reason is that TE abundance needs to be studied at the population level, but sequencing individuals on a population scale is still too expensive to characterize TE abundance in multiple populations. While sequencing pools of individuals (Pool-Seq) dramatically reduces sequencing costs, a comparison of TE abundance between pooled samples has been dificult, if not impossible, due to various biases. Here, we introduce a novel bioinformatic tool, PoPoolationTE2, which is specifically tailored for the comparison of TE abundance among pooled population samples or different tissues. Using computer simulations we demonstrate that PoPoolationTE2 not only faithfully recovers TE insertion frequencies and positions but, by homogenizing the power to identify TEs acrosss samples, it provides an unbiased comparison of TE abundance between pooled population samples. We anticipate that PoPoolationTE2 will greatly facilitate the analysis of TE insertion patterns in a broad range of applications. |
Franssen, Susanne U; Kofler, Robert; Schlötterer, Christian (August), S. 1–10, 2016, ISSN: 0018-067X. @article{Franssen2016, title = {Uncovering the genetic signature of quantitative trait evolution with replicated time series data Keywords}, author = {Susanne U Franssen and Robert Kofler and Christian Schlötterer}, doi = {10.1038/hdy.2016.98}, issn = {0018-067X}, year = {2016}, date = {2016-01-01}, number = {August}, pages = {1--10}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Kofler, Robert; Nolte, Viola; Schlötterer, Christian Molecular ecology resources, 16 (1), S. 118–122, 2016, ISSN: 1755-0998. @article{Kofler2016c, title = {The impact of library preparation protocols on the consistency of allele frequency estimates in Pool-Seq data.}, author = {Robert Kofler and Viola Nolte and Christian Schlötterer}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4744716&tool=pmcentrez&rendertype=abstract}, doi = {10.1111/1755-0998.12432}, issn = {1755-0998}, year = {2016}, date = {2016-01-01}, journal = {Molecular ecology resources}, volume = {16}, number = {1}, pages = {118--122}, abstract = {Sequencing pools of individuals (Pool-Seq) is a cost-effective method to determine genome-wide allele frequency estimates. Given the importance of meta-analyses combining data sets, we determined the influence of different genomic library preparation protocols on the consistency of allele frequency estimates. We found that typically no more than 1% of the variation in allele frequency estimates could be attributed to differences in library preparation. Also read length had only a minor effect on the consistency of allele frequency estimates. By far, the most pronounced influence could be attributed to sequence coverage. Increasing the coverage from 30- to 50-fold improved the consistency of allele frequency estimates by at least 27%. We conclude that Pool-Seq data can be easily combined across different library preparation methods, but sufficient sequence coverage is key to reliable results.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Sequencing pools of individuals (Pool-Seq) is a cost-effective method to determine genome-wide allele frequency estimates. Given the importance of meta-analyses combining data sets, we determined the influence of different genomic library preparation protocols on the consistency of allele frequency estimates. We found that typically no more than 1% of the variation in allele frequency estimates could be attributed to differences in library preparation. Also read length had only a minor effect on the consistency of allele frequency estimates. By far, the most pronounced influence could be attributed to sequence coverage. Increasing the coverage from 30- to 50-fold improved the consistency of allele frequency estimates by at least 27%. We conclude that Pool-Seq data can be easily combined across different library preparation methods, but sufficient sequence coverage is key to reliable results. |
2015 |
Kofler, Robert; Nolte, Viola; Schlötterer, Christian Tempo and Mode of Transposable Element Activity in Drosophila. Artikel PLoS genetics, 11 (7), S. e1005406, 2015, ISSN: 1553-7404. @article{Kofler2015b, title = {Tempo and Mode of Transposable Element Activity in Drosophila.}, author = {Robert Kofler and Viola Nolte and Christian Schlötterer}, url = {http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005406}, doi = {10.1371/journal.pgen.1005406}, issn = {1553-7404}, year = {2015}, date = {2015-07-01}, journal = {PLoS genetics}, volume = {11}, number = {7}, pages = {e1005406}, publisher = {Public Library of Science}, abstract = {The evolutionary dynamics of transposable element (TE) insertions have been of continued interest since TE activity has important implications for genome evolution and adaptation. Here, we infer the transposition dynamics of TEs by comparing their abundance in natural D. melanogaster and D. simulans populations. Sequencing pools of more than 550 South African flies to at least 320-fold coverage, we determined the genome wide TE insertion frequencies in both species. We suggest that the predominance of low frequency insertions in the two species (textgreater80% of the insertions have a frequency textless0.2) is probably due to a high activity of more than 58 families in both species. We provide evidence for 50% of the TE families having temporally heterogenous transposition rates with different TE families being affected in the two species. While in D. melanogaster retrotransposons were more active, DNA transposons showed higher activity levels in D. simulans. Moreover, we suggest that LTR insertions are mostly of recent origin in both species, while DNA and non-LTR insertions are older and more frequently vertically transmitted since the split of D. melanogaster and D. simulans. We propose that the high TE activity is of recent origin in both species and a consequence of the demographic history, with habitat expansion triggering a period of rapid evolution.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The evolutionary dynamics of transposable element (TE) insertions have been of continued interest since TE activity has important implications for genome evolution and adaptation. Here, we infer the transposition dynamics of TEs by comparing their abundance in natural D. melanogaster and D. simulans populations. Sequencing pools of more than 550 South African flies to at least 320-fold coverage, we determined the genome wide TE insertion frequencies in both species. We suggest that the predominance of low frequency insertions in the two species (textgreater80% of the insertions have a frequency textless0.2) is probably due to a high activity of more than 58 families in both species. We provide evidence for 50% of the TE families having temporally heterogenous transposition rates with different TE families being affected in the two species. While in D. melanogaster retrotransposons were more active, DNA transposons showed higher activity levels in D. simulans. Moreover, we suggest that LTR insertions are mostly of recent origin in both species, while DNA and non-LTR insertions are older and more frequently vertically transmitted since the split of D. melanogaster and D. simulans. We propose that the high TE activity is of recent origin in both species and a consequence of the demographic history, with habitat expansion triggering a period of rapid evolution. |
Schlötterer, C; Kofler, R; Versace, E; Tobler, R; Franssen, S U Heredity, 114 (5), S. 431–40, 2015, ISSN: 1365-2540. @article{Schlotterer2015, title = {Combining experimental evolution with next-generation sequencing: a powerful tool to study adaptation from standing genetic variation.}, author = {C Schlötterer and R Kofler and E Versace and R Tobler and S U Franssen}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25269380}, doi = {10.1038/hdy.2014.86}, issn = {1365-2540}, year = {2015}, date = {2015-05-01}, journal = {Heredity}, volume = {114}, number = {5}, pages = {431--40}, abstract = {Evolve and resequence (E&R) is a new approach to investigate the genomic responses to selection during experimental evolution. By using whole genome sequencing of pools of individuals (Pool-Seq), this method can identify selected variants in controlled and replicable experimental settings. Reviewing the current state of the field, we show that E&R can be powerful enough to identify causative genes and possibly even single-nucleotide polymorphisms. We also discuss how the experimental design and the complexity of the trait could result in a large number of false positive candidates. We suggest experimental and analytical strategies to maximize the power of E&R to uncover the genotype-phenotype link and serve as an important research tool for a broad range of evolutionary questions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Evolve and resequence (E&R) is a new approach to investigate the genomic responses to selection during experimental evolution. By using whole genome sequencing of pools of individuals (Pool-Seq), this method can identify selected variants in controlled and replicable experimental settings. Reviewing the current state of the field, we show that E&R can be powerful enough to identify causative genes and possibly even single-nucleotide polymorphisms. We also discuss how the experimental design and the complexity of the trait could result in a large number of false positive candidates. We suggest experimental and analytical strategies to maximize the power of E&R to uncover the genotype-phenotype link and serve as an important research tool for a broad range of evolutionary questions. |
Kofler, Robert; Hill, Tom; Nolte, Viola; Betancourt, Andrea J; Schlötterer, Christian The recent invasion of natural Drosophila simulans populations by the P-element. Artikel Proceedings of the National Academy of Sciences of the United States of America, 112 (21), S. 6659–63, 2015, ISSN: 1091-6490. @article{Kofler2015a, title = {The recent invasion of natural Drosophila simulans populations by the P-element.}, author = {Robert Kofler and Tom Hill and Viola Nolte and Andrea J Betancourt and Christian Schlötterer}, url = {http://www.pnas.org/content/112/21/6659.abstract}, doi = {10.1073/pnas.1500758112}, issn = {1091-6490}, year = {2015}, date = {2015-05-01}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {112}, number = {21}, pages = {6659--63}, abstract = {The P-element is one of the best understood eukaryotic transposable elements. It invaded Drosophila melanogaster populations within a few decades but was thought to be absent from close relatives, including Drosophila simulans. Five decades after the spread in D. melanogaster, we provide evidence that the P-element has also invaded D. simulans. P-elements in D. simulans appear to have been acquired recently from D. melanogaster probably via a single horizontal transfer event. Expression data indicate that the P-element is processed in the germ line of D. simulans, and genomic data show an enrichment of P-element insertions in putative origins of replication, similar to that seen in D. melanogaster. This ongoing spread of the P-element in natural populations provides a unique opportunity to understand the dynamics of transposable element spread and the associated piwi-interacting RNAs defense mechanisms.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The P-element is one of the best understood eukaryotic transposable elements. It invaded Drosophila melanogaster populations within a few decades but was thought to be absent from close relatives, including Drosophila simulans. Five decades after the spread in D. melanogaster, we provide evidence that the P-element has also invaded D. simulans. P-elements in D. simulans appear to have been acquired recently from D. melanogaster probably via a single horizontal transfer event. Expression data indicate that the P-element is processed in the germ line of D. simulans, and genomic data show an enrichment of P-element insertions in putative origins of replication, similar to that seen in D. melanogaster. This ongoing spread of the P-element in natural populations provides a unique opportunity to understand the dynamics of transposable element spread and the associated piwi-interacting RNAs defense mechanisms. |
Topa, Hande; Jónás, Ágnes; Kofler, Robert; Kosiol, Carolin; Honkela, Antti Bioinformatics, 31 (11), S. 1762–1770, 2015, ISSN: 14602059. @article{Topa2015, title = {Gaussian process test for high-throughput sequencing time series: Application to experimental evolution}, author = {Hande Topa and Ágnes Jónás and Robert Kofler and Carolin Kosiol and Antti Honkela}, doi = {10.1093/bioinformatics/btv014}, issn = {14602059}, year = {2015}, date = {2015-01-01}, journal = {Bioinformatics}, volume = {31}, number = {11}, pages = {1762--1770}, abstract = {Motivation: Recent advances in high-throughput sequencing (HTS) have made it possible to monitor genomes in great detail. New experiments not only use HTS to measure genomic features at one time point but to monitor them changing over time with the aim of identifying significant changes in their abundance. In population genetics, for example, allele frequencies are monitored over time to detect significant frequency changes that indicate selection pressures. Previous attempts at analysing data from HTS experiments have been limited as they could not simultaneously include data at intermediate time points, replicate experiments and sources of uncertainty specific to HTS such as sequencing depth.Results: We present the beta-binomial Gaussian process (BBGP) model for ranking features with significant non-random variation in abundance over time. The features are assumed to represent proportions, such as proportion of an alternative allele in a population. We use the beta-binomial model to capture the uncertainty arising from finite sequencing depth and combine it with a Gaussian process model over the time series. In simulations that mimic the features of experimental evolution data, the proposed method clearly outperforms classical testing in average precision of finding selected alleles. We also present simulations exploring different experimental design choices and results on real data from Drosophila experimental evolution experiment in temperature adaptation.Availability: R software implementing the test is available at https://github.com/handetopa/BBGP.Contact: hande.topa@aalto.fi, agnes.jonas@vetmeduni.ac.at, carolin.kosiol@vetmeduni.ac.at, antti.honkela@hiit.fi.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Recent advances in high-throughput sequencing (HTS) have made it possible to monitor genomes in great detail. New experiments not only use HTS to measure genomic features at one time point but to monitor them changing over time with the aim of identifying significant changes in their abundance. In population genetics, for example, allele frequencies are monitored over time to detect significant frequency changes that indicate selection pressures. Previous attempts at analysing data from HTS experiments have been limited as they could not simultaneously include data at intermediate time points, replicate experiments and sources of uncertainty specific to HTS such as sequencing depth.Results: We present the beta-binomial Gaussian process (BBGP) model for ranking features with significant non-random variation in abundance over time. The features are assumed to represent proportions, such as proportion of an alternative allele in a population. We use the beta-binomial model to capture the uncertainty arising from finite sequencing depth and combine it with a Gaussian process model over the time series. In simulations that mimic the features of experimental evolution data, the proposed method clearly outperforms classical testing in average precision of finding selected alleles. We also present simulations exploring different experimental design choices and results on real data from Drosophila experimental evolution experiment in temperature adaptation.Availability: R software implementing the test is available at https://github.com/handetopa/BBGP.Contact: hande.topa@aalto.fi, agnes.jonas@vetmeduni.ac.at, carolin.kosiol@vetmeduni.ac.at, antti.honkela@hiit.fi. |
2014 |
Schlötterer, Christian; Tobler, Raymond; Kofler, Robert; Nolte, Viola Sequencing pools of individuals-mining genome-wide polymorphism data without big funding Artikel Nature Reviews Genetics, 15 (11), S. 749–763, 2014, ISSN: 14710064. @article{Schlotterer2014, title = {Sequencing pools of individuals-mining genome-wide polymorphism data without big funding}, author = {Christian Schlötterer and Raymond Tobler and Robert Kofler and Viola Nolte}, url = {http://dx.doi.org/10.1038/nrg3803}, doi = {10.1038/nrg3803}, issn = {14710064}, year = {2014}, date = {2014-09-01}, journal = {Nature Reviews Genetics}, volume = {15}, number = {11}, pages = {749--763}, publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, abstract = {The analysis of polymorphism data is becoming increasingly important as a complementary tool to classical genetic analyses. Nevertheless, despite plunging sequencing costs, genomic sequencing of individuals at the population scale is still restricted to a few model species. Whole-genome sequencing of pools of individuals (Pool-seq) provides a cost-effective alternative to sequencing individuals separately. With the availability of custom-tailored software tools, Pool-seq is being increasingly used for population genomic research on both model and non-model organisms. In this Review, we not only demonstrate the breadth of questions that are being addressed by Pool-seq but also discuss its limitations and provide guidelines for users.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The analysis of polymorphism data is becoming increasingly important as a complementary tool to classical genetic analyses. Nevertheless, despite plunging sequencing costs, genomic sequencing of individuals at the population scale is still restricted to a few model species. Whole-genome sequencing of pools of individuals (Pool-seq) provides a cost-effective alternative to sequencing individuals separately. With the availability of custom-tailored software tools, Pool-seq is being increasingly used for population genomic research on both model and non-model organisms. In this Review, we not only demonstrate the breadth of questions that are being addressed by Pool-seq but also discuss its limitations and provide guidelines for users. |
Tobler, Ray; Franssen, Susanne U; Kofler, Robert; Orozco-Terwengel, Pablo; Nolte, Viola; Hermisson, Joachim; Schlötterer, Christian Molecular Biology and Evolution, 31 (2), S. 364–375, 2014, ISSN: 07374038. @article{Tobler2014, title = {Massive habitat-specific genomic response in D. melanogaster populations during experimental evolution in hot and cold environments}, author = {Ray Tobler and Susanne U Franssen and Robert Kofler and Pablo Orozco-Terwengel and Viola Nolte and Joachim Hermisson and Christian Schlötterer}, doi = {10.1093/molbev/mst205}, issn = {07374038}, year = {2014}, date = {2014-01-01}, journal = {Molecular Biology and Evolution}, volume = {31}, number = {2}, pages = {364--375}, abstract = {Experimental evolution in combination with whole-genome sequencing (evolve and resequence [E&R]) is a promising approach to define the genotype-phenotype map and to understand adaptation in evolving populations. Many previous studies have identified a large number of putative selected sites (i.e., candidate loci), but it remains unclear to what extent these loci are genuine targets of selection or experimental noise. To address this question, we exposed the same founder population to two different selection regimes-a hot environment and a cold environment-and quantified the genomic response in each. We detected large numbers of putative selected loci in both environments, albeit with little overlap between the two sets of candidates, indicating that most resulted from habitat-specific selection. By quantifying changes across multiple independent biological replicates, we demonstrate that most of the candidate SNPs were false positives that were linked to selected sites over distances much larger than the typical linkage disequilibrium range of Drosophila melanogaster. We show that many of these mid- to long-range associations were attributable to large segregating inversions and confirm by computer simulations that such patterns could be readily replicated when strong selection acts on rare haplotypes. In light of our findings, we outline recommendations to improve the performance of future Drosophila E&R studies which include using species with negligible inversion loads, such as D. mauritiana and D. simulans, instead of D. melanogaster.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Experimental evolution in combination with whole-genome sequencing (evolve and resequence [E&R]) is a promising approach to define the genotype-phenotype map and to understand adaptation in evolving populations. Many previous studies have identified a large number of putative selected sites (i.e., candidate loci), but it remains unclear to what extent these loci are genuine targets of selection or experimental noise. To address this question, we exposed the same founder population to two different selection regimes-a hot environment and a cold environment-and quantified the genomic response in each. We detected large numbers of putative selected loci in both environments, albeit with little overlap between the two sets of candidates, indicating that most resulted from habitat-specific selection. By quantifying changes across multiple independent biological replicates, we demonstrate that most of the candidate SNPs were false positives that were linked to selected sites over distances much larger than the typical linkage disequilibrium range of Drosophila melanogaster. We show that many of these mid- to long-range associations were attributable to large segregating inversions and confirm by computer simulations that such patterns could be readily replicated when strong selection acts on rare haplotypes. In light of our findings, we outline recommendations to improve the performance of future Drosophila E&R studies which include using species with negligible inversion loads, such as D. mauritiana and D. simulans, instead of D. melanogaster. |
2013 |
Kofler, Robert; Schlötterer, Christian A Guide for the Design of Evolve and Resequencing Studies. Artikel Molecular biology and evolution, 31 (2), S. 474–483, 2013, ISSN: 1537-1719. @article{Kofler2013, title = {A Guide for the Design of Evolve and Resequencing Studies.}, author = {Robert Kofler and Christian Schlötterer}, url = {http://mbe.oxfordjournals.org/content/31/2/474}, doi = {10.1093/molbev/mst221}, issn = {1537-1719}, year = {2013}, date = {2013-11-01}, journal = {Molecular biology and evolution}, volume = {31}, number = {2}, pages = {474--483}, abstract = {Standing genetic variation provides a rich reservoir of potentially useful mutations facilitating the adaptation to novel environments. Experimental evolution studies have demonstrated that rapid and strong phenotypic responses to selection can also be obtained in the laboratory. When combined with the next-generation sequencing technology, these experiments promise to identify the individual loci contributing to adaption. Nevertheless, until now, very little is known about the design of such evolve & resequencing (E&R) studies. Here, we use forward simulations of entire genomes to evaluate different experimental designs that aim to maximize the power to detect selected variants. We show that low linkage disequilibrium in the starting population, population size, duration of the experiment, and the number of replicates are the key factors in determining the power and accuracy of E&R studies. Furthermore, replication of E&R is more important for detecting the targets of selection than increasing the population size. Using an optimized design, beneficial loci with a selective advantage as low as s = 0.005 can be identified at the nucleotide level. Even when a large number of loci are selected simultaneously, up to 56% can be reliably detected without incurring large numbers of false positives. Our computer simulations suggest that, with an adequate experimental design, E&R studies are a powerful tool to identify adaptive mutations from standing genetic variation and thereby provide an excellent means to analyze the trajectories of selected alleles in evolving populations.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Standing genetic variation provides a rich reservoir of potentially useful mutations facilitating the adaptation to novel environments. Experimental evolution studies have demonstrated that rapid and strong phenotypic responses to selection can also be obtained in the laboratory. When combined with the next-generation sequencing technology, these experiments promise to identify the individual loci contributing to adaption. Nevertheless, until now, very little is known about the design of such evolve & resequencing (E&R) studies. Here, we use forward simulations of entire genomes to evaluate different experimental designs that aim to maximize the power to detect selected variants. We show that low linkage disequilibrium in the starting population, population size, duration of the experiment, and the number of replicates are the key factors in determining the power and accuracy of E&R studies. Furthermore, replication of E&R is more important for detecting the targets of selection than increasing the population size. Using an optimized design, beneficial loci with a selective advantage as low as s = 0.005 can be identified at the nucleotide level. Even when a large number of loci are selected simultaneously, up to 56% can be reliably detected without incurring large numbers of false positives. Our computer simulations suggest that, with an adequate experimental design, E&R studies are a powerful tool to identify adaptive mutations from standing genetic variation and thereby provide an excellent means to analyze the trajectories of selected alleles in evolving populations. |
Llorens, Franc; Hummel, Manuela; Pantano, Lorena; Pastor, Xavier; Vivancos, Ana; Castillo, Ester; Matllin, Heidi; Ferrer, Anna; Ingham, Matthew; Noguera, Marc; Kofler, Robert; Dohm, Juliane C; Pluvinet, Raquel; ò, M; Himmelbauer, Heinz; del Rio, José Antonio; à, Eul; Sumoy, Lauro BMC Genomics, 14 (1), S. 371, 2013, ISSN: 1471-2164. @article{Llorens2013, title = {Microarray and deep sequencing cross-platform analysis of the mirRNome and isomiR variation in response to epidermal growth factor}, author = {Franc Llorens and Manuela Hummel and Lorena Pantano and Xavier Pastor and Ana Vivancos and Ester Castillo and Heidi Matllin and Anna Ferrer and Matthew Ingham and Marc Noguera and Robert Kofler and Juliane C Dohm and Raquel Pluvinet and M{ò}nica Bayés and Heinz Himmelbauer and José Antonio del Rio and Eul{à}lia Martí and Lauro Sumoy}, url = {http://www.biomedcentral.com/1471-2164/14/371}, doi = {10.1186/1471-2164-14-371}, issn = {1471-2164}, year = {2013}, date = {2013-01-01}, journal = {BMC Genomics}, volume = {14}, number = {1}, pages = {371}, abstract = {BACKGROUND:Epidermal Growth Factor (EGF) plays an important function in the regulation of cell growth, proliferation, and differentiation by binding to its receptor (EGFR) and providing cancer cells with increased survival responsiveness. Signal transduction carried out by EGF has been extensively studied at both transcriptional and post-transcriptional levels. Little is known about the involvement of microRNAs (miRNAs) in the EGF signaling pathway. miRNAs have emerged as major players in the complex networks of gene regulation, and cancer miRNA expression studies have evidenced a direct involvement of miRNAs in cancer progression.RESULTS:In this study, we have used an integrative high content analysis approach to identify the specific miRNAs implicated in EGF signaling in HeLa cells as potential mediators of cancer mediated functions. We have used microarray and deep-sequencing technologies in order to obtain a global view of the EGF miRNA transcriptome with a robust experimental cross-validation. By applying a procedure based on Rankprod tests, we have delimited a solid set of EGF-regulated miRNAs. After validating regulated miRNAs by reverse transcription quantitative PCR, we have derived protein networks and biological functions from the predicted targets of the regulated miRNAs to gain insight into the potential role of miRNAs in EGF-treated cells. In addition, we have analyzed sequence heterogeneity due to editing relative to the reference sequence (isomiRs) among regulated miRNAs.CONCLUSIONS:We propose that the use of global genomic miRNA cross-validation derived from high throughput technologies can be used to generate more reliable datasets inferring more robust networks of co-regulated predicted miRNA target genes.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:Epidermal Growth Factor (EGF) plays an important function in the regulation of cell growth, proliferation, and differentiation by binding to its receptor (EGFR) and providing cancer cells with increased survival responsiveness. Signal transduction carried out by EGF has been extensively studied at both transcriptional and post-transcriptional levels. Little is known about the involvement of microRNAs (miRNAs) in the EGF signaling pathway. miRNAs have emerged as major players in the complex networks of gene regulation, and cancer miRNA expression studies have evidenced a direct involvement of miRNAs in cancer progression.RESULTS:In this study, we have used an integrative high content analysis approach to identify the specific miRNAs implicated in EGF signaling in HeLa cells as potential mediators of cancer mediated functions. We have used microarray and deep-sequencing technologies in order to obtain a global view of the EGF miRNA transcriptome with a robust experimental cross-validation. By applying a procedure based on Rankprod tests, we have delimited a solid set of EGF-regulated miRNAs. After validating regulated miRNAs by reverse transcription quantitative PCR, we have derived protein networks and biological functions from the predicted targets of the regulated miRNAs to gain insight into the potential role of miRNAs in EGF-treated cells. In addition, we have analyzed sequence heterogeneity due to editing relative to the reference sequence (isomiRs) among regulated miRNAs.CONCLUSIONS:We propose that the use of global genomic miRNA cross-validation derived from high throughput technologies can be used to generate more reliable datasets inferring more robust networks of co-regulated predicted miRNA target genes. |
Boitard, Simon; Kofler, Robert; ç, Pierre Fran; Robelin, David; Schlötterer, Christian; Futschik, Andreas Molecular Ecology Resources, S. n/a–n/a, 2013, ISSN: 1755098X. @article{Boitard2013, title = {Pool-hmm: a Python program for estimating the allele frequency spectrum and detecting selective sweeps from next generation sequencing of pooled samples}, author = {Simon Boitard and Robert Kofler and Pierre Fran{ç}oise and David Robelin and Christian Schlötterer and Andreas Futschik}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23311589}, doi = {10.1111/1755-0998.12063}, issn = {1755098X}, year = {2013}, date = {2013-01-01}, journal = {Molecular Ecology Resources}, pages = {n/a--n/a}, abstract = {Due to its cost effectiveness, next generation sequencing of pools of individuals (Pool-Seq) is becoming a popular strategy for genome-wide estimation of allele frequencies in population samples. As the allele frequency spectrum provides information about past episodes of selection, Pool-seq is also a promising design for genomic scans for selection. However, no software tool has yet been developed for selection scans based on Pool-Seq data. We introduce Pool-hmm, a Python program for the estimation of allele frequencies and the detection of selective sweeps in a Pool-Seq sample. Pool-hmm includes several options that allow a flexible analysis of Pool-Seq data, and can be run in parallel on several processors. Source code and documentation for Pool-hmm is freely available at https://qgsp.jouy.inra.fr/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Due to its cost effectiveness, next generation sequencing of pools of individuals (Pool-Seq) is becoming a popular strategy for genome-wide estimation of allele frequencies in population samples. As the allele frequency spectrum provides information about past episodes of selection, Pool-seq is also a promising design for genomic scans for selection. However, no software tool has yet been developed for selection scans based on Pool-Seq data. We introduce Pool-hmm, a Python program for the estimation of allele frequencies and the detection of selective sweeps in a Pool-Seq sample. Pool-hmm includes several options that allow a flexible analysis of Pool-Seq data, and can be run in parallel on several processors. Source code and documentation for Pool-hmm is freely available at https://qgsp.jouy.inra.fr/. |
2012 |
Nolte, V; Pandey, R V; Kofler, R; Schlotterer, C Genome Research, 23 (1), S. 99–110, 2012, ISSN: 1088-9051. @article{Nolte2012, title = {Genome-wide patterns of natural variation reveal strong selective sweeps and ongoing genomic conflict in Drosophila mauritiana}, author = {V Nolte and R V Pandey and R Kofler and C Schlotterer}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23051690}, doi = {10.1101/gr.139873.112}, issn = {1088-9051}, year = {2012}, date = {2012-10-01}, journal = {Genome Research}, volume = {23}, number = {1}, pages = {99--110}, abstract = {While it is well understood that selection shapes the polymorphism pattern in Drosophila, signatures of classic selective sweeps are scarce. Here, we focus on D. mauritiana, an island endemic, which is closely related to D. melanogaster. Based on a new, annotated genome sequence we characterized the genome-wide polymorphism by sequencing pooled individuals (Pool-Seq). We show that the interplay between selection and recombination results in a genome-wide polymorphism pattern characteristic for D. mauritiana. Two large genomic regions (textgreater 500 kb) showed the signature of almost complete selective sweeps. We propose that the absence of population structure and limited geographic distribution could explain why such pronounced sweep patterns are restricted to D. mauritiana. Further evidence for strong adaptive evolution was detected for several nucleoporin genes, some of which were not previously identified as genes involved in genomic conflict. Since this adaptive evolution is continuing after the split of D. mauritiana and D. simulans, we conclude that genomic conflict is not restricted to short episodes, but rather an ongoing process in Drosophila.}, keywords = {}, pubstate = {published}, tppubtype = {article} } While it is well understood that selection shapes the polymorphism pattern in Drosophila, signatures of classic selective sweeps are scarce. Here, we focus on D. mauritiana, an island endemic, which is closely related to D. melanogaster. Based on a new, annotated genome sequence we characterized the genome-wide polymorphism by sequencing pooled individuals (Pool-Seq). We show that the interplay between selection and recombination results in a genome-wide polymorphism pattern characteristic for D. mauritiana. Two large genomic regions (textgreater 500 kb) showed the signature of almost complete selective sweeps. We propose that the absence of population structure and limited geographic distribution could explain why such pronounced sweep patterns are restricted to D. mauritiana. Further evidence for strong adaptive evolution was detected for several nucleoporin genes, some of which were not previously identified as genes involved in genomic conflict. Since this adaptive evolution is continuing after the split of D. mauritiana and D. simulans, we conclude that genomic conflict is not restricted to short episodes, but rather an ongoing process in Drosophila. |
Fabian, Daniel K; Kapun, Martin; Nolte, Viola; Kofler, Robert; Schmidt, Paul S; Schlötterer, Christian; Flatt, Thomas Molecular Ecology, S. n/a–n/a, 2012, ISSN: 09621083. @article{Fabian2012, title = {Genome-wide patterns of latitudinal differentiation among populations of Drosophila melanogaster from North America}, author = {Daniel K Fabian and Martin Kapun and Viola Nolte and Robert Kofler and Paul S Schmidt and Christian Schlötterer and Thomas Flatt}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22913798}, doi = {10.1111/j.1365-294X.2012.05731.x}, issn = {09621083}, year = {2012}, date = {2012-08-01}, journal = {Molecular Ecology}, pages = {n/a--n/a}, abstract = {Understanding the genetic underpinnings of adaptive change is a fundamental but largely unresolved problem in evolutionary biology. Drosophila melanogaster, an ancestrally tropical insect that has spread to temperate regions and become cosmopolitan, offers a powerful opportunity for identifying the molecular polymorphisms underlying clinal adaptation. Here, we use genome-wide next-generation sequencing of DNA pools ('pool-seq') from three populations collected along the North American east coast to examine patterns of latitudinal differentiation. Comparing the genomes of these populations is particularly interesting since they exhibit clinal variation in a number of important life history traits. We find extensive latitudinal differentiation, with many of the most strongly differentiated genes involved in major functional pathways such as the insulin/TOR, ecdysone, torso, EGFR, TGF$beta$/BMP, JAK/STAT, immunity and circadian rhythm pathways. We observe particularly strong differentiation on chromosome 3R, especially within the cosmopolitan inversion In(3R)Payne, which contains a large number of clinally varying genes. While much of the differentiation might be driven by clinal differences in the frequency of In(3R)P, we also identify genes that are likely independent of this inversion. Our results provide genome-wide evidence consistent with pervasive spatially variable selection acting on numerous loci and pathways along the well-known North American cline, with many candidates implicated in life history regulation and exhibiting parallel differentiation along the previously investigated Australian cline.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Understanding the genetic underpinnings of adaptive change is a fundamental but largely unresolved problem in evolutionary biology. Drosophila melanogaster, an ancestrally tropical insect that has spread to temperate regions and become cosmopolitan, offers a powerful opportunity for identifying the molecular polymorphisms underlying clinal adaptation. Here, we use genome-wide next-generation sequencing of DNA pools ('pool-seq') from three populations collected along the North American east coast to examine patterns of latitudinal differentiation. Comparing the genomes of these populations is particularly interesting since they exhibit clinal variation in a number of important life history traits. We find extensive latitudinal differentiation, with many of the most strongly differentiated genes involved in major functional pathways such as the insulin/TOR, ecdysone, torso, EGFR, TGF$beta$/BMP, JAK/STAT, immunity and circadian rhythm pathways. We observe particularly strong differentiation on chromosome 3R, especially within the cosmopolitan inversion In(3R)Payne, which contains a large number of clinally varying genes. While much of the differentiation might be driven by clinal differences in the frequency of In(3R)P, we also identify genes that are likely independent of this inversion. Our results provide genome-wide evidence consistent with pervasive spatially variable selection acting on numerous loci and pathways along the well-known North American cline, with many candidates implicated in life history regulation and exhibiting parallel differentiation along the previously investigated Australian cline. |
Orozco-Terwengel, Pablo; Kapun, Martin; Nolte, Viola; Kofler, Robert; Flatt, Thomas; ã, Christian Schl Molecular Ecology, 21 (20), S. 4931–4941, 2012, ISSN: 09621083. @article{Orozco-Terwengel2012, title = {Adaptation of Drosophila to a novel laboratory environment reveals temporally heterogeneous trajectories of selected alleles}, author = {Pablo Orozco-Terwengel and Martin Kapun and Viola Nolte and Robert Kofler and Thomas Flatt and Christian Schl{ã}tterer}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22726122}, doi = {10.1111/j.1365-294X.2012.05673.x}, issn = {09621083}, year = {2012}, date = {2012-06-01}, journal = {Molecular Ecology}, volume = {21}, number = {20}, pages = {4931--4941}, abstract = {The genomic basis of adaptation to novel environments is a fundamental problem in evolutionary biology that has gained additional importance in the light of the recent global change discussion. Here, we combined laboratory natural selection (experimental evolution) in Drosophila melanogaster with genome-wide next generation sequencing of DNA pools (Pool-Seq) to identify alleles that are favourable in a novel laboratory environment and traced their trajectories during the adaptive process. Already after 15 generations, we identified a pronounced genomic response to selection, with almost 5000 single nucleotide polymorphisms (SNP; genome-wide false discovery rates textless 0.005%) deviating from neutral expectation. Importantly, the evolutionary trajectories of the selected alleles were heterogeneous, with the alleles falling into two distinct classes: (i) alleles that continuously rise in frequency; and (ii) alleles that at first increase rapidly but whose frequencies then reach a plateau. Our data thus suggest that the genomic response to selection can involve a large number of selected SNPs that show unexpectedly complex evolutionary trajectories, possibly due to nonadditive effects.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The genomic basis of adaptation to novel environments is a fundamental problem in evolutionary biology that has gained additional importance in the light of the recent global change discussion. Here, we combined laboratory natural selection (experimental evolution) in Drosophila melanogaster with genome-wide next generation sequencing of DNA pools (Pool-Seq) to identify alleles that are favourable in a novel laboratory environment and traced their trajectories during the adaptive process. Already after 15 generations, we identified a pronounced genomic response to selection, with almost 5000 single nucleotide polymorphisms (SNP; genome-wide false discovery rates textless 0.005%) deviating from neutral expectation. Importantly, the evolutionary trajectories of the selected alleles were heterogeneous, with the alleles falling into two distinct classes: (i) alleles that continuously rise in frequency; and (ii) alleles that at first increase rapidly but whose frequencies then reach a plateau. Our data thus suggest that the genomic response to selection can involve a large number of selected SNPs that show unexpectedly complex evolutionary trajectories, possibly due to nonadditive effects. |
Kofler, Robert; Schlötterer, Christian Gowinda: unbiased analysis of gene set enrichment for Genome Wide Association Studies. Artikel Bioinformatics (Oxford, England), 2012, ISSN: 1367-4811. @article{Kofler2012a, title = {Gowinda: unbiased analysis of gene set enrichment for Genome Wide Association Studies.}, author = {Robert Kofler and Christian Schlötterer}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22635606}, doi = {10.1093/bioinformatics/bts315}, issn = {1367-4811}, year = {2012}, date = {2012-05-01}, journal = {Bioinformatics (Oxford, England)}, abstract = {SUMMARY: An analysis of gene set (e.g.: Gene Ontology) enrichment assumes that all genes are sampled independently from each other with the same probability. These assumptions are violated in Genome Wide Association (GWA) studies since (i) longer genes typically have more SNPs resulting in a higher probability of being sampled and (ii) overlapping genes are sampled in clusters. Here we introduce Gowinda, a software specifically designed to test for enrichment of gene sets in GWA studies. We show that Gene Ontology (GO) tests on GWA data could result in a substantial number of false positive GO terms. Permutation tests implemented in Gowinda eliminate these biases, but maintain sufficient power to detect enrichment of GO terms. Since, sufficient resolution for large data sets requires millions of permutations, we use multi-threading to keep computation times reasonable.Availability and implementation: Gowinda is implemented in Java (v1.6) and freely available on http://code.google.com/p/gowinda/ CONTACT: christian.schloetterer@vetmeduni.ac.at SUPPLEMENTARY INFORMATION: Manual: http://code.google.com/p/gowinda/wiki/Manual Test data and tutorial: http://code.google.com/p/gowinda/wiki/Tutorial Validation: http://code.google.com/p/gowinda/wiki/Validation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } SUMMARY: An analysis of gene set (e.g.: Gene Ontology) enrichment assumes that all genes are sampled independently from each other with the same probability. These assumptions are violated in Genome Wide Association (GWA) studies since (i) longer genes typically have more SNPs resulting in a higher probability of being sampled and (ii) overlapping genes are sampled in clusters. Here we introduce Gowinda, a software specifically designed to test for enrichment of gene sets in GWA studies. We show that Gene Ontology (GO) tests on GWA data could result in a substantial number of false positive GO terms. Permutation tests implemented in Gowinda eliminate these biases, but maintain sufficient power to detect enrichment of GO terms. Since, sufficient resolution for large data sets requires millions of permutations, we use multi-threading to keep computation times reasonable.Availability and implementation: Gowinda is implemented in Java (v1.6) and freely available on http://code.google.com/p/gowinda/ CONTACT: christian.schloetterer@vetmeduni.ac.at SUPPLEMENTARY INFORMATION: Manual: http://code.google.com/p/gowinda/wiki/Manual Test data and tutorial: http://code.google.com/p/gowinda/wiki/Tutorial Validation: http://code.google.com/p/gowinda/wiki/Validation. |
Kofler, Robert; Betancourt, Andrea J; Schlötterer, Christian PLoS genetics, 8 (1), S. e1002487, 2012, ISSN: 1553-7404. @article{Kofler2012, title = {Sequencing of Pooled DNA Samples (Pool-Seq) Uncovers Complex Dynamics of Transposable Element Insertions in Drosophila melanogaster.}, author = {Robert Kofler and Andrea J Betancourt and Christian Schlötterer}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3266889&tool=pmcentrez&rendertype=abstract}, doi = {10.1371/journal.pgen.1002487}, issn = {1553-7404}, year = {2012}, date = {2012-01-01}, journal = {PLoS genetics}, volume = {8}, number = {1}, pages = {e1002487}, abstract = {Transposable elements (TEs) are mobile genetic elements that parasitize genomes by semi-autonomously increasing their own copy number within the host genome. While TEs are important for genome evolution, appropriate methods for performing unbiased genome-wide surveys of TE variation in natural populations have been lacking. Here, we describe a novel and cost-effective approach for estimating population frequencies of TE insertions using paired-end Illumina reads from a pooled population sample. Importantly, the method treats insertions present in and absent from the reference genome identically, allowing unbiased TE population frequency estimates. We apply this method to data from a natural Drosophila melanogaster population from Portugal. Consistent with previous reports, we show that low recombining genomic regions harbor more TE insertions and maintain insertions at higher frequencies than do high recombining regions. We conservatively estimate that there are almost twice as many "novel" TE insertion sites as sites known from the reference sequence in our population sample (6,824 novel versus 3,639 reference sites, with on average a 31-fold coverage per insertion site). Different families of transposable elements show large differences in their insertion densities and population frequencies. Our analyses suggest that the history of TE activity significantly contributes to this pattern, with recently active families segregating at lower frequencies than those active in the more distant past. Finally, using our high-resolution TE abundance measurements, we identified 13 candidate positively selected TE insertions based on their high population frequencies and on low Tajima's D values in their neighborhoods.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Transposable elements (TEs) are mobile genetic elements that parasitize genomes by semi-autonomously increasing their own copy number within the host genome. While TEs are important for genome evolution, appropriate methods for performing unbiased genome-wide surveys of TE variation in natural populations have been lacking. Here, we describe a novel and cost-effective approach for estimating population frequencies of TE insertions using paired-end Illumina reads from a pooled population sample. Importantly, the method treats insertions present in and absent from the reference genome identically, allowing unbiased TE population frequency estimates. We apply this method to data from a natural Drosophila melanogaster population from Portugal. Consistent with previous reports, we show that low recombining genomic regions harbor more TE insertions and maintain insertions at higher frequencies than do high recombining regions. We conservatively estimate that there are almost twice as many "novel" TE insertion sites as sites known from the reference sequence in our population sample (6,824 novel versus 3,639 reference sites, with on average a 31-fold coverage per insertion site). Different families of transposable elements show large differences in their insertion densities and population frequencies. Our analyses suggest that the history of TE activity significantly contributes to this pattern, with recently active families segregating at lower frequencies than those active in the more distant past. Finally, using our high-resolution TE abundance measurements, we identified 13 candidate positively selected TE insertions based on their high population frequencies and on low Tajima's D values in their neighborhoods. |
2011 |
Kofler, Robert; Pandey, Ram Vinay; Schlötterer, Christian Bioinformatics (Oxford, England), 27 (24), S. 3435–6, 2011, ISSN: 1367-4811. @article{Kofler2011a, title = {PoPoolation2: identifying differentiation between populations using sequencing of pooled DNA samples (Pool-Seq).}, author = {Robert Kofler and Ram Vinay Pandey and Christian Schlötterer}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3232374&tool=pmcentrez&rendertype=abstract}, doi = {10.1093/bioinformatics/btr589}, issn = {1367-4811}, year = {2011}, date = {2011-12-01}, journal = {Bioinformatics (Oxford, England)}, volume = {27}, number = {24}, pages = {3435--6}, abstract = {Sequencing pooled DNA samples (Pool-Seq) is the most cost-effective approach for the genome-wide comparison of population samples. Here, we introduce PoPoolation2, the first software tool specifically designed for the comparison of populations with Pool-Seq data. PoPoolation2 implements a range of commonly used measures of differentiation (F(ST), Fisher's exact test and Cochran-Mantel-Haenszel test) that can be applied on different scales (windows, genes, exons, SNPs). The result may be visualized with the widely used Integrated Genomics Viewer. AVAILABILITY AND IMPLEMENTATION: PoPoolation2 is implemented in Perl and R. It is freely available on http://code.google.com/p/popoolation2/}, keywords = {}, pubstate = {published}, tppubtype = {article} } Sequencing pooled DNA samples (Pool-Seq) is the most cost-effective approach for the genome-wide comparison of population samples. Here, we introduce PoPoolation2, the first software tool specifically designed for the comparison of populations with Pool-Seq data. PoPoolation2 implements a range of commonly used measures of differentiation (F(ST), Fisher's exact test and Cochran-Mantel-Haenszel test) that can be applied on different scales (windows, genes, exons, SNPs). The result may be visualized with the widely used Integrated Genomics Viewer. AVAILABILITY AND IMPLEMENTATION: PoPoolation2 is implemented in Perl and R. It is freely available on http://code.google.com/p/popoolation2/ |
Esteve-Codina, A; Kofler, R; Himmelbauer, H; Ferretti, L; Vivancos, A P; Groenen, M A M; Folch, J M; Rodríguez, M C; Pérez-Enciso, M Partial short-read sequencing of a highly inbred Iberian pig and genomics inference thereof. Artikel Heredity, 107 (3), S. 256–64, 2011, ISSN: 1365-2540. @article{Esteve-Codina2011b, title = {Partial short-read sequencing of a highly inbred Iberian pig and genomics inference thereof.}, author = {A Esteve-Codina and R Kofler and H Himmelbauer and L Ferretti and A P Vivancos and M A M Groenen and J M Folch and M C Rodríguez and M Pérez-Enciso}, url = {http://www.ncbi.nlm.nih.gov/pubmed/21407255}, doi = {10.1038/hdy.2011.13}, issn = {1365-2540}, year = {2011}, date = {2011-09-01}, journal = {Heredity}, volume = {107}, number = {3}, pages = {256--64}, abstract = {Despite dramatic reduction in sequencing costs with the advent of next generation sequencing technologies, obtaining a complete mammalian genome sequence at sufficient depth is still costly. An alternative is partial sequencing. Here, we have sequenced a reduced representation library of an Iberian sow from the Guadyerbas strain, a highly inbred strain that has been used in numerous QTL studies because of its extreme phenotypic characteristics. Using the Illumina Genome Analyzer II (San Diego, CA, USA), we resequenced ∼ 1% of the genome with average 4 × depth, identifying 68,778 polymorphisms. Of these, 55,457 were putative fixed differences with respect to the assembly, based on the genome of a Duroc pig, and 13,321 were heterozygous positions within Guadyerbas. Despite being highly inbred, the estimate of heterozygosity within Guadyerbas was ∼ 0.78 kb(-1) in autosomes, after correcting for low depth. Nucleotide variability was consistently higher at the telomeric regions than on the rest of the chromosome, likely a result of increased recombination rates. Further, variability was 50% lower in the X-chromosome than in autosomes, which may be explained by a recent bottleneck or by selection. We divided the whole genome in 500 kb windows and we analyzed overrepresented gene ontology terms in regions of low and high variability. Multi organism process, pigmentation and cell killing were overrepresented in high variability regions and metabolic process ontology, within low variability regions. Further, a genome wide Hudson-Kreitman-Aguadé test was carried out per window; overall, variability was in agreement with neutral expectations.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Despite dramatic reduction in sequencing costs with the advent of next generation sequencing technologies, obtaining a complete mammalian genome sequence at sufficient depth is still costly. An alternative is partial sequencing. Here, we have sequenced a reduced representation library of an Iberian sow from the Guadyerbas strain, a highly inbred strain that has been used in numerous QTL studies because of its extreme phenotypic characteristics. Using the Illumina Genome Analyzer II (San Diego, CA, USA), we resequenced ∼ 1% of the genome with average 4 × depth, identifying 68,778 polymorphisms. Of these, 55,457 were putative fixed differences with respect to the assembly, based on the genome of a Duroc pig, and 13,321 were heterozygous positions within Guadyerbas. Despite being highly inbred, the estimate of heterozygosity within Guadyerbas was ∼ 0.78 kb(-1) in autosomes, after correcting for low depth. Nucleotide variability was consistently higher at the telomeric regions than on the rest of the chromosome, likely a result of increased recombination rates. Further, variability was 50% lower in the X-chromosome than in autosomes, which may be explained by a recent bottleneck or by selection. We divided the whole genome in 500 kb windows and we analyzed overrepresented gene ontology terms in regions of low and high variability. Multi organism process, pigmentation and cell killing were overrepresented in high variability regions and metabolic process ontology, within low variability regions. Further, a genome wide Hudson-Kreitman-Aguadé test was carried out per window; overall, variability was in agreement with neutral expectations. |
Kofler, Robert; Orozco-terWengel, Pablo; De Maio, Nicola ; Pandey, Ram Vinay; Nolte, Viola; Futschik, Andreas; Kosiol, Carolin; Schlötterer, Christian PloS one, 6 (1), S. e15925, 2011, ISSN: 1932-6203. @article{Kofler2011, title = {PoPoolation: a toolbox for population genetic analysis of next generation sequencing data from pooled individuals.}, author = {Robert Kofler and Pablo Orozco-terWengel and Nicola {De Maio} and Ram Vinay Pandey and Viola Nolte and Andreas Futschik and Carolin Kosiol and Christian Schlötterer}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3017084&tool=pmcentrez&rendertype=abstract}, doi = {10.1371/journal.pone.0015925}, issn = {1932-6203}, year = {2011}, date = {2011-01-01}, journal = {PloS one}, volume = {6}, number = {1}, pages = {e15925}, abstract = {Recent statistical analyses suggest that sequencing of pooled samples provides a cost effective approach to determine genome-wide population genetic parameters. Here we introduce PoPoolation, a toolbox specifically designed for the population genetic analysis of sequence data from pooled individuals. PoPoolation calculates estimates of $theta$(Watterson), $theta$($pi$), and Tajima's D that account for the bias introduced by pooling and sequencing errors, as well as divergence between species. Results of genome-wide analyses can be graphically displayed in a sliding window plot. PoPoolation is written in Perl and R and it builds on commonly used data formats. Its source code can be downloaded from http://code.google.com/p/popoolation/. Furthermore, we evaluate the influence of mapping algorithms, sequencing errors, and read coverage on the accuracy of population genetic parameter estimates from pooled data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Recent statistical analyses suggest that sequencing of pooled samples provides a cost effective approach to determine genome-wide population genetic parameters. Here we introduce PoPoolation, a toolbox specifically designed for the population genetic analysis of sequence data from pooled individuals. PoPoolation calculates estimates of $theta$(Watterson), $theta$($pi$), and Tajima's D that account for the bias introduced by pooling and sequencing errors, as well as divergence between species. Results of genome-wide analyses can be graphically displayed in a sliding window plot. PoPoolation is written in Perl and R and it builds on commonly used data formats. Its source code can be downloaded from http://code.google.com/p/popoolation/. Furthermore, we evaluate the influence of mapping algorithms, sequencing errors, and read coverage on the accuracy of population genetic parameter estimates from pooled data. |
Pandey, Ram Vinay; Kofler, Robert; Orozco-terWengel, Pablo; Nolte, Viola; Schlötterer, Christian BMC genetics, 12 , S. 27, 2011, ISSN: 1471-2156. @article{Pandey2011, title = {PoPoolation DB: a user-friendly web-based database for the retrieval of natural polymorphisms in Drosophila.}, author = {Ram Vinay Pandey and Robert Kofler and Pablo Orozco-terWengel and Viola Nolte and Christian Schlötterer}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3060855&tool=pmcentrez&rendertype=abstract}, doi = {10.1186/1471-2156-12-27}, issn = {1471-2156}, year = {2011}, date = {2011-01-01}, journal = {BMC genetics}, volume = {12}, pages = {27}, abstract = {The enormous potential of natural variation for the functional characterization of genes has been neglected for a long time. Only since recently, functional geneticists are starting to account for natural variation in their analyses. With the new sequencing technologies it has become feasible to collect sequence information for multiple individuals on a genomic scale. In particular sequencing pooled DNA samples has been shown to provide a cost-effective approach for characterizing variation in natural populations. While a range of software tools have been developed for mapping these reads onto a reference genome and extracting SNPs, linking this information to population genetic estimators and functional information still poses a major challenge to many researchers.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The enormous potential of natural variation for the functional characterization of genes has been neglected for a long time. Only since recently, functional geneticists are starting to account for natural variation in their analyses. With the new sequencing technologies it has become feasible to collect sequence information for multiple individuals on a genomic scale. In particular sequencing pooled DNA samples has been shown to provide a cost-effective approach for characterizing variation in natural populations. While a range of software tools have been developed for mapping these reads onto a reference genome and extracting SNPs, linking this information to population genetic estimators and functional information still poses a major challenge to many researchers. |
Llorens, Franc; Hummel, Manuela; Pastor, Xavier; Ferrer, Anna; Pluvinet, Raquel; Vivancos, Ana; Castillo, Ester; Iraola, Susana; Mosquera, Ana M; González, Eva; Lozano, Juanjo; Ingham, Matthew; Dohm, Juliane C; Noguera, Marc; Kofler, Robert; del Río, Jose Antonio; ò, M; Himmelbauer, Heinz; Sumoy, Lauro BMC genomics, 12 , S. 326, 2011, ISSN: 1471-2164. @article{Llorens2011, title = {Multiple platform assessment of the EGF dependent transcriptome by microarray and deep tag sequencing analysis.}, author = {Franc Llorens and Manuela Hummel and Xavier Pastor and Anna Ferrer and Raquel Pluvinet and Ana Vivancos and Ester Castillo and Susana Iraola and Ana M Mosquera and Eva González and Juanjo Lozano and Matthew Ingham and Juliane C Dohm and Marc Noguera and Robert Kofler and Jose Antonio del Río and M{ò}nica Bayés and Heinz Himmelbauer and Lauro Sumoy}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3141672&tool=pmcentrez&rendertype=abstract}, doi = {10.1186/1471-2164-12-326}, issn = {1471-2164}, year = {2011}, date = {2011-01-01}, journal = {BMC genomics}, volume = {12}, pages = {326}, abstract = {Epidermal Growth Factor (EGF) is a key regulatory growth factor activating many processes relevant to normal development and disease, affecting cell proliferation and survival. Here we use a combined approach to study the EGF dependent transcriptome of HeLa cells by using multiple long oligonucleotide based microarray platforms (from Agilent, Operon, and Illumina) in combination with digital gene expression profiling (DGE) with the Illumina Genome Analyzer.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Epidermal Growth Factor (EGF) is a key regulatory growth factor activating many processes relevant to normal development and disease, affecting cell proliferation and survival. Here we use a combined approach to study the EGF dependent transcriptome of HeLa cells by using multiple long oligonucleotide based microarray platforms (from Agilent, Operon, and Illumina) in combination with digital gene expression profiling (DGE) with the Illumina Genome Analyzer. |
Esteve-Codina, Anna; Kofler, Robert; Palmieri, Nicola; Bussotti, Giovanni; Notredame, Cedric; Pérez-Enciso, Miguel Exploring the gonad transcriptome of two extreme male pigs with RNA-seq. Artikel BMC genomics, 12 , S. 552, 2011, ISSN: 1471-2164. @article{Esteve-Codina2011a, title = {Exploring the gonad transcriptome of two extreme male pigs with RNA-seq.}, author = {Anna Esteve-Codina and Robert Kofler and Nicola Palmieri and Giovanni Bussotti and Cedric Notredame and Miguel Pérez-Enciso}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3221674&tool=pmcentrez&rendertype=abstract}, doi = {10.1186/1471-2164-12-552}, issn = {1471-2164}, year = {2011}, date = {2011-01-01}, journal = {BMC genomics}, volume = {12}, pages = {552}, abstract = {Although RNA-seq greatly advances our understanding of complex transcriptome landscapes, such as those found in mammals, complete RNA-seq studies in livestock and in particular in the pig are still lacking. Here, we used high-throughput RNA sequencing to gain insight into the characterization of the poly-A RNA fraction expressed in pig male gonads. An expression analysis comparing different mapping approaches and detection of allele specific expression is also discussed in this study.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Although RNA-seq greatly advances our understanding of complex transcriptome landscapes, such as those found in mammals, complete RNA-seq studies in livestock and in particular in the pig are still lacking. Here, we used high-throughput RNA sequencing to gain insight into the characterization of the poly-A RNA fraction expressed in pig male gonads. An expression analysis comparing different mapping approaches and detection of allele specific expression is also discussed in this study. |
2009 |
Kofler, Robert; Teixeira Torres, Tatiana ; Lelley, Tamas; Schlötterer, Christian PanGEA: identification of allele specific gene expression using the 454 technology. Artikel BMC bioinformatics, 10 , S. 143, 2009, ISSN: 1471-2105. @article{Kofler2009, title = {PanGEA: identification of allele specific gene expression using the 454 technology.}, author = {Robert Kofler and Tatiana {Teixeira Torres} and Tamas Lelley and Christian Schlötterer}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2693439&tool=pmcentrez&rendertype=abstract}, doi = {10.1186/1471-2105-10-143}, issn = {1471-2105}, year = {2009}, date = {2009-01-01}, journal = {BMC bioinformatics}, volume = {10}, pages = {143}, abstract = {Next generation sequencing technologies hold great potential for many biological questions. While mainly used for genomic sequencing, they are also very promising for gene expression profiling. Sequencing of cDNA does not only provide an estimate of the absolute expression level, it can also be used for the identification of allele specific gene expression.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Next generation sequencing technologies hold great potential for many biological questions. While mainly used for genomic sequencing, they are also very promising for gene expression profiling. Sequencing of cDNA does not only provide an estimate of the absolute expression level, it can also be used for the identification of allele specific gene expression. |
2008 |
Kofler, Robert; Bartos, Jan; Gong, Li; Stift, Gertraud; Suchánková, Pavla; Simková, Hana; Berenyi, Maria; Burg, Kornel; Dolezel, Jaroslav; Lelley, Tamas TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik, 117 (6), S. 915–26, 2008, ISSN: 0040-5752. @article{Kofler2008a, title = {Development of microsatellite markers specific for the short arm of rye (Secale cereale L.) chromosome 1.}, author = {Robert Kofler and Jan Bartos and Li Gong and Gertraud Stift and Pavla Suchánková and Hana Simková and Maria Berenyi and Kornel Burg and Jaroslav Dolezel and Tamas Lelley}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18626624}, doi = {10.1007/s00122-008-0831-2}, issn = {0040-5752}, year = {2008}, date = {2008-10-01}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {117}, number = {6}, pages = {915--26}, abstract = {We developed 74 microsatellite marker primer pairs yielding 76 polymorphic loci, specific for the short arm of rye chromosome 1R (1RS) in wheat background. Four libraries enriched for microsatellite motifs AG, AAG, AC and AAC were constructed from DNA of flow-sorted 1RS chromosomes and 1,290 clones were sequenced. Additionally, 2,778 BAC-end-sequences from a 1RS specific BAC library were used for microsatellite screening and marker development. From 724 designed primer pairs, 119 produced 1RS specific bands and 74 of them showed polymorphism in a set of ten rye genotypes. We show that this high attrition rate was due to the highly repetitive nature of the rye genome consisting of a large number of transposable elements. We mapped the 76 polymorphic loci physically into three regions (bins) on 1RS; 29, 30 and 17 loci were assigned to the distal, intercalary and proximal regions of the 1RS arm, respectively. The average polymorphism information content increases with distance from the centromere, which could be due to an increased recombination rate along the chromosome arm toward's the telomere. Additionally, we demonstrate, using the data of the whole rice genome, that the intra-genomic length variation of microsatellites correlates (r = 0.87) with microsatellite polymorphism. Based on these results we suggest that an analysis of the microsatellite length variation is conducted for each species prior to microsatellite development, provided that sufficient sequence information is available. This will allow to selectively design microsatellite markers for motifs likely to yield a high level of polymorphism.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We developed 74 microsatellite marker primer pairs yielding 76 polymorphic loci, specific for the short arm of rye chromosome 1R (1RS) in wheat background. Four libraries enriched for microsatellite motifs AG, AAG, AC and AAC were constructed from DNA of flow-sorted 1RS chromosomes and 1,290 clones were sequenced. Additionally, 2,778 BAC-end-sequences from a 1RS specific BAC library were used for microsatellite screening and marker development. From 724 designed primer pairs, 119 produced 1RS specific bands and 74 of them showed polymorphism in a set of ten rye genotypes. We show that this high attrition rate was due to the highly repetitive nature of the rye genome consisting of a large number of transposable elements. We mapped the 76 polymorphic loci physically into three regions (bins) on 1RS; 29, 30 and 17 loci were assigned to the distal, intercalary and proximal regions of the 1RS arm, respectively. The average polymorphism information content increases with distance from the centromere, which could be due to an increased recombination rate along the chromosome arm toward's the telomere. Additionally, we demonstrate, using the data of the whole rice genome, that the intra-genomic length variation of microsatellites correlates (r = 0.87) with microsatellite polymorphism. Based on these results we suggest that an analysis of the microsatellite length variation is conducted for each species prior to microsatellite development, provided that sufficient sequence information is available. This will allow to selectively design microsatellite markers for motifs likely to yield a high level of polymorphism. |
Gong, L; Stift, G; Kofler, R; Pachner, M; Lelley, T Microsatellites for the genus Cucurbita and an SSR-based genetic linkage map of Cucurbita pepo L. Artikel TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik, 117 (1), S. 37–48, 2008, ISSN: 0040-5752. @article{Gong2008, title = {Microsatellites for the genus Cucurbita and an SSR-based genetic linkage map of Cucurbita pepo L.}, author = {L Gong and G Stift and R Kofler and M Pachner and T Lelley}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2413107&tool=pmcentrez&rendertype=abstract}, doi = {10.1007/s00122-008-0750-2}, issn = {0040-5752}, year = {2008}, date = {2008-06-01}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {117}, number = {1}, pages = {37--48}, abstract = {Until recently, only a few microsatellites have been available for Cucurbita, thus their development is highly desirable. The Austrian oil-pumpkin variety Gleisdorfer Olkürbis (C. pepo subsp. pepo) and the C. moschata cultivar Soler (Puerto Rico) were used for SSR development. SSR-enriched partial genomic libraries were established and 2,400 clones were sequenced. Of these 1,058 (44%) contained an SSR at least four repeats long. Primers were designed for 532 SSRs; 500 primer pairs produced fragments of expected size. Of these, 405 (81%) amplified polymorphic fragments in a set of 12 genotypes: three C. moschata, one C. ecuadorensis, and eight C. pepo representing all eight cultivar groups. On an average, C. pepo and C. moschata produced 3.3 alleles per primer pair, showing high inter-species transferability. There were 187 SSR markers detecting polymorphism between the USA oil-pumpkin variety "Lady Godiva" (O5) and the Italian crookneck variety "Bianco Friulano" (CN), which are the parents of our previous F(2) mapping population. It has been used to construct the first published C. pepo map, containing mainly RAPD and AFLP markers. Now the updated map comprises 178 SSRs, 244 AFLPs, 230 RAPDs, five SCARs, and two morphological traits (h and B). It contains 20 linkage groups with a map density of 2.9 cM. The observed genome coverage (Co) is 86.8%.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Until recently, only a few microsatellites have been available for Cucurbita, thus their development is highly desirable. The Austrian oil-pumpkin variety Gleisdorfer Olkürbis (C. pepo subsp. pepo) and the C. moschata cultivar Soler (Puerto Rico) were used for SSR development. SSR-enriched partial genomic libraries were established and 2,400 clones were sequenced. Of these 1,058 (44%) contained an SSR at least four repeats long. Primers were designed for 532 SSRs; 500 primer pairs produced fragments of expected size. Of these, 405 (81%) amplified polymorphic fragments in a set of 12 genotypes: three C. moschata, one C. ecuadorensis, and eight C. pepo representing all eight cultivar groups. On an average, C. pepo and C. moschata produced 3.3 alleles per primer pair, showing high inter-species transferability. There were 187 SSR markers detecting polymorphism between the USA oil-pumpkin variety "Lady Godiva" (O5) and the Italian crookneck variety "Bianco Friulano" (CN), which are the parents of our previous F(2) mapping population. It has been used to construct the first published C. pepo map, containing mainly RAPD and AFLP markers. Now the updated map comprises 178 SSRs, 244 AFLPs, 230 RAPDs, five SCARs, and two morphological traits (h and B). It contains 20 linkage groups with a map density of 2.9 cM. The observed genome coverage (Co) is 86.8%. |
Bartos, Jan; Paux, Etienne; Kofler, Robert; Havránková, Miroslava; ý, David Kopeck; Suchánková, Pavla; Safár, Jan; Simková, Hana; Town, Christopher D; Lelley, Tamas; Feuillet, Catherine; Dolezel, Jaroslav BMC plant biology, 8 , S. 95, 2008, ISSN: 1471-2229. @article{Bartos2008, title = {A first survey of the rye (Secale cereale) genome composition through BAC end sequencing of the short arm of chromosome 1R.}, author = {Jan Bartos and Etienne Paux and Robert Kofler and Miroslava Havránková and David Kopeck{ý} and Pavla Suchánková and Jan Safár and Hana Simková and Christopher D Town and Tamas Lelley and Catherine Feuillet and Jaroslav Dolezel}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2565679&tool=pmcentrez&rendertype=abstract}, doi = {10.1186/1471-2229-8-95}, issn = {1471-2229}, year = {2008}, date = {2008-01-01}, journal = {BMC plant biology}, volume = {8}, pages = {95}, abstract = {Rye (Secale cereale L.) belongs to tribe Triticeae and is an important temperate cereal. It is one of the parents of man-made species Triticale and has been used as a source of agronomically important genes for wheat improvement. The short arm of rye chromosome 1 (1RS), in particular is rich in useful genes, and as it may increase yield, protein content and resistance to biotic and abiotic stress, it has been introgressed into wheat as the 1BL.1RS translocation. A better knowledge of the rye genome could facilitate rye improvement and increase the efficiency of utilizing rye genes in wheat breeding.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Rye (Secale cereale L.) belongs to tribe Triticeae and is an important temperate cereal. It is one of the parents of man-made species Triticale and has been used as a source of agronomically important genes for wheat improvement. The short arm of rye chromosome 1 (1RS), in particular is rich in useful genes, and as it may increase yield, protein content and resistance to biotic and abiotic stress, it has been introgressed into wheat as the 1BL.1RS translocation. A better knowledge of the rye genome could facilitate rye improvement and increase the efficiency of utilizing rye genes in wheat breeding. |
Kofler, Robert; Schlötterer, Christian; Luschützky, Evita; Lelley, Tamas BMC genomics, 9 , S. 612, 2008, ISSN: 1471-2164. @article{Kofler2008, title = {Survey of microsatellite clustering in eight fully sequenced species sheds light on the origin of compound microsatellites.}, author = {Robert Kofler and Christian Schlötterer and Evita Luschützky and Tamas Lelley}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2644718&tool=pmcentrez&rendertype=abstract}, doi = {10.1186/1471-2164-9-612}, issn = {1471-2164}, year = {2008}, date = {2008-01-01}, journal = {BMC genomics}, volume = {9}, pages = {612}, abstract = {Compound microsatellites are a special variation of microsatellites in which two or more individual microsatellites are found directly adjacent to each other. Until now, such composite microsatellites have not been investigated in a comprehensive manner.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Compound microsatellites are a special variation of microsatellites in which two or more individual microsatellites are found directly adjacent to each other. Until now, such composite microsatellites have not been investigated in a comprehensive manner. |
2007 |
Kofler, Robert; Schlötterer, Christian; Lelley, Tamas SciRoKo: a new tool for whole genome microsatellite search and investigation. Artikel Bioinformatics (Oxford, England), 23 (13), S. 1683–5, 2007, ISSN: 1367-4811. @article{Kofler2007, title = {SciRoKo: a new tool for whole genome microsatellite search and investigation.}, author = {Robert Kofler and Christian Schlötterer and Tamas Lelley}, url = {http://www.ncbi.nlm.nih.gov/pubmed/17463017}, doi = {10.1093/bioinformatics/btm157}, issn = {1367-4811}, year = {2007}, date = {2007-07-01}, journal = {Bioinformatics (Oxford, England)}, volume = {23}, number = {13}, pages = {1683--5}, abstract = {SciRoKo is a user-friendly software tool for the identification of microsatellites in genomic sequences. The combination of an extremely fast search algorithm with a built-in summary statistic tool makes SciRoKo an excellent tool for full genome analysis. Compared to other already existing tools, SciRoKo also allows the analysis of compound microsatellites. AVAILABILITY: free for use: www.kofler.or.at/Bioinformatics. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } SciRoKo is a user-friendly software tool for the identification of microsatellites in genomic sequences. The combination of an extremely fast search algorithm with a built-in summary statistic tool makes SciRoKo an excellent tool for full genome analysis. Compared to other already existing tools, SciRoKo also allows the analysis of compound microsatellites. AVAILABILITY: free for use: www.kofler.or.at/Bioinformatics. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online. |
Publications
2018 |
Polygenic adaptation fuels genetic redundancy in Drosophila Artikel bioRxiv, S. 332122, 2018. |
Molecular dissection of a natural transposable element invasion Artikel Genome Research, 2018. |
2017 |
SimulaTE: simulating complex landscapes of transposable elements of populations Artikel Bioinformatics, (November 2017), S. 1–2, 2017, ISSN: 1367-4803. |
Molecular Ecology, 26 (12), S. 3256–3275, 2017, ISSN: 1365294X. |
Molecular Ecology, (May), 2017, ISSN: 09621083. |
2016 |
Suitability of different mapping algorithms for genome-wide polymorphism scans with Pool-Seq data Artikel Submitted, 6 (November), S. 1–20, 2016, ISSN: 2160-1836. |
PoPoolationTE2: Comparative Population Genomics of Transposable Elements Using Pool-Seq Artikel Molecular Biology and Evolution, 33 (10), S. 2759–2764, 2016, ISSN: 15371719. |
(August), S. 1–10, 2016, ISSN: 0018-067X. |
Molecular ecology resources, 16 (1), S. 118–122, 2016, ISSN: 1755-0998. |
2015 |
Tempo and Mode of Transposable Element Activity in Drosophila. Artikel PLoS genetics, 11 (7), S. e1005406, 2015, ISSN: 1553-7404. |
Heredity, 114 (5), S. 431–40, 2015, ISSN: 1365-2540. |
The recent invasion of natural Drosophila simulans populations by the P-element. Artikel Proceedings of the National Academy of Sciences of the United States of America, 112 (21), S. 6659–63, 2015, ISSN: 1091-6490. |
Bioinformatics, 31 (11), S. 1762–1770, 2015, ISSN: 14602059. |
2014 |
Sequencing pools of individuals-mining genome-wide polymorphism data without big funding Artikel Nature Reviews Genetics, 15 (11), S. 749–763, 2014, ISSN: 14710064. |
Molecular Biology and Evolution, 31 (2), S. 364–375, 2014, ISSN: 07374038. |
2013 |
A Guide for the Design of Evolve and Resequencing Studies. Artikel Molecular biology and evolution, 31 (2), S. 474–483, 2013, ISSN: 1537-1719. |
BMC Genomics, 14 (1), S. 371, 2013, ISSN: 1471-2164. |
Molecular Ecology Resources, S. n/a–n/a, 2013, ISSN: 1755098X. |
2012 |
Genome Research, 23 (1), S. 99–110, 2012, ISSN: 1088-9051. |
Molecular Ecology, S. n/a–n/a, 2012, ISSN: 09621083. |
Molecular Ecology, 21 (20), S. 4931–4941, 2012, ISSN: 09621083. |
Gowinda: unbiased analysis of gene set enrichment for Genome Wide Association Studies. Artikel Bioinformatics (Oxford, England), 2012, ISSN: 1367-4811. |
PLoS genetics, 8 (1), S. e1002487, 2012, ISSN: 1553-7404. |
2011 |
Bioinformatics (Oxford, England), 27 (24), S. 3435–6, 2011, ISSN: 1367-4811. |
Partial short-read sequencing of a highly inbred Iberian pig and genomics inference thereof. Artikel Heredity, 107 (3), S. 256–64, 2011, ISSN: 1365-2540. |
PloS one, 6 (1), S. e15925, 2011, ISSN: 1932-6203. |
BMC genetics, 12 , S. 27, 2011, ISSN: 1471-2156. |
BMC genomics, 12 , S. 326, 2011, ISSN: 1471-2164. |
Exploring the gonad transcriptome of two extreme male pigs with RNA-seq. Artikel BMC genomics, 12 , S. 552, 2011, ISSN: 1471-2164. |
2009 |
PanGEA: identification of allele specific gene expression using the 454 technology. Artikel BMC bioinformatics, 10 , S. 143, 2009, ISSN: 1471-2105. |
2008 |
TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik, 117 (6), S. 915–26, 2008, ISSN: 0040-5752. |
Microsatellites for the genus Cucurbita and an SSR-based genetic linkage map of Cucurbita pepo L. Artikel TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik, 117 (1), S. 37–48, 2008, ISSN: 0040-5752. |
BMC plant biology, 8 , S. 95, 2008, ISSN: 1471-2229. |
BMC genomics, 9 , S. 612, 2008, ISSN: 1471-2164. |
2007 |
SciRoKo: a new tool for whole genome microsatellite search and investigation. Artikel Bioinformatics (Oxford, England), 23 (13), S. 1683–5, 2007, ISSN: 1367-4811. |