@article {6, title = {HPG pore: an efficient and scalable framework for nanopore sequencing data.}, journal = {BMC Bioinformatics}, volume = {17}, year = {2016}, month = {2016}, pages = {107}, abstract = {

BACKGROUND: The use of nanopore technologies is expected to spread in the future because they are portable and can sequence long fragments of DNA molecules without prior amplification. The first nanopore sequencer available, the MinION{\texttrademark} from Oxford Nanopore Technologies, is a USB-connected, portable device that allows real-time DNA analysis. In addition, other new instruments are expected to be released soon, which promise to outperform the current short-read technologies in terms of throughput. Despite the flood of data expected from this technology, the data analysis solutions currently available are only designed to manage small projects and are not scalable.

RESULTS: Here we present HPG Pore, a toolkit for exploring and analysing nanopore sequencing data. HPG Pore can run on both individual computers and in the Hadoop distributed computing framework, which allows easy scale-up to manage the large amounts of data expected to result from extensive use of nanopore technologies in the future.

CONCLUSIONS: HPG Pore allows for virtually unlimited sequencing data scalability, thus guaranteeing its continued management in near future scenarios. HPG Pore is available in GitHub at http://github.com/opencb/hpg-pore .

}, issn = {1471-2105}, doi = {10.1186/s12859-016-0966-0}, author = {Tarraga, Joaquin and Gallego, Asunci{\'o}n and Arnau, Vicente and Medina, Ignacio and Dopazo, Joaqu{\'\i}n} } @article {10, title = {Concurrent and Accurate Short Read Mapping on Multicore Processors.}, journal = {IEEE/ACM Trans Comput Biol Bioinform}, volume = {12}, year = {2015}, month = {2015 Sep-Oct}, pages = {995-1007}, abstract = {

We introduce a parallel aligner with a work-flow organization for fast and accurate mapping of RNA sequences on servers equipped with multicore processors. Our software, HPG Aligner SA (HPG Aligner SA is an open-source application. The software is available at http://www.opencb.org, exploits a suffix array to rapidly map a large fraction of the RNA fragments (reads), as well as leverages the accuracy of the Smith-Waterman algorithm to deal with conflictive reads. The aligner is enhanced with a careful strategy to detect splice junctions based on an adaptive division of RNA reads into small segments (or seeds), which are then mapped onto a number of candidate alignment locations, providing crucial information for the successful alignment of the complete reads. The experimental results on a platform with Intel multicore technology report the parallel performance of HPG Aligner SA, on RNA reads of 100-400 nucleotides, which excels in execution time/sensitivity to state-of-the-art aligners such as TopHat 2+Bowtie 2, MapSplice, and STAR.

}, issn = {1557-9964}, doi = {10.1109/TCBB.2015.2392077}, author = {Mart{\'\i}nez, H{\'e}ctor and Tarraga, Joaquin and Medina, Ignacio and Barrachina, Sergio and Castillo, Maribel and Dopazo, Joaqu{\'\i}n and Quintana-Ort{\'\i}, Enrique S} } @article {8, title = {Acceleration of short and long DNA read mapping without loss of accuracy using suffix array.}, journal = {Bioinformatics}, volume = {30}, year = {2014}, month = {2014 Dec 1}, pages = {3396-8}, abstract = {

UNLABELLED: HPG Aligner applies suffix arrays for DNA read mapping. This implementation produces a highly sensitive and extremely fast mapping of DNA reads that scales up almost linearly with read length. The approach presented here is faster (over 20{\texttimes} for long reads) and more sensitive (over 98\% in a wide range of read lengths) than the current state-of-the-art mappers. HPG Aligner is not only an optimal alternative for current sequencers but also the only solution available to cope with longer reads and growing throughputs produced by forthcoming sequencing technologies.

AVAILABILITY AND IMPLEMENTATION: https://github.com/opencb/hpg-aligner.

}, keywords = {Algorithms, Animals, DNA, Drosophila, High-Throughput Nucleotide Sequencing, Humans, Sequence Alignment, Sequence Analysis, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btu553}, author = {Tarraga, Joaquin and Arnau, Vicente and Mart{\'\i}nez, H{\'e}ctor and Moreno, Raul and Cazorla, Diego and Salavert-Torres, Jos{\'e} and Blanquer-Espert, Ignacio and Dopazo, Joaqu{\'\i}n and Medina, Ignacio} } @article {3, title = {CellBase, a comprehensive collection of RESTful web services for retrieving relevant biological information from heterogeneous sources.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W609-14}, abstract = {During the past years, the advances in high-throughput technologies have produced an unprecedented growth in the number and size of repositories and databases storing relevant biological data. Today, there is more biological information than ever but, unfortunately, the current status of many of these repositories is far from being optimal. Some of the most common problems are that the information is spread out in many small databases; frequently there are different standards among repositories and some databases are no longer supported or they contain too specific and unconnected information. In addition, data size is increasingly becoming an obstacle when accessing or storing biological data. All these issues make very difficult to extract and integrate information from different sources, to analyze experiments or to access and query this information in a programmatic way. CellBase provides a solution to the growing necessity of integration by easing the access to biological data. CellBase implements a set of RESTful web services that query a centralized database containing the most relevant biological data sources. The database is hosted in our servers and is regularly updated. CellBase documentation can be found at http://docs.bioinfo.cipf.es/projects/cellbase.}, keywords = {Animals, Databases, Gene Regulatory Networks, Genetic, Genetic Variation, Humans, Internet, Mice, MicroRNAs, Molecular Sequence Annotation, Protein Interaction Mapping, Rats, Software, Systems Biology, Systems Integration, Transcription Factors}, issn = {1362-4962}, doi = {10.1093/nar/gks575}, author = {Bleda, Marta and Tarraga, Joaquin and de Maria, Alejandro and Salavert, Francisco and Garcia-Alonso, Luz and Celma, Matilde and Martin, Ainoha and Dopazo, Joaqu{\'\i}n and Medina, Ignacio} }