{ "items": [ { "label": "RKB", "type": "Publication", "pub-type": "Journal Article", "keywords": ["semantic web","database", "bioinformatics", "ontology"], "title": "RKB: A Semantic Web Knowledge Base for RNA", "author" : [ "Jose Cruz-Toledo", "Michel Dumontier", "Marc Parisien", "Francois Major" ], "journal": "Journal of Biomedical Semantics", "volume": "Proceedings of the Bio-Ontologies Special Interest Group Meeting 2009: Knowledge in Biology. Volume 1", "issue": "Supplement 1", "pages": "S2 (10 pages)", "year": "2010", "pub-date": "2010-06-22", "abstract": "Increasingly sophisticated knowledge about RNA structure and function requires an inclusive knowledge representation that facilitates the integration of independently-generated information arising from such efforts as genome sequencing projects, microarray analyses, structure determination and RNA SELEX experiments. While RNAML, an XML-based representation, has been proposed as an exchange format for a select subset of information, it lacks domain-specific semantics that are essential for answering questions that require expert knowledge. Here, we describe an RNA knowledge base (RKB) for structure-based knowledge using RDF/OWL Semantic Web technologies. RKB extends a number of ontologies and contains basic terminology for nucleic acid composition along with context/model-specific structural features such as sugar conformations, base pairings and base stackings. RKB (available at http://semanticscience.org/projects/rkb) is populated with PDB entries and MC-Annotate structural annotation. We show queries to the RKB using description logic reasoning, thus opening the door to question answering over independently-published RNA knowledge using Semantic Web technologies.", "pdf":"2010_JSW_RKB.pdf", "publisher_url": "http://www.jbiomedsem.com/content/pdf/2041-1480-1-S1-S2.pdf", "doi": "10.1186/2041-1480-1-S1-S2" }, { "label": "HyQue", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","bioinformatics"], "title": "HyQue: Evaluating hypotheses using Semantic Web technologies", "author": [ "Alison Callahan", "Michel Dumontier", "Nigam Shah" ], "pub-date": "2010-07-09", "year": "2010", "workshop": "Bio-Ontologies 2010: Semantic Applications in Life Sciences", "workshop_url": "http://www.bio-ontologies.org.uk/", "conference": "Intelligent Systems in Molecular Biology (ISMB)", "conference_url": "http://www.iscb.org/ismb2010", "city": "Boston", "country": "USA", "abstract": "Key to the success of e-Science is the ability to evaluate complex knowledge and expert-composed hypotheses by checking their validity against supporting experimental data. Motivated by the HyBrow Hypothesis Browser, we present HyQue, a Semantic Web-based tool for querying and evaluating scientific hypotheses. HyQue features a flexible knowledge model to accommodate diverse hypotheses structured as events and represented using Se-mantic Web languages (RDF/OWL). Hypothesis validity is evaluated according to experimental and literature-sourced evidence through a unique combination of SPARQL queries and query evaluation rules. Inference over OWL ontologies (for type specifications, subclass assertions and parthood relations) and retrieval of facts stored as Bio2RDF linked data provide support for a given hypothesis. Unlike HyBrow, this system is capable of executing queries where participating entities or event types are unspecified, thus opening the door to a significantly wider range of hypothesis support. We use HyQue to evaluate hypotheses with varying levels of detail in the context of the galactose gene network in Saccharo-myces cerevisiae, and demonstrate its feasibility over a growing body of knowledge.", "publisher_url": "http://www.bio-ontologies.org.uk/programme", "pdf": "2010_BIOONT_hyque.pdf" }, { "label": "OWLDEF", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology"], "title": "OWLDEF: Integrating OBO and OWL", "author": [ "Robert Hoehndorf", "Anika Oellrich", "Michel Dumontier", "Janet Kelso", "Heinrich Herre", "Dietrich Rebholz-Schuhmann" ], "pub-date": "2010-07-10", "year": "2010", "workshop": "Bio-Ontologies 2010: Semantic Applications in Life Sciences", "workshop_url": "http://www.bio-ontologies.org.uk/", "conference": "Intelligent Systems in Molecular Biology (ISMB)", "conference_url": "http://www.iscb.org/ismb2010", "city": "Boston", "country": "USA", "abstract": "An integration of the OBO Flatfile Format and the Web Ontology Language (OWL) would enable automated reasoning, inferences and consistency checking of biomedical ontologies and support the development and maintenance of ontologies developed in the OBO Flatfile Format. So far, the translation of relations in the OBO language to OWL is performed according to a single rigid pattern and in violation of the relation definitions of the OBO Relationship Ontology. We extend both the OBO Flatfile Format and the Manchester OWL Syntax to accommodate relation definitions. Based on these extensions, we implemented and evaluated two software applications. The first converts the OBO Flatfile Format to an OWL representation. The second uses automated inferences to convert OWL ontologies back to a representation in the OBO Flatfile Format. The OWLDEF method is generally applicaple whenever ontologies are developed primarily using patterns and not a detailled knowledge representation language. The tools and libraries we developed for the OWLDEF method are available from http://bioonto.de/obo2owl.", "publisher_url": "http://www.bio-ontologies.org.uk/programme", "pdf": "2010_BIOONT_OWLDEF.pdf" }, { "label": "TMO", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","drug discovery"], "title": "The Translational Medicine Ontology: Driving personalized medicine by bridging the gap from bedside to bench", "author": [ "Michel Dumontier", "Bosse Andersson", "Colin Batchelor", "Christine Denney", "Christopher Domarew", "Anja Jentzsch", "Joanne Luciano", "Elgar Pichler", "Eric Prud'hommeaux", "Patricia L. Whetzel", "Olivier Bodenreider", "Tim Clark", "Lee Harland", "Vipul Kashyap", "Peter Kos", "Julia Kozlovsky", "James McGurk","Chimezie Ogbuji", "Matthias Samwald", "Lynn Schriml", "Peter J. Tonellato", "Jun Zhao", "Susie Stephens" ], "pub-date": "2010-06-21", "year": "2010", "workshop": "Bio-Ontologies 2010: Semantic Applications in Life Sciences", "workshop_url": "http://www.bio-ontologies.org.uk/", "conference": "Intelligent Systems in Molecular Biology (ISMB)", "conference_url": "http://www.iscb.org/ismb2010", "city": "Boston", "country": "USA", "abstract": "The Translational Medicine Ontology provides terminology that bridges diverse areas of translational medicine including hypothesis management, discovery research, drug development and formulation, clinical research, and clinical practice. Designed primarily from use cases, the ontology consists of essential terms that are mapped to other ontologies. It serves as a global schema for data integration while simultaneously facilitating the formulation of complex queries across heterogeneous sources. We demonstrate the utility of the ontology through question answering over a prototype knowledge base composed of sample patient data integrated with linked open data. This work forms a basis for the development of a computational platform for managing information relevant to personalized medicine.", "publisher_url": "http://www.bio-ontologies.org.uk/programme", "pdf": "2010_BIOONT_TMO.pdf" }, { "label": "OBO2OWL", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology"], "title": "Relational patterns in OWL and their application to OBO", "author": [ "Robert Hoehndorf", "Anika Oellrich", "Michel Dumontier", "Janet Kelso", "Heinrich Herre", "Dietrich Rebholz-Schuhmann" ], "pub-date": "2010-06-21", "year": "2010", "workshop": "OWL Experiences and Directions (OWLED 2010)", "workshop_url": "http://www.webont.org/owled/2010/", "conference": "Semantic Technology Conference", "conference_url": "http://semtech2010.semanticuniverse.com/", "city": "San Francisco", "country": "USA", "abstract": "Directed acyclic graphs are commonly used to represent ontologies in the biomedical domain. They provide an intuitive means to formalize relations that hold between ontological categories. However, their semantics is usually not explicit. We provide a semantics for a part of the OBO Flatfile Format by extending OWL with a method to express relational patterns. These patterns are OWL axioms with variables for classes. The variables can only be filled with named classes. Additionally, we provide a semantics for open patterns in OWL. Our method is applicable to the OBO Flatfile Format, and provides a means to design OWL ontologies using complex ontology design patterns. Therefore, it leads not only to an integration of the OBO Flatfile Format and OWL, but extends OWL with an intuitive interface for designing ontologies using complex definition patterns. A prototypic implementation and test results are available at http://bioonto.de/obo2owl.", "publisher_url": "http://www.webont.org/owled/2010/papers/owled2010_submission_3.pdf", "pdf": "2010_OWLED_OBO2OWL.pdf", "presentation": "http://www.slideshare.net/micheldumontier/relational-patterns-in-owl-and-their-application-to-obo" }, { "label": "DG", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","chemistry"], "title": "Representing Chemicals using OWL, Description Graphs and Rules", "author": [ "Janna Hastings", "Michel Dumontier", "Duncan Hull", "Matthew Horridge", "Christoph Steinbeck", "Ulrike Sattler", "Robert Stevens", "Tertia Horne", "Katarina Britz" ], "pub-date": "2010-06-21", "year": "2010", "workshop": "OWL Experiences and Directions (OWLED 2010)", "workshop_url": "http://www.webont.org/owled/2010/", "conference": "Semantic Technology Conference", "conference_url": "http://semtech2010.semanticuniverse.com/", "city": "San Francisco", "country": "USA", "abstract": "Objects can be said to be structured when their representation also contains their parts. While OWL in general can describe structured objects, description graphs are a recent, decidable extension to OWL which support the description of classes of structured objects whose parts are related in complex ways. Classes of chemical entities such as molecules, ions and groups (parts of molecules) are often characterised by the way in which the constituent atoms of their instances are connected via chemical bonds. For chemoinformatics tools and applications, this internal structure is represented using chemical graphs. We here present a chemical knowledge base based on the standard chemical graph model using description graphs, OWL and rules. We include in our ontology chemical classes, groups, and molecules, together with their structures encoded as description graphs. We show how role-safe rules can be used to determine parthood between groups and molecules based on the graph structures and to determine basic chemical properties. Finally, we investigate the scalability of the technology used through the development of an automatic utility to convert standard chemical graphs into description graphs, and converting a large number of diverse graphs obtained from a publicly available chemical database.", "publisher_url": "http://www.webont.org/owled/2010/papers/owled2010_submission_13.pdf", "pdf": "2010_OWLED_DG.pdf", "presentation": "http://www.slideshare.net/micheldumontier/owled-descgraphs" }, { "label": "realism", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["ontology","bioinformatics"], "title": "Realism for scientific ontologies", "author" : [ "Michel Dumontier", "Robert Hoehndorf" ], "pages": "12", "pub-date": "2010-05-14", "year": "2010", "conference": "6th International Conference on Formal Ontology in Information Systems", "conference_url": "http://fois2010.mie.utoronto.ca/", "city": "Toronto", "country": "Canada", "abstract": "Science aims to develop an accurate understanding of reality through a variety of rigorously empirical and formal methods. Ontologies are used to formalize the meaning of terms within a domain of discourse. The Basic Formal Ontology is an ontology of particular importance in the biomedical domains, where it provides the top-level for numerous ontologies, including those admitted as part of the OBO Foundry collection. The Basic Formal Ontology requires that all classes in an ontology are actually instantiated in reality. Despite the fact that it is hard to show whether entities of some kind exist or do not exist in reality (especially for unobservable entities like elementary particles), this criterion fails to satisfy the need of scientists to communicate their findings and theories unambiguously. We discuss the problems that arise due to the Basic Formal Ontology's realism criterion and suggest viable alternatives.", "pdf": "2010_FOIS_realism.pdf", "presentation": "http://www.slideshare.net/micheldumontier/realism-for-scientific-ontologies" }, { "label": "statistical graphs", "type": "Publication", "pub-type": "Journal Article", "title": "Modeling and querying graphical representations of statistical data", "keywords" : ["semantic web","ontology"], "author" : [ "Michel Dumontier", "Leo Ferres", "Natalia Villanueva-Rosales" ], "journal": "Journal of Web Semantics", "volume": "8", "issue": "2-3", "pages": "241-254", "year": "2010", "pub-date": "2010-01-04", "abstract" : "Although statistical graphs are ubiquitous, few techniques and standards exist to exchange, search and query these graphical representations. We present several improvements to human-graph interaction including i) a new approach to manage statistical graph knowledge by semantic annotation of graphs that bridges the gap between Web 2.0 social tagging and formal, logic-based approaches, ii) knowledge management and discovery across a non-trivial graph knowledge base and iii) sophisticated question answering that requires background knowledge using Semantic Web technology.", "doi": "10.1016/j.websem.2009.12.002", /* "pdf": "2010_JWS_statisticalgraph.pdf" */ }, { "label": "semantic vectors", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","NLP","science map"], "title": " Semantic Journal Mapping for Search Visualization in a Large Scale Article Digital Library", "author" : [ "Glen Newton", "Alison Callahan", "Michel Dumontier" ], "pages": "7", "pub-date": "2009-10-27", "year": "2009", "workshop": "Second Workshop on Very Large Digital Libraries (VLDL2009)", "workshop_url": "http://www.ionio.gr/conferences/ecdl2009/ws_vldl.php", "conference": "European Conference on Digital Libraries (ECDL2009)", "conference_url": "http://www.ionio.gr/conferences/ecdl2009/index.php", "city": "Corfu", "country": "Greece", "abstract": "In this paper, we examine the scalability and utility of semantically mapping (visualizing) journals in a large scale (5.7+ million) science, technology and medical article digital library. This work is part of a larger research effort to evaluate semantic journal and article mapping for search query results refinement and visual contextualization in a large scale digital library. In this work the Semantic Vectors software package is parallelized and evaluated to create semantic distances between 2365 journals, from the sum of their full-text. This is used to create a journal semantic map whose production does scale and whose results are comparable to other maps of the scientific literature.", "publisher_url": "http://cuvier.cisti.nrc.ca/~gnewton/events/2009/ecdl2009Newton_20090723.pdf", "pdf": "2009_ECDL_semantic_journal_mapping" }, { "label": "RNA ontology", "type": "Publication", "pub-type": "Poster", "keywords": ["semantic web","bioinformatics"], "title": "An Ontology for RNA Structure and Interaction", "author" : [ "Michel Dumontier", "Jose Toledo Cruz", "Marc Parisien", "Francois Major" ], "pages": "1", "pub-date": "2009-07-24", "year": "2009", "poster": "", "workshop": "International Conference on Biomedical Ontology", "workshop_url": "http://icbo.buffalo.edu/", "city": "Buffalo", "country": "USA", "abstract": "Increasingly sophisticated knowledge about RNA structure and function requires ontologies to facilitate the integration of information arising from genome sequencing projects, microarray analyses and structure determination experiments. Here, we describe an ontology for nucleic acid composition along with context/model-specific representation of structural features such as sugar conformations, base pairings and base stackings. The ontology supports the goals of the RNA Ontology Consortium towards discovery of new knowledge from independently-published RNA data.", "pdf":"2009_NCBO_rna_ontology.pdf" }, { "label": "auto-identifiers", "type": "Publication", "pub-type": "Poster", "keywords": ["semantic web","bioinformatics"], "title": "Accurate biochemical knowledge representation with precise, structure-based identifiers", "author" : [ "Michel Dumontier", "Leonid L. Chepelev" ], "pages": "1", "pub-date": "2009-07-24", "year": "2009", "poster": "", "workshop": "International Conference on Biomedical Ontology", "workshop_url": "http://icbo.buffalo.edu/", "city": "Buffalo", "country": "USA", "abstract": "Biochemical ontologies aim to represent biochemical entities and the relations that exist between them in an accurate and precise manner. A fundamental starting point is the use of identifiers that precisely and uniquely identify some biochemical entity. Yet, our current approach for generating identifiers is often haphazard and incomplete. We describe plausible structure-based strategies for biochemical identity, ultimately to generate identifiers in an automatic and curator/database independent fashion, whether it is at molecular level or some part thereof. ", "pdf":"2009_NCBO_biochemical_identifier.pdf" }, { "label": "identifiers", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","database"], "title": "Increasingly accurate biochemical knowledge representation with precise, structure-based chemical identifiers", "author" : [ "Michel Dumontier", "Leonid C. Chepelev" ], "pages": "4", "pub-date": "2009-06-28", "year": "2009", "workshop": "Bio-ontologies 2009", "workshop_url": "http://www.bio-ontologies.org.uk/", "city": "Stockholm", "country": "Sweden", "abstract": "Biochemical ontologies aim to represent biochemical entities and the relations that exist between them in an accurate and precise manner. A fundamental starting point is the use of identifiers that precisely and uniquely identify some biochemical entity, whether it be a substance, a quality or some biological process. Yet, our current approach for generating identifiers is often haphazard and incomplete. This prevents us from accurately integrating knowledge and also leads to under-specification of our knowledge. This paper describes plausible structure-based strategies for biochemical identity, ultimately to generate identifiers in an automatic and curator/database independent fashion, whether it be at molecular level or some part thereof (e.g. residues, collection of residues, atoms, collection of atoms, functional groups). With structure-based identifiers in hand, we will be in a position to accurately capture specific biochemical knowledge, such as how a set of residues in a binding site are involved in a chemical reaction including, for example, the fact that a key nitrogen atom must first be de-protonated. Thus, this will enhance our current representation of biochemical knowledge and make it fundamentally more useful in describing (sub) molecular-based events. ", "pdf":"2009_BIOONT_BIOID.pdf" }, { "label": "rkb", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","database"], "title": "RKB: A Semantic Web Knowledge Base for RNA", "author" : [ "Michel Dumontier", "Jose Cruz-Toledo", "Marc Parisien", "Francois Major" ], "pages": "4", "pub-date": "2009-06-28", "year": "2009", "workshop": "Bio-ontologies 2009", "workshop_url": "http://www.bio-ontologies.org.uk/", "city": "Stockholm", "country": "Sweden", "abstract": "Increasingly sophisticated knowledge about RNA structure and function requires an inclusive knowledge representation that facilitates the integration of independently-generated information arising from such efforts as genome sequencing projects, microarray analyses, structure determination and RNA SELEX experiments. While RNAML, an XML-based representation, has been proposed as an exchange format for a select subset of information, it lacks machine-understandable semantics that make it arbitrarily user-extensible, as is the case for formal logic based languages. Here, we describe an RNA knowledge base (RKB) for structure-based knowledge using RDF/OWL Semantic Web technologies. RKB contains basic terminology for nucleic acid composition along with context/model-specific representation of structural features such as sugar conformations, base pairings and base stackings. RKB is populated with RNA PDB entries and MC-Annotate structural annotation. The use of semantic web technologies addresses the reality of diverse interests of the RNA Ontology Consortium and supports knowledge discovery over independently-published RNA knowledge.", "pdf":"2009_BIOONT_RKB.pdf" }, { "label": "extraction", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","database"], "title": "Semi-Automated Extraction of Biological Knowledge Bases using Semantic Web", "author" : [ "Natalia Villanueva-Rosales", "Michel Dumontier" ], "pages": "7", "pub-date": "2009-05-24", "year": "2009", "workshop": "The 2nd Canadian Semantic Web Working Symposium (CSWWS09)", "workshop_url": "http://isel.cs.unb.ca/~cswwc09/", "city": "Kelowna", "country": "Canada", "abstract": "The promise of Semantic Web to seamlessly retrieve and integrate relevant biological data in order to enable semantic query answering has yet to be realized. In particular, the design of bio-ontologies remains a manual, cumbersome task that together with the limited support for automated population of ontologies results in a knowledge acquisition bottleneck. At present, the lack of expressive reason-able biological ontologies is slowing the adoption of Semantic Web. Our research aims to develop semi-automated approaches for building reasoning-capable biological knowledge bases from relational databases. Developments towards this goal are expected to revolutionize the way biologists interact with computer systems to store and retrieve their increasingly sophisticated knowledge.", "pdf":"2009_CSWWS_dbextraction.pdf" }, { "label": "DOS", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","parallel computing"], "title": "A Framework for Distributed Ontology Systems", "author" : [ "Xueying Chen", "Michel Dumontier" ], "pages": "12", "pub-date": "2009-05-24", "year": "2009", "workshop": "The 2nd Canadian Semantic Web Working Symposium (CSWWS09)", "workshop_url": "http://isel.cs.unb.ca/~cswwc09/", "city": "Kelowna", "country": "Canada", "abstract": "In recent years, both the number and size of web ontologies has been fervently growing. Although much attention has been paid to ontology integration so as to enable interoperability between different ontologies, the problem of how to handle ontologies of large size has not been sufficiently addressed. In this paper, we propose a general framework for peer-to-peer distributed ontology systems which manages large ontologies size by partitioning an ontology into ontology fragments and distributing them over multiple autonomous nodes. Taking advantage of peer-to-peer computer networks, a distributed ontology system promises distributed reasoning as well as collaborative ontology development.", "pdf":"2009_CSWWS_framework_DOS.pdf" }, { "label": "digestion", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["bioinformatics","parallel computing"], "title": "Modeling Tryptic Digestion On The Cell BE Processor", "author" : [ "James Green", "Hanna Mahmoud", "Michel Dumontier" ], "pages": "5", "pub-date": "2009-05-03", "year": "2009", "conference": "22nd Canadian Conference on Electrical and Computer Engineering", "conference_url": "http://www.ieee.ca/ccece09/", "city": "St. John's", "country": "Canada", "abstract": "The Cell BE is a heterogeneous multi-core processor offering multiple levels of parallelism. When these are properly leveraged, the Cell BE demonstrates impressive performance acceleration for several high performance computing applications, including exact string matching on streaming data. The present study investigates the suitability of the Cell BE for a string matching problem of relevance to proteomics – the identification of tryptic digest points based on the presence of a short sequence motif. Three string matching algorithms are implemented and evaluated over several proteomic datasets. In its first application to bioinformatics, Parabix, a method of high-throughput XML stream processing which relies on bit transposition and the effective use of single-instruction multiple-data (SIMD) instructions, is applied here with great success. This method performs very well when the protein database is pre-processed in the form of parallel bit streams. Double buffering is also critical to hide the latency of DMA data transfers. Performance results are computed for both the cycle-accurate Cell BE simulator and also using real hardware. This problem is also placed in the larger context of using the Cell BE to achieve hypothesis-driven protein identification.", "pdf": "2009_CCECE_trypticCELLBE.pdf" }, { "label": "pharmacogenomics", "type": "Publication", "pub-type": "Journal Article", "title": "Towards Pharmacogenomics Knowledge Discovery on the Semantic Web", "keywords" : ["semantic web","ontology", "drug discovery"], "author" : [ "Michel Dumontier", "Natalia Villanueva-Rosales" ], "journal": "Briefings in Bioinformatics", "volume": "10", "issue": "2", "pages": "153-163", "year": "2009", "pub-date": "2009-02-24", "abstract" : "Pharmacogenomics aims to understand pharmacological response with respect to genetic variation. Essential to the delivery of better health care is the use of pharmacogenomics knowledge to answer questions about therapeutic, pharmacological or genetic aspects. Several XML markup languages have been developed to capture pharmacogenomic and related information so as to facilitate data sharing. However, recent advances in semantic web technologies have presented exciting new opportunities for pharmacogenomics knowledge discovery by representing the information with machine understandable semantics. Progress in this area is illustrated with reference to the personalized medicine project that aims to facilitate pharmacogenomics knowledge discovery through intuitive knowledge capture and sophisticated question answering using automated reasoning over expressive ontologies.", "pmid": "19240125", "doi": "10.1093/bib/bbn056", "pdf": "2008_BIB_pharmacogenomics.pdf" }, { "label": "sm profiling", "type": "Publication", "pub-type": "Book Chapter", "keywords": ["drug discovery","bioinformatics"], "title": "Target Profiling of Small Molecules", "author": [ "Leonid Chepelev", "Michel Dumontier" ], "year": "2009", "note": ["In Press"], "book_title": "Small Molecules for Protein Targeting", "editor": ["Hiroyuki Osada"], "publisher": "John Wiley & Sons, Inc.", "isbn": "0470120533 / 9780470120538 / 0-470-12053-3" }, { "label": "ligands & inhibitors", "type": "Publication", "pub-type": "Book Chapter", "keywords": ["drug discovery","bioinformatics"], "title": "Development of Small Molecule Ligands and Inhibitors", "author": [ "Leonid Chepelev", "Nikolai Chepelev", "Hooman Shadnia", "Bill Willmore", "James Wright", "Michel Dumontier" ], "year": "2009", "note": ["In Press"], "book_title": "Small Molecules for Protein Targeting", "editor": ["Hiroyuki Osada"], "publisher": "John Wiley & Sons, Inc.", "isbn": "0470120533 / 9780470120538 / 0-470-12053-3" }, { "label": "bio2rdf", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["semantic web","open source","database","bioinformatics"], "title": "Bio2RDF Network Of Linked Data", "author": [ "Marc-Alexandre Nolin", "Peter Ansell", "Francois Belleau", "Kingsley Idehen", "Philippe Rigault", "Nicole Tourigny", "Paul Roe", "James M Hogan", "Michel Dumontier" ], "pub-date": "2008-10-26", "year": "2008", "workshop": "Semantic Web Challenge and Billion Triples Tracks", "workshop_url": "http://challenge.semanticweb.org/", "conference": "7th International Semantic Web Conference", "conference_url": "http://iswc2008.semanticweb.org/", "city": "Karlsruhe", "country": "Germany", "abstract": "Background: The Bio2RDF project (http://bio2rdf.org) work to create a network of coherent linked data across the life sciences databases. The project is open source and the following result is from the input of this community. Results: Databases have been converted and linked together with semantic web technologies. The process is to normalize any external URIs in each databases while we do the conversion from the original format to RDF. Conclusion: Each converted databases have his own SPARQL point provided by a Virtuoso Triplestore. Every documents can be retrieve using a REST interface similar for any databases. The REST URL also happen to be the URI.Other tools are also available using the REST interface.", "publisher_url": "http://www.cs.vu.nl/~pmika/swc-2008/Bio2RDF-Bio2RDF_submission.pdf", "pdf": "2008_ISWC_SWC_Bio2RDF.pdf" }, { "label": "situational modeling", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","bioinformatics"], "title": "Biological Situational Modeling: Defining Molecular Roles in Pathways and Reactions", "author": [ "Michel Dumontier" ], "pub-date": "2008-10-26", "year": "2008", "workshop": "OWL Experiences and Directions (OWLED-EU 2008)", "workshop_url": "http://www.webont.org/owled/2008/", "conference": "7th International Semantic Web Conference", "conference_url": "http://iswc2008.semanticweb.org/", "city": "Karlsruhe", "country": "Germany", "abstract": "Central to a coherent understanding of cellular biology is a faithful representation of biochemical processes as it pertains to its molecular participants. Current representations underspecify our knowledge because they fail to indicate the roles of the molecular components during relevant processes. Here, we describe a knowledge representation using OWL2 that overcomes previous limitations in modeling biochemical events and has clear implications for the accurate functional/role based annotation of molecular components.", "publisher_url": "http://www.webont.org/owled/2008/papers/owled2008eu_submission_24.pdf", "pdf": "2008_OWLEDEU_MR.pdf" }, { "label": "statistical graph", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["semantic web","ontology"], "title": "Semantic annotation and question answering of statistical graphs", "author": [ "Natalia Villanueva-Rosales", "Leo Ferres", "Michel Dumontier" ], "journal": "Lecture Notes in Computer Science", "volume": "5317", "pages": "100-110", "book_title": "MICAI 2008: Advances in Artificial Intelligence", "publisher": "Springer Berlin / Heidelberg", "pub-date": "2008-10-27", "year": "2008", "conference": "7th Mexican International Conference on Artificial Intelligence (MICAI-2008)", "conference_url": "http://www.micai.org/", "city": "Mexico City", "country": "Mexico", "abstract": "Although statistical graphs are ubiquitous, few techniques and standards exist to exchange, search and query these graphical representations. The gSem project aims to improve human-graph interaction by developing new approaches to manage statistical graph knowledge. Two specific objectives of this project are addressed in this paper: (1) improve the efficiency of searching statistical graph knowledge and (2) facilitate sophisticated question answering about statistical graph knowledge. In particular, semantic annotation and query answering across statistical graphs with OWL ontologies are described.", "publisher_url": "http://www.springer.com/computer/artificial/book/978-3-540-88635-8", "doi": "10.1007/978-3-540-88636-5", "pdf": "2008_MICAI_graph.pdf" }, { "label": "ppi", "type": "Publication", "pub-type": "Journal Article", "keywords": ["bioinformatics","cell biology","parallel computing"], "title": "Global investigation of protein-protein interactions in yeast Saccharomyces cerevisiae using re-occurring short polypeptide sequences", "author": [ "Sylvain Pitre", "Chris North", "Md Alamgir", "M Jessulat", "Adrian Chan", "Xuemei Luo", "James Green", "Michel Dumontier", "Frank Dehne", "Ashkan Golshani" ], "journal": "Nucleic Acids Research", "year": "2008", "volume": "36", "issue": "13", "pages": "4286-4294", "pub-date": "2008-06-27", "abstract": "Protein–protein interaction (PPI) maps provide insight into cellular biology and have received considerable attention in the post-genomic era. While large-scale experimental approaches have generated large collections of experimentally determined PPIs, technical limitations preclude certain PPIs from detection. Recently, we demonstrated that yeast PPIs can be computationally predicted using re-occurring short polypeptide sequences between known interacting protein pairs. However, the computational requirements and low specificity made this method unsuitable for large-scale investigations. Here, we report an improved approach, which exhibits a specificity of 99.95% and executes 16 000 times faster. Importantly, we report the first all-to-all sequence-based computational screen of PPIs in yeast, Saccharomyces cerevisiae in which we identify 29 589 high confidence interactions of 2 x 107 possible pairs. Of these, 14 438 PPIs have not been previously reported and may represent novel interactions. In particular, these results reveal a richer set of membrane protein interactions, not readily amenable to experimental investigations. From the novel PPIs, a novel putative protein complex comprised largely of membrane proteins was revealed. In addition, two novel gene functions were predicted and experimentally confirmed to affect the efficiency of non-homologous end-joining, providing further support for the usefulness of the identified PPIs in biological investigations.", "pmid": "18562252", "doi": "10.1093/nar/gkn390", "pdf": "2008_NAR_PIPE2.pdf" }, { "label": "gridcell", "type": "Publication", "pub-type": "Journal Article", "keywords": ["bioinformatics","cell simulation","parallel computing"], "title": "GridCell : A Stochastic Particle-Based Biological System Simulator", "author": [ "Laurier Boulianne", "Sevine Assaad", "Michel Dumontier", "Warren Gross" ], "journal": "BMC Systems Biology", "year": "2008", "volume": "2", "pages": "66", "pub-date": "2008-07-23", "abstract": "Background: Realistic biochemical simulators aim to improve our understanding of many biological processes that would be otherwise very difficult to monitor in experimental studies. Increasingly accurate simulators may provide insights into the regulation of biological processes due to stochastic or spatial effects. Results: We have developed GridCell as a three-dimensional simulation environment for investigating the behaviour of biochemical networks under a variety of spatial influences including crowding, recruitment and localization. GridCell enables the tracking and characterization of individual particles, leading to insights on the behaviour of low copy number molecules participating in signaling networks. The simulation space is divided into a discrete 3D grid that provides ideal support for particle collisions without distance calculation and particle search. SBML support enables existing networks to be simulated and visualized. The user interface provides intuitive navigation that facilitates insights into species behaviour across spatial and temporal dimensions. We demonstrate the effect of crowing on a Michaelis-Menten system. Conclusion: GridCell is an effective stochastic particle simulator designed to track the progress of individual particles in a three-dimensional space in which spatial influences such as crowding, co-localization and recruitment may be investigated.", "pmid": "18651956", "publisher_url": "http://www.biomedcentral.com/1752-0509/2/66", "pdf": "2008_BMCSB_gridcell.pdf" }, { "label": "yowl", "type": "Publication", "pub-type": "Journal Article", "keywords": ["bioinformatics","ontology","semantic web","database"], "title": "yOWL: an Ontology-Driven Knowledge Base for Yeast Biologists", "author": [ "Natalia Villanueva-Rosales", "Michel Dumontier" ], "journal": "Journal of Biomedical Informatics", "year": "2008", "volume": "41", "issue": "5", "pages": "779-789", "pub-date": "2008-05-11", "abstract": "Knowledge management is an ongoing challenge for the biological community such that large, diverse and continuously growing information requires more sophisticated methods to store, integrate and query their knowledge. The semantic web initiative provides a new knowledge engineering framework to represent, share and discover information. In this paper, we describe our efforts towards the development of an ontology-based knowledge base, including aspects from ontology design and population using \"semantic\" data mashup, to automated reasoning and semantic query answering. Based on yeast data obtained from the Saccharomyces Genome Database and UniProt, we discuss the challenges encountered during the building of the knowledge base and how they were overcome.", "pmid": "18562252", "doi": "10.1016/j.jbi.2008.05.001", "pdf": "2008_JBI_YOWL.pdf" }, { "label": "chemical ontology", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["semantic web","ontology","drug discovery","open source","database","bioinformatics"], "title": "Chemical Knowledge for the Semantic Web", "author": [ "Mykola Konyk", "Alexander De Leon", "Michel Dumontier" ], "journal": "LNBI", "year": "2008", "volume": "5109", "pages": "169-176", "pub-date": "2008-06-17", "book_title": "Lecture Notes in Computer Science", "publisher": "Springer Berlin / Heidelberg", "isbn": "978-3-540-69827-2", "conference": "Data Integration in the Life Sciences (DILS2008)", "conference_url": "http://dils2008.lri.fr/", "city": "Evry", "country": "France", "abstract": "With over 80 file formats to represent various chemical attributes, the conversion between one format and another is invariably lossy due to informal specifications. In contrast, the use of a formal knowledge representation language such as the Web Ontology Language (OWL) enables precise molecular descriptions that can be reasoned about in a logically valid manner. In this paper, we describe a chemical knowledge representation using OWL. We demonstrate its utility in querying a new drug repository created from PubChem, DrugBank and DBpedia. By leveraging Semantic Web technologies, it becomes possible to integrate chemical information at differing levels of detail and granularity, opening new avenues for life science knowledge discovery.", "doi": "10.1007/978-3-540-69828-9_17", "pdf": "2008_DILS_chemicalknowledge.pdf" }, { "label": "owl11 life science", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","bioinformatics"], "title": "Modeling Life Science Knowledge with OWL 1.1", "author": [ "Michel Dumontier", "Natalia Villanueva-Rosales" ], "year": "2008", "pub-date": "2008-04-01", "workshop": "OWL: Experiences and Directions (OWLED-DC 2008)", "workshop_url": "http://www.webont.org/owled/2008dc/", "city": "Washington DC", "country": "USA", "abstract": "The OWL 1.1 specification has created new opportunities for the design of increasingly expressive and useful ontologies in the modeling of life science knowledge. Here, we describe the application of expressive features in the design of an ontology of basic relations and how, in combination with an upper level ontology, they can be used to guide the formulation of life science knowledge. We report on our experiences to enhance existing ontologies so as to facilitate knowledge representation and question answering. Finally, we identify some outstanding challenges towards building an ontology-based semantic web.", "publisher_url": "http://www.webont.org/owled/2008dc/papers/owled2008dc_paper_20.pdf", "pdf": "2008_OWLEDDC_LS_OWL11.pdf" }, { "label": "presto", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology"], "title": "Publishing ontologies with Presto", "author": [ "Michel Dumontier", "Alexander De Leon" ], "year": "2008", "pub-date": "2008-04-02", "workshop": "OWL: Experiences and Directions (OWLED-DC 2008)", "workshop_url": "http://www.webont.org/owled/2008dc/", "city": "Washington DC", "country": "USA", "abstract": "Publishing RDF/OWL ontologies on the Semantic Web typically starts by placing the document in a web accessible location andends with redirects of ontological components (classes, properties, individuals) to that the document. Unfortunately, this is seldom sufficient for expressive OWL ontologies in which reasoning is essential to determine the full extent of the entity in question. Moreover, the ability to dynamically query expressive ontologies yields new applications over static publishing including the possibility that these queries may be equivalent to new ontological entities. Here, we describe the design of a new tool for publishing OWL ontologies in a dynamic manner such that the ontology and all of its entities are web resolvable and queryable, hence opening new avenues for knowledge management on the Semantic Web.", "publisher_url": "http://www.webont.org/owled/2008dc/papers/owled2008dc_paper_19.pdf", "pdf": "2008_OWLEDDC_PRESTO.pdf" } ,{ "label": "depression", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","drug discovery","database","bioinformatics"], "title": "Modeling the Pharmacogenomics of Depression", "author": [ "Michel Dumontier", "Muhammed Faizan", "Joseph Obeng", "Natalia Villanueva-Rosales" ], "year": "2008", "pub-date": "2008-04-20", "workshop": "Semantic Web for Health Care and Life Sciences (HCLS 2008)", "workshop_url": "http://esw.w3.org/topic/HCLS/WWW2008", "conference": "World Wide Web Conference (WWW2008)", "conference_url": "http://www2008.org/", "city": "Beijing", "country": "China", "abstract": "Pharmacogenomics aims to better understand the pharmacological response of a drug with respect to genetic variation. Essential to the delivery of better health care is the use of pharmacogenomics knowledge to answer questions about therapeutic, pharmacological or genetic aspects. In this preliminary work, we present a knowledge base designed with semantic web technologies capable of capturing essential aspects including genes, gene variants, SNPs, drugs, measures and outcomes, as well as gene-drug interactions and drug treatments. We populate the knowledge base from a primary source (PharmGKB), and augment it with literature curated pharmacogenomics knowledge of depression. We demonstrate the use of OWL 1.1 role chains to facilitate question answering and pharmacogenomics knowledge discovery.", "ppt": "http://esw.w3.org/topic/HCLS/WWW2008?action=AttachFile&do=get&target=mdumontier.ppt", "pdf": "2008_HCLS_pharmacogenomics.pdf" }, { "label": "ppi chapter", "type": "Publication", "pub-type": "Book Chapter", "keywords": ["bioinformatics"], "title": "Computational Methods For Predicting Protein-Protein Interactions", "author": [ "Sylvain Pitre", "Md Alamgir", "James Green", "Michel Dumontier", "Frank Dehne", "Ashkan Golshani" ], "journal": "Advances Biochemical Engingeering / Biotechnology", "year": "2008", "volume": "110", "pages": "247-267", "pub-date": "2008-01-18", "book_title": "Protein – Protein Interaction", "publisher": "Springer Berlin / Heidelberg", "isbn": "978-3-540-68817-4", "abstract": "Protein–protein interactions (PPIs) play a critical role in many cellular functions. A number of experimental techniques have been applied to discover PPIs; however, these techniques are expensive in terms of time, money, and expertise. There are also large discrepancies between the PPI data collected by the same or different techniques in the same organism. We therefore turn to computational techniques for the prediction of PPIs. Computational techniques have been applied to the collection, indexing, validation, analysis, and extrapolation of PPI data. This chapter will focus on computational prediction of PPI, reviewing a number of techniques including PIPE, developed in our own laboratory. For comparison, the conventional large-scale approaches to predict PPIs are also briefly discussed. The chapter concludes with a discussion of the limitations of both experimental and computational methods of determining PPIs.", "doi": "10.1007/978-3-540-68820-4", "pmid": "18202838", "pdf": "2008_PPI.pdf" }, { "label": "SMART", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["semantic web","ontology","open source","bioinformatics"], "title": "SMART: A Web-Based, Ontology-Driven, Semantic Web Query Answering Application", "author": [ "Alexander De Leon", "Natalia Villanueva-Rosales", "Myroslav Palenychka", "Michel Dumontier" ], "year": "2007", "pub-date": "2007-11-11", "workshop": "Semantic Web Challenge", "workshop_url": "http://iswc2007.semanticweb.org/callfor/SemanticWebChallenge.asp", "conference": "International Semantic Web Conference (ISWC 2007)", "conference_url": "http://iswc2007.semanticweb.org", "city": "Busan", "country": "South Korea", "abstract": "SMART (Semantic web information Management with automated Reasoning Tool) is an open-source project, which aims to provide intuitive tools for life scientists for represent, integrate, manage and query heterogeneous and distributed biological knowledge. SMART was designed with interoperability and extensibility in mind and uses AJAX, SVG and JSF technologies, RDF, OWL, SPARQL semantic web languages, triple stores (i.e. Jena) and DL reasoners (i.e. Pellet) for the automated reasoning. Features include semantic query composition and validation using DL reasoners, a graphical representation of the query, a mapping of DL queries to SPARQL, and the retrieval of pre-computed inferences from an RDF triple store. With a use case scenario, we illustrate how a biological scientist can intuitively query the yeast knowledge base and navigate the results. Continued development of this web-based resource for the biological semantic web will enable new information retrieval opportunities for the life sciences.", "url": "http://smart.dumontierlab.com", "pdf": "2007_ISWC_SWC_SMART.pdf" }, { "label": "3layer design", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology"], "title": "Three-Layer OWL Ontology Design", "author": [ "Michel Dumontier", "Natalia Villanueva-Rosales" ], "year": "2007", "pub-date": "2007-10-28", "workshop": "2nd International Workshop on Modular Ontologies (WOMO07)", "workshop_url": "http://webrum.uni-mannheim.de/math/lski/WoMO07/", "conference": "4th International Conference on Knowledge Capture (K-CAP2007)", "conference_url": "http://www.k-cap.org/kcap07/kcap07/", "city": "Whistler", "country": "Canada", "abstract": "Ontology reuse is often predicated on agreement to the semantics wholly defined within an ontology document. But when faced with overly constrained semantics that one might partially reject, might one instead prefer reduced ontological commitment? In this paper, we describe how three layered ontology design promotes maximal reuse of domain ontologies by separating taxonomically organized domain terminology from i) disjointess, ii) complex expressions that define a world view, and iii) application specific requirements that impose a specific data model for data exchange and document validation.", "url": "http://dumontierlab.com/?page=ontologies", "pdf": "2007_WOMO_3layerdesign.pdf" }, { "label": "time-series", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology"], "title": "Semantic Query Answering with Time-Series Graphs", "author": [ "Leo Ferres", "Natalia Villanueva-Rosales", "Michel Dumontier" ], "year": "2007", "pub-date": "2007-10-15", "workshop": "3rd International Workshop on Vocabularies, Ontologies and Rules for The Enterprise (VORTE 2007)", "workshop_url": "http://oxygen.informatik.tu-cottbus.de/VORTE/?q=node/10", "conference": "12th IEEE International EDOC Conference", "conference_url": "http://edoc.mitre.org/", "city": "Anapolis", "country": "USA", "abstract": "Statistical graphs are ubiquitous mechanisms for data visualization such that most, if not all, enterprises communicate information through them. However, many graphs are stored as unstructured images or proprietary binary objects, making them difficult to work with beyond the reports in which they are embedded. While graphs can be mapped to more common XML representations, these lack expressive semantics to discover new knowledge about them or to answer queries at various levels of granularity. This paper describes an OWL ontology that facilitates the representation, exchange, reasoning and query answering of statistical graph data. We illustrate the advantages of using an ontological approach to discover and query about time-series statistical graphs.", "ppt": "http://oxygen.informatik.tu-cottbus.de/VORTE/files/Ferres_VORTE2007.pdf", "pdf": "2007_VORTE_graph.pdf" }, { "label": "grid simulator", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["cell simulation","open source","bioinformatics","cell biology","parallel computing"], "title": "A Stochastic Particle-Based Biological System Simulator", "author": [ "Laurier Boulianne", "Michel Dumontier", "Warren Gross" ], "year": "2007", "pub-date": "2007-07-15", "conference": "Summer Computer Simulation Conference (SCSC 2007)", "conference_url": "http://www.sce.carleton.ca/faculty/wainer/SCSC07/SCSC'07.htm", "city": "San Diego", "country": "USA", "abstract": "The simulation and visualization of biological systems is expected to enhance our understanding of biological processes towards the development of effective therapeutic treatments. Biological systems are inherently stochastic at the molecular level, exhibit modified behavior under crowded conditions and may be affected by spatial locality. Common simulation approaches fail to account for these important aspects of biological systems, in part because they are computationally expensive. Here, we describe a stochastic, particle-based simulator that takes spatial locality into account. Each particle in the system is represented explicitly on a 3D grid where only one particle can occupy a grid location. The grid structure and stochastic approach removes the need for distance calculation and particle search. We demonstrate the effect of molecular crowding and spatial locality for a simple biological system. We anticipate that this system will be useful in examining more complex systems. Finally, this system is expected to be suitable for acceleration with parallel customizable hardware, a necessary requirement towards the simulation of an entire cell.", "url": "http://iml.ece.mcgill.ca/GridCell/", "pdf": "2007_SCSC_gridcell.pdf" }, { "label": "OWL function groups", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","drug discovery","bioinformatics"], "title": "Describing chemical functional groups in OWL-DL for the classification of chemical compounds", "author": [ "Natalia Villanueva-Rosales", "Michel Dumontier" ], "year": "2007", "pub-date": "2007-06-06", "workshop": "OWL: Experiences and Directions (OWLED 2007)", "workshop_url": "http://www.webont.org/owled/2007/", "conference": "European Semantic Web Conference (ESWC2007)", "conference_url": "http://www.eswc2007.org/", "city": "Innsbruck", "country": "Austria", "abstract": "Functional groups describe the semantics of chemical reactivity in terms of atoms and their connectivity, which exhibit characteristic chemical behavior when present in a compound. In this paper, we take a first step towards designing an OWL-DL ontology of functional groups for the classification of chemical compounds. We highlight the capabilities and limitations OWL 1.0 and the proposed OWL 1.1 in terms of our domain requirements. We also illustrate how cyclic structures may be identified from SWRL rules and suggest extensions for reasoners to achieve this objective. This work represents a preliminary step towards describing, reasoning and querying about structure and function of molecules.", "pdf": "2007_OWLED_CFG.pdf" }, { "label": "ykb", "type": "Publication", "pub-type": "Workshop Paper", "keywords": ["semantic web","ontology","database","bioinformatics"], "title": "Towards a Semantic Knowledge Base for Yeast Biologists", "author": [ "Natalia Villanueva-Rosales", "Kevin Osbahr", "Michel Dumontier" ], "year": "2007", "pub-date": "2007-05-08", "workshop": "1st International Workshop on Health Care and Life Sciences Data Integration for the Semantic Web (HCLS-DI 2007)", "workshop_url": "http://www2007.org/workshop-W2.php", "conference": "World Wide Web Conference (WWW2007)", "conference_url": "http://www2007.org/", "city": "Banff", "country": "Canada", "abstract": "The integration of data from heterogeneous sources is an ongoing challenge for the scientific community. The semantic web initiative provides a new knowledge engineering framework to represent, query and share information. In this paper, we describe our efforts towards the development of an ontology-driven knowledge base that allows semantic query answering of yeast knowledge.", "pdf": "2007_HCLS_yOWL.pdf" }, { "label": "biodevs", "type": "Publication", "pub-type": "Conference Paper", "keywords": ["cell simulation","open source","bioinformatics","parallel computing"], "title": "Advanced DEVS models with applications to biomedicine", "author": [ "Gabriel Wainer", "Shafagh Jafer", "Banan Al-aubidy", "Alex Dias", "Roderick Bain", "Michel Dumontier", "James Cheetham" ], "year": "2007", "pub-date": "2007-02-08", "conference": "AI, Simulation and Planning in High Autonomy Systems (AIS 2007),", "conference_url": "http://www.lsis.org/imsm07/", "city": "Buenos Aires", "country": "Argentina", "abstract": "Simulation is becoming increasingly important in the analysis and design of complex systems such as those involving biological processes. The complexity of these systems makes computer simulation an adequate tool to study them under particular experimental conditions. We presented a simulation model for the metabolic pathways in the cells (namely, the Glycolysis and Krebs cycle), using the DEVS formalism and the CD++ tool. Here, we extend this experience, and we show a model on synapsin and vesicles interaction in nerve cells, and another one on the liver functions using similar techniques. Our long term goal is to provide realistic simulations of different biological processes when specific internal or external parameters are applied.", "pdf": "2007_AIS07_DEVS.pdf" }, { "label": "smid", "type": "Publication", "pub-type": "Journal Article", "keywords": ["drug discovery","open source","database","bioinformatics"], "title": "Domain-based Small Molecule Binding Site Annotation", "author": [ "Kevin Snyder","Howard Feldman", "Michel Dumontier", "John Salama", "Christopher Hogue" ], "journal": "BMC Bioinformatics", "year": "2006", "volume": "7", "pages": "152", "pub-date": "2006-03-17", "abstract": "Background: Accurate small molecule binding site information for a protein can facilitate studies in drug docking, drug discovery and function prediction, but small molecule binding site protein sequence annotation is sparse. The Small Molecule Interaction Database (SMID), a database of protein domain-small molecule interactions, was created using structural data from the Protein Data Bank (PDB). More importantly it provides a means to predict small molecule binding sites on proteins with a known or unknown structure and unlike prior approaches, removes large numbers of false positive hits arising from transitive alignment errors, non-biologically significant small molecules and crystallographic conditions that overpredict ion binding sites. Description: Using a set of co-crystallized protein-small molecule structures as a starting point, SMID interactions were generated by identifying protein domains that bind to small molecules, using NCBI's Reverse Position Specific BLAST (RPS-BLAST) algorithm. SMID records are available for viewing at http://smid.blueprint.org webcite. The SMID-BLAST tool provides accurate transitive annotation of small-molecule binding sites for proteins not found in the PDB. Given a protein sequence, SMID-BLAST identifies domains using RPS-BLAST and then lists potential small molecule ligands based on SMID records, as well as their aligned binding sites. A heuristic ligand score is calculated based on E-value, ligand residue identity and domain entropy to assign a level of confidence to hits found. SMID-BLAST predictions were validated against a set of 793 experimental small molecule interactions from the PDB, of which 472 (60%) of predicted interactions identically matched the experimental small molecule and of these, 344 had greater than 80% of the binding site residues correctly identified. Further, we estimate that 45% of predictions which were not observed in the PDB validation set may be true positives. Conclusion: By focusing on protein domain-small molecule interactions, SMID is able to cluster similar interactions and detect subtle binding patterns that would not otherwise be obvious. Using SMID-BLAST, small molecule targets can be predicted for any protein sequence, with the only limitation being that the small molecule must exist in the PDB. Validation results and specific examples within illustrate that SMID-BLAST has a high degree of accuracy in terms of predicting both the small molecule ligand and binding site residue positions for a query protein.", "pmid": "16545112", "publisher_url": "http://www.biomedcentral.com/1471-2105/7/152/", "pdf": "2006_BMCB_SMID.pdf" }, { "label": "co", "type": "Publication", "pub-type": "Journal Article", "keywords": ["ontology","drug discovery","bioinformatics"], "title": "CO: A Chemical Ontology for Identification of Functional Groups and Semantic Comparison of Small Molecules", "author": [ "Howard Feldman", "Michel Dumontier", "Susan Ling", "Christopher Hogue" ], "journal": "FEBS Letters", "year": "2005", "volume": "579", "issue": "21", "pages": "4685-4691", "pub-date": "2005-08-02", "abstract": "A novel chemical ontology based on chemical functional groups automatically, objectively assigned by a computer program, was developed to categorize small molecules. It has been applied to PubChem and the small molecule interaction database to demonstrate its utility as a basic pharmacophore search system. Molecules can be compared using a semantic similarity score based on functional group assignments rather than 3D shape, which succeeds in identifying small molecules known to bind a common binding site. This ontology will serve as a powerful tool for searching chemical databases and identifying key functional groups responsible for biological activities.", "pmid": "16098521", "publisher_url": "http://www.febsletters.org/article/S0014-5793(05)00894-X/", "pdf": "2005_FEBS_CO.pdf" }, { "label": "armadillo", "type": "Publication", "pub-type": "Journal Article", "keywords": ["bioinformatics"], "title": "Domain Linker Prediction by Amino acid Composition", "author": [ "Michel Dumontier", "Rong Yao", "Howard Feldman", "Christopher Hogue" ], "journal": "Journal of Molecular Biology", "year": "2005", "volume": "350", "issue": "5", "pages": "1061-1074", "pub-date": "2005-06-06", "abstract": "The identification and annotation of protein domains provides a critical step in the accurate determination of molecular function. Both computational and experimental methods of protein structure determination may be deterred by large multi-domain proteins or flexible linker regions. Knowledge of domains and their boundaries may reduce the experimental cost of protein structure determination by allowing researchers to work on a set of smaller and possibly more successful alternatives. Current domain prediction methods often rely on sequence similarity to conserved domains and as such are poorly suited to detect domain structure in poorly conserved or orphan proteins. We present here a simple computational method to identify protein domain linkers and their boundaries from sequence information alone. Our domain predictor, Armadillo (http://armadillo.blueprint.org), uses any amino acid index to convert a protein sequence to a smoothed numeric profile from which domains and domain boundaries may be predicted. We derived an amino acid index called the domain linker propensity index (DLI) from the amino acid composition of domain linkers using a non-redundant structure dataset. The index indicates that Pro and Gly show a propensity for linker residues while small hydrophobic residues do not. Armadillo predicts domain linker boundaries from Z-score distributions and obtains 35% sensitivity with DLI in a two-domain, single-linker dataset (within +/- 20 residues from linker). The combination of DLI and an entropy-based amino acid index increases the overall Armadillo sensitivity to 56% for two domain proteins. Moreover, Armadillo achieves 37% sensitivity for multi-domain proteins, surpassing most other prediction methods. Armadillo provides a simple, but effective method by which prediction of domain boundaries can be obtained with reasonable sensitivity. Armadillo should prove to be a valuable tool for rapidly delineating protein domains in poorly conserved proteins or those with no sequence neighbors. As a first-line predictor, domain meta-predictors could yield improved results with Armadillo predictions", "pmid": "15978619", "doi": "10.1016/j.jmb.2005.05.037", "pdf": "2005_JMB_ARMADILLO.pdf" }, { "label": "FPGA MS", "type": "Publication", "pub-type": "Journal Article", "keywords": ["bioinformatics"], "title": "Hardware accelerated protein identification for mass spectrometry", "author": [ "Anish Alex", "Michel Dumontier", "Jonathan Rose", "Christopher Hogue" ], "journal": "Rapid Communications in Mass Spectrometry", "year": "2005", "volume": "19", "issue": "6", "pages": "833-837", "pub-date": "2005-01-19", "abstract": "An ongoing issue in mass spectrometry is the time it takes to search DNA sequences with MS/MS peptide fragments (see, e.g., Choudary et al., Proteomics 2001; 1: 651-667.) Search times are far longer than spectra acquisition time, and parallelization of search software on clusters requires doubling the size of a conventional computing cluster to cut the search time in half. Field programmable gate arrays (FPGAs) are used to create hardware-accelerated algorithms that reduce operating costs and improve search speed compared to large clusters. We present a novel hardware design that takes full spectra and computes 6-frame translation word searches on DNA databases at a rate of approximately 3 billion base pairs per second, with queries of up to 10 amino acids in length and arbitrary wildcard positions. Hardware post-processing identifies in silico tryptic peptides and scores them using a variety of techniques including mass frequency expected values. With faster FPGAs protein identifications from the human genome can be achieved in less than a second, and this makes it an ideal solution for a number of proteome-scale applications.", "pmid": "15723443", "doi": "10.1002/rcm.1853", "pdf": "2005_RCM_HAPI.pdf" }, { "label": "BIND", "type": "Publication", "pub-type": "Journal Article", "keywords": ["open source","database","bioinformatics"], "title": "The Biomolecular Interaction Network Database and related tools 2005 update", "author": [ "C Alfarano", "CE Andrade", "K Anthony", "N Bahroos", "M Bajec", "K Bantoft", "D Betel", "B Bobechko", "K Boutilier", "E Burgess", "K Buzadzija", "R Cavero", "C D'Abreo", "I Donaldson", "D Dorairajoo", "Michel Dumontier","Marc Dumontier","V Earles","R Farrall","Howard Feldman","E Garderman", "Y Gong","R Gonzaga","V Grytsan", "E Gryz","V Gu","E Haldorsen", "A Halupa","R Haw","A Hrvojic", "L Hurrell","R Isserlin","F Jack", "F Juma","A Khan","T Kon T","S Konopinsky"," V Le","E Lee","Susan Ling","M Magidin","J Moniakis","J Montojo","S Moore","B Muskat","I Ng","JP Paraiso","B Parker","G Pintilie","R Pirone R","JJ Salama", "S Sgro","T Shan","Y Shu","J Siew","D Skinner","Kevin Snyder","R Stasiuk","D Strumpf","B Tuekam","S Tao","Z Wang","M White","R Willis","C Wolting","S Wong","A Wrong","C Xin","Rong Yao","B Yates","S Zhang","K Zheng","T Pawson","BF Ouellette","Christopher Hogue" ], "journal": "Nucleic Acids Research", "year": "2005", "volume": "33", "pages": "D418-D424", "pub-date": "2005-01-01", "abstract": "The Biomolecular Interaction Network Database (BIND) (http://bind.ca) archives biomolecular interaction, reaction, complex and pathway information. Our aim is to curate the details about molecular interactions that arise from published experimental research and to provide this information, as well as tools to enable data analysis, freely to researchers worldwide. BIND data are curated into a comprehensive machine-readable archive of computable information and provides users with methods to discover interactions and molecular mechanisms. BIND has worked to develop new methods for visualization that amplify the underlying annotation of genes and proteins to facilitate the study of molecular interaction networks. BIND has maintained an open database policy since its inception in 1999. Data growth has proceeded at a tremendous rate, approaching over 100 000 records. New services provided include a new BIND Query and Submission interface, a Standard Object Access Protocol service and the Small Molecule Interaction Database (http://smid.blueprint.org) that allows users to determine probable small molecule binding sites of new sequences and examine conserved binding residues.", "pmid": "15608229", "doi": "10.1093/nar/gki051", "pdf": "2005_NAR_BIND.pdf" }, { "label": "MD thesis", "type": "Publication", "pub-type": "PhD Thesis", "keywords": ["bioinformatics"], "title": "Species-Specific Optimizations of Sequence and Structure", "author": [ "Michel Dumontier" ], "year": "2004", "pub-date": "2004-11-01", "school": "University of Toronto", "city": "Toronto", "country": "Canada", "abstract": "How does life adapt to the diverse environmental conditions found on earth? In the ongoing mission to answer this question, genome sequencing efforts identify the genes and proteins that provide the mechanism for growth, reproduction and adaptation. In this study, we took advantage of this exceptional opportunity to investigate whether sequences and structures from different organisms exhibited features that were species-specific. We analyzed the amino acid composition of sequences and structures of up to 150 completely sequenced genomes and explored the effectiveness of predictive methods that take advantage of amino acid composition bias. As a first case, we demonstrate a fast and simple method to predict domain linkers from sequence composition alone. Since the number of solved protein structures for any complete genome is rather nominal, we built a database of conservative, domain-based homology models called the species specific fold database. Our analysis indicates that species-specific sequence and structure optimizations are significantly attributable to lifestyle and environment. Predictive scoring functions based on genome composition biases are shown to be effective in the identification of the taxonomic origin from sequence composition alone. Our predictive contact potentials derived from residue contacts in the structure models captures species-specific contact preferences that may be useful in evaluating structure optimizations. Finally, we demonstrate the ability of novel, asymmetric, species-specific substitution matrices in the alignment of compositionally biased sequences. Taken together, this work presents an original exploration into the adaptation of organisms from diverse environments using both sequence and structure information.", "pdf": "thesis_phd_michel_dumontier.pdf" }, { "label": "optimizations", "type": "Publication", "pub-type": "Journal Article", "keywords": ["database","bioinformatics"], "title": "Species-Specific Protein Sequence and Fold Optimizations", "author": [ "Michel Dumontier", "Katerina Michalickova", "Christopher Hogue", ], "journal": "BMC Bioinformatics", "year": "2002", "volume": "3", "pages": "39", "pub-date": "2002-12-17", "abstract": "BACKGROUND: An organism's ability to adapt to its particular environmental niche is of fundamental importance to its survival and proliferation. In the largest study of its kind, we sought to identify and exploit the amino-acid signatures that make species-specific protein adaptation possible across 100 complete genomes. RESULTS: Environmental niche was determined to be a significant factor in variability from correspondence analysis using the amino acid composition of over 360,000 predicted open reading frames (ORFs) from 17 archaea, 76 bacteria and 7 eukaryote complete genomes. Additionally, we found clusters of phylogenetically unrelated archaea and bacteria that share similar environments by amino acid composition clustering. Composition analyses of conservative, domain-based homology modeling suggested an enrichment of small hydrophobic residues Ala, Gly, Val and charged residues Asp, Glu, His and Arg across all genomes. However, larger aromatic residues Phe, Trp and Tyr are reduced in folds, and these results were not affected by low complexity biases. We derived two simple log-odds scoring functions from ORFs (CG) and folds (CF) for each of the complete genomes. CF achieved an average cross-validation success rate of 85 +/- 8% whereas the CG detected 73 +/- 9% species-specific sequences when competing against all other non-redundant CG. Continuously updated results are available at http://genome.mshri.on.ca. CONCLUSION: Our analysis of amino acid compositions from the complete genomes provides stronger evidence for species-specific and environmental residue preferences in genomic sequences as well as in folds. Scoring functions derived from this work will be useful in future protein engineering experiments and possibly in identifying horizontal transfer events.", "pmid": "12487631", "doi": "10.1186/1471-2105-3-39", "pdf": "2002_BMCB_SFCOMP.pdf" }, { "label": "seqhound", "type": "Publication", "pub-type": "Journal Article", "keywords": ["drug discovery","open source","database","bioinformatics"], "title": "SeqHound: biological sequence and structure database as a platform for bioinformatics research", "author": [ "Katerina Michalickova", "Gary Bader", "Michel Dumontier", "Hao Lieu", "Doron Betel", "Ruth Isserlin", "Christopher Hogue", ], "journal": "BMC Bioinformatics", "year": "2002", "volume": "3", "pages": "32", "pub-date": "2002-10-25", "abstract": "BACKGROUND: SeqHound has been developed as an integrated biological sequence, taxonomy, annotation and 3-D structure database system. It provides a high-performance server platform for bioinformatics research in a locally-hosted environment. RESULTS: SeqHound is based on the National Center for Biotechnology Information data model and programming tools. It offers daily updated contents of all Entrez sequence databases in addition to 3-D structural data and information about sequence redundancies, sequence neighbours, taxonomy, complete genomes, functional annotation including Gene Ontology terms and literature links to PubMed. SeqHound is accessible via a web server through a Perl, C or C++ remote API or an optimized local API. It provides functionality necessary to retrieve specialized subsets of sequences, structures and structural domains. Sequences may be retrieved in FASTA, GenBank, ASN.1 and XML formats. Structures are available in ASN.1, XML and PDB formats. Emphasis has been placed on complete genomes, taxonomy, domain and functional annotation as well as 3-D structural functionality in the API, while fielded text indexing functionality remains under development. SeqHound also offers a streamlined WWW interface for simple web-user queries. CONCLUSIONS: The system has proven useful in several published bioinformatics projects such as the BIND database and offers a cost-effective infrastructure for research. SeqHound will continue to develop and be provided as a service of the Blueprint Initiative at the Samuel Lunenfeld Research Institute. The source code and examples are available under the terms of the GNU public license at the Sourceforge site http://sourceforge.net/projects/slritools/ in the SLRI Toolkit.", "pmid": "12401134", "doi": "10.1186/1471-2105-3-32", "pdf": "2002_BMCB_SEQHOUND.pdf" }, { "label": "nblast", "type": "Publication", "pub-type": "Journal Article", "keywords": ["open source","database","bioinformatics","parallel computing"], "title": "NBLAST: A Cluster Computing Variant of BLAST", "author": [ "Michel Dumontier", "Christopher Hogue" ], "journal": "BMC Bioinformatics", "year": "2002", "volume": "3", "pages": "13", "pub-date": "2002-05-08", "abstract": "BACKGROUND: The BLAST algorithm compares biological sequences to one another in order to determine shared motifs and common ancestry. However, the comparison of all non-redundant (NR) sequences against all other NR sequences is a computationally intensive task. We developed NBLAST as a cluster computer implementation of the BLAST family of sequence comparison programs for the purpose of generating pre-computed BLAST alignments and neighbour lists of NR sequences. RESULTS: NBLAST performs the heuristic BLAST algorithm and generates an exhaustive database of alignments, but it only computes alignments (i.e. the upper triangle) of a possible N2 alignments, where N is the set of all sequences to be compared. A task-partitioning algorithm allows for cluster computing across all cluster nodes and the NBLAST master process produces a BLAST sequence alignment database and a list of sequence neighbours for each sequence record. The resulting sequence alignment and neighbour databases are used to serve the SeqHound query system through a C/C++ and PERL Application Programming Interface (API). CONCLUSIONS: NBLAST offers a local alternative to the NCBI's remote Entrez system for pre-computed BLAST alignments and neighbour queries. On our 216-processor 450 MHz PIII cluster, NBLAST requires ~24 hrs to compute neighbours for 850000 proteins currently in the non-redundant protein database.", "pmid": "12019022", "doi": "10.1186/1471-2105-3-13", "pdf": "2002_BMCB_NBLAST.pdf" }, { "label": "Dicty Rac1", "type": "Publication", "pub-type": "Journal Article", "keywords": ["cell biology"], "title": "Rac1 GTPases are key regulators of cellular activities dependent on actin re-organization in Dictyostelium", "author": [ "Michel Dumontier", "Petra Hoecht", "Ursala Mintert", "Jan Faix" ], "journal": "Journal of Cell Science", "year": "2000", "volume": "133", "issue": "12", "pages": "2253-2265", "pub-date": "2000-09-01", "abstract": "The function of the highly homologous Rac1A, Rac1B, and Rac1C GTPases of the Dictyostelium Rac1 group was investigated. All three GTPases bound with an equal capacity to the IQGAP-related protein DGAP1, with a preference for the activated GTP-bound form. Strong overexpression of wild-type Rac1 GTPases N-terminally tagged with green fluorescent protein (GFP), predominantly induced the formation of numerous long filopodia. Remarkably, expression of the constitutively-activated GTPases resulted in dominant-negative phenotypes: these Rac1-V12 mutants completely lacked filopodia but formed numerous crown shaped structures resembling macropinosomes. Moreover, these mutants were severely impaired in cell motility, colony growth, phagocytosis, pinocytosis, cytokinesis and development. Transformants expressing constitutively-inactivated Rac1-N17 proteins were similar to wild-type cells, but displayed abundant and short filopodia and exhibited a moderate defect in cytokinesis. Taken together, our results indicate that the three GTPases play an identical role in signaling pathways and are key regulators of cellular activities that depend on the re-organization of the actin cytoskeleton in Dictyostelium.", "pmid": "10825297", "publisher_url": "http://jcs.biologists.org/cgi/reprint/113/12/2253", "pdf": "2000_JCS_RAC.pdf" }, { "label": "3D Golgi", "type": "Publication", "pub-type": "Journal Article", "keywords": ["cell biology"], "title": "3D Topography of non-compact zone Golgi tubules in rat spermatids: a computer-assisted serial section reconstruction study", "author": [ "Gro Thorne-Tjomsland", "Michel Dumontier", "James Jamieson" ], "journal": "Anatomical Record", "year": "1998", "volume": "250", "issue": "4", "pages": "381-396", "pub-date": "1998-12-06", "abstract": "BACKGROUND: In the Golgi apparatus, the 3D topography of saccules in the compact zones (CZs) is better understood than that of tubules in the noncompact zones (NCZs). The positioning of NCZ tubules relative to each other and to CZ saccules was studied in rat spermatids by computer-assisted serial section microscopy. METHODS: Twenty-four (semi) serials (3-6 consecutive sections each) in total were collected from untreated tissues and from tissues treated for glucose-6-phosphatase (G6P) cytochemistry as an alignment tool. The serials proceeded along either the cis-trans or the medial-lateral axes of the Golgi and collectively sampled all portions of this organelle. Selected serials were computer reconstructed and the final models displayed in red-green/red-blue stereo. RESULTS: In single thin sections, NCZ tubules typically appeared randomly oriented; however, in serial sections a high degree of organization was evident. Most tubules were traceable to the type of tubular networks (TNs) that interconnect equivalent CZ saccules (see review Rambourg and Clermont, 1990) Such TNs were present at consecutive saccular levels through each NCZ, were stacked like the saccules from which they originate, and in many regions were aligned from cis-trans. The cis-most of the TNs projected above the cis-pole of the stacked saccules and were penetrated by coated and uncoated ER buds. CONCLUSIONS: The function of the extensive NCZ tubular domain, consisting of the stacked and aligned TNs, will have to be addressed in future studies. However, the specific topography of the cis-most TNs make them candidates to serve as acceptor membranes in ER-Golgi transport", "pmid": "9566528", "publisher_url": "http://www3.interscience.wiley.com/journal/28286/", "pdf": "1998_AR_3DGOLGI.pdf" } ] }